1 #ifndef ROSE_Partitioner2_Engine_H
2 #define ROSE_Partitioner2_Engine_H
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
7 #include <BinaryLoader.h>
8 #include <BinarySerialIo.h>
9 #include <boost/noncopyable.hpp>
10 #include <boost/regex.hpp>
11 #include <Disassembler.h>
12 #include <FileSystem.h>
13 #include <Partitioner2/Function.h>
14 #include <Partitioner2/ModulesLinux.h>
15 #include <Partitioner2/Partitioner.h>
16 #include <Partitioner2/Thunk.h>
17 #include <Partitioner2/Utility.h>
19 #include <RoseException.h>
20 #include <Sawyer/DistinctList.h>
23 #ifdef ROSE_ENABLE_PYTHON_API
24 #undef slots // stupid Qt pollution
25 #include <boost/python.hpp>
30 namespace Partitioner2 {
115 class ROSE_DLL_API
Engine:
private boost::noncopyable {
128 friend class boost::serialization::access;
131 void serialize(S &s,
unsigned version) {
132 s & loader & disassembler & partitioner & engine & astConstruction;
154 static Ptr instance() {
return Ptr(
new BasicBlockFinalizer); }
155 virtual bool operator()(
bool chain,
const Args &args) ROSE_OVERRIDE;
157 void fixFunctionReturnEdge(
const Args&);
158 void fixFunctionCallEdges(
const Args&);
159 void addPossibleIndeterminateEdge(
const Args&);
164 class BasicBlockWorkList:
public CfgAdjustmentCallback {
190 BasicBlockWorkList(Engine *engine,
size_t maxSorts): engine_(engine), maxSorts_(maxSorts) {}
193 static Ptr instance(Engine *engine,
size_t maxSorts) {
return Ptr(
new BasicBlockWorkList(engine, maxSorts)); }
194 virtual bool operator()(
bool chain,
const AttachedBasicBlock &args) ROSE_OVERRIDE;
195 virtual bool operator()(
bool chain,
const DetachedBasicBlock &args) ROSE_OVERRIDE;
200 void moveAndSortCallReturn(
const Partitioner&);
204 class CodeConstants:
public CfgAdjustmentCallback {
209 std::set<rose_addr_t> toBeExamined_;
210 std::set<rose_addr_t> wasExamined_;
211 rose_addr_t inProgress_;
212 std::vector<rose_addr_t> constants_;
215 CodeConstants(): inProgress_(0) {}
218 static Ptr instance() {
return Ptr(
new CodeConstants); }
221 virtual bool operator()(
bool chain,
const AttachedBasicBlock &attached) ROSE_OVERRIDE;
224 virtual bool operator()(
bool chain,
const DetachedBasicBlock &detached) ROSE_OVERRIDE;
230 rose_addr_t inProgress()
const {
return inProgress_; }
240 Disassembler *disassembler_;
242 BasicBlockWorkList::Ptr basicBlockWorkList_;
243 CodeConstants::Ptr codeFunctionPointers_;
246 ThunkPredicates::Ptr functionMatcherThunks_;
247 ThunkPredicates::Ptr functionSplittingThunks_;
255 : interp_(NULL), disassembler_(NULL),
256 basicBlockWorkList_(BasicBlockWorkList::instance(this, settings_.partitioner.functionReturnAnalysisMaxSorts)),
263 : settings_(settings), interp_(NULL), disassembler_(NULL),
264 basicBlockWorkList_(BasicBlockWorkList::instance(this, settings_.partitioner.functionReturnAnalysisMaxSorts)),
300 const std::string &purpose,
const std::string &description);
301 virtual SgAsmBlock* frontend(
const std::vector<std::string> &args,
302 const std::string &purpose,
const std::string &description);
343 const std::string &purpose,
const std::string &description) ;
345 const std::string &purpose,
const std::string &description);
391 virtual MemoryMap::Ptr loadSpecimens(
const std::vector<std::string> &fileNames = std::vector<std::string>());
414 virtual Partitioner partition(
const std::vector<std::string> &fileNames = std::vector<std::string>());
415 Partitioner partition(
const std::string &fileName) ;
432 SgAsmBlock* buildAst(
const std::vector<std::string> &fileNames = std::vector<std::string>()) ;
433 SgAsmBlock* buildAst(
const std::string &fileName) ;
491 static std::string specimenNameDocumentation();
512 virtual void checkSettings();
524 virtual bool isRbaFile(
const std::string&);
530 virtual bool isNonContainer(
const std::string&);
537 virtual bool areContainersParsed()
const;
543 virtual void loadVxCore(
const std::string &spec);
554 virtual bool areSpecimensLoaded()
const;
578 virtual void loadContainers(
const std::vector<std::string> &fileNames);
584 virtual void loadNonContainers(
const std::vector<std::string> &names);
589 virtual void adjustMemoryMap();
637 virtual void checkCreatePartitionerPrerequisites()
const;
682 virtual void runPartitionerRecursive(
Partitioner&);
785 virtual std::vector<Function::Ptr> makeUserFunctions(
Partitioner&,
const std::vector<rose_addr_t>&);
829 virtual std::vector<Function::Ptr> makeCalledFunctions(
Partitioner&);
846 virtual std::vector<Function::Ptr> makeNextPrologueFunction(
Partitioner&, rose_addr_t startVa);
847 virtual std::vector<Function::Ptr> makeNextPrologueFunction(
Partitioner&, rose_addr_t startVa, rose_addr_t &lastSearchedVa);
868 virtual std::vector<Function::Ptr>
869 makeFunctionFromInterFunctionCalls(
Partitioner &partitioner, rose_addr_t &startVa );
893 virtual std::set<rose_addr_t> attachDeadCodeToFunction(
Partitioner&,
const Function::Ptr&,
size_t maxIterations=
size_t(-1));
908 virtual std::vector<DataBlock::Ptr> attachPaddingToFunctions(
Partitioner&);
920 virtual size_t attachAllSurroundedCodeToFunctions(
Partitioner&);
929 virtual size_t attachSurroundedCodeToFunctions(
Partitioner&);
935 virtual void attachBlocksToFunctions(
Partitioner&);
943 virtual std::set<rose_addr_t> attachDeadCodeToFunctions(
Partitioner&,
size_t maxIterations=
size_t(-1));
954 virtual std::vector<DataBlock::Ptr> attachSurroundedDataToFunctions(
Partitioner&);
981 virtual bool makeNextCallReturnEdge(
Partitioner&, boost::logic::tribool assumeCallReturns);
1038 virtual void exitOnError(
bool b) { settings_.engine.exitOnError = b; }
1151 virtual void linkerCommand(
const std::string &cmd) { settings_.loader.linker = cmd; }
1190 virtual void doDisassemble(
bool b) { settings_.disassembler.doDisassemble = b; }
1209 const std::string&
isaName() const {
return settings_.disassembler.isaName; }
1210 virtual void isaName(
const std::string &s) { settings_.disassembler.isaName = s; }
1216 const std::vector<rose_addr_t>& startingVas() const ROSE_DEPRECATED("use functionStartingVas") {
return settings_.partitioner.functionStartingVas; }
1217 std::vector<rose_addr_t>& startingVas() ROSE_DEPRECATED("use functionStartingVas") {
return settings_.partitioner.functionStartingVas; }
1225 const std::vector<rose_addr_t>&
functionStartingVas() const {
return settings_.partitioner.functionStartingVas; }
1236 virtual void usingSemantics(
bool b) { settings_.partitioner.base.usingSemantics = b; }
1321 const std::vector<rose_addr_t>&
ipRewrites() const {
return settings_.partitioner.ipRewrites; }
1322 virtual void ipRewrites(
const std::vector<rose_addr_t> &v) { settings_.partitioner.ipRewrites = v; }
1623 const std::vector<std::string>&
configurationNames()
const {
return settings_.engine.configurationNames; }
1662 const boost::filesystem::path&
systemCallHeader() const {
return settings_.partitioner.syscallHeader; }
1663 virtual void systemCallHeader(
const boost::filesystem::path &filename) { settings_.partitioner.syscallHeader = filename; }
1693 return settings_.astConstruction.allowFunctionWithNoBasicBlocks;
1696 settings_.astConstruction.allowFunctionWithNoBasicBlocks = b;
1726 #ifdef ROSE_ENABLE_PYTHON_API
1729 Partitioner pythonParseVector(boost::python::list &pyArgs,
const std::string &purpose,
const std::string &description);
1730 Partitioner pythonParseSingle(
const std::string &specimen,
const std::string &purpose,
const std::string &description);
1741 SgProject* roseFrontendReplacement(
const std::vector<boost::filesystem::path> &fileNames);
size_t maxBasicBlockSize() const
Property: Maximum size for basic blocks.
const Settings & settings() const
Property: All settings.
bool doingPostFunctionStackDelta() const
Property: Whether to run the function stack delta analysis.
virtual void linkObjectFiles(bool b)
Property: Link object files.
virtual void memoryIsExecutable(bool b)
Property: Global adjustment to executability.
virtual void systemCallHeader(const boost::filesystem::path &filename)
Property: Header file in which system calls are defined.
size_t findingIntraFunctionCode() const
Property: Whether to find intra-function code.
virtual void disassembler(Disassembler *d)
Property: Disassembler.
bool checkingCallBranch() const
Property: Whether to look for function calls used as branches.
virtual void maxBasicBlockSize(size_t n)
Property: Maximum size for basic blocks.
Settings for controling the engine behavior.
virtual void ignoringUnknownInsns(bool b)
Property: Whether unknown instructions are ignored.
virtual void namingStrings(bool b)
Property: Give names to string literal addresses.
MemoryDataAdjustment
How the partitioner should globally treat memory.
bool findingInterFunctionCalls() const
Property: Whether to search for function calls between exiting functions.
const std::vector< rose_addr_t > & functionStartingVas() const
Property: Starting addresses for disassembly.
virtual void findingExportFunctions(bool b)
Property: Whether to make functions at export addresses.
Progress::Ptr progress() const
Property: progress reporting.
Base class for adjusting basic blocks during discovery.
virtual void findingFunctionCallFunctions(bool b)
Property: Whether to turn function call targets into functions.
virtual void followingGhostEdges(bool b)
Property: Whether to follow ghost edges.
virtual void interruptVector(const AddressInterval &i)
Property: Location of machine interrupt vector.
virtual void astCopyAllInstructions(bool b)
Property: Whether to copy instructions when building the AST.
virtual void findingSymbolFunctions(bool b)
Property: Whether to make functions according to symbol tables.
Sawyer::SharedPointer< BinaryLoader > Ptr
Referenc counting pointer to BinaryLoader.
bool namingStrings() const
Property: Give names to string literal addresses.
bool findingDeadCode() const
Property: Whether to find dead code.
virtual void findingCodeFunctionPointers(bool b)
Property: Whether to search existing instructions for function pointers.
size_t functionReturnAnalysisMaxSorts() const
Property: Maximum number of function may-return sorting operations.
virtual void functionSplittingThunks(const ThunkPredicates::Ptr &p)
Property: Predicate for finding thunks at the start of functions.
const std::vector< std::string > & configurationNames() const
Property: Configuration files.
virtual void doingPostFunctionNoop(bool b)
Property: Whether to run no-op function analysis.
std::vector< rose_addr_t > & functionStartingVas()
Property: Starting addresses for disassembly.
Settings that control the disassembler.
Base class for engines driving the partitioner.
Engine(const Settings &settings)
Construct engine with settings.
The result from parsing a command line.
bool namingConstants() const
Property: Give names to constants.
Disassembler * disassembler() const
Property: Disassembler.
virtual void exitOnError(bool b)
Property: Error handling.
const std::vector< std::string > environmentInsertions() const
Property: Environment variables to insert.
virtual void peScramblerDispatcherVa(rose_addr_t va)
Property: PE-Scrambler dispatcher address.
SemanticMemoryParadigm semanticMemoryParadigm() const
Property: Type of container for semantic memory.
List of things to work on.
FunctionReturnAnalysis functionReturnAnalysis() const
Property: Whether to run the function may-return analysis.
ThunkPredicates::Ptr functionSplittingThunks() const
Property: Predicate for finding thunks at the start of functions.
size_t deExecuteZerosLeaveAtBack() const
Property: when to remove execute permission from zero bytes.
const std::string & isaName() const
Property: Instruction set architecture name.
virtual void findingIntraFunctionData(bool b)
Property: Whether to find intra-function data.
Settings & settings()
Property: All settings.
A collection of related switch declarations.
MemoryDataAdjustment memoryDataAdjustment() const
Property: Global adjustments to memory map data access bits.
bool linkObjectFiles() const
Property: Link object files.
virtual void findingDataFunctionPointers(bool b)
Property: Whether to search static data for function pointers.
Sawyer::SharedPointer< LibcStartMain > Ptr
Shared ownership pointer to LibcStartMain callback.
virtual void isaName(const std::string &s)
Property: Instruction set architecture name.
virtual void findingErrorFunctions(bool b)
Property: Whether to make error handling functions.
virtual void memoryMap(const MemoryMap::Ptr &m)
Property: memory map.
bool findingErrorFunctions() const
Property: Whether to make error handling functions.
Main namespace for the ROSE library.
Settings for loading specimens.
Settings that control building the AST.
BinaryLoader::Ptr binaryLoader() const
Property: binary loader.
virtual void findingEntryFunctions(bool b)
Property: Whether to make functions at program entry points.
bool followingGhostEdges() const
Property: Whether to follow ghost edges.
bool demangleNames() const
Property: Demangle names.
bool astAllowEmptyGlobalBlock() const
Property: Whether to allow empty global block in the AST.
bool findingFunctionPadding() const
Property: Whether to find function padding.
bool doingPostCallingConvention() const
Property: Whether to run calling-convention analysis.
bool discontiguousBlocks() const
Property: Whether to allow discontiguous basic blocks.
virtual void namingConstants(bool b)
Property: Give names to constants.
bool doDisassemble() const
Property: Perform disassembly.
bool findingIntraFunctionData() const
Property: Whether to find intra-function data.
virtual void findingIntraFunctionCode(size_t n)
Property: Whether to find intra-function code.
ThunkPredicates::Ptr functionMatcherThunks() const
Property: Predicate for finding functions that are thunks.
virtual void astAllowEmptyGlobalBlock(bool b)
Property: Whether to allow empty global block in the AST.
bool memoryIsExecutable() const
Property: Global adjustment to executability.
bool doingPostFunctionMayReturn() const
Property: Whether to run the function may-return analysis.
virtual void doingPostFunctionStackDelta(bool b)
Property: Whether to run the function stack delta analysis.
virtual void findingThunks(bool b)
Property: Whether to find thunk patterns.
virtual void usingSemantics(bool b)
Property: Whether to use instruction semantics.
virtual void findingFunctionPadding(bool b)
Property: Whether to find function padding.
size_t deExecuteZerosLeaveAtFront() const
Property: when to remove execute permission from zero bytes.
bool astAllowFunctionWithNoBasicBlocks() const
Property: Whether to allow empty functions in the AST.
const std::vector< boost::regex > environmentErasePatterns() const
Property: Environment variable erasure patterns.
bool namingSystemCalls() const
Property: Give names to system calls.
virtual void astAllowEmptyBasicBlock(bool b)
Property: Whether to allow empty basic blocks in the AST.
The parser for a program command line.
virtual void semanticMemoryParadigm(SemanticMemoryParadigm p)
Property: Type of container for semantic memory.
virtual void interpretation(SgAsmInterpretation *interp)
Property: interpretation.
const AddressInterval & interruptVector() const
Property: Location of machine interrupt vector.
Settings that control the engine partitioning.
virtual void deExecuteZerosLeaveAtBack(size_t n)
Property: when to remove execute permission from zero bytes.
std::vector< std::string > & configurationNames()
Property: Configuration files.
bool findingSymbolFunctions() const
Property: Whether to make functions according to symbol tables.
virtual void environmentInsertions(const std::vector< std::string > &vars)
Property: Environment variables to insert.
bool findingDataFunctionPointers() const
Property: Whether to search static data for function pointers.
DisassemblerSettings disassembler
Settings for creating the disassembler.
virtual void ipRewrites(const std::vector< rose_addr_t > &v)
Property: CFG edge rewrite pairs.
SemanticMemoryParadigm
Organization of semantic memory.
virtual void findingImportFunctions(bool b)
Property: Whether to make functions at import addresses.
virtual void findingDeadCode(bool b)
Property: Whether to find dead code.
bool findingCodeFunctionPointers() const
Property: Whether to search existing instructions for function pointers.
virtual void deExecuteZerosThreshold(size_t n)
Property: when to remove execute permission from zero bytes.
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
bool findingEntryFunctions() const
Property: Whether to make functions at program entry points.
SgAsmInterpretation * interpretation() const
Property: interpretation.
virtual void checkingCallBranch(bool b)
Property: Whether to look for function calls used as branches.
AstConstructionSettings astConstruction
Settings for constructing the AST.
bool astCopyAllInstructions() const
Property: Whether to copy instructions when building the AST.
size_t deExecuteZerosThreshold() const
Property: when to remove execute permission from zero bytes.
virtual void demangleNames(bool b)
Property: Demangle names.
bool doingPostAnalysis() const
Property: Whether to perform post-partitioning analysis steps.
bool usingSemantics() const
Property: Whether to use instruction semantics.
virtual void environmentEraseNames(const std::vector< std::string > &names)
Property: Environment variable erasure names.
void basicBlockSemanticsAutoDrop(bool b)
Property: Automatically drop semantics for attached basic blocks.
MemoryMap::Ptr memoryMap() const
Property: memory map.
bool exitOnError() const
Property: Error handling.
bool doingPostFunctionNoop() const
Property: Whether to run no-op function analysis.
const boost::filesystem::path & systemCallHeader() const
Property: Header file in which system calls are defined.
A general, thread-safe way to report progress made on some task.
bool findingExportFunctions() const
Property: Whether to make functions at export addresses.
virtual void splittingThunks(bool b)
Property: Whether to split thunk instructions into mini functions.
virtual void astAllowFunctionWithNoBasicBlocks(bool b)
Property: Whether to allow empty functions in the AST.
bool linkStaticArchives() const
Property: Link library archives.
bool astAllowEmptyBasicBlock() const
Property: Whether to allow empty basic blocks in the AST.
virtual void functionMatcherThunks(const ThunkPredicates::Ptr &p)
Property: Predicate for finding functions that are thunks.
virtual void deExecuteZerosLeaveAtFront(size_t n)
Property: when to remove execute permission from zero bytes.
rose_addr_t peScramblerDispatcherVa() const
Property: PE-Scrambler dispatcher address.
virtual void environmentErasePatterns(const std::vector< boost::regex > &res)
Property: Environment variable erasure patterns.
bool findingImportFunctions() const
Property: Whether to make functions at import addresses.
virtual void linkerCommand(const std::string &cmd)
Property: Linker command.
Format
Format of the state file.
virtual void findingInterFunctionCalls(bool b)
Property: Whether to search for function calls between exiting functions.
This class represents a source project, with a list of SgFile objects and global information about th...
Engine()
Default constructor.
PartitionerSettings partitioner
Settings for creating a partitioner.
bool findingFunctionCallFunctions() const
Property: Whether to turn function call targets into functions.
LoaderSettings loader
Settings used during specimen loading.
virtual void binaryLoader(const BinaryLoader::Ptr &loader)
Property: binary loader.
virtual void doingPostCallingConvention(bool b)
Property: Whether to run calling-convention analysis.
virtual void namingSystemCalls(bool b)
Property: Give names to system calls.
virtual void discontiguousBlocks(bool b)
Property: Whether to allow discontiguous basic blocks.
bool findingThunks() const
Property: Whether to find thunk patterns.
virtual void doDisassemble(bool b)
Property: Perform disassembly.
Partitions instructions into basic blocks and functions.
Base class for all ROSE exceptions.
const std::vector< std::string > environmentEraseNames() const
Property: Environment variable erasure names.
virtual void doingPostFunctionMayReturn(bool b)
Property: Whether to run the function may-return analysis.
Binary state files are smaller and faster than the other formats, but are not portable across archite...
Virtual base class for instruction disassemblers.
Represents an interpretation of a binary container.
virtual void functionReturnAnalysis(FunctionReturnAnalysis x)
Property: Whether to run the function may-return analysis.
EngineSettings engine
Settings that control engine behavior.
bool basicBlockSemanticsAutoDrop() const
Property: Automatically drop semantics for attached basic blocks.
virtual void linkStaticArchives(bool b)
Property: Link library archives.
Container associating values with keys.
virtual void functionReturnAnalysisMaxSorts(size_t n)
Property: Maximum number of function may-return sorting operations.
virtual void doingPostAnalysis(bool b)
Property: Whether to perform post-partitioning analysis steps.
virtual void progress(const Progress::Ptr &progress)
Property: progress reporting.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
bool ignoringUnknownInsns() const
Property: Whether unknown instructions are ignored.
virtual void memoryDataAdjustment(MemoryDataAdjustment x)
Property: Global adjustments to memory map data access bits.
Holds configuration information.
const std::string & linkerCommand() const
Property: Linker command.
bool splittingThunks() const
Property: Whether to split thunk instructions into mini functions.
const std::vector< rose_addr_t > & ipRewrites() const
Property: CFG edge rewrite pairs.