1 #ifndef ROSE_BinaryAnalysis_Partitioner2_Partitioner_H
2 #define ROSE_BinaryAnalysis_Partitioner2_Partitioner_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Rose/BinaryAnalysis/Partitioner2/AddressUsageMap.h>
7 #include <Rose/BinaryAnalysis/Partitioner2/BasicBlock.h>
8 #include <Rose/BinaryAnalysis/Partitioner2/BasicTypes.h>
9 #include <Rose/BinaryAnalysis/Partitioner2/Configuration.h>
10 #include <Rose/BinaryAnalysis/Partitioner2/ControlFlowGraph.h>
11 #include <Rose/BinaryAnalysis/Partitioner2/DataBlock.h>
12 #include <Rose/BinaryAnalysis/Partitioner2/Function.h>
13 #include <Rose/BinaryAnalysis/Partitioner2/FunctionCallGraph.h>
14 #include <Rose/BinaryAnalysis/Partitioner2/InstructionProvider.h>
15 #include <Rose/BinaryAnalysis/Partitioner2/Modules.h>
16 #include <Rose/BinaryAnalysis/Partitioner2/Reference.h>
18 #include <Sawyer/Attribute.h>
19 #include <Sawyer/Callbacks.h>
20 #include <Sawyer/IntervalSet.h>
21 #include <Sawyer/Map.h>
22 #include <Sawyer/Message.h>
23 #include <Sawyer/Optional.h>
24 #include <Sawyer/ProgressBar.h>
25 #include <Sawyer/SharedPointer.h>
27 #include <Rose/BinaryAnalysis/SourceLocations.h>
28 #include <Rose/BinaryAnalysis/Unparser/Settings.h>
29 #include <Rose/Progress.h>
31 #include <boost/filesystem.hpp>
32 #include <boost/move/utility_core.hpp>
33 #include <boost/serialization/access.hpp>
34 #include <boost/serialization/split_member.hpp>
35 #include <boost/serialization/version.hpp>
43 #include <Rose/BinaryAnalysis/Z3Solver.h>
44 #include <Rose/BinaryAnalysis/InstructionSemantics/DispatcherAarch32.h>
45 #include <Rose/BinaryAnalysis/InstructionSemantics/DispatcherAarch64.h>
46 #include <Rose/BinaryAnalysis/InstructionSemantics/DispatcherM68k.h>
47 #include <Rose/BinaryAnalysis/InstructionSemantics/DispatcherPowerpc.h>
48 #include <Rose/BinaryAnalysis/InstructionSemantics/DispatcherX86.h>
53 namespace Rose {
namespace BinaryAnalysis {
namespace InstructionSemantics2 = InstructionSemantics; }}
67 #if __cplusplus >= 201103L
68 #define ROSE_PARTITIONER_MOVE
69 #elif defined(__GNUC__)
71 #define ROSE_PARTITIONER_MOVE
72 #elif BOOST_VERSION >= 106900 // 1.68.0 might be okay too, but ROSE blacklists it for other reasons
73 #define ROSE_PARTITIONER_MOVE
78 namespace BinaryAnalysis {
79 namespace Partitioner2 {
295 #ifdef ROSE_PARTITIONER_MOVE
325 bool autoAddCallReturnEdges_;
326 bool assumeFunctionsReturn_;
327 size_t stackDeltaInterproceduralLimit_;
328 AddressNameMap addressNames_;
336 CfgAdjustmentCallbacks cfgAdjustmentCallbacks_;
337 BasicBlockCallbacks basicBlockCallbacks_;
338 FunctionPrologueMatchers functionPrologueMatchers_;
339 FunctionPaddingMatchers functionPaddingMatchers_;
342 ControlFlowGraph::VertexIterator undiscoveredVertex_;
343 ControlFlowGraph::VertexIterator indeterminateVertex_;
344 ControlFlowGraph::VertexIterator nonexistingVertex_;
345 static const size_t nSpecialVertices = 3;
351 mutable SAWYER_THREAD_TRAITS::Mutex mutex_;
353 mutable size_t cfgProgressTotal_;
364 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
366 friend class boost::serialization::access;
369 void serializeCommon(S &s,
const unsigned version) {
370 s.template register_type<InstructionSemantics::SymbolicSemantics::SValue>();
371 s.template register_type<InstructionSemantics::SymbolicSemantics::RiscOperators>();
372 #ifdef ROSE_ENABLE_ASM_AARCH64
373 s.template register_type<InstructionSemantics::DispatcherAarch64>();
375 #ifdef ROSE_ENABLE_ASM_AARCH32
376 s.template register_type<InstructionSemantics::DispatcherAarch32>();
378 s.template register_type<InstructionSemantics::DispatcherX86>();
379 s.template register_type<InstructionSemantics::DispatcherM68k>();
380 s.template register_type<InstructionSemantics::DispatcherPowerpc>();
381 s.template register_type<SymbolicExpression::Interior>();
382 s.template register_type<SymbolicExpression::Leaf>();
383 s.template register_type<Z3Solver>();
384 s.template register_type<Semantics::SValue>();
385 s.template register_type<Semantics::MemoryListState>();
386 s.template register_type<Semantics::MemoryMapState>();
387 s.template register_type<Semantics::RegisterState>();
388 s.template register_type<Semantics::State>();
389 s.template register_type<Semantics::RiscOperators>();
390 s & BOOST_SERIALIZATION_NVP(settings_);
392 s & BOOST_SERIALIZATION_NVP(instructionProvider_);
393 s & BOOST_SERIALIZATION_NVP(memoryMap_);
394 s & BOOST_SERIALIZATION_NVP(cfg_);
396 s & BOOST_SERIALIZATION_NVP(aum_);
398 s & BOOST_SERIALIZATION_NVP(functions_);
399 s & BOOST_SERIALIZATION_NVP(autoAddCallReturnEdges_);
400 s & BOOST_SERIALIZATION_NVP(assumeFunctionsReturn_);
401 s & BOOST_SERIALIZATION_NVP(stackDeltaInterproceduralLimit_);
402 s & BOOST_SERIALIZATION_NVP(addressNames_);
404 s & BOOST_SERIALIZATION_NVP(sourceLocations_);
405 s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm_);
415 s & BOOST_SERIALIZATION_NVP(elfGotVa_);
421 void save(S &s,
const unsigned version)
const {
422 const_cast<Partitioner*
>(
this)->serializeCommon(s, version);
426 void load(S &s,
const unsigned version) {
427 serializeCommon(s, version);
428 rebuildVertexIndices();
431 BOOST_SERIALIZATION_SPLIT_MEMBER();
456 #ifdef ROSE_PARTITIONER_MOVE
458 Partitioner(BOOST_RV_REF(Partitioner));
461 Partitioner& operator=(BOOST_RV_REF(Partitioner));
464 Partitioner(
const Partitioner&);
465 Partitioner& operator=(
const Partitioner&);
517 return memoryMap_!=NULL && memoryMap_->at(va).require(MemoryMap::EXECUTABLE).exists();
538 void unparser(const Unparser::BasePtr&) ;
550 Unparser::BasePtr insnUnparser() const ;
551 void insnUnparser(const Unparser::BasePtr&) ;
555 void configureInsnUnparser(const Unparser::BasePtr&) const ;
560 void configureInsnPlainUnparser(const Unparser::BasePtr&) const ;
573 void unparse(
std::ostream&, const
DataBlock::Ptr&) const;
574 void unparse(
std::ostream&, const
Function::Ptr&) const;
575 void unparse(
std::ostream&) const;
596 size_t nBytes() const {
return aum_.
size(); }
606 return undiscoveredVertex_;
609 return undiscoveredVertex_;
624 return indeterminateVertex_;
627 return indeterminateVertex_;
641 return nonexistingVertex_;
644 return nonexistingVertex_;
680 std::set<rose_addr_t> ghostSuccessors() const ;
773 size_t nInstructions() const ;
798 ControlFlowGraph::ConstVertexIterator instructionVertex(rose_addr_t insnVa)
const;
808 std::vector<SgAsmInstruction*> instructionsOverlapping(
const AddressInterval&) const ;
879 size_t nPlaceholders() const ;
889 bool placeholderExists(rose_addr_t startVa) const ;
929 ControlFlowGraph::VertexIterator insertPlaceholder(rose_addr_t startVa) ;
944 BasicBlock::Ptr erasePlaceholder(
const ControlFlowGraph::ConstVertexIterator &placeholder) ;
983 void basicBlockDropSemantics() const ;
993 size_t nBasicBlocks() const ;
1025 BasicBlock::Ptr basicBlockExists(rose_addr_t startVa) const ;
1069 BasicBlock::Ptr basicBlockContainingInstruction(rose_addr_t insnVa) const ;
1113 BasicBlock::Ptr detachBasicBlock(rose_addr_t startVa) ;
1164 void attachBasicBlock(const
BasicBlock::Ptr&) ;
1253 BasicBlock::Ptr discoverBasicBlock(rose_addr_t startVa) const ;
1272 Precision::Level precision = Precision::HIGH) const ;
1282 std::vector<rose_addr_t> basicBlockConcreteSuccessors(const
BasicBlock::Ptr&,
bool *isComplete=NULL) const ;
1302 std::set<rose_addr_t> basicBlockGhostSuccessors(const
BasicBlock::Ptr&) const ;
1313 bool basicBlockIsFunctionCall(const
BasicBlock::Ptr&, Precision::Level precision = Precision::HIGH) const ;
1325 bool basicBlockIsFunctionReturn(const
BasicBlock::Ptr&) const ;
1331 bool basicBlockPopsStack(const
BasicBlock::Ptr&) const ;
1385 void forgetStackDeltas() const ;
1386 void forgetStackDeltas(const
Function::Ptr&) const ;
1398 size_t stackDeltaInterproceduralLimit() const {
return stackDeltaInterproceduralLimit_; }
1463 void basicBlockMayReturnReset() const ;
1467 struct MayReturnVertexInfo {
1468 enum State {INIT, CALCULATING, FINISHED};
1470 bool processedCallees;
1471 boost::logic::tribool anyCalleesReturn;
1472 boost::logic::tribool result;
1473 MayReturnVertexInfo(): state(INIT), processedCallees(false), anyCalleesReturn(false), result(
boost::indeterminate) {}
1477 bool mayReturnIsSignificantEdge(
const ControlFlowGraph::ConstEdgeIterator &edge,
1478 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1481 boost::logic::tribool mayReturnDoesCalleeReturn(
const ControlFlowGraph::ConstVertexIterator &vertex,
1482 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1485 boost::logic::tribool mayReturnDoesSuccessorReturn(
const ControlFlowGraph::ConstVertexIterator &vertex,
1486 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1490 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1507 size_t nDataBlocks() const ;
1515 DataBlock::Ptr dataBlockExists(const DataBlock::Ptr&) const ;
1538 DataBlock::Ptr attachDataBlock(const DataBlock::Ptr&) ;
1546 void detachDataBlock(const DataBlock::Ptr&) ;
1557 DataBlock::Ptr attachDataBlockToFunction(const DataBlock::Ptr&, const
Function::Ptr&) ;
1571 DataBlock::Ptr attachDataBlockToBasicBlock(const DataBlock::Ptr&, const BasicBlock::Ptr&) ;
1619 std::vector<DataBlock::Ptr> dataBlocks() const ;
1636 size_t nFunctions() const {
return functions_.
size(); }
1745 size_t attachFunction(const
Function::Ptr&) ;
1746 size_t attachFunctions(const
Functions&) ;
1780 size_t attachFunctionBasicBlocks(const Functions&) ;
1781 size_t attachFunctionBasicBlocks(const
Function::Ptr&) ;
1797 void detachFunction(const
Function::Ptr&) ;
1837 functionsOwningBasicBlock(const
ControlFlowGraph::ConstVertexIterator&,
bool doSort = true) const ;
1840 functionsOwningBasicBlock(rose_addr_t bblockVa,
bool doSort = true) const ;
1843 functionsOwningBasicBlock(const
BasicBlock::Ptr&,
bool doSort = true) const ;
1845 template<class Container>
1847 functionsOwningBasicBlocks(const Container &bblocks) const {
1848 std::vector<Function::Ptr> retval;
1849 for (
const typename Container::value_type& bblock: bblocks) {
1850 for (
const Function::Ptr &
function: functionsOwningBasicBlock(bblock,
false))
1851 insertUnique(retval,
function, sortFunctionsByAddress);
1866 std::vector<Function::Ptr> discoverCalledFunctions() const ;
1879 std::vector<
Function::Ptr> discoverFunctionEntryVertices() const ;
1890 Sawyer::Optional<Thunk> functionIsThunk(const
Function::Ptr&) const ;
1902 void discoverFunctionBasicBlocks(const
Function::Ptr &function) const ;
1910 std::set<rose_addr_t> functionGhostSuccessors(const
Function::Ptr&) const ;
1919 FunctionCallGraph functionCallGraph(AllowParallelEdges::Type allowParallelEdges) const ;
1945 void allFunctionStackDelta() const ;
1953 Sawyer::Optional<
bool> functionOptionalMayReturn(const
Function::Ptr &function) const ;
1958 void allFunctionMayReturn() const ;
1987 functionCallingConvention(const
Function::Ptr&,
1988 const CallingConvention::Definition::Ptr &dflt = CallingConvention::Definition::Ptr())
2004 allFunctionCallingConvention(const CallingConvention::Definition::Ptr &dflt = CallingConvention::Definition::Ptr())
2032 CallingConvention::Dictionary
2033 functionCallingConventionDefinitions(const
Function::Ptr&,
2034 const CallingConvention::Definition::Ptr &dflt = CallingConvention::Definition::Ptr())
2050 allFunctionCallingConventionDefinition(const CallingConvention::Definition::Ptr &dflt =
2051 CallingConvention::Definition::Ptr()) const ;
2061 void fixInterFunctionEdges() ;
2082 bool functionIsNoop(const
Function::Ptr&) const ;
2089 void allFunctionIsNoop() const ;
2098 void forgetFunctionIsNoop() const ;
2099 void forgetFunctionIsNoop(const
Function::Ptr&) const ;
2106 std::set<rose_addr_t> functionDataFlowConstants(const
Function::Ptr&) const ;
2135 CfgAdjustmentCallbacks& cfgAdjustmentCallbacks() {
return cfgAdjustmentCallbacks_; }
2187 std::vector<Function::Ptr> nextFunctionPrologue(rose_addr_t startVa) ;
2188 std::vector<Function::Ptr> nextFunctionPrologue(rose_addr_t startVa, rose_addr_t &lastSearchedVa ) ;
2231 void dumpCfg(std::ostream&,
const std::string &prefix=
"",
bool showBlocks=
true,
2232 bool computeProperties=
true) const ;
2248 bool showNeighbors=true) const ;
2299 static
std::
string dataBlockName(const
DataBlock::Ptr&) ;
2304 static
std::
string functionName(const
Function::Ptr&) ;
2310 void expandIndeterminateCalls();
2330 void progress(const
Progress::Ptr&) ;
2337 void updateProgress(const
std::
string &phase,
double completion) const;
2340 void showStatistics() const;
2343 void checkConsistency() const;
2355 Sawyer::Optional<rose_addr_t> elfGotVa() const;
2444 void addressName(rose_addr_t,
const std::string&) ;
2533 #ifdef ROSE_ENABLE_PYTHON_API
2534 void pythonUnparse()
const;
2549 void updateCfgProgress();
2554 ControlFlowGraph::VertexIterator convertFrom(
const Partitioner &other,
2555 ControlFlowGraph::ConstVertexIterator otherIter);
2559 ControlFlowGraph::EdgeIterator adjustPlaceholderEdges(
const ControlFlowGraph::VertexIterator &placeholder);
2563 ControlFlowGraph::EdgeIterator adjustNonexistingEdges(
const ControlFlowGraph::VertexIterator &vertex);
2566 BasicBlock::Ptr discoverBasicBlockInternal(rose_addr_t startVa)
const;
2570 void bblockAttached(
const ControlFlowGraph::VertexIterator &newVertex);
2574 void bblockDetached(rose_addr_t startVa,
const BasicBlock::Ptr &removedBlock);
2577 void rebuildVertexIndices();
Base classes for instruction semantics.
Represents information about a thunk.
size_t size() const
Number of addresses represented by the map.
Sawyer::Callbacks< BasicBlockCallback::Ptr > BasicBlockCallbacks
See basicBlockCallbacks.
const FunctionPrologueMatchers & functionPrologueMatchers() const
Ordered list of function prologue matchers.
Contiguous region of a file.
SmtSolverPtr smtSolver() const
SMT solver.
BasicBlock::Ptr bblock
The one and only basic block for the thunk.
FunctionPrologueMatchers & functionPrologueMatchers()
Ordered list of function prologue matchers.
const std::string & addressName(rose_addr_t va) const
Property: Name for address.
void disableSymbolicSemantics()
Use or not use symbolic semantics.
void checkingCallBranch(bool b)
Property: Whether to look for function calls used as branches.
ControlFlowGraph::ConstVertexIterator indeterminateVertex() const
Returns the special "indeterminate" vertex.
const SourceLocations & sourceLocations() const
Property: Source locations.
const Configuration & configuration() const
Configuration information.
boost::shared_ptr< RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
Base class for machine instructions.
Bidirectional mapping between addresses and source locations.
const FunctionPaddingMatchers & functionPaddingMatchers() const
Ordered list of function padding matchers.
AddressUser instructionExists(SgAsmInstruction *insn) const
Determines whether an instruction is attached to the CFG/AUM.
const ControlFlowGraph & cfg() const
Returns the control flow graph.
Provides and caches instructions.
Settings that directly control a partitioner.
ControlFlowGraph::VertexIterator indeterminateVertex()
Returns the special "indeterminate" vertex.
InstructionProvider & instructionProvider()
Returns the instruction provider.
void sourceLocations(const SourceLocations &locs)
Property: Source locations.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Main namespace for the ROSE library.
Sawyer::Container::Map< rose_addr_t, std::string > AddressNameMap
Map address to name.
Function call information.
BasicBlockCallbacks & basicBlockCallbacks()
Callbacks for adjusting basic block during discovery.
ControlFlowGraph::VertexIterator undiscoveredVertex()
Returns the special "undiscovered" vertex.
SemanticMemoryParadigm semanticMemoryParadigm() const
Property: Whether to use map- or list-based memory states.
Name space for the entire library.
Configuration & configuration()
Configuration information.
bool assumeFunctionsReturn() const
Property: Assume (or not) that function calls return.
ControlFlowGraph::ConstVertexIterator findPlaceholder(rose_addr_t startVa) const
Find the CFG vertex for a basic block placeholder.
const AddressUsageMap & aum() const
Returns the address usage map.
const AddressNameMap & addressNames() const
Property: Name for address.
MemoryMapPtr Ptr
Reference counting pointer.
Optional< Value > getOptional(const Key &key) const
Lookup and return a value or nothing.
void settings(const BasePartitionerSettings &s)
Partitioner settings.
void assumeFunctionsReturn(bool b)
Property: Assume (or not) that function calls return.
rose_addr_t target
The one and only successor for the basic block.
ControlFlowGraph::VertexIterator nonexistingVertex()
Returns the special "non-existing" vertex.
bool isDefaultConstructed() const
Return true if this is a default constructed partitioner.
bool usingSymbolicSemantics() const
Use or not use symbolic semantics.
SemanticMemoryParadigm
Organization of semantic memory.
const BasicBlockCallbacks & basicBlockCallbacks() const
Callbacks for adjusting basic block during discovery.
void basicBlockSemanticsAutoDrop(bool b)
Property: Automatically drop semantics for attached basic blocks.
const Value & getOrDefault(const Key &key) const
Lookup and return a value or a default.
void stackDeltaInterproceduralLimit(size_t n)
Property: max depth for inter-procedural stack delta analysis.
const InstructionProvider & instructionProvider() const
Returns the instruction provider.
bool checkingCallBranch
Check for situations where CALL is used as a branch.
bool usingSemantics
Whether instruction semantics are used.
A general, thread-safe way to report progress made on some task.
void autoAddCallReturnEdges(bool b)
Property: Insert (or not) function call return edges.
AddressUser findInstruction(SgAsmInstruction *) const
Find an AddressUser record for the specified instruction, or equivalent.
bool basicBlockSemanticsAutoDrop() const
Property: Automatically drop semantics for attached basic blocks.
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
ControlFlowGraph::ConstVertexIterator undiscoveredVertex() const
Returns the special "undiscovered" vertex.
ControlFlowGraph::ConstVertexIterator nonexistingVertex() const
Returns the special "non-existing" vertex.
std::shared_ptr< SmtSolver > SmtSolverPtr
Reference counting pointer.
std::vector< FunctionPrologueMatcher::Ptr > FunctionPrologueMatchers
See functionPrologueMatchers.
FunctionPaddingMatchers & functionPaddingMatchers()
Ordered list of function padding matchers.
const CfgAdjustmentCallbacks & cfgAdjustmentCallbacks() const
List of all callbacks invoked when the CFG is adjusted.
API and storage for attributes.
MemoryMap::Ptr memoryMap() const
Returns the memory map.
Sawyer::SharedPointer< Base > BasePtr
Reference counted pointer for disassemblers.
Partitions instructions into basic blocks and functions.
bool checkingCallBranch() const
Property: Whether to look for function calls used as branches.
bool addressIsExecutable(rose_addr_t va) const
Returns true if address is executable.
size_t size() const
Number of nodes, keys, or values in this container.
void enableSymbolicSemantics(bool b=true)
Use or not use symbolic semantics.
bool autoAddCallReturnEdges() const
Property: Insert (or not) function call return edges.
Sawyer::Callbacks< CfgAdjustmentCallback::Ptr > CfgAdjustmentCallbacks
See cfgAdjustmentCallbacks.
std::vector< FunctionPaddingMatcher::Ptr > FunctionPaddingMatchers
See functionPaddingMatchers.
void semanticMemoryParadigm(SemanticMemoryParadigm p)
Property: Whether to use map- or list-based memory states.
Holds configuration information.
SourceLocations & sourceLocations()
Property: Source locations.