1 #ifndef ROSE_BinaryAnalysis_Partitioner2_Partitioner_H
2 #define ROSE_BinaryAnalysis_Partitioner2_Partitioner_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <Rose/BinaryAnalysis/Partitioner2/AddressUsageMap.h>
7 #include <Rose/BinaryAnalysis/Partitioner2/BasicBlock.h>
8 #include <Rose/BinaryAnalysis/Partitioner2/BasicTypes.h>
9 #include <Rose/BinaryAnalysis/Partitioner2/Config.h>
10 #include <Rose/BinaryAnalysis/Partitioner2/ControlFlowGraph.h>
11 #include <Rose/BinaryAnalysis/Partitioner2/DataBlock.h>
12 #include <Rose/BinaryAnalysis/Partitioner2/Function.h>
13 #include <Rose/BinaryAnalysis/Partitioner2/FunctionCallGraph.h>
14 #include <Rose/BinaryAnalysis/Partitioner2/InstructionProvider.h>
15 #include <Rose/BinaryAnalysis/Partitioner2/Modules.h>
16 #include <Rose/BinaryAnalysis/Partitioner2/Reference.h>
18 #include <Sawyer/Attribute.h>
19 #include <Sawyer/Callbacks.h>
20 #include <Sawyer/IntervalSet.h>
21 #include <Sawyer/Map.h>
22 #include <Sawyer/Message.h>
23 #include <Sawyer/Optional.h>
24 #include <Sawyer/ProgressBar.h>
25 #include <Sawyer/SharedPointer.h>
27 #include <Rose/BinaryAnalysis/SourceLocations.h>
28 #include <Rose/BinaryAnalysis/Unparser/Settings.h>
29 #include <Rose/Progress.h>
31 #include <boost/filesystem.hpp>
32 #include <boost/move/utility_core.hpp>
33 #include <boost/serialization/access.hpp>
34 #include <boost/serialization/split_member.hpp>
35 #include <boost/serialization/version.hpp>
43 #include <Rose/BinaryAnalysis/Z3Solver.h>
44 #include <Rose/BinaryAnalysis/InstructionSemantics2/DispatcherAarch32.h>
45 #include <Rose/BinaryAnalysis/InstructionSemantics2/DispatcherAarch64.h>
46 #include <Rose/BinaryAnalysis/InstructionSemantics2/DispatcherM68k.h>
47 #include <Rose/BinaryAnalysis/InstructionSemantics2/DispatcherPowerpc.h>
48 #include <Rose/BinaryAnalysis/InstructionSemantics2/DispatcherX86.h>
62 #if __cplusplus >= 201103L
63 #define ROSE_PARTITIONER_MOVE
64 #elif defined(__GNUC__)
66 #define ROSE_PARTITIONER_MOVE
67 #elif BOOST_VERSION >= 106900 // 1.68.0 might be okay too, but ROSE blacklists it for other reasons
68 #define ROSE_PARTITIONER_MOVE
73 namespace BinaryAnalysis {
74 namespace Partitioner2 {
290 #ifdef ROSE_PARTITIONER_MOVE
320 bool autoAddCallReturnEdges_;
321 bool assumeFunctionsReturn_;
322 size_t stackDeltaInterproceduralLimit_;
323 AddressNameMap addressNames_;
331 CfgAdjustmentCallbacks cfgAdjustmentCallbacks_;
332 BasicBlockCallbacks basicBlockCallbacks_;
333 FunctionPrologueMatchers functionPrologueMatchers_;
334 FunctionPaddingMatchers functionPaddingMatchers_;
337 ControlFlowGraph::VertexIterator undiscoveredVertex_;
338 ControlFlowGraph::VertexIterator indeterminateVertex_;
339 ControlFlowGraph::VertexIterator nonexistingVertex_;
340 static const size_t nSpecialVertices = 3;
346 mutable SAWYER_THREAD_TRAITS::Mutex mutex_;
348 mutable size_t cfgProgressTotal_;
359 #ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
361 friend class boost::serialization::access;
364 void serializeCommon(S &s,
const unsigned version) {
365 s.template register_type<InstructionSemantics2::SymbolicSemantics::SValue>();
366 s.template register_type<InstructionSemantics2::SymbolicSemantics::RiscOperators>();
367 #ifdef ROSE_ENABLE_ASM_AARCH64
368 s.template register_type<InstructionSemantics2::DispatcherAarch64>();
370 #ifdef ROSE_ENABLE_ASM_AARCH32
371 s.template register_type<InstructionSemantics2::DispatcherAarch32>();
373 s.template register_type<InstructionSemantics2::DispatcherX86>();
374 s.template register_type<InstructionSemantics2::DispatcherM68k>();
375 s.template register_type<InstructionSemantics2::DispatcherPowerpc>();
376 s.template register_type<SymbolicExpr::Interior>();
377 s.template register_type<SymbolicExpr::Leaf>();
378 s.template register_type<Z3Solver>();
379 s.template register_type<Semantics::SValue>();
380 s.template register_type<Semantics::MemoryListState>();
381 s.template register_type<Semantics::MemoryMapState>();
382 s.template register_type<Semantics::RegisterState>();
383 s.template register_type<Semantics::State>();
384 s.template register_type<Semantics::RiscOperators>();
385 s & BOOST_SERIALIZATION_NVP(settings_);
387 s & BOOST_SERIALIZATION_NVP(instructionProvider_);
388 s & BOOST_SERIALIZATION_NVP(memoryMap_);
389 s & BOOST_SERIALIZATION_NVP(cfg_);
391 s & BOOST_SERIALIZATION_NVP(aum_);
393 s & BOOST_SERIALIZATION_NVP(functions_);
394 s & BOOST_SERIALIZATION_NVP(autoAddCallReturnEdges_);
395 s & BOOST_SERIALIZATION_NVP(assumeFunctionsReturn_);
396 s & BOOST_SERIALIZATION_NVP(stackDeltaInterproceduralLimit_);
397 s & BOOST_SERIALIZATION_NVP(addressNames_);
399 s & BOOST_SERIALIZATION_NVP(sourceLocations_);
400 s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm_);
410 s & BOOST_SERIALIZATION_NVP(elfGotVa_);
416 void save(S &s,
const unsigned version)
const {
417 const_cast<Partitioner*
>(
this)->serializeCommon(s, version);
421 void load(S &s,
const unsigned version) {
422 serializeCommon(s, version);
423 rebuildVertexIndices();
426 BOOST_SERIALIZATION_SPLIT_MEMBER();
449 Partitioner(Disassembler *disassembler,
const MemoryMap::Ptr &map);
451 #ifdef ROSE_PARTITIONER_MOVE
453 Partitioner(BOOST_RV_REF(Partitioner));
456 Partitioner& operator=(BOOST_RV_REF(Partitioner));
459 Partitioner(
const Partitioner&);
460 Partitioner& operator=(
const Partitioner&);
512 return memoryMap_!=NULL && memoryMap_->at(va).require(MemoryMap::EXECUTABLE).exists();
533 void unparser(const Unparser::BasePtr&) ;
545 Unparser::BasePtr insnUnparser() const ;
546 void insnUnparser(const Unparser::BasePtr&) ;
550 void configureInsnUnparser(const Unparser::BasePtr&) const ;
555 void configureInsnPlainUnparser(const Unparser::BasePtr&) const ;
568 void unparse(
std::ostream&, const
DataBlock::Ptr&) const;
569 void unparse(
std::ostream&, const
Function::Ptr&) const;
570 void unparse(
std::ostream&) const;
591 size_t nBytes() const {
return aum_.
size(); }
601 return undiscoveredVertex_;
604 return undiscoveredVertex_;
619 return indeterminateVertex_;
622 return indeterminateVertex_;
636 return nonexistingVertex_;
639 return nonexistingVertex_;
675 std::set<rose_addr_t> ghostSuccessors() const ;
768 size_t nInstructions() const ;
793 ControlFlowGraph::ConstVertexIterator instructionVertex(rose_addr_t insnVa)
const;
803 std::vector<SgAsmInstruction*> instructionsOverlapping(
const AddressInterval&) const ;
874 size_t nPlaceholders() const ;
884 bool placeholderExists(rose_addr_t startVa) const ;
924 ControlFlowGraph::VertexIterator insertPlaceholder(rose_addr_t startVa) ;
939 BasicBlock::Ptr erasePlaceholder(
const ControlFlowGraph::ConstVertexIterator &placeholder) ;
978 void basicBlockDropSemantics() const ;
988 size_t nBasicBlocks() const ;
1020 BasicBlock::Ptr basicBlockExists(rose_addr_t startVa) const ;
1064 BasicBlock::Ptr basicBlockContainingInstruction(rose_addr_t insnVa) const ;
1108 BasicBlock::Ptr detachBasicBlock(rose_addr_t startVa) ;
1159 void attachBasicBlock(const
BasicBlock::Ptr&) ;
1248 BasicBlock::Ptr discoverBasicBlock(rose_addr_t startVa) const ;
1267 Precision::Level precision = Precision::HIGH) const ;
1277 std::vector<rose_addr_t> basicBlockConcreteSuccessors(const
BasicBlock::Ptr&,
bool *isComplete=NULL) const ;
1297 std::set<rose_addr_t> basicBlockGhostSuccessors(const
BasicBlock::Ptr&) const ;
1308 bool basicBlockIsFunctionCall(const
BasicBlock::Ptr&, Precision::Level precision = Precision::HIGH) const ;
1320 bool basicBlockIsFunctionReturn(const
BasicBlock::Ptr&) const ;
1326 bool basicBlockPopsStack(const
BasicBlock::Ptr&) const ;
1380 void forgetStackDeltas() const ;
1381 void forgetStackDeltas(const
Function::Ptr&) const ;
1393 size_t stackDeltaInterproceduralLimit() const {
return stackDeltaInterproceduralLimit_; }
1458 void basicBlockMayReturnReset() const ;
1462 struct MayReturnVertexInfo {
1463 enum State {INIT, CALCULATING, FINISHED};
1465 bool processedCallees;
1466 boost::logic::tribool anyCalleesReturn;
1467 boost::logic::tribool result;
1468 MayReturnVertexInfo(): state(INIT), processedCallees(false), anyCalleesReturn(false), result(
boost::indeterminate) {}
1472 bool mayReturnIsSignificantEdge(
const ControlFlowGraph::ConstEdgeIterator &edge,
1473 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1476 boost::logic::tribool mayReturnDoesCalleeReturn(
const ControlFlowGraph::ConstVertexIterator &vertex,
1477 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1480 boost::logic::tribool mayReturnDoesSuccessorReturn(
const ControlFlowGraph::ConstVertexIterator &vertex,
1481 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1485 std::vector<MayReturnVertexInfo> &vertexInfo)
const;
1502 size_t nDataBlocks() const ;
1510 DataBlock::Ptr dataBlockExists(const DataBlock::Ptr&) const ;
1533 DataBlock::Ptr attachDataBlock(const DataBlock::Ptr&) ;
1541 void detachDataBlock(const DataBlock::Ptr&) ;
1552 DataBlock::Ptr attachDataBlockToFunction(const DataBlock::Ptr&, const
Function::Ptr&) ;
1566 DataBlock::Ptr attachDataBlockToBasicBlock(const DataBlock::Ptr&, const BasicBlock::Ptr&) ;
1614 std::vector<DataBlock::Ptr> dataBlocks() const ;
1631 size_t nFunctions() const {
return functions_.
size(); }
1740 size_t attachFunction(const
Function::Ptr&) ;
1741 size_t attachFunctions(const
Functions&) ;
1775 size_t attachFunctionBasicBlocks(const Functions&) ;
1776 size_t attachFunctionBasicBlocks(const
Function::Ptr&) ;
1792 void detachFunction(const
Function::Ptr&) ;
1832 functionsOwningBasicBlock(const
ControlFlowGraph::ConstVertexIterator&,
bool doSort = true) const ;
1835 functionsOwningBasicBlock(rose_addr_t bblockVa,
bool doSort = true) const ;
1838 functionsOwningBasicBlock(const
BasicBlock::Ptr&,
bool doSort = true) const ;
1840 template<class Container>
1842 functionsOwningBasicBlocks(const Container &bblocks) const {
1843 std::vector<Function::Ptr> retval;
1844 for (
const typename Container::value_type& bblock: bblocks) {
1845 for (
const Function::Ptr &
function: functionsOwningBasicBlock(bblock,
false))
1846 insertUnique(retval,
function, sortFunctionsByAddress);
1861 std::vector<Function::Ptr> discoverCalledFunctions() const ;
1874 std::vector<
Function::Ptr> discoverFunctionEntryVertices() const ;
1885 Sawyer::Optional<Thunk> functionIsThunk(const
Function::Ptr&) const ;
1897 void discoverFunctionBasicBlocks(const
Function::Ptr &function) const ;
1905 std::set<rose_addr_t> functionGhostSuccessors(const
Function::Ptr&) const ;
1914 FunctionCallGraph functionCallGraph(AllowParallelEdges::Type allowParallelEdges) const ;
1940 void allFunctionStackDelta() const ;
1948 Sawyer::Optional<
bool> functionOptionalMayReturn(const
Function::Ptr &function) const ;
1953 void allFunctionMayReturn() const ;
1982 functionCallingConvention(const
Function::Ptr&,
1983 const CallingConvention::Definition::Ptr &dflt = CallingConvention::Definition::Ptr())
1999 allFunctionCallingConvention(const CallingConvention::Definition::Ptr &dflt = CallingConvention::Definition::Ptr())
2027 CallingConvention::Dictionary
2028 functionCallingConventionDefinitions(const
Function::Ptr&,
2029 const CallingConvention::Definition::Ptr &dflt = CallingConvention::Definition::Ptr())
2045 allFunctionCallingConventionDefinition(const CallingConvention::Definition::Ptr &dflt =
2046 CallingConvention::Definition::Ptr()) const ;
2056 void fixInterFunctionEdges() ;
2077 bool functionIsNoop(const
Function::Ptr&) const ;
2084 void allFunctionIsNoop() const ;
2093 void forgetFunctionIsNoop() const ;
2094 void forgetFunctionIsNoop(const
Function::Ptr&) const ;
2101 std::set<rose_addr_t> functionDataFlowConstants(const
Function::Ptr&) const ;
2130 CfgAdjustmentCallbacks& cfgAdjustmentCallbacks() {
return cfgAdjustmentCallbacks_; }
2182 std::vector<Function::Ptr> nextFunctionPrologue(rose_addr_t startVa) ;
2183 std::vector<Function::Ptr> nextFunctionPrologue(rose_addr_t startVa, rose_addr_t &lastSearchedVa ) ;
2226 void dumpCfg(std::ostream&,
const std::string &prefix=
"",
bool showBlocks=
true,
2227 bool computeProperties=
true) const ;
2243 bool showNeighbors=true) const ;
2294 static
std::
string dataBlockName(const
DataBlock::Ptr&) ;
2299 static
std::
string functionName(const
Function::Ptr&) ;
2305 void expandIndeterminateCalls();
2325 void progress(const
Progress::Ptr&) ;
2332 void updateProgress(const
std::
string &phase,
double completion) const;
2335 void showStatistics() const;
2338 void checkConsistency() const;
2350 Sawyer::Optional<rose_addr_t> elfGotVa() const;
2439 void addressName(rose_addr_t,
const std::string&) ;
2528 #ifdef ROSE_ENABLE_PYTHON_API
2529 void pythonUnparse()
const;
2544 void updateCfgProgress();
2549 ControlFlowGraph::VertexIterator convertFrom(
const Partitioner &other,
2550 ControlFlowGraph::ConstVertexIterator otherIter);
2554 ControlFlowGraph::EdgeIterator adjustPlaceholderEdges(
const ControlFlowGraph::VertexIterator &placeholder);
2558 ControlFlowGraph::EdgeIterator adjustNonexistingEdges(
const ControlFlowGraph::VertexIterator &vertex);
2561 BasicBlock::Ptr discoverBasicBlockInternal(rose_addr_t startVa)
const;
2565 void bblockAttached(
const ControlFlowGraph::VertexIterator &newVertex);
2569 void bblockDetached(rose_addr_t startVa,
const BasicBlock::Ptr &removedBlock);
2572 void rebuildVertexIndices();
Represents information about a thunk.
size_t size() const
Number of addresses represented by the map.
boost::shared_ptr< RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
Sawyer::Callbacks< BasicBlockCallback::Ptr > BasicBlockCallbacks
See basicBlockCallbacks.
const FunctionPrologueMatchers & functionPrologueMatchers() const
Ordered list of function prologue matchers.
Contiguous region of a file.
SmtSolverPtr smtSolver() const
SMT solver.
BasicBlock::Ptr bblock
The one and only basic block for the thunk.
FunctionPrologueMatchers & functionPrologueMatchers()
Ordered list of function prologue matchers.
const std::string & addressName(rose_addr_t va) const
Property: Name for address.
void disableSymbolicSemantics()
Use or not use symbolic semantics.
void checkingCallBranch(bool b)
Property: Whether to look for function calls used as branches.
ControlFlowGraph::ConstVertexIterator indeterminateVertex() const
Returns the special "indeterminate" vertex.
const SourceLocations & sourceLocations() const
Property: Source locations.
const Configuration & configuration() const
Configuration information.
Base class for machine instructions.
Bidirectional mapping between addresses and source locations.
const FunctionPaddingMatchers & functionPaddingMatchers() const
Ordered list of function padding matchers.
AddressUser instructionExists(SgAsmInstruction *insn) const
Determines whether an instruction is attached to the CFG/AUM.
const ControlFlowGraph & cfg() const
Returns the control flow graph.
Provides and caches instructions.
Settings that directly control a partitioner.
ControlFlowGraph::VertexIterator indeterminateVertex()
Returns the special "indeterminate" vertex.
InstructionProvider & instructionProvider()
Returns the instruction provider.
void sourceLocations(const SourceLocations &locs)
Property: Source locations.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Main namespace for the ROSE library.
Sawyer::Container::Map< rose_addr_t, std::string > AddressNameMap
Map address to name.
Function call information.
BasicBlockCallbacks & basicBlockCallbacks()
Callbacks for adjusting basic block during discovery.
ControlFlowGraph::VertexIterator undiscoveredVertex()
Returns the special "undiscovered" vertex.
SemanticMemoryParadigm semanticMemoryParadigm() const
Property: Whether to use map- or list-based memory states.
Name space for the entire library.
Configuration & configuration()
Configuration information.
bool assumeFunctionsReturn() const
Property: Assume (or not) that function calls return.
ControlFlowGraph::ConstVertexIterator findPlaceholder(rose_addr_t startVa) const
Find the CFG vertex for a basic block placeholder.
const AddressUsageMap & aum() const
Returns the address usage map.
const AddressNameMap & addressNames() const
Property: Name for address.
Optional< Value > getOptional(const Key &key) const
Lookup and return a value or nothing.
Base classes for instruction semantics.
void settings(const BasePartitionerSettings &s)
Partitioner settings.
void assumeFunctionsReturn(bool b)
Property: Assume (or not) that function calls return.
rose_addr_t target
The one and only successor for the basic block.
ControlFlowGraph::VertexIterator nonexistingVertex()
Returns the special "non-existing" vertex.
bool isDefaultConstructed() const
Return true if this is a default constructed partitioner.
bool usingSymbolicSemantics() const
Use or not use symbolic semantics.
SemanticMemoryParadigm
Organization of semantic memory.
const BasicBlockCallbacks & basicBlockCallbacks() const
Callbacks for adjusting basic block during discovery.
void basicBlockSemanticsAutoDrop(bool b)
Property: Automatically drop semantics for attached basic blocks.
const Value & getOrDefault(const Key &key) const
Lookup and return a value or a default.
void stackDeltaInterproceduralLimit(size_t n)
Property: max depth for inter-procedural stack delta analysis.
const InstructionProvider & instructionProvider() const
Returns the instruction provider.
bool checkingCallBranch
Check for situations where CALL is used as a branch.
bool usingSemantics
Whether instruction semantics are used.
A general, thread-safe way to report progress made on some task.
void autoAddCallReturnEdges(bool b)
Property: Insert (or not) function call return edges.
AddressUser findInstruction(SgAsmInstruction *) const
Find an AddressUser record for the specified instruction, or equivalent.
bool basicBlockSemanticsAutoDrop() const
Property: Automatically drop semantics for attached basic blocks.
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
ControlFlowGraph::ConstVertexIterator undiscoveredVertex() const
Returns the special "undiscovered" vertex.
ControlFlowGraph::ConstVertexIterator nonexistingVertex() const
Returns the special "non-existing" vertex.
std::vector< FunctionPrologueMatcher::Ptr > FunctionPrologueMatchers
See functionPrologueMatchers.
FunctionPaddingMatchers & functionPaddingMatchers()
Ordered list of function padding matchers.
const CfgAdjustmentCallbacks & cfgAdjustmentCallbacks() const
List of all callbacks invoked when the CFG is adjusted.
API and storage for attributes.
MemoryMap::Ptr memoryMap() const
Returns the memory map.
Partitions instructions into basic blocks and functions.
bool checkingCallBranch() const
Property: Whether to look for function calls used as branches.
bool addressIsExecutable(rose_addr_t va) const
Returns true if address is executable.
Virtual base class for instruction disassemblers.
size_t size() const
Number of nodes, keys, or values in this container.
void enableSymbolicSemantics(bool b=true)
Use or not use symbolic semantics.
bool autoAddCallReturnEdges() const
Property: Insert (or not) function call return edges.
Sawyer::Callbacks< CfgAdjustmentCallback::Ptr > CfgAdjustmentCallbacks
See cfgAdjustmentCallbacks.
std::vector< FunctionPaddingMatcher::Ptr > FunctionPaddingMatchers
See functionPaddingMatchers.
void semanticMemoryParadigm(SemanticMemoryParadigm p)
Property: Whether to use map- or list-based memory states.
std::shared_ptr< class SmtSolver > SmtSolverPtr
Reference-counting pointer for SMT solvers.
Holds configuration information.
SourceLocations & sourceLocations()
Property: Source locations.