ROSE  0.11.109.0
Partitioner2/Engine.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_Engine_H
2 #define ROSE_BinaryAnalysis_Partitioner2_Engine_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <Rose/BinaryAnalysis/BinaryLoader.h>
7 #include <Rose/BinaryAnalysis/SerialIo.h>
8 #include <boost/noncopyable.hpp>
9 #include <boost/regex.hpp>
10 #include <Rose/BinaryAnalysis/Disassembler/BasicTypes.h>
11 #include <Rose/FileSystem.h>
12 #include <Rose/BinaryAnalysis/Partitioner2/Function.h>
13 #include <Rose/BinaryAnalysis/Partitioner2/ModulesLinux.h>
14 #include <Rose/BinaryAnalysis/Partitioner2/Partitioner.h>
15 #include <Rose/BinaryAnalysis/Partitioner2/Thunk.h>
16 #include <Rose/BinaryAnalysis/Partitioner2/Utility.h>
17 #include <Rose/Progress.h>
18 #include <Rose/Exception.h>
19 #include <Sawyer/DistinctList.h>
20 #include <stdexcept>
21 
22 #ifdef ROSE_ENABLE_PYTHON_API
23 #undef slots // stupid Qt pollution
24 #include <boost/python.hpp>
25 #endif
26 
27 namespace Rose {
28 namespace BinaryAnalysis {
29 namespace Partitioner2 {
30 
114 class ROSE_DLL_API Engine: private boost::noncopyable {
115 public:
119  struct Settings {
126  private:
127  friend class boost::serialization::access;
128 
129  template<class S>
130  void serialize(S &s, unsigned version) {
131  s & loader & disassembler & partitioner & engine & astConstruction;
132  }
133  };
134 
136  class Exception: public Rose::Exception {
137  public:
138  Exception(const std::string &mesg)
139  : Rose::Exception(mesg) {}
140  ~Exception() throw () {}
141  };
142 
144  // Internal data structures
146 private:
147  // Engine callback for handling instructions added to basic blocks. This is called when a basic block is discovered,
148  // before it's attached to a partitioner, so it shouldn't really be modifying any state in the engine, but rather only
149  // preparing the basic block to be processed.
150  class BasicBlockFinalizer: public BasicBlockCallback {
151  typedef Sawyer::Container::Map<rose_addr_t /*target*/, std::vector<rose_addr_t> /*sources*/> WorkList;
152  public:
153  static Ptr instance() { return Ptr(new BasicBlockFinalizer); }
154  virtual bool operator()(bool chain, const Args &args) override;
155  private:
156  void fixFunctionReturnEdge(const Args&);
157  void fixFunctionCallEdges(const Args&);
158  void addPossibleIndeterminateEdge(const Args&);
159  };
160 
161  // Basic blocks that need to be worked on next. These lists are adjusted whenever a new basic block (or placeholder) is
162  // inserted or erased from the CFG.
163  class BasicBlockWorkList: public CfgAdjustmentCallback {
164  // The following lists are used for adding outgoing E_CALL_RETURN edges to basic blocks based on whether the basic
165  // block is a call to a function that might return. When a new basic block is inserted into the CFG (or a previous
166  // block is removed, modified, and re-inserted), the operator() is called and conditionally inserts the block into the
167  // "pendingCallReturn" list (if the block is a function call that lacks an E_CALL_RETURN edge and the function is known
168  // to return or the analysis was incomplete).
169  //
170  // When we run out of other ways to create basic blocks, we process the pendingCallReturn list from back to front. If
171  // the back block (which gets popped) has a positive may-return result then an E_CALL_RETURN edge is added to the CFG
172  // and the normal recursive BB discovery is resumed. Otherwise if the analysis is incomplete the basic block is moved
173  // to the processedCallReturn list. The entire pendingCallReturn list is processed before proceeding.
174  //
175  // If there is no more pendingCallReturn work to be done, then the processedCallReturn blocks are moved to the
176  // finalCallReturn list and finalCallReturn is sorted by approximate CFG height (i.e., leafs first). The contents
177  // of the finalCallReturn list is then analyzed and the result (or the default may-return value for failed analyses)
178  // is used to decide whether a new CFG edge should be created, possibly adding new basic block addresses to the
179  // list of undiscovered blocks.
180  //
181  Sawyer::Container::DistinctList<rose_addr_t> pendingCallReturn_; // blocks that might need an E_CALL_RETURN edge
182  Sawyer::Container::DistinctList<rose_addr_t> processedCallReturn_; // call sites whose may-return was indeterminate
183  Sawyer::Container::DistinctList<rose_addr_t> finalCallReturn_; // indeterminate call sites awaiting final analysis
184 
185  Sawyer::Container::DistinctList<rose_addr_t> undiscovered_; // undiscovered basic block list (last-in-first-out)
186  Engine *engine_; // engine to which this callback belongs
187  size_t maxSorts_; // max sorts before using unsorted lists
188  protected:
189  BasicBlockWorkList(Engine *engine, size_t maxSorts): engine_(engine), maxSorts_(maxSorts) {}
190  public:
192  static Ptr instance(Engine *engine, size_t maxSorts) { return Ptr(new BasicBlockWorkList(engine, maxSorts)); }
193  virtual bool operator()(bool chain, const AttachedBasicBlock &args) override;
194  virtual bool operator()(bool chain, const DetachedBasicBlock &args) override;
195  Sawyer::Container::DistinctList<rose_addr_t>& pendingCallReturn() { return pendingCallReturn_; }
196  Sawyer::Container::DistinctList<rose_addr_t>& processedCallReturn() { return processedCallReturn_; }
197  Sawyer::Container::DistinctList<rose_addr_t>& finalCallReturn() { return finalCallReturn_; }
198  Sawyer::Container::DistinctList<rose_addr_t>& undiscovered() { return undiscovered_; }
199  void moveAndSortCallReturn(const Partitioner&);
200  };
201 
202  // A work list providing constants from instructions that are part of the CFG.
203  class CodeConstants: public CfgAdjustmentCallback {
204  public:
206 
207  private:
208  std::set<rose_addr_t> toBeExamined_; // instructions waiting to be examined
209  std::set<rose_addr_t> wasExamined_; // instructions we've already examined
210  rose_addr_t inProgress_; // instruction that is currently in progress
211  std::vector<rose_addr_t> constants_; // constants for the instruction in progress
212 
213  protected:
214  CodeConstants(): inProgress_(0) {}
215 
216  public:
217  static Ptr instance() { return Ptr(new CodeConstants); }
218 
219  // Possibly insert more instructions into the work list when a basic block is added to the CFG
220  virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override;
221 
222  // Possibly remove instructions from the worklist when a basic block is removed from the CFG
223  virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override;
224 
225  // Return the next available constant if any.
226  Sawyer::Optional<rose_addr_t> nextConstant(const Partitioner &partitioner);
227 
228  // Address of instruction being examined.
229  rose_addr_t inProgress() const { return inProgress_; }
230  };
231 
233  // Data members
235 private:
236  Settings settings_; // Settings for the partitioner.
237  SgAsmInterpretation *interp_; // interpretation set by loadSpecimen
238  BinaryLoader::Ptr binaryLoader_; // how to remap, link, and fixup
239  Disassembler::BasePtr disassembler_; // not ref-counted yet, but don't destroy it since user owns it
240  MemoryMap::Ptr map_; // memory map initialized by load()
241  BasicBlockWorkList::Ptr basicBlockWorkList_; // what blocks to work on next
242  CodeConstants::Ptr codeFunctionPointers_; // generates constants that are found in instruction ASTs
243  Progress::Ptr progress_; // optional progress reporting
244  ModulesLinux::LibcStartMain::Ptr libcStartMain_; // looking for "main" by analyzing libc_start_main?
245  ThunkPredicates::Ptr functionMatcherThunks_; // predicates to find thunks when looking for functions
246  ThunkPredicates::Ptr functionSplittingThunks_; // predicates for splitting thunks from front of functions
247 
249  // Constructors
251 private:
253  Engine() = delete;
254 
256  explicit Engine(const Settings &settings);
257 
258 public:
259  virtual ~Engine();
260 
262  static Engine *instance() { return new Engine(Settings{}); };
263 
265  static Engine *instance(const Settings &settings) { return new Engine(settings); };
266 
268  // The very top-level use case
270 public:
295  SgAsmBlock* frontend(int argc, char *argv[],
296  const std::string &purpose, const std::string &description);
297  virtual SgAsmBlock* frontend(const std::vector<std::string> &args,
298  const std::string &purpose, const std::string &description);
301  // Basic top-level steps
304 public:
310  void reset();
311 
338  Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[],
339  const std::string &purpose, const std::string &description) /*final*/;
340  virtual Sawyer::CommandLine::ParserResult parseCommandLine(const std::vector<std::string> &args,
341  const std::string &purpose, const std::string &description);
363  virtual SgAsmInterpretation* parseContainers(const std::vector<std::string> &fileNames);
364  SgAsmInterpretation* parseContainers(const std::string &fileName) /*final*/;
387  virtual MemoryMap::Ptr loadSpecimens(const std::vector<std::string> &fileNames = std::vector<std::string>());
388  MemoryMap::Ptr loadSpecimens(const std::string &fileName) /*final*/;
410  virtual Partitioner partition(const std::vector<std::string> &fileNames = std::vector<std::string>());
411  Partitioner partition(const std::string &fileName) /*final*/;
428  SgAsmBlock* buildAst(const std::vector<std::string> &fileNames = std::vector<std::string>()) /*final*/;
429  SgAsmBlock* buildAst(const std::string &fileName) /*final*/;
437  virtual void savePartitioner(const Partitioner&, const boost::filesystem::path&, SerialIo::Format fmt = SerialIo::BINARY);
438 
443  virtual Partitioner loadPartitioner(const boost::filesystem::path&, SerialIo::Format fmt = SerialIo::BINARY);
444 
446  // Command-line parsing
447  //
448  // top-level: parseCommandLine
450 public:
454  virtual Sawyer::CommandLine::SwitchGroup loaderSwitches();
455  static Sawyer::CommandLine::SwitchGroup loaderSwitches(LoaderSettings&);
461  virtual Sawyer::CommandLine::SwitchGroup disassemblerSwitches();
462  static Sawyer::CommandLine::SwitchGroup disassemblerSwitches(DisassemblerSettings&);
468  virtual Sawyer::CommandLine::SwitchGroup partitionerSwitches();
469  static Sawyer::CommandLine::SwitchGroup partitionerSwitches(PartitionerSettings&);
475  virtual Sawyer::CommandLine::SwitchGroup engineSwitches();
476  static Sawyer::CommandLine::SwitchGroup engineSwitches(EngineSettings&);
482  virtual Sawyer::CommandLine::SwitchGroup astConstructionSwitches();
483  static Sawyer::CommandLine::SwitchGroup astConstructionSwitches(AstConstructionSettings&);
487  static std::string specimenNameDocumentation();
488 
500  virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description);
501 
508  virtual void checkSettings();
509 
511  // Container parsing
512  //
513  // top-level: parseContainers
515 public:
520  virtual bool isRbaFile(const std::string&);
521 
526  virtual bool isNonContainer(const std::string&);
527 
533  virtual bool areContainersParsed() const;
534 
539  virtual void loadVxCore(const std::string &spec);
540 
542  // Load specimens
543  //
544  // top-level: loadSpecimens
546 public:
550  virtual bool areSpecimensLoaded() const;
551 
567  virtual BinaryLoader::Ptr obtainLoader(const BinaryLoader::Ptr &hint = BinaryLoader::Ptr());
568 
574  virtual void loadContainers(const std::vector<std::string> &fileNames);
575 
580  virtual void loadNonContainers(const std::vector<std::string> &names);
581 
585  virtual void adjustMemoryMap();
586 
595  MemoryMap::Ptr memoryMap() const /*final*/ { return map_; }
596  virtual void memoryMap(const MemoryMap::Ptr &m) { map_ = m; }
600  // Disassembler
603 public:
622  virtual Disassembler::BasePtr obtainDisassembler();
623  virtual Disassembler::BasePtr obtainDisassembler(const Disassembler::BasePtr &hint);
627  // Partitioner high-level functions
629  //
630  // top-level: partition
632 public:
634  virtual void checkCreatePartitionerPrerequisites() const;
635 
643  virtual Partitioner createBarePartitioner();
644 
650  virtual Partitioner createGenericPartitioner();
651 
657  virtual Partitioner createTunedPartitioner();
658 
663  virtual Partitioner createPartitionerFromAst(SgAsmInterpretation*);
664 
669  virtual Partitioner createPartitioner();
670 
674  virtual void runPartitionerInit(Partitioner&);
675 
679  virtual void runPartitionerRecursive(Partitioner&);
680 
685  virtual void runPartitionerFinal(Partitioner&);
686 
692  virtual void runPartitioner(Partitioner&);
693 
694 
696  // Partitioner mid-level functions
697  //
698  // These are the functions called by the partitioner high-level stuff. These are sometimes overridden in subclasses,
699  // although it is more likely that the high-level stuff is overridden.
701 public:
706  virtual void labelAddresses(Partitioner&, const Configuration&);
707 
711  virtual std::vector<DataBlock::Ptr> makeConfiguredDataBlocks(Partitioner&, const Configuration&);
712 
716  virtual std::vector<Function::Ptr> makeConfiguredFunctions(Partitioner&, const Configuration&);
717 
724  virtual std::vector<Function::Ptr> makeEntryFunctions(Partitioner&, SgAsmInterpretation*);
725 
732  virtual std::vector<Function::Ptr> makeErrorHandlingFunctions(Partitioner&, SgAsmInterpretation*);
733 
743  virtual std::vector<Function::Ptr> makeImportFunctions(Partitioner&, SgAsmInterpretation*);
744 
751  virtual std::vector<Function::Ptr> makeExportFunctions(Partitioner&, SgAsmInterpretation*);
752 
759  virtual std::vector<Function::Ptr> makeSymbolFunctions(Partitioner&, SgAsmInterpretation*);
760 
768  virtual std::vector<Function::Ptr> makeContainerFunctions(Partitioner&, SgAsmInterpretation*);
769 
776  virtual std::vector<Function::Ptr> makeInterruptVectorFunctions(Partitioner&, const AddressInterval &vector);
777 
782  virtual std::vector<Function::Ptr> makeUserFunctions(Partitioner&, const std::vector<rose_addr_t>&);
783 
790  virtual void discoverBasicBlocks(Partitioner&);
791 
803  virtual Function::Ptr makeNextDataReferencedFunction(const Partitioner&, rose_addr_t &startVa /*in,out*/);
804 
818  virtual Function::Ptr makeNextCodeReferencedFunction(const Partitioner&);
819 
826  virtual std::vector<Function::Ptr> makeCalledFunctions(Partitioner&);
827 
843  virtual std::vector<Function::Ptr> makeNextPrologueFunction(Partitioner&, rose_addr_t startVa);
844  virtual std::vector<Function::Ptr> makeNextPrologueFunction(Partitioner&, rose_addr_t startVa, rose_addr_t &lastSearchedVa);
865  virtual std::vector<Function::Ptr>
866  makeFunctionFromInterFunctionCalls(Partitioner &partitioner, rose_addr_t &startVa /*in,out*/);
867 
874  virtual void discoverFunctions(Partitioner&);
875 
890  virtual std::set<rose_addr_t> attachDeadCodeToFunction(Partitioner&, const Function::Ptr&, size_t maxIterations=size_t(-1));
891 
899  virtual DataBlock::Ptr attachPaddingToFunction(Partitioner&, const Function::Ptr&);
900 
905  virtual std::vector<DataBlock::Ptr> attachPaddingToFunctions(Partitioner&);
906 
917  virtual size_t attachAllSurroundedCodeToFunctions(Partitioner&);
918 
926  virtual size_t attachSurroundedCodeToFunctions(Partitioner&);
927 
932  virtual void attachBlocksToFunctions(Partitioner&);
933 
940  virtual std::set<rose_addr_t> attachDeadCodeToFunctions(Partitioner&, size_t maxIterations=size_t(-1));
941 
951  virtual std::vector<DataBlock::Ptr> attachSurroundedDataToFunctions(Partitioner&);
952 
958  virtual void updateAnalysisResults(Partitioner&);
959 
960 
962  // Partitioner low-level functions
963  //
964  // These are functions that a subclass seldom overrides, and maybe even shouldn't override because of their complexity or
965  // the way the interact with one another.
967 public:
978  virtual bool makeNextCallReturnEdge(Partitioner&, boost::logic::tribool assumeCallReturns);
979 
985  virtual BasicBlock::Ptr makeNextBasicBlockFromPlaceholder(Partitioner&);
986 
1001  virtual BasicBlock::Ptr makeNextBasicBlock(Partitioner&);
1002 
1003 
1005  // Build AST
1007 public:
1008  // Used internally by ROSE's ::frontend disassemble instructions to build the AST that goes under each SgAsmInterpretation.
1009  static void disassembleForRoseFrontend(SgAsmInterpretation*);
1010 
1011 
1013  // Settings and properties
1015 public:
1022  const Settings& settings() const /*final*/ { return settings_; }
1023  Settings& settings() /*final*/ { return settings_; }
1032  Disassembler::BasePtr disassembler() const;
1033  virtual void disassembler(const Disassembler::BasePtr&);
1043  SgAsmInterpretation* interpretation() const /*final*/ { return interp_; }
1044  virtual void interpretation(SgAsmInterpretation *interp) { interp_ = interp; }
1052  Progress::Ptr progress() const /*final*/ { return progress_; }
1053  virtual void progress(const Progress::Ptr &progress) { progress_ = progress; }
1056  // Python API support functions
1059 #ifdef ROSE_ENABLE_PYTHON_API
1060 
1061  // Similar to frontend, but returns a partitioner rather than an AST since the Python API doesn't yet support ASTs.
1062  Partitioner pythonParseVector(boost::python::list &pyArgs, const std::string &purpose, const std::string &description);
1063  Partitioner pythonParseSingle(const std::string &specimen, const std::string &purpose, const std::string &description);
1064 
1065 #endif
1066 
1068  // Internal stuff
1070 private:
1071  void init();
1072 
1073  // Similar to ::frontend but a lot less complicated.
1074  SgProject* roseFrontendReplacement(const std::vector<boost::filesystem::path> &fileNames);
1075 };
1076 
1077 } // namespace
1078 } // namespace
1079 } // namespace
1080 
1081 #endif
1082 #endif
Rose::BinaryAnalysis::DataFlow::Engine< DfCfg, BaseSemantics::StatePtr, TransferFunction, MergeFunction > Engine
Data-Flow engine.
const Settings & settings() const
Property: All settings.
BinaryLoaderPtr Ptr
Referenc counting pointer to BinaryLoader.
Definition: BinaryLoader.h:60
Instruction basic block.
Progress::Ptr progress() const
Property: progress reporting.
Base class for adjusting basic blocks during discovery.
Definition: Modules.h:42
Base class for engines driving the partitioner.
The result from parsing a command line.
List of things to work on.
Definition: WorkLists.h:60
Settings & settings()
Property: All settings.
A collection of related switch declarations.
static Engine * instance()
Factory method returning an Engine instance of type EngineBinary.
Sawyer::SharedPointer< LibcStartMain > Ptr
Shared ownership pointer to LibcStartMain callback.
Definition: ModulesLinux.h:45
virtual void memoryMap(const MemoryMap::Ptr &m)
Property: memory map.
Main namespace for the ROSE library.
static Engine * instance(const Settings &settings)
Factory method returning an Engine instance of type based on settings.
MemoryMapPtr Ptr
Reference counting pointer.
Definition: MemoryMap.h:115
The parser for a program command line.
virtual void interpretation(SgAsmInterpretation *interp)
Property: interpretation.
DisassemblerSettings disassembler
Settings for creating the disassembler.
SgAsmInterpretation * interpretation() const
Property: interpretation.
AstConstructionSettings astConstruction
Settings for constructing the AST.
MemoryMap::Ptr memoryMap() const
Property: memory map.
Format
Format of the state file.
Definition: SerialIo.h:122
This class represents a source project, with a list of SgFile objects and global information about th...
PartitionerSettings partitioner
Settings for creating a partitioner.
LoaderSettings loader
Settings used during specimen loading.
Sawyer::SharedPointer< Base > BasePtr
Reference counted pointer for disassemblers.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:294
Base class for all ROSE exceptions.
Definition: Rose/Exception.h:9
Binary state files are smaller and faster than the other formats, but are not portable across archite...
Definition: SerialIo.h:123
Represents an interpretation of a binary container.
EngineSettings engine
Settings that control engine behavior.
Container associating values with keys.
Definition: Sawyer/Map.h:66
virtual void progress(const Progress::Ptr &progress)
Property: progress reporting.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
Definition: Progress.h:168