ROSE 0.11.145.192
Partitioner2/Engine.h
1#ifndef ROSE_BinaryAnalysis_Partitioner2_Engine_H
2#define ROSE_BinaryAnalysis_Partitioner2_Engine_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BasicTypes.h>
7#include <Rose/BinaryAnalysis/Architecture/BasicTypes.h>
8#include <Rose/BinaryAnalysis/Partitioner2/Exception.h>
9#include <Rose/BinaryAnalysis/Partitioner2/Modules.h>
10#include <Rose/BinaryAnalysis/SerialIo.h>
11
12#include <Sawyer/DistinctList.h>
13#include <Sawyer/SharedObject.h>
14#include <Sawyer/SharedPointer.h>
15
16namespace Rose {
17namespace BinaryAnalysis {
18namespace Partitioner2 {
19
158 // Internal data structures
160public:
162 using Ptr = EnginePtr;
163
164 //--------------------------------------------------------------------------------------------------------------------------
165public:
169 struct Settings {
176#ifdef ROSE_HAVE_BOOST_SERIALIZATION_LIB
177 private:
178 friend class boost::serialization::access;
179 template<class S> void serialize(S&, unsigned version);
180#endif
181
182 public:
183 ~Settings();
184 Settings();
185 };
186
187 //--------------------------------------------------------------------------------------------------------------------------
188public:
191 public:
192 // WARNING: Defined in Engine.C with different behavior
193 // ~Exception() throw();
194 ~Exception();
195
197 explicit Exception(const std::string&);
198 };
199
200 //--------------------------------------------------------------------------------------------------------------------------
201public:
204 public:
207
214 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const = 0;
215 };
216
219 public:
220 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
221 };
222
225 size_t n_;
226 public:
228 explicit FirstPositionalArguments(size_t n);
229 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
230 };
231
234 size_t n_;
235 public:
237 explicit AllButLastArguments(size_t n);
238 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
239 };
240
243 size_t n_ = 0;
244 public:
249 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
250 };
251
252 //--------------------------------------------------------------------------------------------------------------------------
253protected:
254 // Engine callback for handling instructions added to basic blocks. This is called when a basic block is discovered,
255 // before it's attached to a partitioner, so it shouldn't really be modifying any state in the engine, but rather only
256 // preparing the basic block to be processed.
258 typedef Sawyer::Container::Map<rose_addr_t /*target*/, std::vector<rose_addr_t> /*sources*/> WorkList;
259 public:
261 protected:
263 public:
264 static Ptr instance();
265 virtual bool operator()(bool chain, const Args &args) override;
266 private:
267 void fixFunctionReturnEdge(const Args&);
268 void fixFunctionCallEdges(const Args&);
269 void addPossibleIndeterminateEdge(const Args&);
270 };
271
272 //--------------------------------------------------------------------------------------------------------------------------
273private:
274 // Basic blocks that need to be worked on next. These lists are adjusted whenever a new basic block (or placeholder) is
275 // inserted or erased from the CFG.
276 class BasicBlockWorkList: public CfgAdjustmentCallback {
277 // The following lists are used for adding outgoing E_CALL_RETURN edges to basic blocks based on whether the basic
278 // block is a call to a function that might return. When a new basic block is inserted into the CFG (or a previous
279 // block is removed, modified, and re-inserted), the operator() is called and conditionally inserts the block into the
280 // "pendingCallReturn" list (if the block is a function call that lacks an E_CALL_RETURN edge and the function is known
281 // to return or the analysis was incomplete).
282 //
283 // When we run out of other ways to create basic blocks, we process the pendingCallReturn list from back to front. If
284 // the back block (which gets popped) has a positive may-return result then an E_CALL_RETURN edge is added to the CFG
285 // and the normal recursive BB discovery is resumed. Otherwise if the analysis is incomplete the basic block is moved
286 // to the processedCallReturn list. The entire pendingCallReturn list is processed before proceeding.
287 //
288 // If there is no more pendingCallReturn work to be done, then the processedCallReturn blocks are moved to the
289 // finalCallReturn list and finalCallReturn is sorted by approximate CFG height (i.e., leafs first). The contents
290 // of the finalCallReturn list is then analyzed and the result (or the default may-return value for failed analyses)
291 // is used to decide whether a new CFG edge should be created, possibly adding new basic block addresses to the
292 // list of undiscovered blocks.
293 //
294 Sawyer::Container::DistinctList<rose_addr_t> pendingCallReturn_; // blocks that might need an E_CALL_RETURN edge
295 Sawyer::Container::DistinctList<rose_addr_t> processedCallReturn_; // call sites whose may-return was indeterminate
296 Sawyer::Container::DistinctList<rose_addr_t> finalCallReturn_; // indeterminate call sites awaiting final analysis
297
298 Sawyer::Container::DistinctList<rose_addr_t> undiscovered_; // undiscovered basic block list (last-in-first-out)
299 EnginePtr engine_; // engine to which this callback belongs
300 size_t maxSorts_; // max sorts before using unsorted lists
301 public:
302 ~BasicBlockWorkList();
303 protected:
304 BasicBlockWorkList(const EnginePtr &engine, size_t maxSorts);
305 public:
307 static Ptr instance(const EnginePtr &engine, size_t maxSorts);
308 virtual bool operator()(bool chain, const AttachedBasicBlock &args) override;
309 virtual bool operator()(bool chain, const DetachedBasicBlock &args) override;
314 void moveAndSortCallReturn(const PartitionerConstPtr&);
315 };
316
317 //--------------------------------------------------------------------------------------------------------------------------
318protected:
319 // A work list providing constants from instructions that are part of the CFG.
321 public:
323
324 private:
325 std::set<rose_addr_t> toBeExamined_; // instructions waiting to be examined
326 std::set<rose_addr_t> wasExamined_; // instructions we've already examined
327 rose_addr_t inProgress_; // instruction that is currently in progress
328 std::vector<rose_addr_t> constants_; // constants for the instruction in progress
329
330 public:
332 protected:
334
335 public:
336 static Ptr instance();
337
338 // Address of instruction being examined.
339 rose_addr_t inProgress();
340
341 // Possibly insert more instructions into the work list when a basic block is added to the CFG
342 virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override;
343
344 // Possibly remove instructions from the worklist when a basic block is removed from the CFG
345 virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override;
346
347 // Return the next available constant if any.
348 Sawyer::Optional<rose_addr_t> nextConstant(const PartitionerConstPtr &partitioner);
349 };
350
352 // Data members
354private:
355 std::string name_; // factory name
356 Settings settings_; // Settings for the partitioner.
357 SgAsmInterpretation *interp_; // interpretation set by loadSpecimen
358 Architecture::BaseConstPtr architecture_; // architecture-specific information
359 MemoryMapPtr map_; // memory map initialized by load()
360 BasicBlockWorkList::Ptr basicBlockWorkList_; // what blocks to work on next
361 CodeConstants::Ptr codeFunctionPointers_; // generates constants that are found in instruction ASTs
362 ProgressPtr progress_; // optional progress reporting
363 std::vector<std::string> specimen_; // list of additional command line arguments (often file names)
364
366 // Construction and destruction
368public:
369 virtual ~Engine();
370
371protected:
373 Engine() = delete;
374 Engine(const Engine&) = delete;
375 Engine& operator=(const Engine&) = delete;
376
377protected:
379 Engine(const std::string &name, const Settings &settings);
380
381public:
382 // [Robb Matzke 2023-03-03]: deprecated.
383 // This used to create a binary engine, so we leave it in place for a while for improved backward compatibility
384 static EngineBinaryPtr instance() ROSE_DEPRECATED("use Engine::forge or EngineBinary::instance");
385
386private:
387 void init();
388
390 // Command-line processing
392public:
393
410 virtual std::list<Sawyer::CommandLine::SwitchGroup> commandLineSwitches();
411
417 std::list<Sawyer::CommandLine::SwitchGroup> allCommandLineSwitches();
418
430 virtual std::pair<std::string/*title*/, std::string /*doc*/> specimenNameDocumentation() = 0;
431
438 static std::list<std::pair<std::string /*title*/, std::string /*doc*/>> allSpecimenNameDocumentation();
439
447 virtual void addToParser(Sawyer::CommandLine::Parser&);
448
454 void addAllToParser(Sawyer::CommandLine::Parser&);
455
468 virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description);
469
471 // Factories
473public:
482 static void registerFactory(const EnginePtr &factory);
483
490 static bool deregisterFactory(const EnginePtr &factory);
491
498 static std::vector<EnginePtr> registeredFactories();
499
525 //---------------------------------------------------------
526 // These operate on specimens
527 //---------------------------------------------------------
528
529 static EnginePtr forge(const std::vector<std::string> &specimen);
530 static EnginePtr forge(const std::string &specimen);
531
532 //---------------------------------------------------------
533 // These operate on arguments as std::vector<std::string>
534 //---------------------------------------------------------
535
536 // all args
537 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
538 const PositionalArgumentParser&, const Settings&);
539
540 // default settings
541 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
543
544 // default positional parser
545 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&, const Settings&);
546
547 // default positional parser and settings
548 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&);
549
550 //---------------------------------------------------------
551 // These operate on arguments as argc and argv
552 //---------------------------------------------------------
553
554 // all args
555 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&, const Settings&);
556
557 // default settings
558 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&);
559
560 // default positional parser
561 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const Settings&);
562
563 // default positional parser and settings
564 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&);
568 virtual bool matchFactory(const std::vector<std::string> &specimen) const = 0;
569
575
581 bool isFactory() const;
582
584 // Top-level, do everything functions
586public:
611 SgAsmBlock* frontend(int argc, char *argv[],
612 const std::string &purpose, const std::string &description);
613 virtual SgAsmBlock* frontend(const std::vector<std::string> &args,
614 const std::string &purpose, const std::string &description) = 0;
618 // Basic top-level steps
620public:
626 virtual void reset();
627
654 Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[],
655 const std::string &purpose, const std::string &description) /*final*/;
656 virtual Sawyer::CommandLine::ParserResult parseCommandLine(const std::vector<std::string> &args,
657 const std::string &purpose, const std::string &description);
674 virtual SgAsmBlock* buildAst(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
675 SgAsmBlock *buildAst(const std::string &fileName) /*final*/;
678 // [Robb Matzke 2023-03-03]: deprecated
679 // Save a partitioner and AST to a file.
680 //
681 // The specified partitioner and the binary analysis components of the AST are saved into the specified file, which is
682 // created if it doesn't exist and truncated if it does exist. The name should end with a ".rba" extension. The file can
683 // be loaded by passing its name to the @ref partition function or by calling @ref loadPartitioner.
684 virtual void savePartitioner(const PartitionerConstPtr&, const boost::filesystem::path&, SerialIo::Format = SerialIo::BINARY)
685 ROSE_DEPRECATED("use Partitioner::saveAsRbaFile");
686
687 // [Robb Matzke 2023-03-03]: deprecated
688 // Load a partitioner and an AST from a file.
689 //
690 // The specified RBA file is opened and read to create a new @ref Partitioner object and associated AST. The @ref
691 // partition function also understands how to open RBA files.
692 virtual PartitionerPtr loadPartitioner(const boost::filesystem::path&, SerialIo::Format = SerialIo::BINARY)
693 ROSE_DEPRECATED("use Partitioner::instanceFromRbaFile");
694
696 // Command-line parsing
698public:
717 virtual SgAsmInterpretation* parseContainers(const std::vector<std::string> &fileNames) = 0;
718 SgAsmInterpretation* parseContainers(const std::string &fileName) /*final*/;
741 virtual MemoryMapPtr loadSpecimens(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
742 MemoryMapPtr loadSpecimens(const std::string &fileName) /*final*/;
764 virtual PartitionerPtr partition(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
765 PartitionerPtr partition(const std::string &fileName) /*final*/;
774 virtual void checkSettings();
775
777 // Container parsing
778 //
779 // top-level: parseContainers
781public:
786 virtual bool isRbaFile(const std::string&);
787
792 virtual bool isNonContainer(const std::string&) = 0;
793
799 virtual bool areContainersParsed() const = 0;
800
802 // Load specimens
803 //
804 // top-level: loadSpecimens
806public:
810 virtual bool areSpecimensLoaded() const;
811
815 virtual void adjustMemoryMap();
816
824 MemoryMapPtr memoryMap() const /*final*/;
825 virtual void memoryMap(const MemoryMapPtr&);
829 // Architecture
831public:
850 virtual Architecture::BaseConstPtr obtainArchitecture();
851 virtual Architecture::BaseConstPtr obtainArchitecture(const Architecture::BaseConstPtr &hint);
855 // Partitioner high-level functions
856 //
857 // top-level: partition
859public:
862
869
874
878 virtual void runPartitionerInit(const PartitionerPtr&) = 0;
879
883 virtual void runPartitionerRecursive(const PartitionerPtr&) = 0;
884
889 virtual void runPartitionerFinal(const PartitionerPtr&) = 0;
890
896 virtual void runPartitioner(const PartitionerPtr&);
897
899 // Partitioner mid-level functions
900 //
901 // These are the functions called by the partitioner high-level stuff. These are sometimes overridden in subclasses,
902 // although it is more likely that the high-level stuff is overridden.
904public:
909 virtual void labelAddresses(const PartitionerPtr&, const Configuration&);
910
915
920
927
929 // Partitioner low-level functions
930 //
931 // These are functions that a subclass seldom overrides, and maybe even shouldn't override because of their complexity or
932 // the way the interact with one another.
934public:
935
936
938 // Build AST
940public:
941 // Used internally by ROSE's ::frontend disassemble instructions to build the AST that goes under each SgAsmInterpretation.
942 static void disassembleForRoseFrontend(SgAsmInterpretation*);
943
945 // Settings and properties
947public:
953 const std::string& name() const /*final*/;
954 void name(const std::string&);
961 Architecture::BaseConstPtr architecture();
962
969 const Settings& settings() const /*final*/;
970 Settings& settings() /*final*/;
971 void settings(const Settings&) /*final*/;
979 BasicBlockWorkList::Ptr basicBlockWorkList() const /*final*/;
980 void basicBlockWorkList(const BasicBlockWorkList::Ptr&) /*final*/;
981
988 void codeFunctionPointers(const CodeConstants::Ptr&) /*final*/;
1007 ProgressPtr progress() const /*final*/;
1008 virtual void progress(const ProgressPtr&);
1016 const std::vector<std::string>& specimen() const /*final*/;
1017 virtual void specimen(const std::vector<std::string>&);
1021 // Internal stuff
1023protected:
1024 // Similar to ::frontend but a lot less complicated.
1025 virtual SgProject* roseFrontendReplacement(const std::vector<boost::filesystem::path> &fileNames) = 0;
1026};
1027
1028} // namespace
1029} // namespace
1030} // namespace
1031
1032#endif
1033#endif
Base class for adjusting basic blocks during discovery.
Definition Modules.h:39
Engine for specimens containing machine instructions.
AllButLastArguments(size_t n)
Constructor returning all but last n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
virtual bool operator()(bool chain, const Args &args) override
Callback method.
virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override
Called when basic block is detached or placeholder erased.
virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override
Called when basic block is attached or placeholder inserted.
Exception(const std::string &)
Construct an exception with a message string.
FirstPositionalArguments(size_t n)
Constructor returning up to n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments(size_t)
Constructor returning nth group of arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments()
Constructor returning first group of arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const =0
Return specimen from positional arguments.
Base class for engines driving the partitioner.
virtual PartitionerPtr createBarePartitioner()
Create a bare partitioner.
virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description)
Creates a command-line parser.
void addAllToParser(Sawyer::CommandLine::Parser &)
Add switches and sections to command-line parser.
virtual bool isRbaFile(const std::string &)
Determine whether a specimen is an RBA file.
MemoryMapPtr memoryMap() const
Property: memory map.
virtual void runPartitionerRecursive(const PartitionerPtr &)=0
Runs the recursive part of partioning.
virtual void addToParser(Sawyer::CommandLine::Parser &)
Add switches and sections to command-line parser.
virtual std::pair< std::string, std::string > specimenNameDocumentation()=0
Documentation about how the specimen is specified.
virtual std::vector< DataBlockPtr > makeConfiguredDataBlocks(const PartitionerPtr &, const Configuration &)
Make data blocks based on configuration.
static std::list< std::pair< std::string, std::string > > allSpecimenNameDocumentation()
Documentation for all specimen specifications.
Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[], const std::string &purpose, const std::string &description)
Parse the command-line.
SgAsmInterpretation * interpretation() const
Property: interpretation.
virtual bool areSpecimensLoaded() const
Returns true if specimens are loaded.
const std::vector< std::string > & specimen() const
Property: specimen.
static bool deregisterFactory(const EnginePtr &factory)
Remove a concrete engine factory from the registry.
ProgressPtr progress() const
Property: progress reporting.
virtual void runPartitionerFinal(const PartitionerPtr &)=0
Runs the final parts of partitioning.
virtual bool areContainersParsed() const =0
Returns true if containers are parsed.
SgAsmBlock * frontend(int argc, char *argv[], const std::string &purpose, const std::string &description)
Most basic usage of the partitioner.
virtual EnginePtr instanceFromFactory(const Settings &)=0
Virtual constructor for factories.
virtual std::vector< FunctionPtr > makeConfiguredFunctions(const PartitionerPtr &, const Configuration &)
Make functions based on configuration information.
static void registerFactory(const EnginePtr &factory)
Register an engine as a factory.
virtual void adjustMemoryMap()
Adjust memory map post-loading.
Architecture::BaseConstPtr architecture()
Property: Architecture.
virtual void checkSettings()
Check settings after command-line is processed.
Engine()=delete
Default constructor.
virtual void runPartitionerInit(const PartitionerPtr &)=0
Finds interesting things to work on initially.
virtual PartitionerPtr createPartitioner()=0
Create partitioner.
static EnginePtr forge(const std::vector< std::string > &specimen)
Creates a suitable engine based on the specimen.
std::list< Sawyer::CommandLine::SwitchGroup > allCommandLineSwitches()
List of command-line switches for all engines.
bool isFactory() const
Returns true if this object is a factory.
virtual bool matchFactory(const std::vector< std::string > &specimen) const =0
Predicate for matching a concrete engine factory by settings and specimen.
virtual std::list< Sawyer::CommandLine::SwitchGroup > commandLineSwitches()
Command-line switches for a particular engine.
BasicBlockWorkList::Ptr basicBlockWorkList() const
Property: BasicBlock work list.
Engine(const std::string &name, const Settings &settings)
Allocating instance constructors are implemented by the non-abstract subclasses.
virtual SgAsmBlock * buildAst(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Obtain an abstract syntax tree.
CodeConstants::Ptr codeFunctionPointers() const
Property: Instruction AST constants.
const Settings & settings() const
Property: All settings.
virtual bool isNonContainer(const std::string &)=0
Determine whether a specimen name is a non-container.
const std::string & name() const
Property: Name.
virtual SgAsmInterpretation * parseContainers(const std::vector< std::string > &fileNames)=0
Parse specimen binary containers.
virtual void reset()
Reset the engine to its initial state.
virtual void labelAddresses(const PartitionerPtr &, const Configuration &)
Label addresses.
virtual MemoryMapPtr loadSpecimens(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Load and/or link interpretation.
virtual void updateAnalysisResults(const PartitionerPtr &)
Runs various analysis passes.
virtual void checkCreatePartitionerPrerequisites() const
Check that we have everything necessary to create a partitioner.
virtual void runPartitioner(const PartitionerPtr &)
Partitions instructions into basic blocks and functions.
virtual Architecture::BaseConstPtr obtainArchitecture()
Determine the architecture.
static std::vector< EnginePtr > registeredFactories()
List of all registered factories.
virtual PartitionerPtr partition(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Partition instructions into basic blocks and functions.
Partitions instructions into basic blocks and functions.
Base class for binary state input and output.
Definition SerialIo.h:114
A doubly-linked list of distinct items.
Container associating values with keys.
Definition Sawyer/Map.h:72
Holds a value or nothing.
Definition Optional.h:56
Creates SharedPointer from this.
Base class for reference counted objects.
Instruction basic block.
Represents an interpretation of a binary container.
This class represents a source project, with a list of SgFile objects and global information about th...
std::shared_ptr< const Base > BaseConstPtr
Reference counted pointer for Architecture::Base.
Sawyer::SharedPointer< Engine > EnginePtr
Shared-ownership pointer for Engine.
The ROSE library.
Sawyer support library.
EngineSettings engine
Settings that control engine behavior.
LoaderSettings loader
Settings used during specimen loading.
DisassemblerSettings disassembler
Settings for creating the disassembler.
AstConstructionSettings astConstruction
Settings for constructing the AST.
PartitionerSettings partitioner
Settings for creating a partitioner.