ROSE 0.11.145.272
Partitioner2/Engine.h
1#ifndef ROSE_BinaryAnalysis_Partitioner2_Engine_H
2#define ROSE_BinaryAnalysis_Partitioner2_Engine_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BasicTypes.h>
7#include <Rose/BinaryAnalysis/Architecture/BasicTypes.h>
8#include <Rose/BinaryAnalysis/Partitioner2/Exception.h>
9#include <Rose/BinaryAnalysis/Partitioner2/Modules.h>
10#include <Rose/BinaryAnalysis/SerialIo.h>
11
12#include <Sawyer/DistinctList.h>
13#include <Sawyer/SharedObject.h>
14#include <Sawyer/SharedPointer.h>
15
16namespace Rose {
17namespace BinaryAnalysis {
18namespace Partitioner2 {
19
158 // Internal data structures
160public:
162 using Ptr = EnginePtr;
163
164 //--------------------------------------------------------------------------------------------------------------------------
165public:
169 struct Settings {
173 IndirectControlFlow::Settings icf;
178#ifdef ROSE_ENABLE_BOOST_SERIALIZATION
179 private:
180 friend class boost::serialization::access;
181 template<class S> void serialize(S&, unsigned version);
182#endif
183
184 public:
185 Settings();
186 ~Settings();
187 };
188
189 //--------------------------------------------------------------------------------------------------------------------------
190public:
193 public:
194 // WARNING: Defined in Engine.C with different behavior
195 // ~Exception() throw();
196 ~Exception();
197
199 explicit Exception(const std::string&);
200 };
201
202 //--------------------------------------------------------------------------------------------------------------------------
203public:
206 public:
209
216 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const = 0;
217 };
218
221 public:
222 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
223 };
224
227 size_t n_;
228 public:
230 explicit FirstPositionalArguments(size_t n);
231 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
232 };
233
236 size_t n_;
237 public:
239 explicit AllButLastArguments(size_t n);
240 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
241 };
242
245 size_t n_ = 0;
246 public:
251 virtual std::vector<std::string> specimen(const std::vector<std::string>&) const override;
252 };
253
254 //--------------------------------------------------------------------------------------------------------------------------
255protected:
256 // Engine callback for handling instructions added to basic blocks. This is called when a basic block is discovered,
257 // before it's attached to a partitioner, so it shouldn't really be modifying any state in the engine, but rather only
258 // preparing the basic block to be processed.
260 typedef Sawyer::Container::Map<Address /*target*/, std::vector<Address> /*sources*/> WorkList;
261 public:
263 protected:
265 public:
266 static Ptr instance();
267 virtual bool operator()(bool chain, const Args &args) override;
268 private:
269 void fixFunctionReturnEdge(const Args&);
270 void fixFunctionCallEdges(const Args&);
271 void addPossibleIndeterminateEdge(const Args&);
272 };
273
274 //--------------------------------------------------------------------------------------------------------------------------
275private:
276 // Basic blocks that need to be worked on next. These lists are adjusted whenever a new basic block (or placeholder) is
277 // inserted or erased from the CFG.
278 class BasicBlockWorkList: public CfgAdjustmentCallback {
279 // The following lists are used for adding outgoing E_CALL_RETURN edges to basic blocks based on whether the basic
280 // block is a call to a function that might return. When a new basic block is inserted into the CFG (or a previous
281 // block is removed, modified, and re-inserted), the operator() is called and conditionally inserts the block into the
282 // "pendingCallReturn" list (if the block is a function call that lacks an E_CALL_RETURN edge and the function is known
283 // to return or the analysis was incomplete).
284 //
285 // When we run out of other ways to create basic blocks, we process the pendingCallReturn list from back to front. If
286 // the back block (which gets popped) has a positive may-return result then an E_CALL_RETURN edge is added to the CFG
287 // and the normal recursive BB discovery is resumed. Otherwise if the analysis is incomplete the basic block is moved
288 // to the processedCallReturn list. The entire pendingCallReturn list is processed before proceeding.
289 //
290 // If there is no more pendingCallReturn work to be done, then the processedCallReturn blocks are moved to the
291 // finalCallReturn list and finalCallReturn is sorted by approximate CFG height (i.e., leafs first). The contents
292 // of the finalCallReturn list is then analyzed and the result (or the default may-return value for failed analyses)
293 // is used to decide whether a new CFG edge should be created, possibly adding new basic block addresses to the
294 // list of undiscovered blocks.
295 //
296 Sawyer::Container::DistinctList<Address> pendingCallReturn_; // blocks that might need an E_CALL_RETURN edge
297 Sawyer::Container::DistinctList<Address> processedCallReturn_; // call sites whose may-return was indeterminate
298 Sawyer::Container::DistinctList<Address> finalCallReturn_; // indeterminate call sites awaiting final analysis
299
300 Sawyer::Container::DistinctList<Address> undiscovered_; // undiscovered basic block list (last-in-first-out)
301 EnginePtr engine_; // engine to which this callback belongs
302 size_t maxSorts_; // max sorts before using unsorted lists
303 public:
304 ~BasicBlockWorkList();
305 protected:
306 BasicBlockWorkList(const EnginePtr &engine, size_t maxSorts);
307 public:
309 static Ptr instance(const EnginePtr &engine, size_t maxSorts);
310 virtual bool operator()(bool chain, const AttachedBasicBlock &args) override;
311 virtual bool operator()(bool chain, const DetachedBasicBlock &args) override;
312 Sawyer::Container::DistinctList<Address>& pendingCallReturn();
313 Sawyer::Container::DistinctList<Address>& processedCallReturn();
316 void moveAndSortCallReturn(const PartitionerConstPtr&);
317 };
318
319 //--------------------------------------------------------------------------------------------------------------------------
320protected:
321 // A work list providing constants from instructions that are part of the CFG.
323 public:
325
326 private:
327 std::set<Address> toBeExamined_; // instructions waiting to be examined
328 std::set<Address> wasExamined_; // instructions we've already examined
329 Address inProgress_; // instruction that is currently in progress
330 std::vector<Address> constants_; // constants for the instruction in progress
331
332 public:
334 protected:
336
337 public:
338 static Ptr instance();
339
340 // Address of instruction being examined.
341 Address inProgress();
342
343 // Possibly insert more instructions into the work list when a basic block is added to the CFG
344 virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override;
345
346 // Possibly remove instructions from the worklist when a basic block is removed from the CFG
347 virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override;
348
349 // Return the next available constant if any.
350 Sawyer::Optional<Address> nextConstant(const PartitionerConstPtr &partitioner);
351 };
352
354 // Data members
356private:
357 std::string name_; // factory name
358 Settings settings_; // Settings for the partitioner.
359 SgAsmInterpretation *interp_; // interpretation set by loadSpecimen
360 Architecture::BaseConstPtr architecture_; // architecture-specific information
361 MemoryMapPtr map_; // memory map initialized by load()
362 BasicBlockWorkList::Ptr basicBlockWorkList_; // what blocks to work on next
363 CodeConstants::Ptr codeFunctionPointers_; // generates constants that are found in instruction ASTs
364 ProgressPtr progress_; // optional progress reporting
365 std::vector<std::string> specimen_; // list of additional command line arguments (often file names)
366
368 // Construction and destruction
370public:
371 virtual ~Engine();
372
373protected:
375 Engine() = delete;
376 Engine(const Engine&) = delete;
377 Engine& operator=(const Engine&) = delete;
378
379protected:
381 Engine(const std::string &name, const Settings &settings);
382
383public:
384 // [Robb Matzke 2023-03-03]: deprecated.
385 // This used to create a binary engine, so we leave it in place for a while for improved backward compatibility
386 static EngineBinaryPtr instance() ROSE_DEPRECATED("use Engine::forge or EngineBinary::instance");
387
388private:
389 void init();
390
392 // Command-line processing
394public:
395
412 virtual std::list<Sawyer::CommandLine::SwitchGroup> commandLineSwitches();
413
419 std::list<Sawyer::CommandLine::SwitchGroup> allCommandLineSwitches();
420
432 virtual std::pair<std::string/*title*/, std::string /*doc*/> specimenNameDocumentation() = 0;
433
440 static std::list<std::pair<std::string /*title*/, std::string /*doc*/>> allSpecimenNameDocumentation();
441
449 virtual void addToParser(Sawyer::CommandLine::Parser&);
450
456 void addAllToParser(Sawyer::CommandLine::Parser&);
457
470 virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description);
471
473 // Factories
475public:
484 static void registerFactory(const EnginePtr &factory);
485
492 static bool deregisterFactory(const EnginePtr &factory);
493
500 static std::vector<EnginePtr> registeredFactories();
501
527 //---------------------------------------------------------
528 // These operate on specimens
529 //---------------------------------------------------------
530
531 static EnginePtr forge(const std::vector<std::string> &specimen);
532 static EnginePtr forge(const std::string &specimen);
533
534 //---------------------------------------------------------
535 // These operate on arguments as std::vector<std::string>
536 //---------------------------------------------------------
537
538 // all args
539 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
540 const PositionalArgumentParser&, const Settings&);
541
542 // default settings
543 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&,
545
546 // default positional parser
547 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&, const Settings&);
548
549 // default positional parser and settings
550 static EnginePtr forge(const std::vector<std::string> &arguments, Sawyer::CommandLine::Parser&);
551
552 //---------------------------------------------------------
553 // These operate on arguments as argc and argv
554 //---------------------------------------------------------
555
556 // all args
557 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&, const Settings&);
558
559 // default settings
560 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const PositionalArgumentParser&);
561
562 // default positional parser
563 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&, const Settings&);
564
565 // default positional parser and settings
566 static EnginePtr forge(int argc, char *argv[], Sawyer::CommandLine::Parser&);
570 virtual bool matchFactory(const Sawyer::CommandLine::ParserResult &result, const std::vector<std::string> &specimen) const = 0;
571
577
583 bool isFactory() const;
584
586 // Top-level, do everything functions
588public:
613 SgAsmBlock* frontend(int argc, char *argv[],
614 const std::string &purpose, const std::string &description);
615 virtual SgAsmBlock* frontend(const std::vector<std::string> &args,
616 const std::string &purpose, const std::string &description) = 0;
620 // Basic top-level steps
622public:
628 virtual void reset();
629
656 Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[],
657 const std::string &purpose, const std::string &description) /*final*/;
658 virtual Sawyer::CommandLine::ParserResult parseCommandLine(const std::vector<std::string> &args,
659 const std::string &purpose, const std::string &description);
676 virtual SgAsmBlock* buildAst(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
677 SgAsmBlock *buildAst(const std::string &fileName) /*final*/;
680 // [Robb Matzke 2023-03-03]: deprecated
681 // Save a partitioner and AST to a file.
682 //
683 // The specified partitioner and the binary analysis components of the AST are saved into the specified file, which is
684 // created if it doesn't exist and truncated if it does exist. The name should end with a ".rba" extension. The file can
685 // be loaded by passing its name to the @ref partition function or by calling @ref loadPartitioner.
686 virtual void savePartitioner(const PartitionerConstPtr&, const boost::filesystem::path&, SerialIo::Format = SerialIo::BINARY)
687 ROSE_DEPRECATED("use Partitioner::saveAsRbaFile");
688
689 // [Robb Matzke 2023-03-03]: deprecated
690 // Load a partitioner and an AST from a file.
691 //
692 // The specified RBA file is opened and read to create a new @ref Partitioner object and associated AST. The @ref
693 // partition function also understands how to open RBA files.
694 virtual PartitionerPtr loadPartitioner(const boost::filesystem::path&, SerialIo::Format = SerialIo::BINARY)
695 ROSE_DEPRECATED("use Partitioner::instanceFromRbaFile");
696
698 // Command-line parsing
700public:
719 virtual SgAsmInterpretation* parseContainers(const std::vector<std::string> &fileNames) = 0;
720 SgAsmInterpretation* parseContainers(const std::string &fileName) /*final*/;
743 virtual MemoryMapPtr loadSpecimens(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
744 MemoryMapPtr loadSpecimens(const std::string &fileName) /*final*/;
766 virtual PartitionerPtr partition(const std::vector<std::string> &fileNames = std::vector<std::string>()) = 0;
767 PartitionerPtr partition(const std::string &fileName) /*final*/;
776 virtual void checkSettings();
777
779 // Container parsing
780 //
781 // top-level: parseContainers
783public:
788 virtual bool isRbaFile(const std::string&);
789
794 virtual bool isNonContainer(const std::string&) = 0;
795
801 virtual bool areContainersParsed() const = 0;
802
804 // Load specimens
805 //
806 // top-level: loadSpecimens
808public:
812 virtual bool areSpecimensLoaded() const;
813
817 virtual void adjustMemoryMap();
818
826 MemoryMapPtr memoryMap() const /*final*/;
827 virtual void memoryMap(const MemoryMapPtr&);
831 // Architecture
833public:
852 virtual Architecture::BaseConstPtr obtainArchitecture();
853 virtual Architecture::BaseConstPtr obtainArchitecture(const Architecture::BaseConstPtr &hint);
857 // Partitioner high-level functions
858 //
859 // top-level: partition
861public:
864
871
876
880 virtual void runPartitionerInit(const PartitionerPtr&) = 0;
881
885 virtual void runPartitionerRecursive(const PartitionerPtr&) = 0;
886
891 virtual void runPartitionerFinal(const PartitionerPtr&) = 0;
892
898 virtual void runPartitioner(const PartitionerPtr&);
899
901 // Partitioner mid-level functions
902 //
903 // These are the functions called by the partitioner high-level stuff. These are sometimes overridden in subclasses,
904 // although it is more likely that the high-level stuff is overridden.
906public:
911 virtual void labelAddresses(const PartitionerPtr&, const Configuration&);
912
917
922
929
931 // Partitioner low-level functions
932 //
933 // These are functions that a subclass seldom overrides, and maybe even shouldn't override because of their complexity or
934 // the way the interact with one another.
936public:
937
938
940 // Build AST
942public:
943 // Used internally by ROSE's ::frontend disassemble instructions to build the AST that goes under each SgAsmInterpretation.
944 static void disassembleForRoseFrontend(SgAsmInterpretation*);
945
947 // Settings and properties
949public:
955 const std::string& name() const /*final*/;
956 void name(const std::string&);
963 Architecture::BaseConstPtr architecture();
964
971 const Settings& settings() const /*final*/;
972 Settings& settings() /*final*/;
973 void settings(const Settings&) /*final*/;
981 BasicBlockWorkList::Ptr basicBlockWorkList() const /*final*/;
982 void basicBlockWorkList(const BasicBlockWorkList::Ptr&) /*final*/;
983
990 void codeFunctionPointers(const CodeConstants::Ptr&) /*final*/;
1009 ProgressPtr progress() const /*final*/;
1010 virtual void progress(const ProgressPtr&);
1018 const std::vector<std::string>& specimen() const /*final*/;
1019 virtual void specimen(const std::vector<std::string>&);
1023 // Internal stuff
1025protected:
1026 // Similar to ::frontend but a lot less complicated.
1027 virtual SgProject* roseFrontendReplacement(const std::vector<boost::filesystem::path> &fileNames) = 0;
1028};
1029
1030} // namespace
1031} // namespace
1032} // namespace
1033
1034#endif
1035#endif
Base class for adjusting basic blocks during discovery.
Definition Modules.h:39
Engine for specimens containing machine instructions.
AllButLastArguments(size_t n)
Constructor returning all but last n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
virtual bool operator()(bool chain, const Args &args) override
Callback method.
virtual bool operator()(bool chain, const DetachedBasicBlock &detached) override
Called when basic block is detached or placeholder erased.
virtual bool operator()(bool chain, const AttachedBasicBlock &attached) override
Called when basic block is attached or placeholder inserted.
Exception(const std::string &)
Construct an exception with a message string.
FirstPositionalArguments(size_t n)
Constructor returning up to n arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments(size_t)
Constructor returning nth group of arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const override
Return specimen from positional arguments.
GroupedPositionalArguments()
Constructor returning first group of arguments.
virtual std::vector< std::string > specimen(const std::vector< std::string > &) const =0
Return specimen from positional arguments.
Base class for engines driving the partitioner.
virtual PartitionerPtr createBarePartitioner()
Create a bare partitioner.
virtual Sawyer::CommandLine::Parser commandLineParser(const std::string &purpose, const std::string &description)
Creates a command-line parser.
virtual bool matchFactory(const Sawyer::CommandLine::ParserResult &result, const std::vector< std::string > &specimen) const =0
Predicate for matching a concrete engine factory by parser result and specimen.
void addAllToParser(Sawyer::CommandLine::Parser &)
Add switches and sections to command-line parser.
virtual bool isRbaFile(const std::string &)
Determine whether a specimen is an RBA file.
MemoryMapPtr memoryMap() const
Property: memory map.
virtual void runPartitionerRecursive(const PartitionerPtr &)=0
Runs the recursive part of partioning.
virtual void addToParser(Sawyer::CommandLine::Parser &)
Add switches and sections to command-line parser.
virtual std::pair< std::string, std::string > specimenNameDocumentation()=0
Documentation about how the specimen is specified.
virtual std::vector< DataBlockPtr > makeConfiguredDataBlocks(const PartitionerPtr &, const Configuration &)
Make data blocks based on configuration.
static std::list< std::pair< std::string, std::string > > allSpecimenNameDocumentation()
Documentation for all specimen specifications.
Sawyer::CommandLine::ParserResult parseCommandLine(int argc, char *argv[], const std::string &purpose, const std::string &description)
Parse the command-line.
SgAsmInterpretation * interpretation() const
Property: interpretation.
virtual bool areSpecimensLoaded() const
Returns true if specimens are loaded.
const std::vector< std::string > & specimen() const
Property: specimen.
static bool deregisterFactory(const EnginePtr &factory)
Remove a concrete engine factory from the registry.
ProgressPtr progress() const
Property: progress reporting.
virtual void runPartitionerFinal(const PartitionerPtr &)=0
Runs the final parts of partitioning.
virtual bool areContainersParsed() const =0
Returns true if containers are parsed.
SgAsmBlock * frontend(int argc, char *argv[], const std::string &purpose, const std::string &description)
Most basic usage of the partitioner.
virtual EnginePtr instanceFromFactory(const Settings &)=0
Virtual constructor for factories.
virtual std::vector< FunctionPtr > makeConfiguredFunctions(const PartitionerPtr &, const Configuration &)
Make functions based on configuration information.
static void registerFactory(const EnginePtr &factory)
Register an engine as a factory.
virtual void adjustMemoryMap()
Adjust memory map post-loading.
Architecture::BaseConstPtr architecture()
Property: Architecture.
virtual void checkSettings()
Check settings after command-line is processed.
Engine()=delete
Default constructor.
virtual void runPartitionerInit(const PartitionerPtr &)=0
Finds interesting things to work on initially.
virtual PartitionerPtr createPartitioner()=0
Create partitioner.
static EnginePtr forge(const std::vector< std::string > &specimen)
Creates a suitable engine based on the specimen.
std::list< Sawyer::CommandLine::SwitchGroup > allCommandLineSwitches()
List of command-line switches for all engines.
bool isFactory() const
Returns true if this object is a factory.
virtual std::list< Sawyer::CommandLine::SwitchGroup > commandLineSwitches()
Command-line switches for a particular engine.
BasicBlockWorkList::Ptr basicBlockWorkList() const
Property: BasicBlock work list.
Engine(const std::string &name, const Settings &settings)
Allocating instance constructors are implemented by the non-abstract subclasses.
virtual SgAsmBlock * buildAst(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Obtain an abstract syntax tree.
CodeConstants::Ptr codeFunctionPointers() const
Property: Instruction AST constants.
const Settings & settings() const
Property: All settings.
virtual bool isNonContainer(const std::string &)=0
Determine whether a specimen name is a non-container.
const std::string & name() const
Property: Name.
virtual SgAsmInterpretation * parseContainers(const std::vector< std::string > &fileNames)=0
Parse specimen binary containers.
virtual void reset()
Reset the engine to its initial state.
virtual void labelAddresses(const PartitionerPtr &, const Configuration &)
Label addresses.
virtual MemoryMapPtr loadSpecimens(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Load and/or link interpretation.
virtual void updateAnalysisResults(const PartitionerPtr &)
Runs various analysis passes.
virtual void checkCreatePartitionerPrerequisites() const
Check that we have everything necessary to create a partitioner.
virtual void runPartitioner(const PartitionerPtr &)
Partitions instructions into basic blocks and functions.
virtual Architecture::BaseConstPtr obtainArchitecture()
Determine the architecture.
static std::vector< EnginePtr > registeredFactories()
List of all registered factories.
virtual PartitionerPtr partition(const std::vector< std::string > &fileNames=std::vector< std::string >())=0
Partition instructions into basic blocks and functions.
Partitions instructions into basic blocks and functions.
Base class for binary state input and output.
Definition SerialIo.h:96
A doubly-linked list of distinct items.
Container associating values with keys.
Definition Sawyer/Map.h:72
Holds a value or nothing.
Definition Optional.h:56
Creates SharedPointer from this.
Base class for reference counted objects.
Instruction basic block.
Represents an interpretation of a binary container.
This class represents a source project, with a list of SgFile objects and global information about th...
std::shared_ptr< const Base > BaseConstPtr
Reference counted pointer for Architecture::Base.
Sawyer::SharedPointer< Engine > EnginePtr
Shared-ownership pointer for Engine.
std::uint64_t Address
Address.
Definition Address.h:11
The ROSE library.
Sawyer support library.
EngineSettings engine
Settings that control engine behavior.
LoaderSettings loader
Settings used during specimen loading.
DisassemblerSettings disassembler
Settings for creating the disassembler.
IndirectControlFlow::Settings icf
Settings for indirect control flow recovery.
AstConstructionSettings astConstruction
Settings for constructing the AST.
PartitionerSettings partitioner
Settings for creating a partitioner.
JvmSettings engineJvm
Settings that control behavior specific to EngineJvm.