ROSE  0.11.87.0
Partitioner2/BasicTypes.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
2 #define ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <boost/filesystem.hpp>
7 #include <boost/regex.hpp>
8 #include <boost/serialization/access.hpp>
9 #include <boost/serialization/nvp.hpp>
10 #include <boost/serialization/version.hpp>
11 #include <string>
12 #include <vector>
13 
14 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
15 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
16 // inexpensive) checks are performed.
17 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
18 
19 namespace Rose {
20 namespace BinaryAnalysis {
21 
53 namespace Partitioner2 {
54 
56 namespace Precision {
57 enum Level {
58  LOW,
62 };
63 } // namespace
64 
66 namespace AllowParallelEdges {
67 enum Type {
68  NO,
69  YES
70 };
71 } // namespace
72 
74 enum VertexType {
82 };
83 
85 enum EdgeType {
86  E_NORMAL = 0x00000001,
87  E_FUNCTION_CALL = 0x00000002,
88  E_FUNCTION_RETURN = 0x00000004,
91  E_CALL_RETURN = 0x00000008,
97  E_FUNCTION_XFER = 0x00000010,
103  E_USER_DEFINED = 0x00000020,
106 };
107 
112 };
113 
118 };
119 
130 
137 
145 
158 
159 private:
160  friend class boost::serialization::access;
161 
162  template<class S>
163  void serialize(S &s, unsigned version) {
164  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
165  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
166  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
167  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
168  }
169 
170 public:
173  : allowEmptyGlobalBlock(false), allowFunctionWithNoBasicBlocks(false), allowEmptyBasicBlocks(false),
174  copyAllInstructions(true) {}
175 
182  s.allowEmptyGlobalBlock = false;
184  s.allowEmptyBasicBlocks = false;
185  s.copyAllInstructions = true;
186  return s;
187  }
188 
196  s.allowEmptyGlobalBlock = true;
198  s.allowEmptyBasicBlocks = true;
199  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
200  return s;
201  }
202 };
203 
205 // Settings. All settings must act like properties, which means the following:
206 // 1. Each setting must have a name that does not begin with a verb.
207 // 2. Each setting must have a command-line switch to manipulate it.
208 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
209 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
211 
223 };
224 
252  std::string linker;
261  std::vector<std::string> envEraseNames;
263  std::vector<boost::regex> envErasePatterns;
266  std::vector<std::string> envInsert;
272  : deExecuteZerosThreshold(0), deExecuteZerosLeaveAtFront(16), deExecuteZerosLeaveAtBack(1),
273  memoryDataAdjustment(DATA_IS_INITIALIZED), memoryIsExecutable(false), linkObjectFiles(true),
274  linkStaticArchives(true), linker("ld -o %o --unresolved-symbols=ignore-all --whole-archive %f") {}
275 
276 private:
277  friend class boost::serialization::access;
278 
279  template<class S>
280  void serialize(S &s, unsigned version) {
281  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
282  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
283  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
284  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
285  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
286  if (version >= 1) {
287  s & BOOST_SERIALIZATION_NVP(envEraseNames);
288  s & BOOST_SERIALIZATION_NVP(envInsert);
289 
290  // There's no serialization for boost::regex, so we do it ourselves.
291  std::vector<std::string> reStrings;
292  for (const boost::regex &re: envErasePatterns)
293  reStrings.push_back(re.str());
294  s & BOOST_SERIALIZATION_NVP(reStrings);
295  if (envErasePatterns.empty()) {
296  for (const std::string &reStr: reStrings)
297  envErasePatterns.push_back(boost::regex(reStr));
298  }
299  }
300  }
301 };
302 
309  std::string isaName;
313 private:
314  friend class boost::serialization::access;
315 
316  template<class S>
317  void serialize(S &s, unsigned version) {
318  if (version >= 1)
319  s & BOOST_SERIALIZATION_NVP(doDisassemble);
320  s & BOOST_SERIALIZATION_NVP(isaName);
321  }
322 
323 public:
325  : doDisassemble(true) {}
326 };
327 
338 };
339 
352 private:
353  friend class boost::serialization::access;
354 
355  template<class S>
356  void serialize(S &s, const unsigned version) {
357  s & BOOST_SERIALIZATION_NVP(usingSemantics);
358  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
359  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
360  if (version >= 1)
361  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
362  }
363 
364 public:
366  : usingSemantics(false), checkingCallBranch(false), basicBlockSemanticsAutoDrop(true), ignoringUnknownInsns(false) {}
367 };
368 
378  std::vector<rose_addr_t> functionStartingVas;
393  std::vector<rose_addr_t> ipRewrites;
422  boost::filesystem::path syscallHeader;
425 private:
426  friend class boost::serialization::access;
427 
428  template<class S>
429  void serialize(S &s, unsigned version) {
430  s & BOOST_SERIALIZATION_NVP(base);
431  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
432  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
433  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
434  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
435  if (version >= 6)
436  s & BOOST_SERIALIZATION_NVP(ipRewrites);
437  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
438  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
439  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
440  if (version >= 2) {
441  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
442  } else {
443  bool temp = false;
444  if (S::is_saving::value)
445  temp = findingIntraFunctionCode > 0;
446  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
447  if (S::is_loading::value)
448  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
449  }
450  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
451  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
452  if (version >= 4)
453  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
454  if (version >= 5) {
455  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
456  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
457  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
458  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
459  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
460  }
461  s & BOOST_SERIALIZATION_NVP(interruptVector);
462  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
463  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
464  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
465  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
466  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
467  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
468  if (version >= 3)
469  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
470  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
471  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
472  s & BOOST_SERIALIZATION_NVP(findingThunks);
473  s & BOOST_SERIALIZATION_NVP(splittingThunks);
474  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
475  s & BOOST_SERIALIZATION_NVP(namingConstants);
476  if (version >= 7) {
477  s & BOOST_SERIALIZATION_NVP(namingStrings);
478  } else if (S::is_loading()) {
479  bool b;
480  s & boost::serialization::make_nvp("namingStrings", b);
481  if (b) {
482  namingStrings = AddressInterval::whole();
483  } else {
484  namingStrings = AddressInterval();
485  }
486  }
487  s & BOOST_SERIALIZATION_NVP(demangleNames);
488  if (version >= 1) {
489  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
490 
491  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
492  // responsibility, so we do it the hard way.
493  std::string temp;
494  if (S::is_saving::value)
495  temp = syscallHeader.string();
496  s & boost::serialization::make_nvp("syscallHeader", temp);
497  if (S::is_loading::value)
498  syscallHeader = temp;
499  }
500  }
501 
502 public:
503  PartitionerSettings()
504  : followingGhostEdges(false), discontiguousBlocks(true), maxBasicBlockSize(0), findingFunctionPadding(true),
505  findingDeadCode(true), peScramblerDispatcherVa(0), findingIntraFunctionCode(10), findingIntraFunctionData(true),
506  findingInterFunctionCalls(true), findingFunctionCallFunctions(true), findingEntryFunctions(true),
507  findingErrorFunctions(true), findingImportFunctions(true), findingExportFunctions(true), findingSymbolFunctions(true),
508  doingPostAnalysis(true), doingPostFunctionMayReturn(true), doingPostFunctionStackDelta(true),
509  doingPostCallingConvention(false), doingPostFunctionNoop(false), functionReturnAnalysis(MAYRETURN_DEFAULT_YES),
510  functionReturnAnalysisMaxSorts(50), findingDataFunctionPointers(false), findingCodeFunctionPointers(false),
511  findingThunks(true), splittingThunks(false), semanticMemoryParadigm(LIST_BASED_MEMORY), namingConstants(true),
512  namingStrings(AddressInterval::hull(4096, AddressInterval::whole().greatest())),
513  namingSyscalls(true), demangleNames(true) {}
514 };
515 
516 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
517 
523  std::vector<std::string> configurationNames;
524  bool exitOnError;
527  : exitOnError(true) {}
528 
529 private:
530  friend class boost::serialization::access;
531 
532  template<class S>
533  void serialize(S &s, unsigned version) {
534  s & BOOST_SERIALIZATION_NVP(configurationNames);
535  s & BOOST_SERIALIZATION_NVP(exitOnError);
536  }
537 };
538 
539 // Additional declarations w/out definitions yet.
540 class Partitioner;
541 class Function;
543 class BasicBlock;
545 class DataBlock;
547 class ThunkPredicates;
549 
550 } // namespace
551 } // namespace
552 } // namespace
553 
554 // Class versions must be at global scope
559 
560 #endif
561 #endif
bool findingInterFunctionCalls
Look for function calls between functions.
bool splittingThunks
Split thunks into their own separate functions.
bool memoryIsExecutable
Determines whether all of memory should be made executable.
bool linkStaticArchives
Link static libraries before parsing.
Settings for controling the engine behavior.
bool findingExportFunctions
Create functions at export addresses.
MemoryDataAdjustment
How the partitioner should globally treat memory.
bool findingDataFunctionPointers
Look for function pointers in static data.
Assume a function returns if the may-return analysis cannot decide whether it may return...
Allow parallel edges, so each edge has a unit count.
std::string isaName
Name of the instruction set architecture.
bool doingPostFunctionStackDelta
Run function-stack-delta analysis if doingPostAnalysis is set?
EdgeType
Partitioner control flow edge types.
rose_addr_t peScramblerDispatcherVa
Run the PeDescrambler module if non-zero.
Treat all memory as if it were initialized.
size_t deExecuteZerosLeaveAtBack
Number of bytes at the end of each zero area to leave unaffected.
AddressInterval namingStrings
Addresses that might be string literals for commenting integers.
bool copyAllInstructions
Whether to allow shared instructions in the AST.
Special vertex destination for indeterminate edges.
static AstConstructionSettings permissive()
Default permissive settings.
bool doingPostFunctionNoop
Find and name functions that are effectively no-ops.
Assume that all functions return without ever running the may-return analysis.
bool namingSyscalls
Give names (comments) to system calls if possible.
bool followingGhostEdges
Should ghost edges be followed during disassembly? A ghost edge is a CFG edge that is apparent from t...
Main namespace for the ROSE library.
bool doingPostCallingConvention
Run calling-convention analysis if doingPostAnalysis is set?
std::vector< rose_addr_t > functionStartingVas
Addresses at which to start recursive disassembly.
bool linkObjectFiles
Link object files before parsing.
VertexType
Partitioner control flow vertex types.
static AstConstructionSettings strict()
Default strict settings.
bool discontiguousBlocks
Should basic blocks be allowed to be discontiguous.
bool allowEmptyGlobalBlock
Whether to allow an empty global block.
size_t functionReturnAnalysisMaxSorts
Number of times functions are sorted before using unsorted lists.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for function.
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
bool demangleNames
Run all names through a demangling step.
std::vector< std::string > configurationNames
List of configuration files and/or directories.
std::vector< rose_addr_t > ipRewrites
Pairs of addresses for rewriting CFG edges.
bool doingPostFunctionMayReturn
Run function-may-return analysis if doingPostAnalysis is set?
bool ignoringUnknownInsns
Whether to ignore unkonwn insns when extending basic blocks.
bool findingErrorFunctions
Create functions from error handling and exception information.
std::vector< std::string > envInsert
List of environment variable names and values to be inserted before launching a "run:" specimen...
The value is an assumption without any proof.
Settings that control the engine partitioning.
Confidence
How sure are we of something.
bool doingPostAnalysis
Perform enabled post-partitioning analyses?
size_t findingIntraFunctionCode
Suck up unused addresses as intra-function code (number of passes).
Do not make any global changes to the memory map.
bool findingThunks
Look for common thunk patterns in undiscovered areas.
static Interval whole()
Construct an interval that covers the entire domain.
Definition: Interval.h:180
bool findingImportFunctions
Create functions at import addresses.
A basic block or placeholder for a basic block.
SemanticMemoryParadigm
Organization of semantic memory.
Edge is a function return from the call site.
Normal control flow edge, nothing special.
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
size_t deExecuteZerosLeaveAtFront
Number of bytes at the beginning of each zero area to leave unaffected.
bool findingEntryFunctions
Create functions at the program entry point(s).
SemanticMemoryParadigm semanticMemoryParadigm
Container used for semantic memory states.
bool checkingCallBranch
Check for situations where CALL is used as a branch.
bool allowEmptyBasicBlocks
Whether to allow a basic block to be empty.
bool usingSemantics
Whether instruction semantics are used.
bool findingSymbolFunctions
Create functions according to symbol tables.
bool exitOnError
If true, emit error message and exit non-zero, else throw.
std::vector< boost::regex > envErasePatterns
List of regular expressions for removing environment variables before launching a "run:" specimen...
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
bool namingConstants
Give names to constants by calling Modules::nameConstants.
size_t deExecuteZerosThreshold
Size threshold for removing execute permission from zero data.
std::vector< std::string > envEraseNames
List of environment variable names that should be removed before launching a "run:" specimen...
bool findingCodeFunctionPointers
Look for function pointers in instructions.
Special vertex destination for non-existing basic blocks.
bool findingIntraFunctionData
Suck up unused addresses as intra-function data.
Assume that a function cannot return without ever running the may-return analysis.
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
AddressInterval interruptVector
Table of interrupt handling functions.
bool findingFunctionPadding
Look for padding before each function entry point?
std::string linker
Command to run to link object and archives.
bool findingFunctionCallFunctions
Create functions from function calls.
boost::filesystem::path syscallHeader
Name of header file containing system call numbers.