ROSE  0.11.109.0
BinaryAnalysis/Partitioner2/BasicTypes.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
2 #define ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <boost/filesystem.hpp>
7 #include <boost/regex.hpp>
8 #include <boost/serialization/access.hpp>
9 #include <boost/serialization/nvp.hpp>
10 #include <boost/serialization/version.hpp>
11 #include <string>
12 #include <vector>
13 
14 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
15 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
16 // inexpensive) checks are performed.
17 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
18 
19 namespace Rose {
20 namespace BinaryAnalysis {
21 
53 namespace Partitioner2 {
54 
56 namespace Precision {
57 enum Level {
58  LOW,
62 };
63 } // namespace
64 
66 namespace AllowParallelEdges {
67 enum Type {
68  NO,
69  YES
70 };
71 } // namespace
72 
74 enum VertexType {
82 };
83 
85 enum EdgeType {
86  E_NORMAL = 0x00000001,
87  E_FUNCTION_CALL = 0x00000002,
88  E_FUNCTION_RETURN = 0x00000004,
91  E_CALL_RETURN = 0x00000008,
97  E_FUNCTION_XFER = 0x00000010,
103  E_USER_DEFINED = 0x00000020,
106 };
107 
112 };
113 
118 };
119 
130 
137 
145 
158 
159 private:
160  friend class boost::serialization::access;
161 
162  template<class S>
163  void serialize(S &s, unsigned version) {
164  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
165  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
166  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
167  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
168  }
169 
170 public:
173  : allowEmptyGlobalBlock(false), allowFunctionWithNoBasicBlocks(false), allowEmptyBasicBlocks(false),
174  copyAllInstructions(true) {}
175 
182  s.allowEmptyGlobalBlock = false;
184  s.allowEmptyBasicBlocks = false;
185  s.copyAllInstructions = true;
186  return s;
187  }
188 
196  s.allowEmptyGlobalBlock = true;
198  s.allowEmptyBasicBlocks = true;
199  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
200  return s;
201  }
202 };
203 
205 // Settings. All settings must act like properties, which means the following:
206 // 1. Each setting must have a name that does not begin with a verb.
207 // 2. Each setting must have a command-line switch to manipulate it.
208 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
209 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
211 
223 };
224 
253  std::string linker;
262  std::vector<std::string> envEraseNames;
264  std::vector<boost::regex> envErasePatterns;
267  std::vector<std::string> envInsert;
273  : deExecuteZerosThreshold(0), deExecuteZerosLeaveAtFront(16), deExecuteZerosLeaveAtBack(1),
274  memoryDataAdjustment(DATA_IS_INITIALIZED), memoryIsExecutable(false), linkObjectFiles(true),
275  linkStaticArchives(true), linker("ld -o %o --unresolved-symbols=ignore-all --whole-archive %f") {}
276 
277 private:
278  friend class boost::serialization::access;
279 
280  template<class S>
281  void serialize(S &s, unsigned version) {
282  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
283  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
284  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
285  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
286  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
287  if (version >= 1) {
288  s & BOOST_SERIALIZATION_NVP(envEraseNames);
289  s & BOOST_SERIALIZATION_NVP(envInsert);
290 
291  // There's no serialization for boost::regex, so we do it ourselves.
292  std::vector<std::string> reStrings;
293  for (const boost::regex &re: envErasePatterns)
294  reStrings.push_back(re.str());
295  s & BOOST_SERIALIZATION_NVP(reStrings);
296  if (envErasePatterns.empty()) {
297  for (const std::string &reStr: reStrings)
298  envErasePatterns.push_back(boost::regex(reStr));
299  }
300  }
301  }
302 };
303 
311  std::string isaName;
315 private:
316  friend class boost::serialization::access;
317 
318  template<class S>
319  void serialize(S &s, unsigned version) {
320  if (version >= 1)
321  s & BOOST_SERIALIZATION_NVP(doDisassemble);
322  s & BOOST_SERIALIZATION_NVP(isaName);
323  }
324 
325 public:
327  : doDisassemble(true) {}
328 };
329 
340 };
341 
354 private:
355  friend class boost::serialization::access;
356 
357  template<class S>
358  void serialize(S &s, const unsigned version) {
359  s & BOOST_SERIALIZATION_NVP(usingSemantics);
360  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
361  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
362  if (version >= 1)
363  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
364  }
365 
366 public:
368  : usingSemantics(false), checkingCallBranch(false), basicBlockSemanticsAutoDrop(true), ignoringUnknownInsns(false) {}
369 };
370 
381  std::vector<rose_addr_t> functionStartingVas;
396  std::vector<rose_addr_t> ipRewrites;
425  boost::filesystem::path syscallHeader;
428 private:
429  friend class boost::serialization::access;
430 
431  template<class S>
432  void serialize(S &s, unsigned version) {
433  s & BOOST_SERIALIZATION_NVP(base);
434  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
435  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
436  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
437  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
438  if (version >= 6)
439  s & BOOST_SERIALIZATION_NVP(ipRewrites);
440  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
441  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
442  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
443  if (version >= 2) {
444  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
445  } else {
446  bool temp = false;
447  if (S::is_saving::value)
448  temp = findingIntraFunctionCode > 0;
449  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
450  if (S::is_loading::value)
451  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
452  }
453  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
454  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
455  if (version >= 4)
456  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
457  if (version >= 5) {
458  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
459  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
460  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
461  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
462  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
463  }
464  s & BOOST_SERIALIZATION_NVP(interruptVector);
465  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
466  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
467  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
468  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
469  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
470  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
471  if (version >= 3)
472  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
473  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
474  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
475  s & BOOST_SERIALIZATION_NVP(findingThunks);
476  s & BOOST_SERIALIZATION_NVP(splittingThunks);
477  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
478  if (version >= 8) {
479  s & BOOST_SERIALIZATION_NVP(namingConstants);
480  } else if (S::is_loading()) {
481  bool b;
482  s & boost::serialization::make_nvp("namingConstants", b);
483  if (b) {
484  namingConstants = AddressInterval::whole();
485  } else {
486  namingConstants = AddressInterval();
487  }
488  }
489  if (version >= 7) {
490  s & BOOST_SERIALIZATION_NVP(namingStrings);
491  } else if (S::is_loading()) {
492  bool b;
493  s & boost::serialization::make_nvp("namingStrings", b);
494  if (b) {
495  namingStrings = AddressInterval::whole();
496  } else {
497  namingStrings = AddressInterval();
498  }
499  }
500  s & BOOST_SERIALIZATION_NVP(demangleNames);
501  if (version >= 1) {
502  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
503 
504  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
505  // responsibility, so we do it the hard way.
506  std::string temp;
507  if (S::is_saving::value)
508  temp = syscallHeader.string();
509  s & boost::serialization::make_nvp("syscallHeader", temp);
510  if (S::is_loading::value)
511  syscallHeader = temp;
512  }
513  }
514 
515 public:
516  PartitionerSettings()
517  : followingGhostEdges(false), discontiguousBlocks(true), maxBasicBlockSize(0), findingFunctionPadding(true),
518  findingDeadCode(true), peScramblerDispatcherVa(0), findingIntraFunctionCode(10), findingIntraFunctionData(true),
519  findingInterFunctionCalls(true), findingFunctionCallFunctions(true), findingEntryFunctions(true),
520  findingErrorFunctions(true), findingImportFunctions(true), findingExportFunctions(true), findingSymbolFunctions(true),
521  doingPostAnalysis(true), doingPostFunctionMayReturn(true), doingPostFunctionStackDelta(true),
522  doingPostCallingConvention(false), doingPostFunctionNoop(false), functionReturnAnalysis(MAYRETURN_DEFAULT_YES),
523  functionReturnAnalysisMaxSorts(50), findingDataFunctionPointers(false), findingCodeFunctionPointers(false),
524  findingThunks(true), splittingThunks(false), semanticMemoryParadigm(LIST_BASED_MEMORY),
525  namingConstants(AddressInterval::hull(4096, AddressInterval::whole().greatest())),
526  namingStrings(AddressInterval::hull(4096, AddressInterval::whole().greatest())),
527  namingSyscalls(true), demangleNames(true) {}
528 };
529 
530 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
531 
537  std::vector<std::string> configurationNames;
538  bool exitOnError;
541  : exitOnError(true) {}
542 
543 private:
544  friend class boost::serialization::access;
545 
546  template<class S>
547  void serialize(S &s, unsigned version) {
548  s & BOOST_SERIALIZATION_NVP(configurationNames);
549  s & BOOST_SERIALIZATION_NVP(exitOnError);
550  }
551 };
552 
553 // Additional declarations w/out definitions yet.
554 class Partitioner;
555 class Function;
557 class BasicBlock;
559 class DataBlock;
561 class ThunkPredicates;
563 
564 } // namespace
565 } // namespace
566 } // namespace
567 
568 // Class versions must be at global scope
573 
574 #endif
575 #endif
bool findingInterFunctionCalls
Look for function calls between functions.
bool splittingThunks
Split thunks into their own separate functions.
bool memoryIsExecutable
Determines whether all of memory should be made executable.
MemoryDataAdjustment
How the partitioner should globally treat memory.
bool findingDataFunctionPointers
Look for function pointers in static data.
Assume a function returns if the may-return analysis cannot decide whether it may return...
std::string isaName
Name of the instruction set architecture.
bool doingPostFunctionStackDelta
Run function-stack-delta analysis if doingPostAnalysis is set?
rose_addr_t peScramblerDispatcherVa
Run the PeDescrambler module if non-zero.
size_t deExecuteZerosLeaveAtBack
Number of bytes at the end of each zero area to leave unaffected.
AddressInterval namingStrings
Addresses that might be string literals for commenting integers.
bool copyAllInstructions
Whether to allow shared instructions in the AST.
AddressInterval namingConstants
Give possible names to constants if they're in this range.
Special vertex destination for indeterminate edges.
static AstConstructionSettings permissive()
Default permissive settings.
bool doingPostFunctionNoop
Find and name functions that are effectively no-ops.
Assume that all functions return without ever running the may-return analysis.
bool namingSyscalls
Give names (comments) to system calls if possible.
bool followingGhostEdges
Should ghost edges be followed during disassembly? A ghost edge is a CFG edge that is apparent from t...
Main namespace for the ROSE library.
bool doingPostCallingConvention
Run calling-convention analysis if doingPostAnalysis is set?
std::vector< rose_addr_t > functionStartingVas
Addresses at which to start recursive disassembly.
VertexType
Partitioner control flow vertex types.
static AstConstructionSettings strict()
Default strict settings.
bool discontiguousBlocks
Should basic blocks be allowed to be discontiguous.
size_t functionReturnAnalysisMaxSorts
Number of times functions are sorted before using unsorted lists.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for function.
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
std::vector< std::string > configurationNames
List of configuration files and/or directories.
std::vector< rose_addr_t > ipRewrites
Pairs of addresses for rewriting CFG edges.
bool doingPostFunctionMayReturn
Run function-may-return analysis if doingPostAnalysis is set?
bool ignoringUnknownInsns
Whether to ignore unkonwn insns when extending basic blocks.
bool findingErrorFunctions
Create functions from error handling and exception information.
std::vector< std::string > envInsert
List of environment variable names and values to be inserted before launching a "run:" specimen...
The value is an assumption without any proof.
size_t findingIntraFunctionCode
Suck up unused addresses as intra-function code (number of passes).
bool findingThunks
Look for common thunk patterns in undiscovered areas.
static Interval whole()
Construct an interval that covers the entire domain.
Definition: Interval.h:180
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
size_t deExecuteZerosLeaveAtFront
Number of bytes at the beginning of each zero area to leave unaffected.
bool findingEntryFunctions
Create functions at the program entry point(s).
SemanticMemoryParadigm semanticMemoryParadigm
Container used for semantic memory states.
bool checkingCallBranch
Check for situations where CALL is used as a branch.
bool findingSymbolFunctions
Create functions according to symbol tables.
bool exitOnError
If true, emit error message and exit non-zero, else throw.
std::vector< boost::regex > envErasePatterns
List of regular expressions for removing environment variables before launching a "run:" specimen...
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
size_t deExecuteZerosThreshold
Size threshold for removing execute permission from zero data.
std::vector< std::string > envEraseNames
List of environment variable names that should be removed before launching a "run:" specimen...
bool findingCodeFunctionPointers
Look for function pointers in instructions.
Special vertex destination for non-existing basic blocks.
bool findingIntraFunctionData
Suck up unused addresses as intra-function data.
Assume that a function cannot return without ever running the may-return analysis.
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
AddressInterval interruptVector
Table of interrupt handling functions.
bool findingFunctionPadding
Look for padding before each function entry point?
std::string linker
Command to run to link object and archives.
boost::filesystem::path syscallHeader
Name of header file containing system call numbers.