ROSE  0.11.98.0
BinaryAnalysis/Partitioner2/BasicTypes.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
2 #define ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <boost/filesystem.hpp>
7 #include <boost/regex.hpp>
8 #include <boost/serialization/access.hpp>
9 #include <boost/serialization/nvp.hpp>
10 #include <boost/serialization/version.hpp>
11 #include <string>
12 #include <vector>
13 
14 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
15 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
16 // inexpensive) checks are performed.
17 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
18 
19 namespace Rose {
20 namespace BinaryAnalysis {
21 
53 namespace Partitioner2 {
54 
56 namespace Precision {
57 enum Level {
58  LOW,
62 };
63 } // namespace
64 
66 namespace AllowParallelEdges {
67 enum Type {
68  NO,
69  YES
70 };
71 } // namespace
72 
74 enum VertexType {
82 };
83 
85 enum EdgeType {
86  E_NORMAL = 0x00000001,
87  E_FUNCTION_CALL = 0x00000002,
88  E_FUNCTION_RETURN = 0x00000004,
91  E_CALL_RETURN = 0x00000008,
97  E_FUNCTION_XFER = 0x00000010,
103  E_USER_DEFINED = 0x00000020,
106 };
107 
112 };
113 
118 };
119 
130 
137 
145 
158 
159 private:
160  friend class boost::serialization::access;
161 
162  template<class S>
163  void serialize(S &s, unsigned version) {
164  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
165  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
166  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
167  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
168  }
169 
170 public:
173  : allowEmptyGlobalBlock(false), allowFunctionWithNoBasicBlocks(false), allowEmptyBasicBlocks(false),
174  copyAllInstructions(true) {}
175 
182  s.allowEmptyGlobalBlock = false;
184  s.allowEmptyBasicBlocks = false;
185  s.copyAllInstructions = true;
186  return s;
187  }
188 
196  s.allowEmptyGlobalBlock = true;
198  s.allowEmptyBasicBlocks = true;
199  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
200  return s;
201  }
202 };
203 
205 // Settings. All settings must act like properties, which means the following:
206 // 1. Each setting must have a name that does not begin with a verb.
207 // 2. Each setting must have a command-line switch to manipulate it.
208 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
209 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
211 
223 };
224 
252  std::string linker;
261  std::vector<std::string> envEraseNames;
263  std::vector<boost::regex> envErasePatterns;
266  std::vector<std::string> envInsert;
272  : deExecuteZerosThreshold(0), deExecuteZerosLeaveAtFront(16), deExecuteZerosLeaveAtBack(1),
273  memoryDataAdjustment(DATA_IS_INITIALIZED), memoryIsExecutable(false), linkObjectFiles(true),
274  linkStaticArchives(true), linker("ld -o %o --unresolved-symbols=ignore-all --whole-archive %f") {}
275 
276 private:
277  friend class boost::serialization::access;
278 
279  template<class S>
280  void serialize(S &s, unsigned version) {
281  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
282  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
283  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
284  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
285  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
286  if (version >= 1) {
287  s & BOOST_SERIALIZATION_NVP(envEraseNames);
288  s & BOOST_SERIALIZATION_NVP(envInsert);
289 
290  // There's no serialization for boost::regex, so we do it ourselves.
291  std::vector<std::string> reStrings;
292  for (const boost::regex &re: envErasePatterns)
293  reStrings.push_back(re.str());
294  s & BOOST_SERIALIZATION_NVP(reStrings);
295  if (envErasePatterns.empty()) {
296  for (const std::string &reStr: reStrings)
297  envErasePatterns.push_back(boost::regex(reStr));
298  }
299  }
300  }
301 };
302 
309  std::string isaName;
313 private:
314  friend class boost::serialization::access;
315 
316  template<class S>
317  void serialize(S &s, unsigned version) {
318  if (version >= 1)
319  s & BOOST_SERIALIZATION_NVP(doDisassemble);
320  s & BOOST_SERIALIZATION_NVP(isaName);
321  }
322 
323 public:
325  : doDisassemble(true) {}
326 };
327 
338 };
339 
352 private:
353  friend class boost::serialization::access;
354 
355  template<class S>
356  void serialize(S &s, const unsigned version) {
357  s & BOOST_SERIALIZATION_NVP(usingSemantics);
358  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
359  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
360  if (version >= 1)
361  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
362  }
363 
364 public:
366  : usingSemantics(false), checkingCallBranch(false), basicBlockSemanticsAutoDrop(true), ignoringUnknownInsns(false) {}
367 };
368 
378  std::vector<rose_addr_t> functionStartingVas;
393  std::vector<rose_addr_t> ipRewrites;
422  boost::filesystem::path syscallHeader;
425 private:
426  friend class boost::serialization::access;
427 
428  template<class S>
429  void serialize(S &s, unsigned version) {
430  s & BOOST_SERIALIZATION_NVP(base);
431  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
432  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
433  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
434  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
435  if (version >= 6)
436  s & BOOST_SERIALIZATION_NVP(ipRewrites);
437  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
438  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
439  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
440  if (version >= 2) {
441  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
442  } else {
443  bool temp = false;
444  if (S::is_saving::value)
445  temp = findingIntraFunctionCode > 0;
446  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
447  if (S::is_loading::value)
448  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
449  }
450  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
451  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
452  if (version >= 4)
453  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
454  if (version >= 5) {
455  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
456  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
457  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
458  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
459  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
460  }
461  s & BOOST_SERIALIZATION_NVP(interruptVector);
462  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
463  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
464  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
465  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
466  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
467  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
468  if (version >= 3)
469  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
470  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
471  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
472  s & BOOST_SERIALIZATION_NVP(findingThunks);
473  s & BOOST_SERIALIZATION_NVP(splittingThunks);
474  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
475  if (version >= 8) {
476  s & BOOST_SERIALIZATION_NVP(namingConstants);
477  } else if (S::is_loading()) {
478  bool b;
479  s & boost::serialization::make_nvp("namingConstants", b);
480  if (b) {
481  namingConstants = AddressInterval::whole();
482  } else {
483  namingConstants = AddressInterval();
484  }
485  }
486  if (version >= 7) {
487  s & BOOST_SERIALIZATION_NVP(namingStrings);
488  } else if (S::is_loading()) {
489  bool b;
490  s & boost::serialization::make_nvp("namingStrings", b);
491  if (b) {
492  namingStrings = AddressInterval::whole();
493  } else {
494  namingStrings = AddressInterval();
495  }
496  }
497  s & BOOST_SERIALIZATION_NVP(demangleNames);
498  if (version >= 1) {
499  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
500 
501  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
502  // responsibility, so we do it the hard way.
503  std::string temp;
504  if (S::is_saving::value)
505  temp = syscallHeader.string();
506  s & boost::serialization::make_nvp("syscallHeader", temp);
507  if (S::is_loading::value)
508  syscallHeader = temp;
509  }
510  }
511 
512 public:
513  PartitionerSettings()
514  : followingGhostEdges(false), discontiguousBlocks(true), maxBasicBlockSize(0), findingFunctionPadding(true),
515  findingDeadCode(true), peScramblerDispatcherVa(0), findingIntraFunctionCode(10), findingIntraFunctionData(true),
516  findingInterFunctionCalls(true), findingFunctionCallFunctions(true), findingEntryFunctions(true),
517  findingErrorFunctions(true), findingImportFunctions(true), findingExportFunctions(true), findingSymbolFunctions(true),
518  doingPostAnalysis(true), doingPostFunctionMayReturn(true), doingPostFunctionStackDelta(true),
519  doingPostCallingConvention(false), doingPostFunctionNoop(false), functionReturnAnalysis(MAYRETURN_DEFAULT_YES),
520  functionReturnAnalysisMaxSorts(50), findingDataFunctionPointers(false), findingCodeFunctionPointers(false),
521  findingThunks(true), splittingThunks(false), semanticMemoryParadigm(LIST_BASED_MEMORY),
522  namingConstants(AddressInterval::hull(4096, AddressInterval::whole().greatest())),
523  namingStrings(AddressInterval::hull(4096, AddressInterval::whole().greatest())),
524  namingSyscalls(true), demangleNames(true) {}
525 };
526 
527 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
528 
534  std::vector<std::string> configurationNames;
535  bool exitOnError;
538  : exitOnError(true) {}
539 
540 private:
541  friend class boost::serialization::access;
542 
543  template<class S>
544  void serialize(S &s, unsigned version) {
545  s & BOOST_SERIALIZATION_NVP(configurationNames);
546  s & BOOST_SERIALIZATION_NVP(exitOnError);
547  }
548 };
549 
550 // Additional declarations w/out definitions yet.
551 class Partitioner;
552 class Function;
554 class BasicBlock;
556 class DataBlock;
558 class ThunkPredicates;
560 
561 } // namespace
562 } // namespace
563 } // namespace
564 
565 // Class versions must be at global scope
570 
571 #endif
572 #endif
bool findingInterFunctionCalls
Look for function calls between functions.
bool splittingThunks
Split thunks into their own separate functions.
bool memoryIsExecutable
Determines whether all of memory should be made executable.
MemoryDataAdjustment
How the partitioner should globally treat memory.
bool findingDataFunctionPointers
Look for function pointers in static data.
Assume a function returns if the may-return analysis cannot decide whether it may return...
std::string isaName
Name of the instruction set architecture.
bool doingPostFunctionStackDelta
Run function-stack-delta analysis if doingPostAnalysis is set?
rose_addr_t peScramblerDispatcherVa
Run the PeDescrambler module if non-zero.
size_t deExecuteZerosLeaveAtBack
Number of bytes at the end of each zero area to leave unaffected.
AddressInterval namingStrings
Addresses that might be string literals for commenting integers.
bool copyAllInstructions
Whether to allow shared instructions in the AST.
AddressInterval namingConstants
Give possible names to constants if they're in this range.
Special vertex destination for indeterminate edges.
static AstConstructionSettings permissive()
Default permissive settings.
bool doingPostFunctionNoop
Find and name functions that are effectively no-ops.
Assume that all functions return without ever running the may-return analysis.
bool namingSyscalls
Give names (comments) to system calls if possible.
bool followingGhostEdges
Should ghost edges be followed during disassembly? A ghost edge is a CFG edge that is apparent from t...
Main namespace for the ROSE library.
bool doingPostCallingConvention
Run calling-convention analysis if doingPostAnalysis is set?
std::vector< rose_addr_t > functionStartingVas
Addresses at which to start recursive disassembly.
VertexType
Partitioner control flow vertex types.
static AstConstructionSettings strict()
Default strict settings.
bool discontiguousBlocks
Should basic blocks be allowed to be discontiguous.
size_t functionReturnAnalysisMaxSorts
Number of times functions are sorted before using unsorted lists.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for function.
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
std::vector< std::string > configurationNames
List of configuration files and/or directories.
std::vector< rose_addr_t > ipRewrites
Pairs of addresses for rewriting CFG edges.
bool doingPostFunctionMayReturn
Run function-may-return analysis if doingPostAnalysis is set?
bool ignoringUnknownInsns
Whether to ignore unkonwn insns when extending basic blocks.
bool findingErrorFunctions
Create functions from error handling and exception information.
std::vector< std::string > envInsert
List of environment variable names and values to be inserted before launching a "run:" specimen...
The value is an assumption without any proof.
size_t findingIntraFunctionCode
Suck up unused addresses as intra-function code (number of passes).
bool findingThunks
Look for common thunk patterns in undiscovered areas.
static Interval whole()
Construct an interval that covers the entire domain.
Definition: Interval.h:180
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
size_t deExecuteZerosLeaveAtFront
Number of bytes at the beginning of each zero area to leave unaffected.
bool findingEntryFunctions
Create functions at the program entry point(s).
Binary analysis.
SemanticMemoryParadigm semanticMemoryParadigm
Container used for semantic memory states.
bool checkingCallBranch
Check for situations where CALL is used as a branch.
bool findingSymbolFunctions
Create functions according to symbol tables.
bool exitOnError
If true, emit error message and exit non-zero, else throw.
std::vector< boost::regex > envErasePatterns
List of regular expressions for removing environment variables before launching a "run:" specimen...
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
size_t deExecuteZerosThreshold
Size threshold for removing execute permission from zero data.
std::vector< std::string > envEraseNames
List of environment variable names that should be removed before launching a "run:" specimen...
bool findingCodeFunctionPointers
Look for function pointers in instructions.
Special vertex destination for non-existing basic blocks.
bool findingIntraFunctionData
Suck up unused addresses as intra-function data.
Assume that a function cannot return without ever running the may-return analysis.
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
AddressInterval interruptVector
Table of interrupt handling functions.
bool findingFunctionPadding
Look for padding before each function entry point?
std::string linker
Command to run to link object and archives.
boost::filesystem::path syscallHeader
Name of header file containing system call numbers.