ROSE  0.11.58.0
Partitioner2/BasicTypes.h
1 #ifndef ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
2 #define ROSE_BinaryAnalysis_Partitioner2_BasicTypes_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <boost/filesystem.hpp>
7 #include <boost/foreach.hpp>
8 #include <boost/regex.hpp>
9 #include <boost/serialization/access.hpp>
10 #include <boost/serialization/nvp.hpp>
11 #include <boost/serialization/version.hpp>
12 #include <string>
13 #include <vector>
14 
15 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
16 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
17 // inexpensive) checks are performed.
18 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
19 
20 namespace Rose {
21 namespace BinaryAnalysis {
22 
54 namespace Partitioner2 {
55 
57 namespace Precision {
58 enum Level {
59  LOW,
63 };
64 } // namespace
65 
67 namespace AllowParallelEdges {
68 enum Type {
69  NO,
70  YES
71 };
72 } // namespace
73 
75 enum VertexType {
83 };
84 
86 enum EdgeType {
87  E_NORMAL = 0x00000001,
88  E_FUNCTION_CALL = 0x00000002,
89  E_FUNCTION_RETURN = 0x00000004,
92  E_CALL_RETURN = 0x00000008,
98  E_FUNCTION_XFER = 0x00000010,
104  E_USER_DEFINED = 0x00000020,
107 };
108 
113 };
114 
119 };
120 
131 
138 
146 
159 
160 private:
161  friend class boost::serialization::access;
162 
163  template<class S>
164  void serialize(S &s, unsigned version) {
165  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
166  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
167  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
168  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
169  }
170 
171 public:
174  : allowEmptyGlobalBlock(false), allowFunctionWithNoBasicBlocks(false), allowEmptyBasicBlocks(false),
175  copyAllInstructions(true) {}
176 
183  s.allowEmptyGlobalBlock = false;
185  s.allowEmptyBasicBlocks = false;
186  s.copyAllInstructions = true;
187  return s;
188  }
189 
197  s.allowEmptyGlobalBlock = true;
199  s.allowEmptyBasicBlocks = true;
200  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
201  return s;
202  }
203 };
204 
206 // Settings. All settings must act like properties, which means the following:
207 // 1. Each setting must have a name that does not begin with a verb.
208 // 2. Each setting must have a command-line switch to manipulate it.
209 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
210 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
212 
224 };
225 
253  std::string linker;
262  std::vector<std::string> envEraseNames;
264  std::vector<boost::regex> envErasePatterns;
267  std::vector<std::string> envInsert;
273  : deExecuteZerosThreshold(0), deExecuteZerosLeaveAtFront(16), deExecuteZerosLeaveAtBack(1),
274  memoryDataAdjustment(DATA_IS_INITIALIZED), memoryIsExecutable(false), linkObjectFiles(true),
275  linkStaticArchives(true), linker("ld -o %o --unresolved-symbols=ignore-all --whole-archive %f") {}
276 
277 private:
278  friend class boost::serialization::access;
279 
280  template<class S>
281  void serialize(S &s, unsigned version) {
282  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
283  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
284  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
285  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
286  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
287  if (version >= 1) {
288  s & BOOST_SERIALIZATION_NVP(envEraseNames);
289  s & BOOST_SERIALIZATION_NVP(envInsert);
290 
291  // There's no serialization for boost::regex, so we do it ourselves.
292  std::vector<std::string> reStrings;
293  BOOST_FOREACH (const boost::regex &re, envErasePatterns)
294  reStrings.push_back(re.str());
295  s & BOOST_SERIALIZATION_NVP(reStrings);
296  if (envErasePatterns.empty()) {
297  BOOST_FOREACH (const std::string &reStr, reStrings)
298  envErasePatterns.push_back(boost::regex(reStr));
299  }
300  }
301  }
302 };
303 
310  std::string isaName;
314 private:
315  friend class boost::serialization::access;
316 
317  template<class S>
318  void serialize(S &s, unsigned version) {
319  if (version >= 1)
320  s & BOOST_SERIALIZATION_NVP(doDisassemble);
321  s & BOOST_SERIALIZATION_NVP(isaName);
322  }
323 
324 public:
325  DisassemblerSettings()
326  : doDisassemble(true) {}
327 };
328 
339 };
340 
353 private:
354  friend class boost::serialization::access;
355 
356  template<class S>
357  void serialize(S &s, const unsigned version) {
358  s & BOOST_SERIALIZATION_NVP(usingSemantics);
359  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
360  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
361  if (version >= 1)
362  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
363  }
364 
365 public:
367  : usingSemantics(false), checkingCallBranch(false), basicBlockSemanticsAutoDrop(true), ignoringUnknownInsns(false) {}
368 };
369 
379  std::vector<rose_addr_t> functionStartingVas;
394  std::vector<rose_addr_t> ipRewrites;
423  boost::filesystem::path syscallHeader;
426 private:
427  friend class boost::serialization::access;
428 
429  template<class S>
430  void serialize(S &s, unsigned version) {
431  s & BOOST_SERIALIZATION_NVP(base);
432  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
433  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
434  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
435  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
436  if (version >= 6)
437  s & BOOST_SERIALIZATION_NVP(ipRewrites);
438  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
439  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
440  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
441  if (version >= 2) {
442  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
443  } else {
444  bool temp = false;
445  if (S::is_saving::value)
446  temp = findingIntraFunctionCode > 0;
447  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
448  if (S::is_loading::value)
449  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
450  }
451  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
452  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
453  if (version >= 4)
454  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
455  if (version >= 5) {
456  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
457  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
458  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
459  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
460  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
461  }
462  s & BOOST_SERIALIZATION_NVP(interruptVector);
463  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
464  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
465  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
466  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
467  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
468  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
469  if (version >= 3)
470  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
471  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
472  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
473  s & BOOST_SERIALIZATION_NVP(findingThunks);
474  s & BOOST_SERIALIZATION_NVP(splittingThunks);
475  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
476  s & BOOST_SERIALIZATION_NVP(namingConstants);
477  if (version >= 7) {
478  s & BOOST_SERIALIZATION_NVP(namingStrings);
479  } else if (S::is_loading()) {
480  bool b;
481  s & boost::serialization::make_nvp("namingStrings", b);
482  if (b) {
483  namingStrings = AddressInterval::whole();
484  } else {
485  namingStrings = AddressInterval();
486  }
487  }
488  s & BOOST_SERIALIZATION_NVP(demangleNames);
489  if (version >= 1) {
490  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
491 
492  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
493  // responsibility, so we do it the hard way.
494  std::string temp;
495  if (S::is_saving::value)
496  temp = syscallHeader.string();
497  s & boost::serialization::make_nvp("syscallHeader", temp);
498  if (S::is_loading::value)
499  syscallHeader = temp;
500  }
501  }
502 
503 public:
504  PartitionerSettings()
505  : followingGhostEdges(false), discontiguousBlocks(true), maxBasicBlockSize(0), findingFunctionPadding(true),
506  findingDeadCode(true), peScramblerDispatcherVa(0), findingIntraFunctionCode(10), findingIntraFunctionData(true),
507  findingInterFunctionCalls(true), findingFunctionCallFunctions(true), findingEntryFunctions(true),
508  findingErrorFunctions(true), findingImportFunctions(true), findingExportFunctions(true), findingSymbolFunctions(true),
509  doingPostAnalysis(true), doingPostFunctionMayReturn(true), doingPostFunctionStackDelta(true),
510  doingPostCallingConvention(false), doingPostFunctionNoop(false), functionReturnAnalysis(MAYRETURN_DEFAULT_YES),
511  functionReturnAnalysisMaxSorts(50), findingDataFunctionPointers(false), findingCodeFunctionPointers(false),
512  findingThunks(true), splittingThunks(false), semanticMemoryParadigm(LIST_BASED_MEMORY), namingConstants(true),
513  namingStrings(AddressInterval::hull(4096, AddressInterval::whole().greatest())),
514  namingSyscalls(true), demangleNames(true) {}
515 };
516 
517 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
518 
524  std::vector<std::string> configurationNames;
525  bool exitOnError;
528  : exitOnError(true) {}
529 
530 private:
531  friend class boost::serialization::access;
532 
533  template<class S>
534  void serialize(S &s, unsigned version) {
535  s & BOOST_SERIALIZATION_NVP(configurationNames);
536  s & BOOST_SERIALIZATION_NVP(exitOnError);
537  }
538 };
539 
540 // Additional declarations w/out definitions yet.
541 class Partitioner;
542 class Function;
544 class BasicBlock;
546 class DataBlock;
548 class ThunkPredicates;
550 
551 } // namespace
552 } // namespace
553 } // namespace
554 
555 // Class versions must be at global scope
560 
561 #endif
562 #endif
bool findingInterFunctionCalls
Look for function calls between functions.
bool splittingThunks
Split thunks into their own separate functions.
bool memoryIsExecutable
Determines whether all of memory should be made executable.
bool linkStaticArchives
Link static libraries before parsing.
Settings for controling the engine behavior.
bool findingExportFunctions
Create functions at export addresses.
MemoryDataAdjustment
How the partitioner should globally treat memory.
bool findingDataFunctionPointers
Look for function pointers in static data.
Assume a function returns if the may-return analysis cannot decide whether it may return...
Allow parallel edges, so each edge has a unit count.
std::string isaName
Name of the instruction set architecture.
bool doingPostFunctionStackDelta
Run function-stack-delta analysis if doingPostAnalysis is set?
EdgeType
Partitioner control flow edge types.
rose_addr_t peScramblerDispatcherVa
Run the PeDescrambler module if non-zero.
Treat all memory as if it were initialized.
size_t deExecuteZerosLeaveAtBack
Number of bytes at the end of each zero area to leave unaffected.
AddressInterval namingStrings
Addresses that might be string literals for commenting integers.
bool copyAllInstructions
Whether to allow shared instructions in the AST.
Special vertex destination for indeterminate edges.
static AstConstructionSettings permissive()
Default permissive settings.
bool doingPostFunctionNoop
Find and name functions that are effectively no-ops.
Assume that all functions return without ever running the may-return analysis.
bool namingSyscalls
Give names (comments) to system calls if possible.
bool followingGhostEdges
Should ghost edges be followed during disassembly? A ghost edge is a CFG edge that is apparent from t...
Main namespace for the ROSE library.
bool doingPostCallingConvention
Run calling-convention analysis if doingPostAnalysis is set?
std::vector< rose_addr_t > functionStartingVas
Addresses at which to start recursive disassembly.
bool linkObjectFiles
Link object files before parsing.
VertexType
Partitioner control flow vertex types.
static AstConstructionSettings strict()
Default strict settings.
bool discontiguousBlocks
Should basic blocks be allowed to be discontiguous.
bool allowEmptyGlobalBlock
Whether to allow an empty global block.
size_t functionReturnAnalysisMaxSorts
Number of times functions are sorted before using unsorted lists.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for function.
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
bool demangleNames
Run all names through a demangling step.
std::vector< std::string > configurationNames
List of configuration files and/or directories.
std::vector< rose_addr_t > ipRewrites
Pairs of addresses for rewriting CFG edges.
bool doingPostFunctionMayReturn
Run function-may-return analysis if doingPostAnalysis is set?
bool ignoringUnknownInsns
Whether to ignore unkonwn insns when extending basic blocks.
bool findingErrorFunctions
Create functions from error handling and exception information.
std::vector< std::string > envInsert
List of environment variable names and values to be inserted before launching a "run:" specimen...
The value is an assumption without any proof.
Settings that control the engine partitioning.
Confidence
How sure are we of something.
bool doingPostAnalysis
Perform enabled post-partitioning analyses?
size_t findingIntraFunctionCode
Suck up unused addresses as intra-function code (number of passes).
Do not make any global changes to the memory map.
bool findingThunks
Look for common thunk patterns in undiscovered areas.
static Interval whole()
Construct an interval that covers the entire domain.
Definition: Interval.h:180
bool findingImportFunctions
Create functions at import addresses.
A basic block or placeholder for a basic block.
SemanticMemoryParadigm
Organization of semantic memory.
Edge is a function return from the call site.
Normal control flow edge, nothing special.
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
size_t deExecuteZerosLeaveAtFront
Number of bytes at the beginning of each zero area to leave unaffected.
bool findingEntryFunctions
Create functions at the program entry point(s).
SemanticMemoryParadigm semanticMemoryParadigm
Container used for semantic memory states.
bool checkingCallBranch
Check for situations where CALL is used as a branch.
bool allowEmptyBasicBlocks
Whether to allow a basic block to be empty.
bool usingSemantics
Whether instruction semantics are used.
bool findingSymbolFunctions
Create functions according to symbol tables.
bool exitOnError
If true, emit error message and exit non-zero, else throw.
std::vector< boost::regex > envErasePatterns
List of regular expressions for removing environment variables before launching a "run:" specimen...
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
bool namingConstants
Give names to constants by calling Modules::nameConstants.
size_t deExecuteZerosThreshold
Size threshold for removing execute permission from zero data.
std::vector< std::string > envEraseNames
List of environment variable names that should be removed before launching a "run:" specimen...
bool findingCodeFunctionPointers
Look for function pointers in instructions.
Special vertex destination for non-existing basic blocks.
bool findingIntraFunctionData
Suck up unused addresses as intra-function data.
Assume that a function cannot return without ever running the may-return analysis.
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
AddressInterval interruptVector
Table of interrupt handling functions.
bool findingFunctionPadding
Look for padding before each function entry point?
std::string linker
Command to run to link object and archives.
bool findingFunctionCallFunctions
Create functions from function calls.
boost::filesystem::path syscallHeader
Name of header file containing system call numbers.