ROSE  0.11.22.0
BasicTypes.h
1 #ifndef ROSE_Partitioner2_BasicTypes_H
2 #define ROSE_Partitioner2_BasicTypes_H
3 
4 #include <featureTests.h>
5 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 
7 #include <boost/filesystem.hpp>
8 #include <boost/foreach.hpp>
9 #include <boost/regex.hpp>
10 #include <boost/serialization/access.hpp>
11 #include <boost/serialization/nvp.hpp>
12 #include <boost/serialization/version.hpp>
13 #include <string>
14 #include <vector>
15 
16 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
17 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
18 // inexpensive) checks are performed.
19 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
20 
21 namespace Rose {
22 namespace BinaryAnalysis {
23 namespace Partitioner2 {
24 
26 namespace Precision {
27 enum Level {
28  LOW,
32 };
33 } // namespace
34 
36 namespace AllowParallelEdges {
37 enum Type {
38  NO,
39  YES
40 };
41 } // namespace
42 
44 enum VertexType {
52 };
53 
55 enum EdgeType {
56  E_NORMAL = 0x00000001,
57  E_FUNCTION_CALL = 0x00000002,
58  E_FUNCTION_RETURN = 0x00000004,
61  E_CALL_RETURN = 0x00000008,
67  E_FUNCTION_XFER = 0x00000010,
73  E_USER_DEFINED = 0x00000020,
76 };
77 
79 enum Confidence {
82 };
83 
88 };
89 
100 
107 
115 
128 
129 private:
130  friend class boost::serialization::access;
131 
132  template<class S>
133  void serialize(S &s, unsigned version) {
134  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
135  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
136  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
137  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
138  }
139 
140 public:
143  : allowEmptyGlobalBlock(false), allowFunctionWithNoBasicBlocks(false), allowEmptyBasicBlocks(false),
144  copyAllInstructions(true) {}
145 
152  s.allowEmptyGlobalBlock = false;
154  s.allowEmptyBasicBlocks = false;
155  s.copyAllInstructions = true;
156  return s;
157  }
158 
166  s.allowEmptyGlobalBlock = true;
168  s.allowEmptyBasicBlocks = true;
169  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
170  return s;
171  }
172 };
173 
175 // Settings. All settings must act like properties, which means the following:
176 // 1. Each setting must have a name that does not begin with a verb.
177 // 2. Each setting must have a command-line switch to manipulate it.
178 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
179 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
181 
193 };
194 
222  std::string linker;
231  std::vector<std::string> envEraseNames;
233  std::vector<boost::regex> envErasePatterns;
236  std::vector<std::string> envInsert;
242  : deExecuteZerosThreshold(0), deExecuteZerosLeaveAtFront(16), deExecuteZerosLeaveAtBack(1),
243  memoryDataAdjustment(DATA_IS_INITIALIZED), memoryIsExecutable(false), linkObjectFiles(true),
244  linkStaticArchives(true), linker("ld -o %o --unresolved-symbols=ignore-all --whole-archive %f") {}
245 
246 private:
247  friend class boost::serialization::access;
248 
249  template<class S>
250  void serialize(S &s, unsigned version) {
251  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
252  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
253  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
254  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
255  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
256  if (version >= 1) {
257  s & BOOST_SERIALIZATION_NVP(envEraseNames);
258  s & BOOST_SERIALIZATION_NVP(envInsert);
259 
260  // There's no serialization for boost::regex, so we do it ourselves.
261  std::vector<std::string> reStrings;
262  BOOST_FOREACH (const boost::regex &re, envErasePatterns)
263  reStrings.push_back(re.str());
264  s & BOOST_SERIALIZATION_NVP(reStrings);
265  if (envErasePatterns.empty()) {
266  BOOST_FOREACH (const std::string &reStr, reStrings)
267  envErasePatterns.push_back(boost::regex(reStr));
268  }
269  }
270  }
271 };
272 
279  std::string isaName;
283 private:
284  friend class boost::serialization::access;
285 
286  template<class S>
287  void serialize(S &s, unsigned version) {
288  if (version >= 1)
289  s & BOOST_SERIALIZATION_NVP(doDisassemble);
290  s & BOOST_SERIALIZATION_NVP(isaName);
291  }
292 
293 public:
294  DisassemblerSettings()
295  : doDisassemble(true) {}
296 };
297 
308 };
309 
322 private:
323  friend class boost::serialization::access;
324 
325  template<class S>
326  void serialize(S &s, const unsigned version) {
327  s & BOOST_SERIALIZATION_NVP(usingSemantics);
328  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
329  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
330  if (version >= 1)
331  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
332  }
333 
334 public:
336  : usingSemantics(false), checkingCallBranch(false), basicBlockSemanticsAutoDrop(true), ignoringUnknownInsns(false) {}
337 };
338 
348  std::vector<rose_addr_t> functionStartingVas;
363  std::vector<rose_addr_t> ipRewrites;
392  boost::filesystem::path syscallHeader;
395 private:
396  friend class boost::serialization::access;
397 
398  template<class S>
399  void serialize(S &s, unsigned version) {
400  s & BOOST_SERIALIZATION_NVP(base);
401  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
402  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
403  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
404  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
405  if (version >= 6)
406  s & BOOST_SERIALIZATION_NVP(ipRewrites);
407  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
408  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
409  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
410  if (version >= 2) {
411  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
412  } else {
413  bool temp = false;
414  if (S::is_saving::value)
415  temp = findingIntraFunctionCode > 0;
416  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
417  if (S::is_loading::value)
418  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
419  }
420  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
421  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
422  if (version >= 4)
423  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
424  if (version >= 5) {
425  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
426  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
427  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
428  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
429  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
430  }
431  s & BOOST_SERIALIZATION_NVP(interruptVector);
432  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
433  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
434  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
435  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
436  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
437  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
438  if (version >= 3)
439  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
440  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
441  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
442  s & BOOST_SERIALIZATION_NVP(findingThunks);
443  s & BOOST_SERIALIZATION_NVP(splittingThunks);
444  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
445  s & BOOST_SERIALIZATION_NVP(namingConstants);
446  s & BOOST_SERIALIZATION_NVP(namingStrings);
447  s & BOOST_SERIALIZATION_NVP(demangleNames);
448  if (version >= 1) {
449  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
450 
451  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
452  // responsibility, so we do it the hard way.
453  std::string temp;
454  if (S::is_saving::value)
455  temp = syscallHeader.string();
456  s & boost::serialization::make_nvp("syscallHeader", temp);
457  if (S::is_loading::value)
458  syscallHeader = temp;
459  }
460  }
461 
462 public:
463  PartitionerSettings()
464  : followingGhostEdges(false), discontiguousBlocks(true), maxBasicBlockSize(0), findingFunctionPadding(true),
465  findingDeadCode(true), peScramblerDispatcherVa(0), findingIntraFunctionCode(10), findingIntraFunctionData(true),
466  findingInterFunctionCalls(true), findingFunctionCallFunctions(true), findingEntryFunctions(true),
467  findingErrorFunctions(true), findingImportFunctions(true), findingExportFunctions(true), findingSymbolFunctions(true),
468  doingPostAnalysis(true), doingPostFunctionMayReturn(true), doingPostFunctionStackDelta(true),
469  doingPostCallingConvention(false), doingPostFunctionNoop(false), functionReturnAnalysis(MAYRETURN_DEFAULT_YES),
470  functionReturnAnalysisMaxSorts(50), findingDataFunctionPointers(false), findingCodeFunctionPointers(false),
471  findingThunks(true), splittingThunks(false), semanticMemoryParadigm(LIST_BASED_MEMORY), namingConstants(true),
472  namingStrings(true), namingSyscalls(true), demangleNames(true) {}
473 };
474 
475 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
476 
482  std::vector<std::string> configurationNames;
483  bool exitOnError;
486  : exitOnError(true) {}
487 
488 private:
489  friend class boost::serialization::access;
490 
491  template<class S>
492  void serialize(S &s, unsigned version) {
493  s & BOOST_SERIALIZATION_NVP(configurationNames);
494  s & BOOST_SERIALIZATION_NVP(exitOnError);
495  }
496 };
497 
498 // Additional declarations w/out definitions yet.
499 class Partitioner;
500 class Function;
502 class BasicBlock;
504 class DataBlock;
506 class ThunkPredicates;
508 
509 } // namespace
510 } // namespace
511 } // namespace
512 
513 // Class versions must be at global scope
518 
519 #endif
520 #endif
bool findingInterFunctionCalls
Look for function calls between functions.
Definition: BasicTypes.h:369
Edge is a function call transfer.
Definition: BasicTypes.h:67
bool splittingThunks
Split thunks into their own separate functions.
Definition: BasicTypes.h:387
bool memoryIsExecutable
Determines whether all of memory should be made executable.
Definition: BasicTypes.h:214
bool linkStaticArchives
Link static libraries before parsing.
Definition: BasicTypes.h:221
Settings for controling the engine behavior.
Definition: BasicTypes.h:481
bool findingExportFunctions
Create functions at export addresses.
Definition: BasicTypes.h:374
MemoryDataAdjustment
How the partitioner should globally treat memory.
Definition: BasicTypes.h:183
bool findingDataFunctionPointers
Look for function pointers in static data.
Definition: BasicTypes.h:384
Assume a function returns if the may-return analysis cannot decide whether it may return...
Definition: BasicTypes.h:300
Allow parallel edges, so each edge has a unit count.
Definition: BasicTypes.h:39
std::string isaName
Name of the instruction set architecture.
Definition: BasicTypes.h:279
bool doingPostFunctionStackDelta
Run function-stack-delta analysis if doingPostAnalysis is set?
Definition: BasicTypes.h:379
EdgeType
Partitioner control flow edge types.
Definition: BasicTypes.h:55
rose_addr_t peScramblerDispatcherVa
Run the PeDescrambler module if non-zero.
Definition: BasicTypes.h:366
Settings that control the disassembler.
Definition: BasicTypes.h:276
Treat all memory as if it were initialized.
Definition: BasicTypes.h:186
size_t deExecuteZerosLeaveAtBack
Number of bytes at the end of each zero area to leave unaffected.
Definition: BasicTypes.h:208
bool copyAllInstructions
Whether to allow shared instructions in the AST.
Definition: BasicTypes.h:127
Special vertex destination for indeterminate edges.
Definition: BasicTypes.h:47
static AstConstructionSettings permissive()
Default permissive settings.
Definition: BasicTypes.h:164
Enum type for allowing parallel edges.
Definition: BasicTypes.h:38
Settings that directly control a partitioner.
Definition: BasicTypes.h:313
bool doingPostFunctionNoop
Find and name functions that are effectively no-ops.
Definition: BasicTypes.h:381
Assume that all functions return without ever running the may-return analysis.
Definition: BasicTypes.h:304
bool namingSyscalls
Give names (comments) to system calls if possible.
Definition: BasicTypes.h:391
bool followingGhostEdges
Should ghost edges be followed during disassembly? A ghost edge is a CFG edge that is apparent from t...
Definition: BasicTypes.h:351
Main namespace for the ROSE library.
bool doingPostCallingConvention
Run calling-convention analysis if doingPostAnalysis is set?
Definition: BasicTypes.h:380
Settings for loading specimens.
Definition: BasicTypes.h:198
std::vector< rose_addr_t > functionStartingVas
Addresses at which to start recursive disassembly.
Definition: BasicTypes.h:348
bool linkObjectFiles
Link object files before parsing.
Definition: BasicTypes.h:220
VertexType
Partitioner control flow vertex types.
Definition: BasicTypes.h:44
Settings that control building the AST.
Definition: BasicTypes.h:94
static AstConstructionSettings strict()
Default strict settings.
Definition: BasicTypes.h:150
bool discontiguousBlocks
Should basic blocks be allowed to be discontiguous.
Definition: BasicTypes.h:358
bool allowEmptyGlobalBlock
Whether to allow an empty global block.
Definition: BasicTypes.h:99
bool namingStrings
Give labels to constants that are string literal addresses.
Definition: BasicTypes.h:390
size_t functionReturnAnalysisMaxSorts
Number of times functions are sorted before using unsorted lists.
Definition: BasicTypes.h:383
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for function.
Definition: BasicTypes.h:500
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
Definition: BasicTypes.h:382
bool demangleNames
Run all names through a demangling step.
Definition: BasicTypes.h:393
std::vector< std::string > configurationNames
List of configuration files and/or directories.
Definition: BasicTypes.h:482
std::vector< rose_addr_t > ipRewrites
Pairs of addresses for rewriting CFG edges.
Definition: BasicTypes.h:363
bool doingPostFunctionMayReturn
Run function-may-return analysis if doingPostAnalysis is set?
Definition: BasicTypes.h:378
bool ignoringUnknownInsns
Whether to ignore unkonwn insns when extending basic blocks.
Definition: BasicTypes.h:320
The special "undiscovered" vertex.
Definition: BasicTypes.h:46
bool findingErrorFunctions
Create functions from error handling and exception information.
Definition: BasicTypes.h:372
std::vector< std::string > envInsert
List of environment variable names and values to be inserted before launching a "run:" specimen...
Definition: BasicTypes.h:236
The value is an assumption without any proof.
Definition: BasicTypes.h:80
Settings that control the engine partitioning.
Definition: BasicTypes.h:346
Confidence
How sure are we of something.
Definition: BasicTypes.h:79
bool doingPostAnalysis
Perform enabled post-partitioning analyses?
Definition: BasicTypes.h:377
size_t findingIntraFunctionCode
Suck up unused addresses as intra-function code (number of passes).
Definition: BasicTypes.h:367
Do not make any global changes to the memory map.
Definition: BasicTypes.h:192
bool findingThunks
Look for common thunk patterns in undiscovered areas.
Definition: BasicTypes.h:386
bool findingImportFunctions
Create functions at import addresses.
Definition: BasicTypes.h:373
A basic block or placeholder for a basic block.
Definition: BasicTypes.h:45
SemanticMemoryParadigm
Organization of semantic memory.
Definition: BasicTypes.h:85
Edge is a function return from the call site.
Definition: BasicTypes.h:61
Normal control flow edge, nothing special.
Definition: BasicTypes.h:56
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
Definition: BasicTypes.h:210
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
Definition: BasicTypes.h:299
size_t deExecuteZerosLeaveAtFront
Number of bytes at the beginning of each zero area to leave unaffected.
Definition: BasicTypes.h:206
The value was somehow proved.
Definition: BasicTypes.h:81
bool findingEntryFunctions
Create functions at the program entry point(s).
Definition: BasicTypes.h:371
bool findingDeadCode
Look for unreachable basic blocks?
Definition: BasicTypes.h:365
SemanticMemoryParadigm semanticMemoryParadigm
Container used for semantic memory states.
Definition: BasicTypes.h:388
bool checkingCallBranch
Check for situations where CALL is used as a branch.
Definition: BasicTypes.h:318
bool allowEmptyBasicBlocks
Whether to allow a basic block to be empty.
Definition: BasicTypes.h:114
bool usingSemantics
Whether instruction semantics are used.
Definition: BasicTypes.h:314
Treat all memory as if it were constant.
Definition: BasicTypes.h:184
bool findingSymbolFunctions
Create functions according to symbol tables.
Definition: BasicTypes.h:375
bool exitOnError
If true, emit error message and exit non-zero, else throw.
Definition: BasicTypes.h:483
std::vector< boost::regex > envErasePatterns
List of regular expressions for removing environment variables before launching a "run:" specimen...
Definition: BasicTypes.h:233
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
Definition: BasicTypes.h:319
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
Definition: BasicTypes.h:302
bool namingConstants
Give names to constants by calling Modules::nameConstants.
Definition: BasicTypes.h:389
size_t deExecuteZerosThreshold
Size threshold for removing execute permission from zero data.
Definition: BasicTypes.h:199
std::vector< std::string > envEraseNames
List of environment variable names that should be removed before launching a "run:" specimen...
Definition: BasicTypes.h:231
bool findingCodeFunctionPointers
Look for function pointers in instructions.
Definition: BasicTypes.h:385
Special vertex destination for non-existing basic blocks.
Definition: BasicTypes.h:48
bool findingIntraFunctionData
Suck up unused addresses as intra-function data.
Definition: BasicTypes.h:368
Assume that a function cannot return without ever running the may-return analysis.
Definition: BasicTypes.h:306
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
Definition: BasicTypes.h:106
AddressInterval interruptVector
Table of interrupt handling functions.
Definition: BasicTypes.h:376
bool findingFunctionPadding
Look for padding before each function entry point?
Definition: BasicTypes.h:364
std::string linker
Command to run to link object and archives.
Definition: BasicTypes.h:222
bool findingFunctionCallFunctions
Create functions from function calls.
Definition: BasicTypes.h:370
size_t maxBasicBlockSize
Maximum basic block size.
Definition: BasicTypes.h:362
boost::filesystem::path syscallHeader
Name of header file containing system call numbers.
Definition: BasicTypes.h:392