ROSE  0.11.2.0
BasicTypes.h
1 #ifndef ROSE_Partitioner2_BasicTypes_H
2 #define ROSE_Partitioner2_BasicTypes_H
3 
4 #include <rosePublicConfig.h>
5 #ifdef ROSE_BUILD_BINARY_ANALYSIS_SUPPORT
6 
7 #include <boost/filesystem.hpp>
8 #include <boost/foreach.hpp>
9 #include <boost/regex.hpp>
10 #include <boost/serialization/access.hpp>
11 #include <boost/serialization/nvp.hpp>
12 #include <boost/serialization/version.hpp>
13 #include <string>
14 #include <vector>
15 
16 // Define this as one if you want extra invariant checks that are quite expensive, or define as zero. This only makes a
17 // difference if NDEBUG and SAWYER_NDEBUG are both undefined--if either one of them are defined then no expensive (or
18 // inexpensive) checks are performed.
19 #define ROSE_PARTITIONER_EXPENSIVE_CHECKS 0
20 
21 namespace Rose {
22 namespace BinaryAnalysis {
23 namespace Partitioner2 {
24 
26 namespace Precision {
27 enum Level {
28  LOW,
32 };
33 } // namespace
34 
36 namespace AllowParallelEdges {
37 enum Type {
38  NO,
39  YES
40 };
41 } // namespace
42 
44 enum VertexType {
52 };
53 
55 enum EdgeType {
56  E_NORMAL = 0x00000001,
57  E_FUNCTION_CALL = 0x00000002,
58  E_FUNCTION_RETURN = 0x00000004,
61  E_CALL_RETURN = 0x00000008,
67  E_FUNCTION_XFER = 0x00000010,
73  E_USER_DEFINED = 0x00000020,
76 };
77 
79 enum Confidence {
82 };
83 
88 };
89 
100 
107 
115 
128 
129 private:
130  friend class boost::serialization::access;
131 
132  template<class S>
133  void serialize(S &s, unsigned version) {
134  s & BOOST_SERIALIZATION_NVP(allowEmptyGlobalBlock);
135  s & BOOST_SERIALIZATION_NVP(allowFunctionWithNoBasicBlocks);
136  s & BOOST_SERIALIZATION_NVP(allowEmptyBasicBlocks);
137  s & BOOST_SERIALIZATION_NVP(copyAllInstructions);
138  }
139 
140 public:
143  : allowEmptyGlobalBlock(false), allowFunctionWithNoBasicBlocks(false), allowEmptyBasicBlocks(false),
144  copyAllInstructions(true) {}
145 
152  s.allowEmptyGlobalBlock = false;
154  s.allowEmptyBasicBlocks = false;
155  s.copyAllInstructions = true;
156  return s;
157  }
158 
166  s.allowEmptyGlobalBlock = true;
168  s.allowEmptyBasicBlocks = true;
169  s.copyAllInstructions = true; // true keeps the AST a tree instead of a lattice
170  return s;
171  }
172 };
173 
175 // Settings. All settings must act like properties, which means the following:
176 // 1. Each setting must have a name that does not begin with a verb.
177 // 2. Each setting must have a command-line switch to manipulate it.
178 // 3. Each setting must have a method that queries the property (same name as the property and taking no arguments).
179 // 4. Each setting must have a modifier method (same name as property but takes a value and returns void)
181 
193 };
194 
222  std::string linker;
231  std::vector<std::string> envEraseNames;
233  std::vector<boost::regex> envErasePatterns;
236  std::vector<std::string> envInsert;
242  : deExecuteZerosThreshold(0), deExecuteZerosLeaveAtFront(16), deExecuteZerosLeaveAtBack(1),
243  memoryDataAdjustment(DATA_IS_INITIALIZED), memoryIsExecutable(false), linkObjectFiles(true),
244  linkStaticArchives(true), linker("ld -o %o --unresolved-symbols=ignore-all --whole-archive %f") {}
245 
246 private:
247  friend class boost::serialization::access;
248 
249  template<class S>
250  void serialize(S &s, unsigned version) {
251  s & BOOST_SERIALIZATION_NVP(deExecuteZerosThreshold);
252  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtFront);
253  s & BOOST_SERIALIZATION_NVP(deExecuteZerosLeaveAtBack);
254  s & BOOST_SERIALIZATION_NVP(memoryDataAdjustment);
255  s & BOOST_SERIALIZATION_NVP(memoryIsExecutable);
256  if (version >= 1) {
257  s & BOOST_SERIALIZATION_NVP(envEraseNames);
258  s & BOOST_SERIALIZATION_NVP(envInsert);
259 
260  // There's no serialization for boost::regex, so we do it ourselves.
261  std::vector<std::string> reStrings;
262  BOOST_FOREACH (const boost::regex &re, envErasePatterns)
263  reStrings.push_back(re.str());
264  s & BOOST_SERIALIZATION_NVP(reStrings);
265  if (envErasePatterns.empty()) {
266  BOOST_FOREACH (const std::string &reStr, reStrings)
267  envErasePatterns.push_back(boost::regex(reStr));
268  }
269  }
270  }
271 };
272 
277  std::string isaName;
281 private:
282  friend class boost::serialization::access;
283 
284  template<class S>
285  void serialize(S &s, unsigned version) {
286  s & BOOST_SERIALIZATION_NVP(isaName);
287  }
288 };
289 
300 };
301 
314 private:
315  friend class boost::serialization::access;
316 
317  template<class S>
318  void serialize(S &s, const unsigned version) {
319  s & BOOST_SERIALIZATION_NVP(usingSemantics);
320  s & BOOST_SERIALIZATION_NVP(checkingCallBranch);
321  s & BOOST_SERIALIZATION_NVP(basicBlockSemanticsAutoDrop);
322  if (version >= 1)
323  s & BOOST_SERIALIZATION_NVP(ignoringUnknownInsns);
324  }
325 
326 public:
328  : usingSemantics(false), checkingCallBranch(false), basicBlockSemanticsAutoDrop(true), ignoringUnknownInsns(false) {}
329 };
330 
340  std::vector<rose_addr_t> functionStartingVas;
355  std::vector<rose_addr_t> ipRewrites;
384  boost::filesystem::path syscallHeader;
387 private:
388  friend class boost::serialization::access;
389 
390  template<class S>
391  void serialize(S &s, unsigned version) {
392  s & BOOST_SERIALIZATION_NVP(base);
393  s & BOOST_SERIALIZATION_NVP(functionStartingVas);
394  s & BOOST_SERIALIZATION_NVP(followingGhostEdges);
395  s & BOOST_SERIALIZATION_NVP(discontiguousBlocks);
396  s & BOOST_SERIALIZATION_NVP(maxBasicBlockSize);
397  if (version >= 6)
398  s & BOOST_SERIALIZATION_NVP(ipRewrites);
399  s & BOOST_SERIALIZATION_NVP(findingFunctionPadding);
400  s & BOOST_SERIALIZATION_NVP(findingDeadCode);
401  s & BOOST_SERIALIZATION_NVP(peScramblerDispatcherVa);
402  if (version >= 2) {
403  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionCode);
404  } else {
405  bool temp = false;
406  if (S::is_saving::value)
407  temp = findingIntraFunctionCode > 0;
408  s & boost::serialization::make_nvp("findingIntraFunctionCode", temp);
409  if (S::is_loading::value)
410  findingIntraFunctionCode = temp ? 10 : 0; // arbitrary number of passes
411  }
412  s & BOOST_SERIALIZATION_NVP(findingIntraFunctionData);
413  s & BOOST_SERIALIZATION_NVP(findingInterFunctionCalls);
414  if (version >= 4)
415  s & BOOST_SERIALIZATION_NVP(findingFunctionCallFunctions);
416  if (version >= 5) {
417  s & BOOST_SERIALIZATION_NVP(findingEntryFunctions);
418  s & BOOST_SERIALIZATION_NVP(findingErrorFunctions);
419  s & BOOST_SERIALIZATION_NVP(findingImportFunctions);
420  s & BOOST_SERIALIZATION_NVP(findingExportFunctions);
421  s & BOOST_SERIALIZATION_NVP(findingSymbolFunctions);
422  }
423  s & BOOST_SERIALIZATION_NVP(interruptVector);
424  s & BOOST_SERIALIZATION_NVP(doingPostAnalysis);
425  s & BOOST_SERIALIZATION_NVP(doingPostFunctionMayReturn);
426  s & BOOST_SERIALIZATION_NVP(doingPostFunctionStackDelta);
427  s & BOOST_SERIALIZATION_NVP(doingPostCallingConvention);
428  s & BOOST_SERIALIZATION_NVP(doingPostFunctionNoop);
429  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysis);
430  if (version >= 3)
431  s & BOOST_SERIALIZATION_NVP(functionReturnAnalysisMaxSorts);
432  s & BOOST_SERIALIZATION_NVP(findingDataFunctionPointers);
433  s & BOOST_SERIALIZATION_NVP(findingCodeFunctionPointers);
434  s & BOOST_SERIALIZATION_NVP(findingThunks);
435  s & BOOST_SERIALIZATION_NVP(splittingThunks);
436  s & BOOST_SERIALIZATION_NVP(semanticMemoryParadigm);
437  s & BOOST_SERIALIZATION_NVP(namingConstants);
438  s & BOOST_SERIALIZATION_NVP(namingStrings);
439  s & BOOST_SERIALIZATION_NVP(demangleNames);
440  if (version >= 1) {
441  s & BOOST_SERIALIZATION_NVP(namingSyscalls);
442 
443  // There is no support for boost::filesystem serialization due to arguments by the maintainers over who has
444  // responsibility, so we do it the hard way.
445  std::string temp;
446  if (S::is_saving::value)
447  temp = syscallHeader.string();
448  s & boost::serialization::make_nvp("syscallHeader", temp);
449  if (S::is_loading::value)
450  syscallHeader = temp;
451  }
452  }
453 
454 public:
455  PartitionerSettings()
456  : followingGhostEdges(false), discontiguousBlocks(true), maxBasicBlockSize(0), findingFunctionPadding(true),
457  findingDeadCode(true), peScramblerDispatcherVa(0), findingIntraFunctionCode(10), findingIntraFunctionData(true),
458  findingInterFunctionCalls(true), findingFunctionCallFunctions(true), findingEntryFunctions(true),
459  findingErrorFunctions(true), findingImportFunctions(true), findingExportFunctions(true), findingSymbolFunctions(true),
460  doingPostAnalysis(true), doingPostFunctionMayReturn(true), doingPostFunctionStackDelta(true),
461  doingPostCallingConvention(false), doingPostFunctionNoop(false), functionReturnAnalysis(MAYRETURN_DEFAULT_YES),
462  functionReturnAnalysisMaxSorts(50), findingDataFunctionPointers(false), findingCodeFunctionPointers(false),
463  findingThunks(true), splittingThunks(false), semanticMemoryParadigm(LIST_BASED_MEMORY), namingConstants(true),
464  namingStrings(true), namingSyscalls(true), demangleNames(true) {}
465 };
466 
467 // BOOST_CLASS_VERSION(PartitionerSettings, 1); -- see end of file (cannot be in a namespace)
468 
474  std::vector<std::string> configurationNames;
475  bool exitOnError;
478  : exitOnError(true) {}
479 
480 private:
481  friend class boost::serialization::access;
482 
483  template<class S>
484  void serialize(S &s, unsigned version) {
485  s & BOOST_SERIALIZATION_NVP(configurationNames);
486  s & BOOST_SERIALIZATION_NVP(exitOnError);
487  }
488 };
489 
490 // Additional declarations w/out definitions yet.
491 class Partitioner;
492 class Function;
494 class BasicBlock;
496 class DataBlock;
498 class ThunkPredicates;
500 
501 } // namespace
502 } // namespace
503 } // namespace
504 
505 // Class versions must be at global scope
509 
510 #endif
511 #endif
bool findingInterFunctionCalls
Look for function calls between functions.
Definition: BasicTypes.h:361
Edge is a function call transfer.
Definition: BasicTypes.h:67
bool splittingThunks
Split thunks into their own separate functions.
Definition: BasicTypes.h:379
bool memoryIsExecutable
Determines whether all of memory should be made executable.
Definition: BasicTypes.h:214
bool linkStaticArchives
Link static libraries before parsing.
Definition: BasicTypes.h:221
Settings for controling the engine behavior.
Definition: BasicTypes.h:473
bool findingExportFunctions
Create functions at export addresses.
Definition: BasicTypes.h:366
MemoryDataAdjustment
How the partitioner should globally treat memory.
Definition: BasicTypes.h:183
bool findingDataFunctionPointers
Look for function pointers in static data.
Definition: BasicTypes.h:376
Assume a function returns if the may-return analysis cannot decide whether it may return...
Definition: BasicTypes.h:292
Allow parallel edges, so each edge has a unit count.
Definition: BasicTypes.h:39
std::string isaName
Name of the instruction set architecture.
Definition: BasicTypes.h:277
bool doingPostFunctionStackDelta
Run function-stack-delta analysis if doingPostAnalysis is set?
Definition: BasicTypes.h:371
EdgeType
Partitioner control flow edge types.
Definition: BasicTypes.h:55
rose_addr_t peScramblerDispatcherVa
Run the PeDescrambler module if non-zero.
Definition: BasicTypes.h:358
Settings that control the disassembler.
Definition: BasicTypes.h:276
Treat all memory as if it were initialized.
Definition: BasicTypes.h:186
size_t deExecuteZerosLeaveAtBack
Number of bytes at the end of each zero area to leave unaffected.
Definition: BasicTypes.h:208
bool copyAllInstructions
Whether to allow shared instructions in the AST.
Definition: BasicTypes.h:127
Special vertex destination for indeterminate edges.
Definition: BasicTypes.h:47
static AstConstructionSettings permissive()
Default permissive settings.
Definition: BasicTypes.h:164
Enum type for allowing parallel edges.
Definition: BasicTypes.h:38
Settings that directly control a partitioner.
Definition: BasicTypes.h:305
bool doingPostFunctionNoop
Find and name functions that are effectively no-ops.
Definition: BasicTypes.h:373
Assume that all functions return without ever running the may-return analysis.
Definition: BasicTypes.h:296
bool namingSyscalls
Give names (comments) to system calls if possible.
Definition: BasicTypes.h:383
bool followingGhostEdges
Should ghost edges be followed during disassembly? A ghost edge is a CFG edge that is apparent from t...
Definition: BasicTypes.h:343
Main namespace for the ROSE library.
bool doingPostCallingConvention
Run calling-convention analysis if doingPostAnalysis is set?
Definition: BasicTypes.h:372
Settings for loading specimens.
Definition: BasicTypes.h:198
std::vector< rose_addr_t > functionStartingVas
Addresses at which to start recursive disassembly.
Definition: BasicTypes.h:340
bool linkObjectFiles
Link object files before parsing.
Definition: BasicTypes.h:220
VertexType
Partitioner control flow vertex types.
Definition: BasicTypes.h:44
Settings that control building the AST.
Definition: BasicTypes.h:94
static AstConstructionSettings strict()
Default strict settings.
Definition: BasicTypes.h:150
bool discontiguousBlocks
Should basic blocks be allowed to be discontiguous.
Definition: BasicTypes.h:350
bool allowEmptyGlobalBlock
Whether to allow an empty global block.
Definition: BasicTypes.h:99
bool namingStrings
Give labels to constants that are string literal addresses.
Definition: BasicTypes.h:382
size_t functionReturnAnalysisMaxSorts
Number of times functions are sorted before using unsorted lists.
Definition: BasicTypes.h:375
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for function.
Definition: BasicTypes.h:492
FunctionReturnAnalysis functionReturnAnalysis
How to run the function may-return analysis.
Definition: BasicTypes.h:374
bool demangleNames
Run all names through a demangling step.
Definition: BasicTypes.h:385
std::vector< std::string > configurationNames
List of configuration files and/or directories.
Definition: BasicTypes.h:474
std::vector< rose_addr_t > ipRewrites
Pairs of addresses for rewriting CFG edges.
Definition: BasicTypes.h:355
bool doingPostFunctionMayReturn
Run function-may-return analysis if doingPostAnalysis is set?
Definition: BasicTypes.h:370
bool ignoringUnknownInsns
Whether to ignore unkonwn insns when extending basic blocks.
Definition: BasicTypes.h:312
The special "undiscovered" vertex.
Definition: BasicTypes.h:46
bool findingErrorFunctions
Create functions from error handling and exception information.
Definition: BasicTypes.h:364
std::vector< std::string > envInsert
List of environment variable names and values to be inserted before launching a "run:" specimen...
Definition: BasicTypes.h:236
The value is an assumption without any proof.
Definition: BasicTypes.h:80
Settings that control the engine partitioning.
Definition: BasicTypes.h:338
Confidence
How sure are we of something.
Definition: BasicTypes.h:79
bool doingPostAnalysis
Perform enabled post-partitioning analyses?
Definition: BasicTypes.h:369
size_t findingIntraFunctionCode
Suck up unused addresses as intra-function code (number of passes).
Definition: BasicTypes.h:359
Do not make any global changes to the memory map.
Definition: BasicTypes.h:192
bool findingThunks
Look for common thunk patterns in undiscovered areas.
Definition: BasicTypes.h:378
bool findingImportFunctions
Create functions at import addresses.
Definition: BasicTypes.h:365
A basic block or placeholder for a basic block.
Definition: BasicTypes.h:45
SemanticMemoryParadigm
Organization of semantic memory.
Definition: BasicTypes.h:85
Edge is a function return from the call site.
Definition: BasicTypes.h:61
Normal control flow edge, nothing special.
Definition: BasicTypes.h:56
MemoryDataAdjustment memoryDataAdjustment
How to globally adjust memory segment access bits for data areas.
Definition: BasicTypes.h:210
FunctionReturnAnalysis
Controls whether the function may-return analysis runs.
Definition: BasicTypes.h:291
size_t deExecuteZerosLeaveAtFront
Number of bytes at the beginning of each zero area to leave unaffected.
Definition: BasicTypes.h:206
The value was somehow proved.
Definition: BasicTypes.h:81
bool findingEntryFunctions
Create functions at the program entry point(s).
Definition: BasicTypes.h:363
bool findingDeadCode
Look for unreachable basic blocks?
Definition: BasicTypes.h:357
SemanticMemoryParadigm semanticMemoryParadigm
Container used for semantic memory states.
Definition: BasicTypes.h:380
bool checkingCallBranch
Check for situations where CALL is used as a branch.
Definition: BasicTypes.h:310
bool allowEmptyBasicBlocks
Whether to allow a basic block to be empty.
Definition: BasicTypes.h:114
bool usingSemantics
Whether instruction semantics are used.
Definition: BasicTypes.h:306
Treat all memory as if it were constant.
Definition: BasicTypes.h:184
bool findingSymbolFunctions
Create functions according to symbol tables.
Definition: BasicTypes.h:367
bool exitOnError
If true, emit error message and exit non-zero, else throw.
Definition: BasicTypes.h:475
std::vector< boost::regex > envErasePatterns
List of regular expressions for removing environment variables before launching a "run:" specimen...
Definition: BasicTypes.h:233
bool basicBlockSemanticsAutoDrop
Conserve memory by dropping semantics for attached basic blocks.
Definition: BasicTypes.h:311
Assume a function cannot return if the may-return analysis cannot decide whether it may return...
Definition: BasicTypes.h:294
bool namingConstants
Give names to constants by calling Modules::nameConstants.
Definition: BasicTypes.h:381
size_t deExecuteZerosThreshold
Size threshold for removing execute permission from zero data.
Definition: BasicTypes.h:199
std::vector< std::string > envEraseNames
List of environment variable names that should be removed before launching a "run:" specimen...
Definition: BasicTypes.h:231
bool findingCodeFunctionPointers
Look for function pointers in instructions.
Definition: BasicTypes.h:377
Special vertex destination for non-existing basic blocks.
Definition: BasicTypes.h:48
bool findingIntraFunctionData
Suck up unused addresses as intra-function data.
Definition: BasicTypes.h:360
Assume that a function cannot return without ever running the may-return analysis.
Definition: BasicTypes.h:298
bool allowFunctionWithNoBasicBlocks
Whether to allow functions with no basic blocks.
Definition: BasicTypes.h:106
AddressInterval interruptVector
Table of interrupt handling functions.
Definition: BasicTypes.h:368
bool findingFunctionPadding
Look for padding before each function entry point?
Definition: BasicTypes.h:356
std::string linker
Command to run to link object and archives.
Definition: BasicTypes.h:222
bool findingFunctionCallFunctions
Create functions from function calls.
Definition: BasicTypes.h:362
size_t maxBasicBlockSize
Maximum basic block size.
Definition: BasicTypes.h:354
boost::filesystem::path syscallHeader
Name of header file containing system call numbers.
Definition: BasicTypes.h:384