ROSE  0.9.10.200
BinaryFeasiblePath.h
1 #ifndef ROSE_BinaryAnalysis_FeasiblePath_H
2 #define ROSE_BinaryAnalysis_FeasiblePath_H
3 
4 #include <BaseSemantics2.h>
5 #include <BinarySmtSolver.h>
6 #include <BinarySymbolicExprParser.h>
7 #include <Partitioner2/CfgPath.h>
8 #include <Sawyer/CommandLine.h>
9 #include <Sawyer/Message.h>
10 #include <boost/filesystem/path.hpp>
11 
12 namespace Rose {
13 namespace BinaryAnalysis {
14 
18 class FeasiblePath {
20  // Types and public data members
22 public:
24  enum SearchMode {
28  };
29 
34  };
35 
41  };
42 
44  enum IoMode { READ, WRITE };
45 
47  enum MayOrMust { MAY, MUST };
48 
55  struct Expression {
57  std::string parsable;
60  Expression() {}
61  /*implicit*/ Expression(const std::string &parsable): parsable(parsable) {}
62  /*implicit*/ Expression(const SymbolicExpr::Ptr &expr): expr(expr) {}
63 
64  void print(std::ostream&) const;
65  };
66 
68  struct Settings {
69  // Path feasibility
72  size_t maxVertexVisit;
73  size_t maxPathLength;
74  size_t maxCallDepth;
76  std::vector<Expression> assertions;
77  std::vector<std::string> assertionLocations;
78  std::vector<rose_addr_t> summarizeFunctions;
80  std::string solverName;
87  // Null dereferences
88  struct NullDeref {
89  bool check;
91  bool constOnly;
93  NullDeref()
94  : check(false), mode(MUST), constOnly(false) {}
95  } nullDeref;
97  std::string exprParserDoc;
101  : searchMode(SEARCH_SINGLE_DFS), maxVertexVisit((size_t)-1), maxPathLength(200), maxCallDepth((size_t)-1),
102  maxRecursionDepth((size_t)-1), nonAddressIsFeasible(true), solverName("best"),
103  memoryParadigm(LIST_BASED_MEMORY), processFinalVertex(false), ignoreSemanticFailure(false),
104  kCycleCoefficient(0.0), edgeVisitOrder(VISIT_NATURAL) {}
105  };
106 
109 
118 
120  struct VarDetail {
121  std::string registerName;
122  std::string firstAccessMode;
126  size_t memSize;
127  size_t memByteNumber;
130  VarDetail(): firstAccessInsn(NULL), memSize(0), memByteNumber(0) {}
131  std::string toString() const;
132  };
133 
136 
141  public:
142  enum Action {
145  };
146 
147  virtual ~PathProcessor() {}
148 
163  virtual Action found(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path,
165  const SmtSolverPtr &solver) { return CONTINUE; }
166 
178  virtual void nullDeref(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path,
180 
185  virtual void memoryIo(const FeasiblePath &analyzer, IoMode ioMode,
189  };
190 
195  rose_addr_t address;
196  int64_t stackDelta;
197  std::string name;
200  FunctionSummary(): stackDelta(SgAsmInstruction::INVALID_STACK_DELTA) {}
201 
203  FunctionSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgFuncVertex, uint64_t stackDelta);
204  };
205 
208 
213  public:
216  protected:
217  FunctionSummarizer() {}
218  public:
220  virtual void init(const FeasiblePath &analysis, FunctionSummary &summary /*in,out*/,
221  const Partitioner2::Function::Ptr &function,
222  Partitioner2::ControlFlowGraph::ConstVertexIterator cfgCallTarget) = 0;
223 
228  virtual bool process(const FeasiblePath &analysis, const FunctionSummary &summary,
230 
236  returnValue(const FeasiblePath &analysis, const FunctionSummary &summary,
238  };
239 
241  // Private data members
243 private:
244  const Partitioner2::Partitioner *partitioner_; // binary analysis context
245  RegisterDictionary *registers_; // registers augmented with "path" pseudo-register
246  RegisterDescriptor REG_RETURN_; // FIXME[Robb P Matzke 2016-10-11]: see source
247  Settings settings_;
248  FunctionSummaries functionSummaries_;
249  Partitioner2::CfgVertexMap vmap_; // relates CFG vertices to path vertices
250  Partitioner2::ControlFlowGraph paths_; // all possible paths, feasible or otherwise
251  Partitioner2::CfgConstVertexSet pathsBeginVertices_;// vertices of paths_ where searching starts
252  Partitioner2::CfgConstVertexSet pathsEndVertices_; // vertices of paths_ where searching stops
253  Partitioner2::CfgConstEdgeSet cfgAvoidEdges_; // CFG edges to avoid
254  Partitioner2::CfgConstVertexSet cfgEndAvoidVertices_;// CFG end-of-path and other avoidance vertices
255  FunctionSummarizer::Ptr functionSummarizer_; // user-defined function for handling function summaries
256  static Sawyer::Attribute::Id POST_STATE; // stores semantic state after executing the insns for a vertex
257  static Sawyer::Attribute::Id POST_INSN_LENGTH; // path length in instructions at end of vertex
258  static Sawyer::Attribute::Id EFFECTIVE_K; // (double) effective maximimum path length
259 
260 
262  // Construction, destruction
264 public:
267  : registers_(NULL) {}
268 
269  virtual ~FeasiblePath() {}
270 
272  void reset() {
273  partitioner_ = NULL;
274  registers_ = NULL;
275  REG_PATH = REG_RETURN_ = RegisterDescriptor();
276  functionSummaries_.clear();
277  vmap_.clear();
278  paths_.clear();
279  pathsBeginVertices_.clear();
280  pathsEndVertices_.clear();
281  cfgAvoidEdges_.clear();
282  cfgEndAvoidVertices_.clear();
283  }
284 
286  static void initDiagnostics();
287 
288 
290  // Settings affecting behavior
292 public:
296  const Settings& settings() const { return settings_; }
297  Settings& settings() { return settings_; }
298  void settings(const Settings &s) { settings_ = s; }
306 
308  static std::string expressionDocumentation();
309 
310 
312  // Overridable processing functions
314 public:
321  buildVirtualCpu(const Partitioner2::Partitioner&, const Partitioner2::CfgPath*, PathProcessor*, const SmtSolver::Ptr&);
322 
329  virtual void
331  const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsBeginVertex);
332 
337  virtual void
339  const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex);
340 
344  virtual void
345  processIndeterminateBlock(const Partitioner2::ControlFlowGraph::ConstVertexIterator &vertex,
347  size_t pathInsnIndex);
348 
352  virtual void
353  processFunctionSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex,
355  size_t pathInsnIndex);
356 
360  virtual void
362  const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex,
363  size_t &pathInsnIndex /*in,out*/);
364 
366  virtual bool
367  shouldSummarizeCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex,
369  const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget);
370 
372  virtual bool
373  shouldInline(const Partitioner2::CfgPath &path, const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget);
374 
383  FunctionSummarizer::Ptr functionSummarizer() const { return functionSummarizer_; }
384  void functionSummarizer(const FunctionSummarizer::Ptr &f) { functionSummarizer_ = f; }
387  // Utilities
390 public:
392  Partitioner2::ControlFlowGraph::ConstVertexIterator
393  pathToCfg(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex) const;
394 
398 
401 
403  bool isFunctionCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator&) const;
404 
406  void printPathVertex(std::ostream &out, const Partitioner2::ControlFlowGraph::Vertex &pathVertex,
407  size_t &insnIdx /*in,out*/) const;
408 
412  void printPath(std::ostream &out, const Partitioner2::CfgPath&) const;
413 
418  const Partitioner2::ControlFlowGraph::ConstVertexIterator &beginVertex,
419  const Partitioner2::CfgConstVertexSet &endVertices);
420 
422  // Functions for describing the search space
424 public:
425 
432  void
434  const Partitioner2::CfgConstVertexSet &cfgBeginVertices,
435  const Partitioner2::CfgConstVertexSet &cfgEndVertices,
438  void
439  setSearchBoundary(const Partitioner2::Partitioner &partitioner,
440  const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgBeginVertex,
441  const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgEndVertex,
447  // Functions for searching for paths
450 public:
455  void depthFirstSearch(PathProcessor &pathProcessor);
456 
457 
459  // Functions for getting the results
461 public:
466  const Partitioner2::Partitioner& partitioner() const;
467 
471  const FunctionSummaries& functionSummaries() const {
472  return functionSummaries_;
473  }
474 
479  const FunctionSummary& functionSummary(rose_addr_t entryVa) const;
480 
482  const VarDetail& varDetail(const InstructionSemantics2::BaseSemantics::StatePtr&, const std::string &varName) const;
483 
485  const VarDetails& varDetails(const InstructionSemantics2::BaseSemantics::StatePtr&) const;
486 
489 
495  double pathEffectiveK(const Partitioner2::CfgPath&) const;
496 
503  static size_t pathLength(const Partitioner2::CfgPath&);
504 
506  // Private supporting functions
508 private:
509  static rose_addr_t virtualAddress(const Partitioner2::ControlFlowGraph::ConstVertexIterator &vertex);
510 
511  void insertCallSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsCallSite,
513  const Partitioner2::ControlFlowGraph::ConstEdgeIterator &cfgCallEdge);
514 
515  boost::filesystem::path emitPathGraph(size_t callId, size_t graphId); // emit paths graph to "rose-debug" directory
516 
517  // Pop an edge (or more) from the path and follow some other edge. Also, adjust the SMT solver's stack in a similar
518  // way. The SMT solver will have an initial state, plus one pushed state per edge of the path.
519  void backtrack(Partitioner2::CfgPath &path /*in,out*/, const SmtSolver::Ptr&);
520 
521  // Process one edge of a path to find any path constraints. When called, the cpu's current state should be the virtual
522  // machine state at it exists just prior to executing the target vertex of the specified edge.
523  //
524  // Returns a null pointer if the edge's assertion is trivially unsatisfiable, such as when the edge points to a basic block
525  // whose address doesn't match the contents of the instruction pointer register after executing the edge's source
526  // block. Otherwise, returns a symbolic expression which must be tree if the edge is feasible. For trivially feasible
527  // edges, the return value is the constant 1 (one bit wide; i.e., true).
528  SymbolicExpr::Ptr pathEdgeConstraint(const Partitioner2::ControlFlowGraph::ConstEdgeIterator &pathEdge,
530 
531  // Parse the expression if it's a parsable string, otherwise return the expression as is. */
532  Expression parseExpression(Expression, const std::string &where, SymbolicExprParser&) const;
533 
534  SymbolicExpr::Ptr expandExpression(const Expression&, SymbolicExprParser&);
535 
536  // Based on the last vertex of the path, insert user-specified assertions into the SMT solver.
537  void insertAssertions(const SmtSolver::Ptr&, const Partitioner2::CfgPath&,
538  const std::vector<Expression> &assertions, bool atEndOfPath, SymbolicExprParser&);
539 
540  // Size of vertex. How much of "k" does this vertex consume?
541  static size_t vertexSize(const Partitioner2::ControlFlowGraph::ConstVertexIterator&);
542 
543  // Insert the edge assertion and any applicable user assertions (after delayed expansion of the expressions' register
544  // and memory references), and run the solver, returning its result.
546  solvePathConstraints(SmtSolver::Ptr&, const Partitioner2::CfgPath&, const SymbolicExpr::Ptr &edgeAssertion,
547  const std::vector<Expression> &userAssertions, bool atEndOfPath, SymbolicExprParser&);
548 };
549 
550 } // namespace
551 } // namespace
552 
553 std::ostream& operator<<(std::ostream&, const Rose::BinaryAnalysis::FeasiblePath::Expression&);
554 
555 // Convert string to feasible path expression during command-line parsing
556 namespace Sawyer {
557  namespace CommandLine {
558  template<>
559  struct LexicalCast<Rose::BinaryAnalysis::FeasiblePath::Expression> {
560  static Rose::BinaryAnalysis::FeasiblePath::Expression convert(const std::string &src) {
562  }
563  };
564  }
565 }
566 
567 #endif
std::string firstAccessMode
How was variable first accessed ("read" or "write").
std::vector< rose_addr_t > summarizeFunctions
Functions to always summarize.
Sawyer::Container::Map< rose_addr_t, FunctionSummary > FunctionSummaries
Summaries for multiple functions.
MayOrMust mode
Check for addrs that may or must be null.
EdgeVisitOrder edgeVisitOrder
Order in which to visit edges.
virtual InstructionSemantics2::SymbolicSemantics::SValuePtr returnValue(const FeasiblePath &analysis, const FunctionSummary &summary, const InstructionSemantics2::SymbolicSemantics::RiscOperatorsPtr &ops)=0
Return value for function.
static bool isAnyEndpointReachable(const Partitioner2::ControlFlowGraph &cfg, const Partitioner2::ControlFlowGraph::ConstVertexIterator &beginVertex, const Partitioner2::CfgConstVertexSet &endVertices)
Determine whether any ending vertex is reachable.
rose_addr_t address
Address of summarized function.
SemanticMemoryParadigm memoryParadigm
Type of memory state when there's a choice to be made.
double pathEffectiveK(const Partitioner2::CfgPath &) const
Effective maximum path length.
void printPathVertex(std::ostream &out, const Partitioner2::ControlFlowGraph::Vertex &pathVertex, size_t &insnIdx) const
Print one vertex of a path for debugging.
std::string parsable
String to be parsed as an expression.
Information stored per V_USER_DEFINED path vertex.
Sawyer::Container::Map< std::string, FeasiblePath::VarDetail > VarDetails
Variable detail by name.
void setSearchBoundary(const Partitioner2::Partitioner &partitioner, const Partitioner2::CfgConstVertexSet &cfgBeginVertices, const Partitioner2::CfgConstVertexSet &cfgEndVertices, const Partitioner2::CfgConstVertexSet &cfgAvoidVertices=Partitioner2::CfgConstVertexSet(), const Partitioner2::CfgConstEdgeSet &cfgAvoidEdges=Partitioner2::CfgConstEdgeSet())
Specify search boundary.
void depthFirstSearch(PathProcessor &pathProcessor)
Find all feasible paths.
virtual void processIndeterminateBlock(const Partitioner2::ControlFlowGraph::ConstVertexIterator &vertex, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex)
Process an indeterminate block.
Base class for machine instructions.
Sawyer::Optional< rose_addr_t > returnFrom
This variable is the return value from the specified function.
Collection of streams.
Definition: Message.h:1579
boost::shared_ptr< class RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to symbolic RISC operations.
FeasiblePath()
Constructs a new feasible path analyzer.
SearchMode
How to search for paths.
virtual bool process(const FeasiblePath &analysis, const FunctionSummary &summary, const InstructionSemantics2::SymbolicSemantics::RiscOperatorsPtr &ops)=0
Invoked when the analysis traverses the summary.
void clear()
Erase all mappings.
Definition: BiMap.h:63
std::set< ControlFlowGraph::ConstVertexIterator > CfgConstVertexSet
Set of CFG vertex pointers.
virtual void processFunctionSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex)
Process a function summary vertex.
RegisterDescriptor REG_PATH
Descriptor of path pseudo-registers.
std::string name
Name of summarized function.
Sawyer::Optional< size_t > firstAccessIdx
Instruction position in path where this var was first read.
A collection of related switch declarations.
Main namespace for the ROSE library.
Parses symbolic expressions from text.
Describes (part of) a physical CPU register.
Satisfiable
Satisfiability constants.
size_t maxCallDepth
Max length of path in terms of function calls.
struct Rose::BinaryAnalysis::FeasiblePath::Settings::NullDeref nullDeref
Settings for null-dereference analysis.
Blast everything at once to the SMT solver.
Name space for the entire library.
std::vector< std::string > assertionLocations
Locations at which "constraints" are checked.
const FunctionSummaries & functionSummaries() const
Function summary information.
Partitioner2::CfgConstVertexSet cfgToPaths(const Partitioner2::CfgConstVertexSet &) const
Convert CFG vertices to path vertices.
virtual bool shouldInline(const Partitioner2::CfgPath &path, const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget)
Determines whether a function call should be inlined.
virtual void memoryIo(const FeasiblePath &analyzer, IoMode ioMode, const InstructionSemantics2::BaseSemantics::SValuePtr &addr, const InstructionSemantics2::BaseSemantics::SValuePtr &value, const InstructionSemantics2::BaseSemantics::RiscOperatorsPtr &ops)
Function invoked every time a memory reference occurs.
static std::string expressionDocumentation()
Documentation for the symbolic expression parser.
void settings(const Settings &s)
Property: Settings used by this analysis.
boost::shared_ptr< class State > StatePtr
Shared-ownership pointer to a semantic state.
size_t maxPathLength
Limit path length in terms of number of instructions.
bool constOnly
If true, check only constants or sets of constants.
Base class for callbacks for function summaries.
boost::shared_ptr< class Dispatcher > DispatcherPtr
Shared-ownership pointer to a semantics instruction dispatcher.
AddressIntervalSet location
Location where constraint applies.
Visit edges in reverse of the natural order.
static InstructionSemantics2::BaseSemantics::StatePtr pathPostState(const Partitioner2::CfgPath &, size_t vertexIdx)
Get the state at the end of the specified vertex.
FunctionSummary()
Construct empty function summary.
bool nonAddressIsFeasible
Indeterminate/undiscovered vertices are feasible?
size_t maxVertexVisit
Max times to visit a particular vertex in one path.
size_t maxRecursionDepth
Max path length in terms of recursive function calls.
boost::shared_ptr< class RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
void reset()
Reset to initial state without changing settings.
A path through a control flow graph.
Definition: CfgPath.h:15
std::set< ControlFlowGraph::ConstEdgeIterator > CfgConstEdgeSet
Set of CFG edge pointers.
Information about a variable seen on a path.
virtual Action found(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, const SmtSolverPtr &solver)
Function invoked whenever a complete path is found.
FunctionSummarizer::Ptr functionSummarizer() const
Property: Function summary handling.
virtual bool shouldSummarizeCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex, const Partitioner2::ControlFlowGraph &cfg, const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget)
Determines whether a function call should be summarized instead of inlined.
bool isFunctionCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator &) const
True if vertex is a function call.
SymbolicExpr::Ptr expr
Symbolic expression.
bool ignoreSemanticFailure
Whether to ignore instructions with no semantic info.
Defines registers available for a particular architecture.
Definition: Registers.h:32
SearchMode searchMode
Method to use when searching for feasible paths.
size_t Id
Attribute identification.
Definition: Attribute.h:140
void clear()
Remove all vertices and edges.
Definition: Graph.h:1998
bool pathEndsWithFunctionCall(const Partitioner2::CfgPath &) const
True if path ends with a function call.
void printPath(std::ostream &out, const Partitioner2::CfgPath &) const
Print the path to the specified output stream.
virtual InstructionSemantics2::BaseSemantics::DispatcherPtr buildVirtualCpu(const Partitioner2::Partitioner &, const Partitioner2::CfgPath *, PathProcessor *, const SmtSolver::Ptr &)
Create the virtual CPU.
const Settings & settings() const
Property: Settings used by this analysis.
size_t memByteNumber
Byte number for memory access.
int64_t stackDelta
Stack delta for summarized function.
Base class for reference counted objects.
Definition: SharedObject.h:64
double kCycleCoefficient
Coefficient for adjusting maxPathLengh during CFG cycles.
static Sawyer::CommandLine::SwitchGroup commandLineSwitches(Settings &settings)
Describe command-line switches.
virtual void nullDeref(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path, IoMode ioMode, const InstructionSemantics2::BaseSemantics::SValuePtr &addr, SgAsmInstruction *)
Function invoked whenever a null pointer dereference is detected.
Sawyer::SharedPointer< FunctionSummarizer > Ptr
Reference counting pointer.
static Sawyer::Message::Facility mlog
Diagnostic output.
virtual void processVertex(const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex, size_t &pathInsnIndex)
Process one vertex.
const VarDetails & varDetails(const InstructionSemantics2::BaseSemantics::StatePtr &) const
Details about all variables by name.
static size_t pathLength(const Partitioner2::CfgPath &)
Total length of path.
Partitioner2::ControlFlowGraph::ConstVertexIterator pathToCfg(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex) const
Convert path vertex to a CFG vertex.
Visit edges in their natural, forward order.
const FunctionSummary & functionSummary(rose_addr_t entryVa) const
Function summary information.
const Partitioner2::Partitioner & partitioner() const
Property: Partitioner currently in use.
const VarDetail & varDetail(const InstructionSemantics2::BaseSemantics::StatePtr &, const std::string &varName) const
Details about a variable.
virtual void processBasicBlock(const Partitioner2::BasicBlock::Ptr &bblock, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex)
Process instructions for one basic block on the specified virtual CPU.
SymbolicExpr::Ptr memAddress
Address where variable is located.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:293
std::string solverName
Type of SMT solver.
EdgeVisitOrder
Edge visitation order.
void functionSummarizer(const FunctionSummarizer::Ptr &f)
Property: Function summary handling.
Settings & settings()
Property: Settings used by this analysis.
size_t memSize
Size of total memory access in bytes.
Container associating values with keys.
Definition: Sawyer/Map.h:66
std::string exprParserDoc
String documenting how expressions are parsed, empty for default.
bool processFinalVertex
Whether to process the last vertex of the path.
SgAsmInstruction * firstAccessInsn
Instruction address where this var was first read.
Settings that control this analysis.
static void initDiagnostics()
Initialize diagnostic output.
bool check
If true, look for null dereferences along the paths.
virtual void setInitialState(const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsBeginVertex)
Initialize state for first vertex of path.
Sawyer::Optional< rose_addr_t > initialStackPtr
Concrete value to use for stack pointer register initial value.
SemanticMemoryParadigm
Organization of semantic memory.
std::vector< Expression > assertions
Constraints to be satisfied at some point along the path.
virtual void init(const FeasiblePath &analysis, FunctionSummary &summary, const Partitioner2::Function::Ptr &function, Partitioner2::ControlFlowGraph::ConstVertexIterator cfgCallTarget)=0
Invoked when a new summary is created.