1 #ifndef ROSE_BinaryAnalysis_FeasiblePath_H
2 #define ROSE_BinaryAnalysis_FeasiblePath_H
4 #include <BaseSemantics2.h>
5 #include <BinarySmtSolver.h>
6 #include <BinarySymbolicExprParser.h>
7 #include <Partitioner2/CfgPath.h>
8 #include <Sawyer/CommandLine.h>
9 #include <Sawyer/Message.h>
10 #include <boost/filesystem/path.hpp>
12 namespace Rose {
13 namespace BinaryAnalysis {
18 class FeasiblePath {
20  // Types and public data members
22 public:
29  };
32  enum IoMode { READ, WRITE };
35  enum MayOrMust { MAY, MUST };
38  struct Settings {
39  // Path feasibility
40  SearchMode searchMode;
43  size_t maxPathLength;
44  size_t maxCallDepth;
46  std::vector<SymbolicExpr::Ptr> postConditions;
47  std::vector<std::string> postConditionStrings;
48  std::vector<rose_addr_t> summarizeFunctions;
50  std::string solverName;
54  // Null dereferences
55  struct NullDeref {
56  bool check;
58  bool constOnly;
60  NullDeref()
61  : check(false), mode(MUST), constOnly(false) {}
62  } nullDeref;
66  : searchMode(SEARCH_SINGLE_DFS), vertexVisitLimit((size_t)-1), maxPathLength((size_t)-1), maxCallDepth((size_t)-1),
67  maxRecursionDepth((size_t)-1), nonAddressIsFeasible(true), solverName("best"),
68  memoryParadigm(LIST_BASED_MEMORY), processFinalVertex(false) {}
69  };
84  struct VarDetail {
85  std::string registerName;
86  std::string firstAccessMode;
90  size_t memSize;
91  size_t memByteNumber;
94  VarDetail(): firstAccessInsn(NULL), memSize(0), memByteNumber(0) {}
95  std::string toString() const;
96  };
105  public:
106  enum Action {
109  };
111  virtual ~PathProcessor() {}
128  virtual Action found(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path,
130  const SmtSolverPtr &solver) { return CONTINUE; }
143  virtual void nullDeref(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path,
150  virtual void memoryIo(const FeasiblePath &analyzer, IoMode ioMode,
154  };
160  rose_addr_t address;
161  int64_t stackDelta;
162  std::string name;
165  FunctionSummary(): stackDelta(SgAsmInstruction::INVALID_STACK_DELTA) {}
168  FunctionSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgFuncVertex, uint64_t stackDelta);
169  };
178  public:
181  protected:
182  FunctionSummarizer() {}
183  public:
185  virtual void init(const FeasiblePath &analysis, FunctionSummary &summary /*in,out*/,
186  const Partitioner2::Function::Ptr &function,
187  Partitioner2::ControlFlowGraph::ConstVertexIterator cfgCallTarget) = 0;
193  virtual bool process(const FeasiblePath &analysis, const FunctionSummary &summary,
201  returnValue(const FeasiblePath &analysis, const FunctionSummary &summary,
203  };
206  // Private data members
208 private:
209  const Partitioner2::Partitioner *partitioner_; // binary analysis context
210  RegisterDictionary *registers_; // registers augmented with "path" pseudo-register
211  RegisterDescriptor REG_RETURN_; // FIXME[Robb P Matzke 2016-10-11]: see source
212  Settings settings_;
213  FunctionSummaries functionSummaries_;
214  Partitioner2::CfgVertexMap vmap_; // relates CFG vertices to path vertices
215  Partitioner2::ControlFlowGraph paths_; // all possible paths, feasible or otherwise
216  Partitioner2::CfgConstVertexSet pathsBeginVertices_;// vertices of paths_ where searching starts
217  Partitioner2::CfgConstVertexSet pathsEndVertices_; // vertices of paths_ where searching stops
218  Partitioner2::CfgConstEdgeSet cfgAvoidEdges_; // CFG edges to avoid
219  Partitioner2::CfgConstVertexSet cfgEndAvoidVertices_;// CFG end-of-path and other avoidance vertices
220  FunctionSummarizer::Ptr functionSummarizer_; // user-defined function for handling function summaries
221  static Sawyer::Attribute::Id POST_STATE; // stores semantic state after executing the insns for a vertex
222  static Sawyer::Attribute::Id POST_INSN_LENGTH; // path length in instructions at end of vertex
226  // Construction, destruction
228 public:
231  : registers_(NULL) {}
233  virtual ~FeasiblePath() {}
236  void reset() {
237  partitioner_ = NULL;
238  registers_ = NULL;
239  REG_PATH = REG_RETURN_ = RegisterDescriptor();
240  functionSummaries_.clear();
241  vmap_.clear();
242  paths_.clear();
243  pathsBeginVertices_.clear();
244  pathsEndVertices_.clear();
245  cfgAvoidEdges_.clear();
246  cfgEndAvoidVertices_.clear();
247  }
250  static void initDiagnostics();
254  // Settings affecting behavior
256 public:
260  const Settings& settings() const { return settings_; }
261  Settings& settings() { return settings_; }
262  void settings(const Settings &s) { settings_ = s; }
273  // Overridable processing functions
275 public:
282  buildVirtualCpu(const Partitioner2::Partitioner&, const Partitioner2::CfgPath*, PathProcessor*, const SmtSolver::Ptr&);
290  virtual void
292  const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsBeginVertex);
298  virtual void
300  const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex);
305  virtual void
306  processIndeterminateBlock(const Partitioner2::ControlFlowGraph::ConstVertexIterator &vertex,
308  size_t pathInsnIndex);
313  virtual void
314  processFunctionSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex,
316  size_t pathInsnIndex);
321  virtual void
323  const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex,
324  size_t &pathInsnIndex /*in,out*/);
327  virtual bool
328  shouldSummarizeCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex,
330  const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget);
333  virtual bool
334  shouldInline(const Partitioner2::CfgPath &path, const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget);
344  FunctionSummarizer::Ptr functionSummarizer() const { return functionSummarizer_; }
345  void functionSummarizer(const FunctionSummarizer::Ptr &f) { functionSummarizer_ = f; }
348  // Utilities
351 public:
353  Partitioner2::ControlFlowGraph::ConstVertexIterator
354  pathToCfg(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex) const;
364  bool isFunctionCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator&) const;
367  void printPathVertex(std::ostream &out, const Partitioner2::ControlFlowGraph::Vertex &pathVertex,
368  size_t &insnIdx /*in,out*/) const;
373  void printPath(std::ostream &out, const Partitioner2::CfgPath&) const;
379  const Partitioner2::ControlFlowGraph::ConstVertexIterator &beginVertex,
380  const Partitioner2::CfgConstVertexSet &endVertices);
383  // Functions for describing the search space
385 public:
393  void
395  const Partitioner2::CfgConstVertexSet &cfgBeginVertices,
396  const Partitioner2::CfgConstVertexSet &cfgEndVertices,
399  void
400  setSearchBoundary(const Partitioner2::Partitioner &partitioner,
401  const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgBeginVertex,
402  const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgEndVertex,
408  // Functions for searching for paths
411 public:
416  void depthFirstSearch(PathProcessor &pathProcessor);
420  // Functions for getting the results
422 public:
427  const Partitioner2::Partitioner& partitioner() const;
432  const FunctionSummaries& functionSummaries() const {
433  return functionSummaries_;
434  }
440  const FunctionSummary& functionSummary(rose_addr_t entryVa) const;
443  const VarDetail& varDetail(const InstructionSemantics2::BaseSemantics::StatePtr &state, const std::string &varName) const;
446  const VarDetails& varDetails(const InstructionSemantics2::BaseSemantics::StatePtr &state) const;
450  // Private supporting functions
452 private:
453  static rose_addr_t virtualAddress(const Partitioner2::ControlFlowGraph::ConstVertexIterator &vertex);
455  void insertCallSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsCallSite,
457  const Partitioner2::ControlFlowGraph::ConstEdgeIterator &cfgCallEdge);
459  boost::filesystem::path emitPathGraph(size_t callId, size_t graphId); // emit paths graph to "rose-debug" directory
461  // Pop an edge (or more) from the path and follow some other edge. Also, adjust the SMT solver's stack in a similar
462  // way. The SMT solver will have an initial state, plus one pushed state per edge of the path.
463  void backtrack(Partitioner2::CfgPath &path /*in,out*/, const SmtSolver::Ptr&);
465  // Process one edge of a path to find any path constraints. When called, the cpu's current state should be the virtual
466  // machine state at it exists just prior to executing the target vertex of the specified edge.
467  //
468  // Returns a null pointer if the edge's condition is trivially unsatisfiable, such as when the edge points to a basic block
469  // whose address doesn't match the contents of the instruction pointer register after executing the edge's source
470  // block. Otherwise, returns a symbolic expression which must be tree if the edge is feasible. For trivially feasible
471  // edges, the return value is the constant 1 (one bit wide; i.e., true).
472  SymbolicExpr::Ptr pathEdgeConstraint(const Partitioner2::ControlFlowGraph::ConstEdgeIterator &pathEdge,
474 };
476 } // namespace
477 } // namespace
479 #endif
std::string firstAccessMode
How was variable first accessed ("read" or "write").
std::vector< rose_addr_t > summarizeFunctions
Functions to always summarize.
Sawyer::Container::Map< rose_addr_t, FunctionSummary > FunctionSummaries
Summaries for multiple functions.
MayOrMust mode
Check for addrs that may or must be null.
size_t vertexVisitLimit
Max times to visit a particular vertex in one path.
virtual InstructionSemantics2::SymbolicSemantics::SValuePtr returnValue(const FeasiblePath &analysis, const FunctionSummary &summary, const InstructionSemantics2::SymbolicSemantics::RiscOperatorsPtr &ops)=0
Return value for function.
static bool isAnyEndpointReachable(const Partitioner2::ControlFlowGraph &cfg, const Partitioner2::ControlFlowGraph::ConstVertexIterator &beginVertex, const Partitioner2::CfgConstVertexSet &endVertices)
Determine whether any ending vertex is reachable.
rose_addr_t address
Address of summarized function.
SemanticMemoryParadigm memoryParadigm
Type of memory state when there's a choice to be made.
void printPathVertex(std::ostream &out, const Partitioner2::ControlFlowGraph::Vertex &pathVertex, size_t &insnIdx) const
Print one vertex of a path for debugging.
Information stored per V_USER_DEFINED path vertex.
Sawyer::Container::Map< std::string, FeasiblePath::VarDetail > VarDetails
Variable detail by name.
void setSearchBoundary(const Partitioner2::Partitioner &partitioner, const Partitioner2::CfgConstVertexSet &cfgBeginVertices, const Partitioner2::CfgConstVertexSet &cfgEndVertices, const Partitioner2::CfgConstVertexSet &cfgAvoidVertices=Partitioner2::CfgConstVertexSet(), const Partitioner2::CfgConstEdgeSet &cfgAvoidEdges=Partitioner2::CfgConstEdgeSet())
Specify search boundary.
void depthFirstSearch(PathProcessor &pathProcessor)
Find all feasible paths.
virtual void processIndeterminateBlock(const Partitioner2::ControlFlowGraph::ConstVertexIterator &vertex, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex)
Process an indeterminate block.
Base class for machine instructions.
Sawyer::Optional< rose_addr_t > returnFrom
This variable is the return value from the specified function.
Collection of streams.
Definition: Message.h:1579
boost::shared_ptr< class RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to symbolic RISC operations.
Constructs a new feasible path analyzer.
virtual bool process(const FeasiblePath &analysis, const FunctionSummary &summary, const InstructionSemantics2::SymbolicSemantics::RiscOperatorsPtr &ops)=0
Invoked when the analysis traverses the summary.
void clear()
Erase all mappings.
Definition: BiMap.h:63
std::set< ControlFlowGraph::ConstVertexIterator > CfgConstVertexSet
Set of CFG vertex pointers.
virtual void processFunctionSummary(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex)
Process a function summary vertex.
RegisterDescriptor REG_PATH
Descriptor of path pseudo-registers.
std::string name
Name of summarized function.
Sawyer::Optional< size_t > firstAccessIdx
Instruction position in path where this var was first read.
A collection of related switch declarations.
const VarDetails & varDetails(const InstructionSemantics2::BaseSemantics::StatePtr &state) const
Details about all variables by name.
Main namespace for the ROSE library.
Describes (part of) a physical CPU register.
size_t maxCallDepth
Max length of path in terms of function calls.
struct Rose::BinaryAnalysis::FeasiblePath::Settings::NullDeref nullDeref
Settings for null-dereference analysis.
const FunctionSummaries & functionSummaries() const
Function summary information.
Partitioner2::CfgConstVertexSet cfgToPaths(const Partitioner2::CfgConstVertexSet &) const
Convert CFG vertices to path vertices.
virtual bool shouldInline(const Partitioner2::CfgPath &path, const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget)
Determines whether a function call should be inlined.
virtual void memoryIo(const FeasiblePath &analyzer, IoMode ioMode, const InstructionSemantics2::BaseSemantics::SValuePtr &addr, const InstructionSemantics2::BaseSemantics::SValuePtr &value, const InstructionSemantics2::BaseSemantics::RiscOperatorsPtr &ops)
Function invoked every time a memory reference occurs.
void settings(const Settings &s)
Property: Settings used by this analysis.
boost::shared_ptr< class State > StatePtr
Shared-ownership pointer to a semantic state.
size_t maxPathLength
Limit path length in terms of number of instructions.
bool constOnly
If true, check only constants or sets of constants.
Base class for callbacks for function summaries.
const VarDetail & varDetail(const InstructionSemantics2::BaseSemantics::StatePtr &state, const std::string &varName) const
Details about a variable.
boost::shared_ptr< class Dispatcher > DispatcherPtr
Shared-ownership pointer to a semantics instruction dispatcher.
std::vector< std::string > postConditionStrings
Additional post conditions as strings to be parsed.
Construct empty function summary.
bool nonAddressIsFeasible
Indeterminate/undiscovered vertices are feasible?
size_t maxRecursionDepth
Max path length in terms of recursive function calls.
boost::shared_ptr< class RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
void reset()
Reset to initial state without changing settings.
A path through a control flow graph.
Definition: CfgPath.h:15
std::set< ControlFlowGraph::ConstEdgeIterator > CfgConstEdgeSet
Set of CFG edge pointers.
Information about a variable seen on a path.
virtual Action found(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, const SmtSolverPtr &solver)
Function invoked whenever a complete path is found.
FunctionSummarizer::Ptr functionSummarizer() const
Property: Function summary handling.
virtual bool shouldSummarizeCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex, const Partitioner2::ControlFlowGraph &cfg, const Partitioner2::ControlFlowGraph::ConstVertexIterator &cfgCallTarget)
Determines whether a function call should be summarized instead of inlined.
bool isFunctionCall(const Partitioner2::ControlFlowGraph::ConstVertexIterator &) const
True if vertex is a function call.
Defines registers available for a particular architecture.
Definition: Registers.h:32
SearchMode searchMode
Method to use when searching for feasible paths.
size_t Id
Attribute identification.
Definition: Attribute.h:140
void clear()
Remove all vertices and edges.
Definition: Graph.h:1998
bool pathEndsWithFunctionCall(const Partitioner2::CfgPath &) const
True if path ends with a function call.
void printPath(std::ostream &out, const Partitioner2::CfgPath &) const
Print the path to the specified output stream.
virtual InstructionSemantics2::BaseSemantics::DispatcherPtr buildVirtualCpu(const Partitioner2::Partitioner &, const Partitioner2::CfgPath *, PathProcessor *, const SmtSolver::Ptr &)
Create the virtual CPU.
const Settings & settings() const
Property: Settings used by this analysis.
size_t memByteNumber
Byte number for memory access.
int64_t stackDelta
Stack delta for summarized function.
Base class for reference counted objects.
Definition: SharedObject.h:22
static Sawyer::CommandLine::SwitchGroup commandLineSwitches(Settings &settings)
Describe command-line switches.
virtual void nullDeref(const FeasiblePath &analyzer, const Partitioner2::CfgPath &path, IoMode ioMode, const InstructionSemantics2::BaseSemantics::SValuePtr &addr, SgAsmInstruction *)
Function invoked whenever a null pointer dereference is detected.
Sawyer::SharedPointer< FunctionSummarizer > Ptr
Reference counting pointer.
static Sawyer::Message::Facility mlog
Diagnostic output.
virtual void processVertex(const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsVertex, size_t &pathInsnIndex)
Process one vertex.
Partitioner2::ControlFlowGraph::ConstVertexIterator pathToCfg(const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathVertex) const
Convert path vertex to a CFG vertex.
const FunctionSummary & functionSummary(rose_addr_t entryVa) const
Function summary information.
std::vector< SymbolicExpr::Ptr > postConditions
Additional constraints to be satisifed at the end of a path.
const Partitioner2::Partitioner & partitioner() const
Property: Partitioner currently in use.
virtual void processBasicBlock(const Partitioner2::BasicBlock::Ptr &bblock, const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, size_t pathInsnIndex)
Process instructions for one basic block on the specified virtual CPU.
SymbolicExpr::Ptr memAddress
Address where variable is located.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:293
std::string solverName
Type of SMT solver.
void functionSummarizer(const FunctionSummarizer::Ptr &f)
Property: Function summary handling.
Settings & settings()
Property: Settings used by this analysis.
size_t memSize
Size of total memory access in bytes.
Container associating values with keys.
Definition: Sawyer/Map.h:66
bool processFinalVertex
Whether to process the last vertex of the path.
SgAsmInstruction * firstAccessInsn
Instruction address where this var was first read.
Settings that control this analysis.
static void initDiagnostics()
Initialize diagnostic output.
bool check
If true, look for null dereferences along the paths.
virtual void setInitialState(const InstructionSemantics2::BaseSemantics::DispatcherPtr &cpu, const Partitioner2::ControlFlowGraph::ConstVertexIterator &pathsBeginVertex)
Initialize state for first vertex of path.
Sawyer::Optional< rose_addr_t > initialStackPtr
Concrete value to use for stack pointer register initial value.
Organization of semantic memory.
virtual void init(const FeasiblePath &analysis, FunctionSummary &summary, const Partitioner2::Function::Ptr &function, Partitioner2::ControlFlowGraph::ConstVertexIterator cfgCallTarget)=0
Invoked when a new summary is created.