ROSE  0.9.9.139
BinaryControlFlow.h
1 #ifndef ROSE_BinaryAnalysis_ControlFlow_H
2 #define ROSE_BinaryAnalysis_ControlFlow_H
3 
4 #include "Map.h"
5 #include "WorkLists.h"
6 #include "sageBuilderAsm.h"
7 
8 #include <boost/foreach.hpp>
9 #include <boost/graph/adjacency_list.hpp>
10 #include <boost/graph/reverse_graph.hpp>
11 #include <boost/graph/depth_first_search.hpp>
12 #include <Sawyer/GraphBoost.h>
13 
14 class SgNode;
15 class SgAsmBlock;
16 
17 namespace Rose {
18 
25 namespace BinaryAnalysis {
26 
137 class ControlFlow {
138 public:
139  ControlFlow()
140  : vertex_filter(NULL), edge_filter(NULL)
141  {}
142 
143 
162  typedef boost::adjacency_list<boost::setS, /* edges of each vertex in std::list */
163  boost::vecS, /* vertices in std::vector */
164  boost::bidirectionalS,
165  boost::property<boost::vertex_name_t, SgAsmBlock*> > BlockGraph;
166 
185  typedef boost::adjacency_list<boost::setS,
186  boost::vecS,
187  boost::bidirectionalS,
188  boost::property<boost::vertex_name_t, SgAsmInstruction*> > InsnGraph;
189 
192  typedef BlockGraph Graph;
193 
194 
195  /**********************************************************************************************************************
196  * Filters
197  **********************************************************************************************************************/
198 public:
199 
204  class VertexFilter {
205  public:
206  virtual ~VertexFilter() {}
207  virtual bool operator()(ControlFlow*, SgAsmNode*) = 0;
208  };
209 
214  class EdgeFilter {
215  public:
216  virtual ~EdgeFilter() {}
217  virtual bool operator()(ControlFlow*, SgAsmNode *source, SgAsmNode *target) = 0;
218  };
219 
226  void set_vertex_filter(VertexFilter *filter) { vertex_filter = filter; }
227  VertexFilter *get_vertex_filter() const { return vertex_filter; }
236  void set_edge_filter(EdgeFilter *filter) { edge_filter = filter; }
237  EdgeFilter *get_edge_filter() const { return edge_filter; }
245  bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter) { return filter && !(*filter)(this, bb_or_insn); }
246  bool is_vertex_filtered(SgAsmNode *bb_or_insn) { return is_vertex_filtered(bb_or_insn, vertex_filter); }
255  bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter) {
256  return filter && !(*filter)(this, src, dst);
257  }
259  return is_edge_filtered(src, dst, edge_filter);
260  }
263 protected:
264  VertexFilter *vertex_filter;
265  EdgeFilter *edge_filter;
266 
267  /**********************************************************************************************************************
268  * Methods that modify the AST
269  **********************************************************************************************************************/
270 public:
271 
278  void clear_ast(SgNode *ast);
279 
294  template<class ControlFlowGraph>
295  void apply_to_ast(const ControlFlowGraph&);
296 
310  template<class ControlFlowGraph>
311  void cache_vertex_descriptors(const ControlFlowGraph&);
312 
313  /**********************************************************************************************************************
314  * Graph construction methods
315  **********************************************************************************************************************/
316 public:
317 
343  template<class ControlFlowGraph>
344  ControlFlowGraph build_block_cfg_from_ast(SgNode *root);
345 
346  template<class ControlFlowGraph>
347  void build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
348 
349  template<class ControlFlowGraph>
350  ControlFlowGraph build_insn_cfg_from_ast(SgNode *root);
351 
352  template<class ControlFlowGraph>
353  void build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
358  template<class BlockCFG, class InsnCFG>
359  void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/);
365  template<class InsnCFG>
366  void fixup_fcall_fret(InsnCFG &cfg/*in,out*/, bool preserve_call_fallthrough_edges);
367 
377  template<class ControlFlowGraph>
378  ControlFlowGraph build_cg_from_ast(SgNode *root);
379 
380  template<class ControlFlowGraph>
381  void build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
394  template<class ControlFlowGraph>
395  ControlFlowGraph copy(const ControlFlowGraph &src);
396 
397  template<class ControlFlowGraph>
398  void copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/);
401  /***********************************************************************************************************************
402  * Graph output
403  ***********************************************************************************************************************/
404 
406  template<class CFG>
408  std::vector<typename boost::graph_traits<CFG>::vertex_descriptor> vertices;
409  std::vector<typename boost::graph_traits<CFG>::edge_descriptor> edges;
410  };
411 
413  template<class CFG>
415  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::vertex_descriptor vertex) const {}
416  };
417 
419  template<class CFG>
421  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::edge_descriptor vertex) const {}
422  };
423 
426  template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
427  void write_graphviz(std::ostream&, const CFG&, const VertexPropertyWriter&, const EdgePropertyWriter&);
428 
429  template<typename CFG>
430  void write_graphviz(std::ostream &out, const CFG &cfg) {
432  }
433 
434  template<typename CFG, class VertexPropertyWriter>
435  void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw) {
437  }
440  /**********************************************************************************************************************
441  * Miscellaneous members
442  **********************************************************************************************************************/
443 
444 private:
445  /* Visitor used by flow_order(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
446  template<class ControlFlowGraph>
447  struct FlowOrder: public boost::default_dfs_visitor {
448  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
449  typedef std::vector<Vertex> VertexList;
450  typedef std::vector<size_t> ReverseVertexList;
451  VertexList *forward_order;
452  FlowOrder(VertexList *forward_order): forward_order(forward_order) {}
453  void compute(const ControlFlowGraph &g, Vertex v0, ReverseVertexList *reverse_order);
454  void finish_vertex(Vertex v, ControlFlowGraph g);
455  };
456 
457  /* Helper class for build_block_cfg_from_ast(). Adds vertices to its 'cfg' member. Vertices are any SgAsmBlock that
458  * contains at least one SgAsmInstruction. */
459  template<class ControlFlowGraph>
460  class VertexInserter: public AstSimpleProcessing {
461  public:
462  ControlFlow *analyzer;
463  ControlFlowGraph &cfg;
464  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
465  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
466  BlockVertexMap &bv_map;
467  VertexInserter(ControlFlow *analyzer, ControlFlowGraph &cfg, BlockVertexMap &bv_map)
468  : analyzer(analyzer), cfg(cfg), bv_map(bv_map)
469  {}
470  // Add basic block to graph if it hasn't been added already.
471  void conditionally_add_vertex(SgAsmBlock *block);
472 
473  void visit(SgNode *node) {
474  if (isSgAsmFunction(node)) {
475  // Add the function entry block before the other blocks of the function. This ensures that the entry block
476  // of a function has a lower vertex number than the other blocks of the function (the traversal is not
477  // guaranteed to visit the function basic blocks in that order).
478  conditionally_add_vertex(isSgAsmFunction(node)->get_entry_block());
479  } else {
480  conditionally_add_vertex(isSgAsmBlock(node));
481  }
482  }
483  };
484 
485 public:
518  template<class ControlFlowGraph>
519  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
520  flow_order(const ControlFlowGraph&,
521  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
522  std::vector<size_t> *reverse_order=NULL);
523 
524 private:
525  /* Visitor used by return_blocks(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
526  template<class ControlFlowGraph>
527  struct ReturnBlocks: public boost::default_dfs_visitor {
528  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
529  typedef std::vector<Vertex> Vector;
530  Vector &blocks;
531  ReturnBlocks(Vector &blocks): blocks(blocks) {}
532  void finish_vertex(Vertex v, ControlFlowGraph g);
533  };
534 
535 public:
543  template<class ControlFlowGraph>
544  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
545  return_blocks(const ControlFlowGraph &cfg,
546  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start);
547 };
548 
549 
550 /*******************************************************************************************************************************
551  * Functions
552  *******************************************************************************************************************************/
553 
556 template<class V, class E>
558 get_ast_node(const Sawyer::Container::Graph<V, E> &cfg, size_t vertexId) {
559  typedef typename Sawyer::Container::Graph<V, E> CFG;
560  typename CFG::ConstVertexValueIterator iter = cfg.findVertex(vertexId);
561  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
562  return *iter;
563 }
564 
567 template<class V, class E, class AstNode>
568 void
569 put_ast_node(Sawyer::Container::Graph<V, E> &cfg, size_t vertexId, AstNode *astNode) {
570  typedef typename Sawyer::Container::Graph<V, E> CFG;
571  typename CFG::VertexValueIterator iter = cfg.findVertex(vertexId);
572  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
573  *iter = astNode;
574 }
575 
576 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
577 template<class A, class B, class C, class D, class E, class F, class G>
578 typename boost::property_traits<typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>,
579  boost::vertex_name_t>::type>::value_type
580 get_ast_node(const boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
581  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex) {
582  return boost::get(boost::vertex_name, cfg, vertex);
583 }
584 
585 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
586 template<class A, class B, class C, class D, class E, class F, class G>
587 void
588 put_ast_node(boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
589  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex,
590  typename boost::property_traits<
591  typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>, boost::vertex_name_t>::type
592  >::value_type ast_node) {
593  boost::put(boost::vertex_name, cfg, vertex, ast_node);
594 }
595 
596 /******************************************************************************************************************************
597  * Function template definitions
598  ******************************************************************************************************************************/
599 
600 template<class ControlFlowGraph>
601 void
602 ControlFlow::apply_to_ast(const ControlFlowGraph &cfg)
603 {
604  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
605  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
606  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
607  if (!block || is_vertex_filtered(block))
608  continue;
609 
610  /* Delete old targets */
611  const SgAsmIntegerValuePtrList &targets = block->get_successors();
612  for (SgAsmIntegerValuePtrList::const_iterator ti=targets.begin(); ti!=targets.end(); ++ti)
613  delete *ti;
614 
615  /* Add new targets */
616  block->set_successors_complete(true);
617  block->get_successors().clear();
618  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
619  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
620  SgAsmBlock *target_block = get_ast_node(cfg, boost::target(*ei, cfg));
621  if (target_block && !is_edge_filtered(block, target_block)) {
622  SgAsmIntegerValueExpression *target = SageBuilderAsm::buildValueU64(target_block->get_address());
623  target->makeRelativeTo(target_block);
624  target->set_parent(block);
625  block->get_successors().push_back(target);
626  }
627  }
628  }
629 }
630 
631 template<class ControlFlowGraph>
632 void
633 ControlFlow::cache_vertex_descriptors(const ControlFlowGraph &cfg)
634 {
635  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
636  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
637  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
638  if (block && !is_vertex_filtered(block))
639  block->set_cached_vertex(*vi);
640  }
641 }
642 
643 template<class ControlFlowGraph>
644 void
645 ControlFlow::VertexInserter<ControlFlowGraph>::conditionally_add_vertex(SgAsmBlock *block)
646 {
647  if (block && block->has_instructions() && !analyzer->is_vertex_filtered(block) && !bv_map.exists(block)) {
648  Vertex vertex = boost::add_vertex(cfg);
649  bv_map[block] = vertex;
650  put_ast_node(cfg, vertex, block);
651  }
652 }
653 
654 template<class ControlFlowGraph>
655 void
656 ControlFlow::build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
657 {
658  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
659  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
660  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
661  BlockVertexMap bv_map;
662 
663  // Add the vertices
664  cfg.clear();
665  VertexInserter<ControlFlowGraph>(this, cfg, bv_map).traverse(root, preorder);
666 
667  // Mapping from block entry address to CFG vertex
668  Map<rose_addr_t, Vertex> addrToVertex;
669  for (typename BlockVertexMap::iterator bvi=bv_map.begin(); bvi!=bv_map.end(); ++bvi)
670  addrToVertex[bvi->first->get_address()] = bvi->second;
671 
672  // Add the edges
673  BOOST_FOREACH (Vertex sourceVertex, boost::vertices(cfg)) {
674  SgAsmBlock *sourceBlock = get_ast_node(cfg, sourceVertex);
675  BOOST_FOREACH (SgAsmIntegerValueExpression *integerValue, sourceBlock->get_successors()) {
676  Vertex targetVertex = addrToVertex.get_value_or(integerValue->get_absoluteValue(), NO_VERTEX);
677  if (targetVertex!=NO_VERTEX) {
678  SgAsmBlock *targetBlock = get_ast_node(cfg, targetVertex);
679  assert(targetBlock!=NULL); // since we have a vertex, there must be an SgAsmBlock!
680  if (!is_edge_filtered(sourceBlock, targetBlock))
681  boost::add_edge(sourceVertex, targetVertex, cfg);
682  }
683  }
684  }
685 }
686 
687 template<class ControlFlowGraph>
688 void
689 ControlFlow::build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
690 {
691  BlockGraph cfgb;
692  build_block_cfg_from_ast(root, cfgb);
693  explode_blocks(cfgb, cfg);
694  bool preserve_call_fallthrough_edges = false;
695  fixup_fcall_fret(cfg, preserve_call_fallthrough_edges);
696 }
697 
698 template<class ControlFlowGraph>
699 void
700 ControlFlow::build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/)
701 {
702  struct T1: public EdgeFilter {
703  EdgeFilter *parent;
704  T1(EdgeFilter *parent): parent(parent) {}
705  bool operator()(ControlFlow *analyzer, SgAsmNode *src, SgAsmNode *dst) {
706  SgAsmFunction *src_func = SageInterface::getEnclosingNode<SgAsmFunction>(src, true);
707  SgAsmBlock *dst_block = SageInterface::getEnclosingNode<SgAsmBlock>(dst, true);
708  SgAsmFunction *dst_func = SageInterface::getEnclosingNode<SgAsmFunction>(dst_block);
709  if (!src_func || !dst_func || dst_block!=dst_func->get_entry_block()) {
710  return false;
711  } else if (src_func!=dst_func) {
712  // inter-function call, not a return edge
713  } else {
714  // FIXME: this might not actually be a recursive call [Robb P. Matzke 2013-09-05]
715  }
716  return parent ? (*parent)(analyzer, src, dst) : true;
717  }
718  };
719 
720  EdgeFilter *parent = get_edge_filter();
721  T1 edge_filter(parent);
722  try {
723  set_edge_filter(&edge_filter);
724  build_block_cfg_from_ast(root, cfg);
725  } catch (...) {
726  set_edge_filter(parent);
727  throw;
728  }
729  set_edge_filter(parent);
730 }
731 
732 template<class ControlFlowGraph>
733 void
734 ControlFlow::copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/)
735 {
736  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
737  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
738 
739  dst.clear();
740  std::vector<Vertex> src_to_dst(boost::num_vertices(src), NO_VERTEX);
741 
742  typename boost::graph_traits<const ControlFlowGraph>::vertex_iterator vi, vi_end;
743  for (boost::tie(vi, vi_end)=boost::vertices(src); vi!=vi_end; ++vi) {
744  SgAsmNode *node = get_ast_node(src, *vi);
745  if (!is_vertex_filtered(node)) {
746  src_to_dst[*vi] = boost::add_vertex(dst);
747  put_ast_node(dst, src_to_dst[*vi], get_ast_node(src, *vi));
748  }
749  }
750 
751  typename boost::graph_traits<const ControlFlowGraph>::edge_iterator ei, ei_end;
752  for (boost::tie(ei, ei_end)=boost::edges(src); ei!=ei_end; ++ei) {
753  if (NO_VERTEX!=src_to_dst[boost::source(*ei, src)] && NO_VERTEX!=src_to_dst[boost::target(*ei, src)]) {
754  SgAsmNode *node1 = get_ast_node(src, boost::source(*ei, src));
755  SgAsmNode *node2 = get_ast_node(src, boost::target(*ei, src));
756  if (!is_edge_filtered(node1, node2))
757  boost::add_edge(src_to_dst[boost::source(*ei, src)], src_to_dst[boost::target(*ei, src)], dst);
758  }
759  }
760 }
761 
762 template<class ControlFlowGraph>
763 ControlFlowGraph
764 ControlFlow::copy(const ControlFlowGraph &src)
765 {
766  ControlFlowGraph dst;
767  copy(src, dst);
768  return dst;
769 }
770 
771 template<class BlockCFG, class InsnCFG>
772 void
773 ControlFlow::explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/)
774 {
775  // BlockCFG is the basic-block binary control flow graph
776  typedef typename boost::graph_traits<const BlockCFG>::vertex_descriptor BlockCFG_Vertex;
777  typedef typename boost::graph_traits<const BlockCFG>::vertex_iterator BlockCFG_VertexIterator;
778  typedef typename boost::graph_traits<const BlockCFG>::edge_iterator BlockCFG_EdgeIterator;
779 
780  // InsnCFG is the instruction binary control flow graph--it points to instructions rather than basic blocks, and changes
781  // some edges regarding function calls.
782  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor InsnCFG_Vertex;
783  typedef std::pair<InsnCFG_Vertex, InsnCFG_Vertex> InsnCFG_VertexPair;
784 
785  // Expand the cfgb basic blocks to create a cfgi that has instructions instead of blocks, and add the intra-block edges
786  cfgi.clear();
787  Map<BlockCFG_Vertex, InsnCFG_VertexPair> vertex_translation; // enter and leave instructions for each of the blocks in cfgb
788  {
789  BlockCFG_VertexIterator vi, vi_end;
790  for (boost::tie(vi, vi_end)=boost::vertices(cfgb); vi!=vi_end; ++vi) {
791  SgAsmBlock *blk = get_ast_node(cfgb, *vi);
792  const SgAsmStatementPtrList &insns = blk->get_statementList();
793  assert(!insns.empty());
794  InsnCFG_Vertex enter_vertex = boost::graph_traits<InsnCFG>::null_vertex();
795  InsnCFG_Vertex prev_vertex = boost::graph_traits<InsnCFG>::null_vertex();
796  for (SgAsmStatementPtrList::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
797  SgAsmInstruction *insn = isSgAsmInstruction(*ii);
798  assert(insn!=NULL); // basic blocks contain only instructions, no other type of asm statement
799  InsnCFG_Vertex vertex = boost::add_vertex(cfgi);
800  put_ast_node(cfgi, vertex, insn);
801  if (ii==insns.begin()) {
802  enter_vertex = vertex;
803  } else {
804  boost::add_edge(prev_vertex, vertex, cfgi);
805  }
806  prev_vertex = vertex;
807  }
808  assert(prev_vertex!=boost::graph_traits<InsnCFG>::null_vertex()); // basic block had no instructions but was in CFG!
809  vertex_translation[*vi] = InsnCFG_VertexPair(enter_vertex, prev_vertex);
810  }
811  }
812 
813  // Insert the edges from cfgb. The corresponding edge in cfgi must emanate from the final instruction of the source basic
814  // block and enter at the first instruction of the target basic block.
815  {
816  BlockCFG_EdgeIterator ei, ei_end;
817  for (boost::tie(ei, ei_end)=boost::edges(cfgb); ei!=ei_end; ++ei) {
818  InsnCFG_Vertex src_leave_vertex = vertex_translation.get_one(boost::source(*ei, cfgb)).second;
819  InsnCFG_Vertex dst_enter_vertex = vertex_translation.get_one(boost::target(*ei, cfgb)).first;
820  assert(src_leave_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
821  assert(dst_enter_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
822  boost::add_edge(src_leave_vertex, dst_enter_vertex, cfgi);
823  }
824  }
825 }
826 
827 template<class InsnCFG>
828 void
829 ControlFlow::fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
830 {
831  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor CFG_Vertex;
832  typedef typename boost::graph_traits<InsnCFG>::vertex_iterator CFG_VertexIterator;
833  typedef typename boost::graph_traits<InsnCFG>::in_edge_iterator CFG_InEdgeIterator;
834  typedef std::pair<CFG_Vertex, CFG_Vertex> CFG_VertexPair;
835  typedef Map<SgAsmInstruction*, CFG_Vertex> InsnToVertex;
836  CFG_Vertex NO_VERTEX = boost::graph_traits<InsnCFG>::null_vertex();
837 
838  // Build mappings needed later and find the function return points. We just look for the x86
839  // RET instruction for now and assume that each one we find is a return if it has no control flow successors. They have no
840  // successors at this point because CFG1 didn't have any.
841  InstructionMap insns;
842  InsnToVertex insn_to_vertex;
843  std::vector<bool> isret(boost::num_vertices(cfg), false);
844  {
845  CFG_VertexIterator vi, vi_end;
846  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
847  SgAsmInstruction *insn = get_ast_node(cfg, *vi);
848  insns[insn->get_address()] = insn;
849  insn_to_vertex[insn] = *vi;
850 
851  if (0==boost::out_degree(*vi, cfg)) {
852  // FIXME: Architecture-specific code here
853  if (SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn)) {
854  isret[*vi] = x86_ret==insn_x86->get_kind();
855  }
856  }
857  }
858  }
859 
860  // Return the entry vertex for a function that owns the indicated instruction
861  struct FunctionEntryVertex {
862  const InsnToVertex &insn_to_vertex;
863  const InstructionMap &imap;
864  FunctionEntryVertex(const InsnToVertex &insn_to_vertex, const InstructionMap &imap)
865  : insn_to_vertex(insn_to_vertex), imap(imap) {}
866  CFG_Vertex operator()(SgAsmInstruction *insn) {
867  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn, true);
868  SgAsmInstruction *entry_insn = imap.get_one(func->get_entry_va());
869  CFG_Vertex entry_vertex = insn_to_vertex.get_one(entry_insn);
870  return entry_vertex;
871  }
872  } function_entry_vertex(insn_to_vertex, insns);
873 
874  // Process each return site in order to add edges from the return site to the vertex representing the return address
875  std::vector<CFG_VertexPair> edges_to_insert, edges_to_erase;
876  {
877  CFG_VertexIterator vi, vi_end;
878  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
879  CFG_Vertex returner_vertex = *vi;
880  if (!isret[returner_vertex])
881  continue;
882  SgAsmInstruction *returner_insn = get_ast_node(cfg, returner_vertex);
883 
884  // Find all of the true call sites for the function that owns the returner instruction (e.g., RET) by recursively
885  // following inter-function CFG edges until we find the true calls (those edges that follow CALL semantics).
886  // Inter-function CFG edges can represent true calls or simply inter-function branches such as thunks. We have to
887  // gather up the information without adding it to the CFG yet (can't add while we're iterating)
888  std::vector<bool> seen(boost::num_vertices(cfg), false);
889  WorkList<CFG_Vertex> worklist; // targets of inter-function CFG edges; function callees
890  worklist.push(function_entry_vertex(returner_insn));
891  while (!worklist.empty()) {
892  CFG_Vertex callee_vertex = worklist.shift();
893  CFG_InEdgeIterator ei, ei_end;
894  for (boost::tie(ei, ei_end)=boost::in_edges(callee_vertex, cfg); ei!=ei_end; ++ei) {
895  CFG_Vertex caller_vertex = boost::source(*ei, cfg); // caller is a inter-function call or branch site
896  if (!seen[caller_vertex]) {
897  seen[caller_vertex] = true;
898  SgAsmInstruction *caller_insn = get_ast_node(cfg, caller_vertex);
899  SgAsmBlock *caller_block = SageInterface::getEnclosingNode<SgAsmBlock>(caller_insn);
900  assert(caller_block!=NULL);
901  rose_addr_t target_va, returnee_va; // returnee_va is usually the call's fall-through address
902  if (caller_block->is_function_call(target_va/*out*/, returnee_va/*out*/)) {
903  // This is a true call, so we need to add a return edge from the return instruction (the
904  // "returner") to what is probably the fall-through address of the call site (the returnee).
905  SgAsmInstruction *returnee_insn = insns.get_value_or(returnee_va, NULL);
906  CFG_Vertex returnee_vertex = insn_to_vertex.get_value_or(returnee_insn, NO_VERTEX);
907  if (returnee_vertex!=NO_VERTEX) {
908  edges_to_insert.push_back(CFG_VertexPair(returner_vertex, returnee_vertex));
909  edges_to_erase.push_back(CFG_VertexPair(caller_vertex, returnee_vertex));
910  }
911  } else {
912  // This is a non-call inter-function edge; probably a thunk. We need to find its call sites and add
913  // the returnee addresses (call fall throughs) to the returnee addresses of the RET we're
914  // processing.
915  worklist.push(function_entry_vertex(caller_insn));
916  }
917  }
918  }
919  }
920  }
921  }
922 
923  // Erase and insert edges now that we're done iterating.
924  if (!preserve_call_fallthrough_edges) {
925  for (size_t i=0; i<edges_to_erase.size(); ++i)
926  boost::remove_edge(edges_to_erase[i].first, edges_to_erase[i].second, cfg);
927  }
928  for (size_t i=0; i<edges_to_insert.size(); ++i)
929  boost::add_edge(edges_to_insert[i].first, edges_to_insert[i].second, cfg);
930 }
931 
932 template<class ControlFlowGraph>
933 void
934 ControlFlow::FlowOrder<ControlFlowGraph>::compute(const ControlFlowGraph &g, Vertex v0,
935  ReverseVertexList *reverse_order) {
936  forward_order->clear();
937  std::vector<boost::default_color_type> colors(boost::num_vertices(g), boost::white_color);
938  boost::depth_first_visit(g, v0, *this, &(colors[0]));
939  assert(!forward_order->empty()); /* it should at least contain v0 */
940  std::reverse(forward_order->begin(), forward_order->end());
941  if (reverse_order) {
942  reverse_order->clear();
943  reverse_order->resize(boost::num_vertices(g), (size_t)(-1));
944  for (size_t i=0; i<forward_order->size(); i++)
945  (*reverse_order)[(*forward_order)[i]] = i;
946  }
947 }
948 
949 template<class ControlFlowGraph>
950 void
951 ControlFlow::FlowOrder<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g) {
952  forward_order->push_back(v);
953 }
954 
955 template<class ControlFlowGraph>
956 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
957 ControlFlow::flow_order(const ControlFlowGraph &cfg,
958  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
959  std::vector<size_t> *reverse_order/*=NULL*/)
960 {
961  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor> forward_order;
962  FlowOrder<ControlFlowGraph>(&forward_order).compute(cfg, start, reverse_order);
963  return forward_order;
964 }
965 
966 template<class ControlFlowGraph>
967 void
968 ControlFlow::ReturnBlocks<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g)
969 {
970  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
971  boost::tie(ei, ei_end) = boost::out_edges(v, g);
972  if (ei==ei_end)
973  blocks.push_back(v);
974 }
975 
976 template<class ControlFlowGraph>
977 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
978 ControlFlow::return_blocks(const ControlFlowGraph &cfg,
979  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start)
980 {
981  typename ReturnBlocks<ControlFlowGraph>::Vector result;
982  ReturnBlocks<ControlFlowGraph> visitor(result);
983  std::vector<boost::default_color_type> colors(boost::num_vertices(cfg), boost::white_color);
984  boost::depth_first_visit(cfg, start, visitor, &(colors[0]));
985  return result;
986 }
987 
988 template<class ControlFlowGraph>
989 ControlFlowGraph
991 {
992  ControlFlowGraph cfg;
993  build_block_cfg_from_ast(root, cfg);
994  return cfg;
995 }
996 
997 template<class ControlFlowGraph>
998 ControlFlowGraph
1000 {
1001  ControlFlowGraph cfg;
1002  build_insn_cfg_from_ast(root, cfg);
1003  return cfg;
1004 }
1005 
1006 template<class ControlFlowGraph>
1007 ControlFlowGraph
1009 {
1010  ControlFlowGraph cfg;
1011  build_cg_from_ast(root, cfg);
1012  return cfg;
1013 }
1014 
1016 template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
1017 void
1018 ControlFlow::write_graphviz(std::ostream &out, const CFG &cfg,
1019  const VertexPropertyWriter &vpw, const EdgePropertyWriter &epw)
1020 {
1021  // typedef typename boost::graph_traits<CFG>::vertex_descriptor CFG_Vertex;
1022  typedef typename boost::graph_traits<CFG>::edge_descriptor CFG_Edge;
1023  typedef typename boost::graph_traits<CFG>::vertex_iterator CFG_VertexIterator;
1024  typedef typename boost::graph_traits<CFG>::out_edge_iterator CFG_OutEdgeIterator;
1025 
1026  // Partition the graph into functions and inter-function edges
1028  Functions funcs;
1029  std::vector<CFG_Edge> interfunc_edges;
1030  CFG_VertexIterator vi, vi_end;
1031  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
1032  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(get_ast_node(cfg, *vi), true);
1033  FunctionSubgraphInfo<CFG> &f = funcs[func];
1034  f.vertices.push_back(*vi);
1035  CFG_OutEdgeIterator ei, ei_end;
1036  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
1037  SgNode *tgt_node = get_ast_node(cfg, boost::target(*ei, cfg));
1038  SgAsmFunction *tgt_func = SageInterface::getEnclosingNode<SgAsmFunction>(tgt_node, true);
1039  if (tgt_func==func) {
1040  f.edges.push_back(*ei);
1041  } else {
1042  interfunc_edges.push_back(*ei);
1043  }
1044  }
1045  }
1046 
1047  // Output subgraph info, each function in its own cluster
1048  out <<"digraph G {\n";
1049  for (typename Functions::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi) {
1050  FunctionSubgraphInfo<CFG> &f = fi->second;
1051  if (!f.vertices.empty() || !f.edges.empty()) {
1052  SgNode *node = get_ast_node(cfg, f.vertices.front());
1053  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(node, true);
1054  char cluster_name[64];
1055  sprintf(cluster_name, "cluster_F%" PRIx64, func->get_entry_va());
1056  out <<" subgraph " <<cluster_name <<" {\n"
1057  <<" style=filled;\n"
1058  <<" color=lightgrey;\n"
1059  <<" label=\"Function " <<StringUtility::addrToString(func->get_entry_va())
1060  <<(func->get_name().empty()?std::string(""):(" <"+func->get_name()+">")) <<"\";\n";
1061  for (size_t i=0; i<f.vertices.size(); ++i) {
1062  out <<" " <<f.vertices[i];
1063  vpw(out, f.vertices[i]);
1064  out <<";\n";
1065  }
1066  for (size_t i=0; i<f.edges.size(); ++i) {
1067  out <<" " <<boost::source(f.edges[i], cfg) <<"->" <<boost::target(f.edges[i], cfg);
1068  epw(out, f.edges[i]);
1069  out <<";\n";
1070  }
1071  out <<" }\n"; // subgraph
1072  }
1073  }
1074 
1075  // Inter-function edges
1076  for (size_t i=0; i<interfunc_edges.size(); ++i) {
1077  out <<" " <<boost::source(interfunc_edges[i], cfg) <<"->" <<boost::target(interfunc_edges[i], cfg);
1078  epw(out, interfunc_edges[i]);
1079  out <<";\n";
1080  }
1081  out <<"}\n"; // digraph
1082 }
1083 
1084 } // namespace
1085 } // namespace
1086 
1087 #endif
BlockGraph Graph
Default control flow graph.
void put_ast_node(Sawyer::Container::Graph< V, E > &cfg, size_t vertexId, AstNode *astNode)
Set the AST node associated with a vertex.
void write_graphviz(std::ostream &out, const CFG &cfg)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
bool is_function_call(rose_addr_t &target_va, rose_addr_t &return_va)
Returns true if basic block appears to be a function call.
Instruction basic block.
Base class for all binary analysis IR nodes.
Class for traversing the AST.
void apply_to_ast(const ControlFlowGraph &)
Applies graph to AST.
void set_cached_vertex(size_t)
Property: Cached vertex for control flow graphs.
Base class for machine instructions.
bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter)
Determines if a vertex is filtered out.
SgAsmBlock * get_entry_block() const
Function entry basic block.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmInstruction * > > InsnGraph
Default instruction-based control flow graph.
void set_successors_complete(bool)
Property: Whether the successors list is complete.
void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi)
Create an instruction control flow graph from a basic block control flow graph.
const T & get_value_or(const Key &key, const T &dflt) const
Convenience for getting a value from an Option.
Definition: Map.h:75
void set_parent(SgNode *parent)
All nodes in the AST contain a reference to a parent node.
List of things to work on.
Definition: WorkLists.h:58
ControlFlowGraph build_insn_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
Represents a synthesized function.
Sawyer::Container::Graph< V, E >::VertexValue get_ast_node(const Sawyer::Container::Graph< V, E > &cfg, size_t vertexId)
Return the AST node associated with a vertex.
ControlFlowGraph build_block_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > flow_order(const ControlFlowGraph &, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start, std::vector< size_t > *reverse_order=NULL)
Orders nodes by depth first search reverse post order.
void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
ControlFlowGraph copy(const ControlFlowGraph &src)
Copies a graph while filtering.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1454
Main namespace for the ROSE library.
void fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
Fix up a CFG by changing function call and return edges.
void clear_ast(SgNode *ast)
Clears successor information from the AST.
T shift()
Remove and return the item from the front of the work list.
Definition: WorkLists.h:236
void set_edge_filter(EdgeFilter *filter)
Manipulate the edge filter.
bool push(const T &, boost::tribool check_uniqueness=boost::logic::indeterminate)
Add an item to the back of the work list.
Definition: WorkLists.h:202
Base class for integer values.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:8322
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst)
Determines if an edge is filtered out.
ControlFlowGraph build_cg_from_ast(SgNode *root)
Builds a control flow graph with only function call edges.
const SgAsmIntegerValuePtrList & get_successors() const
Property: Control flow successors.
void makeRelativeTo(SgNode *baseNode)
Makes the value of this integer relative to some other addressable node.
bool has_instructions() const
Determins if a block contains instructions.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmBlock * > > BlockGraph
Default basic block control flow graph type.
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1501
VertexFilter * get_vertex_filter() const
Manipulate the vertex filter.
Represents one Intel x86 machine instruction.
Extends std::map with methods that return optional values.
Definition: Map.h:10
rose_addr_t get_entry_va() const
Property: Primary entry address.
bool empty() const
Returns true if this work list is empty.
Definition: WorkLists.h:66
const SgAsmStatementPtrList & get_statementList() const
Property: Statements of which this block is composed.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > return_blocks(const ControlFlowGraph &cfg, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start)
Returns list of function return blocks.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter)
Determines if an edge is filtered out.
Binary control flow analysis.
void write_graphviz(std::ostream &, const CFG &, const VertexPropertyWriter &, const EdgePropertyWriter &)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
V VertexValue
User-level data associated with vertices.
Definition: Graph.h:627
const T & get_one(const Key &key) const
Look up one value or throw an exception.
Definition: Map.h:58
uint64_t get_absoluteValue(size_t nbits=0) const
Returns the current absolute value zero filled to 64 bits.
bool is_vertex_filtered(SgAsmNode *bb_or_insn)
Determines if a vertex is filtered out.
void cache_vertex_descriptors(const ControlFlowGraph &)
Cache basic block vertex descriptors in AST.
List of vertices and intra-function edges for one function.
EdgeFilter * get_edge_filter() const
Manipulate the edge filter.
void set_vertex_filter(VertexFilter *filter)
Manipulate the vertex filter.