ROSE  0.11.51.0
ControlFlow.h
1 #ifndef ROSE_BinaryAnalysis_ControlFlow_H
2 #define ROSE_BinaryAnalysis_ControlFlow_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include "Map.h"
7 #include "WorkLists.h"
8 #include "SageBuilderAsm.h"
9 
10 #include <boost/foreach.hpp>
11 #include <boost/graph/adjacency_list.hpp>
12 #include <boost/graph/reverse_graph.hpp>
13 #include <boost/graph/depth_first_search.hpp>
14 #include <Sawyer/GraphBoost.h>
15 
16 class SgNode;
17 class SgAsmBlock;
18 
19 namespace Rose {
20 
27 namespace BinaryAnalysis {
28 
139 class ControlFlow {
140 public:
141  ControlFlow()
142  : vertex_filter(NULL), edge_filter(NULL)
143  {}
144 
145 
164  typedef boost::adjacency_list<boost::setS, /* edges of each vertex in std::list */
165  boost::vecS, /* vertices in std::vector */
166  boost::bidirectionalS,
167  boost::property<boost::vertex_name_t, SgAsmBlock*> > BlockGraph;
168 
187  typedef boost::adjacency_list<boost::setS,
188  boost::vecS,
189  boost::bidirectionalS,
190  boost::property<boost::vertex_name_t, SgAsmInstruction*> > InsnGraph;
191 
194  typedef BlockGraph Graph;
195 
196 
197  /**********************************************************************************************************************
198  * Filters
199  **********************************************************************************************************************/
200 public:
201 
206  class VertexFilter {
207  public:
208  virtual ~VertexFilter() {}
209  virtual bool operator()(ControlFlow*, SgAsmNode*) = 0;
210  };
211 
216  class EdgeFilter {
217  public:
218  virtual ~EdgeFilter() {}
219  virtual bool operator()(ControlFlow*, SgAsmNode *source, SgAsmNode *target) = 0;
220  };
221 
228  void set_vertex_filter(VertexFilter *filter) { vertex_filter = filter; }
229  VertexFilter *get_vertex_filter() const { return vertex_filter; }
238  void set_edge_filter(EdgeFilter *filter) { edge_filter = filter; }
239  EdgeFilter *get_edge_filter() const { return edge_filter; }
247  bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter) { return filter && !(*filter)(this, bb_or_insn); }
248  bool is_vertex_filtered(SgAsmNode *bb_or_insn) { return is_vertex_filtered(bb_or_insn, vertex_filter); }
257  bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter) {
258  return filter && !(*filter)(this, src, dst);
259  }
261  return is_edge_filtered(src, dst, edge_filter);
262  }
265 protected:
266  VertexFilter *vertex_filter;
267  EdgeFilter *edge_filter;
268 
269  /**********************************************************************************************************************
270  * Methods that modify the AST
271  **********************************************************************************************************************/
272 public:
273 
280  void clear_ast(SgNode *ast);
281 
296  template<class ControlFlowGraph>
297  void apply_to_ast(const ControlFlowGraph&);
298 
312  template<class ControlFlowGraph>
313  void cache_vertex_descriptors(const ControlFlowGraph&);
314 
315  /**********************************************************************************************************************
316  * Graph construction methods
317  **********************************************************************************************************************/
318 public:
319 
345  template<class ControlFlowGraph>
346  ControlFlowGraph build_block_cfg_from_ast(SgNode *root);
347 
348  template<class ControlFlowGraph>
349  void build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
350 
351  template<class ControlFlowGraph>
352  ControlFlowGraph build_insn_cfg_from_ast(SgNode *root);
353 
354  template<class ControlFlowGraph>
355  void build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
360  template<class BlockCFG, class InsnCFG>
361  void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/);
367  template<class InsnCFG>
368  void fixup_fcall_fret(InsnCFG &cfg/*in,out*/, bool preserve_call_fallthrough_edges);
369 
379  template<class ControlFlowGraph>
380  ControlFlowGraph build_cg_from_ast(SgNode *root);
381 
382  template<class ControlFlowGraph>
383  void build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
396  template<class ControlFlowGraph>
397  ControlFlowGraph copy(const ControlFlowGraph &src);
398 
399  template<class ControlFlowGraph>
400  void copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/);
403  /***********************************************************************************************************************
404  * Graph output
405  ***********************************************************************************************************************/
406 
408  template<class CFG>
410  std::vector<typename boost::graph_traits<CFG>::vertex_descriptor> vertices;
411  std::vector<typename boost::graph_traits<CFG>::edge_descriptor> edges;
412  };
413 
415  template<class CFG>
417  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::vertex_descriptor vertex) const {}
418  };
419 
421  template<class CFG>
423  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::edge_descriptor vertex) const {}
424  };
425 
428  template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
429  void write_graphviz(std::ostream&, const CFG&, const VertexPropertyWriter&, const EdgePropertyWriter&);
430 
431  template<typename CFG>
432  void write_graphviz(std::ostream &out, const CFG &cfg) {
434  }
435 
436  template<typename CFG, class VertexPropertyWriter>
437  void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw) {
439  }
442  /**********************************************************************************************************************
443  * Miscellaneous members
444  **********************************************************************************************************************/
445 
446 private:
447  /* Visitor used by flow_order(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
448  template<class ControlFlowGraph>
449  struct FlowOrder: public boost::default_dfs_visitor {
450  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
451  typedef std::vector<Vertex> VertexList;
452  typedef std::vector<size_t> ReverseVertexList;
453  VertexList *forward_order;
454  FlowOrder(VertexList *forward_order): forward_order(forward_order) {}
455  void compute(const ControlFlowGraph &g, Vertex v0, ReverseVertexList *reverse_order);
456  void finish_vertex(Vertex v, ControlFlowGraph g);
457  };
458 
459  /* Helper class for build_block_cfg_from_ast(). Adds vertices to its 'cfg' member. Vertices are any SgAsmBlock that
460  * contains at least one SgAsmInstruction. */
461  template<class ControlFlowGraph>
462  class VertexInserter: public AstSimpleProcessing {
463  public:
464  ControlFlow *analyzer;
465  ControlFlowGraph &cfg;
466  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
467  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
468  BlockVertexMap &bv_map;
469  VertexInserter(ControlFlow *analyzer, ControlFlowGraph &cfg, BlockVertexMap &bv_map)
470  : analyzer(analyzer), cfg(cfg), bv_map(bv_map)
471  {}
472  // Add basic block to graph if it hasn't been added already.
473  void conditionally_add_vertex(SgAsmBlock *block);
474 
475  void visit(SgNode *node) {
476  if (isSgAsmFunction(node)) {
477  // Add the function entry block before the other blocks of the function. This ensures that the entry block
478  // of a function has a lower vertex number than the other blocks of the function (the traversal is not
479  // guaranteed to visit the function basic blocks in that order).
480  conditionally_add_vertex(isSgAsmFunction(node)->get_entry_block());
481  } else {
482  conditionally_add_vertex(isSgAsmBlock(node));
483  }
484  }
485  };
486 
487 public:
520  template<class ControlFlowGraph>
521  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
522  flow_order(const ControlFlowGraph&,
523  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
524  std::vector<size_t> *reverse_order=NULL);
525 
526 private:
527  /* Visitor used by return_blocks(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
528  template<class ControlFlowGraph>
529  struct ReturnBlocks: public boost::default_dfs_visitor {
530  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
531  typedef std::vector<Vertex> Vector;
532  Vector &blocks;
533  ReturnBlocks(Vector &blocks): blocks(blocks) {}
534  void finish_vertex(Vertex v, ControlFlowGraph g);
535  };
536 
537 public:
545  template<class ControlFlowGraph>
546  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
547  return_blocks(const ControlFlowGraph &cfg,
548  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start);
549 };
550 
551 
552 /*******************************************************************************************************************************
553  * Functions
554  *******************************************************************************************************************************/
555 
558 template<class V, class E>
560 get_ast_node(const Sawyer::Container::Graph<V, E> &cfg, size_t vertexId) {
561  typedef typename Sawyer::Container::Graph<V, E> CFG;
562  typename CFG::ConstVertexValueIterator iter = cfg.findVertex(vertexId);
563  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
564  return *iter;
565 }
566 
569 template<class V, class E, class AstNode>
570 void
571 put_ast_node(Sawyer::Container::Graph<V, E> &cfg, size_t vertexId, AstNode *astNode) {
572  typedef typename Sawyer::Container::Graph<V, E> CFG;
573  typename CFG::VertexValueIterator iter = cfg.findVertex(vertexId);
574  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
575  *iter = astNode;
576 }
577 
578 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
579 template<class A, class B, class C, class D, class E, class F, class G>
580 typename boost::property_traits<typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>,
581  boost::vertex_name_t>::type>::value_type
582 get_ast_node(const boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
583  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex) {
584  return boost::get(boost::vertex_name, cfg, vertex);
585 }
586 
587 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
588 template<class A, class B, class C, class D, class E, class F, class G>
589 void
590 put_ast_node(boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
591  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex,
592  typename boost::property_traits<
593  typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>, boost::vertex_name_t>::type
594  >::value_type ast_node) {
595  boost::put(boost::vertex_name, cfg, vertex, ast_node);
596 }
597 
598 /******************************************************************************************************************************
599  * Function template definitions
600  ******************************************************************************************************************************/
601 
602 template<class ControlFlowGraph>
603 void
604 ControlFlow::apply_to_ast(const ControlFlowGraph &cfg)
605 {
606  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
607  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
608  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
609  if (!block || is_vertex_filtered(block))
610  continue;
611 
612  /* Delete old targets */
613  const SgAsmIntegerValuePtrList &targets = block->get_successors();
614  for (SgAsmIntegerValuePtrList::const_iterator ti=targets.begin(); ti!=targets.end(); ++ti)
615  delete *ti;
616 
617  /* Add new targets */
618  block->set_successors_complete(true);
619  block->get_successors().clear();
620  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
621  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
622  SgAsmBlock *target_block = get_ast_node(cfg, boost::target(*ei, cfg));
623  if (target_block && !is_edge_filtered(block, target_block)) {
624  SgAsmIntegerValueExpression *target = SageBuilderAsm::buildValueU64(target_block->get_address());
625  target->makeRelativeTo(target_block);
626  target->set_parent(block);
627  block->get_successors().push_back(target);
628  }
629  }
630  }
631 }
632 
633 template<class ControlFlowGraph>
634 void
635 ControlFlow::cache_vertex_descriptors(const ControlFlowGraph &cfg)
636 {
637  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
638  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
639  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
640  if (block && !is_vertex_filtered(block))
641  block->set_cached_vertex(*vi);
642  }
643 }
644 
645 template<class ControlFlowGraph>
646 void
647 ControlFlow::VertexInserter<ControlFlowGraph>::conditionally_add_vertex(SgAsmBlock *block)
648 {
649  if (block && block->has_instructions() && !analyzer->is_vertex_filtered(block) && !bv_map.exists(block)) {
650  Vertex vertex = boost::add_vertex(cfg);
651  bv_map[block] = vertex;
652  put_ast_node(cfg, vertex, block);
653  }
654 }
655 
656 template<class ControlFlowGraph>
657 void
658 ControlFlow::build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
659 {
660  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
661  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
662  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
663  BlockVertexMap bv_map;
664 
665  // Add the vertices
666  cfg.clear();
667  VertexInserter<ControlFlowGraph>(this, cfg, bv_map).traverse(root, preorder);
668 
669  // Mapping from block entry address to CFG vertex
670  Map<rose_addr_t, Vertex> addrToVertex;
671  for (typename BlockVertexMap::iterator bvi=bv_map.begin(); bvi!=bv_map.end(); ++bvi)
672  addrToVertex[bvi->first->get_address()] = bvi->second;
673 
674  // Add the edges
675  BOOST_FOREACH (Vertex sourceVertex, boost::vertices(cfg)) {
676  SgAsmBlock *sourceBlock = get_ast_node(cfg, sourceVertex);
677  BOOST_FOREACH (SgAsmIntegerValueExpression *integerValue, sourceBlock->get_successors()) {
678  Vertex targetVertex = addrToVertex.get_value_or(integerValue->get_absoluteValue(), NO_VERTEX);
679  if (targetVertex!=NO_VERTEX) {
680  SgAsmBlock *targetBlock = get_ast_node(cfg, targetVertex);
681  assert(targetBlock!=NULL); // since we have a vertex, there must be an SgAsmBlock!
682  if (!is_edge_filtered(sourceBlock, targetBlock))
683  boost::add_edge(sourceVertex, targetVertex, cfg);
684  }
685  }
686  }
687 }
688 
689 template<class ControlFlowGraph>
690 void
691 ControlFlow::build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
692 {
693  BlockGraph cfgb;
694  build_block_cfg_from_ast(root, cfgb);
695  explode_blocks(cfgb, cfg);
696  bool preserve_call_fallthrough_edges = false;
697  fixup_fcall_fret(cfg, preserve_call_fallthrough_edges);
698 }
699 
700 template<class ControlFlowGraph>
701 void
702 ControlFlow::build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/)
703 {
704  struct T1: public EdgeFilter {
705  EdgeFilter *parent;
706  T1(EdgeFilter *parent): parent(parent) {}
707  bool operator()(ControlFlow *analyzer, SgAsmNode *src, SgAsmNode *dst) {
708  SgAsmFunction *src_func = SageInterface::getEnclosingNode<SgAsmFunction>(src, true);
709  SgAsmBlock *dst_block = SageInterface::getEnclosingNode<SgAsmBlock>(dst, true);
710  SgAsmFunction *dst_func = SageInterface::getEnclosingNode<SgAsmFunction>(dst_block);
711  if (!src_func || !dst_func || dst_block!=dst_func->get_entry_block()) {
712  return false;
713  } else if (src_func!=dst_func) {
714  // inter-function call, not a return edge
715  } else {
716  // FIXME: this might not actually be a recursive call [Robb P. Matzke 2013-09-05]
717  }
718  return parent ? (*parent)(analyzer, src, dst) : true;
719  }
720  };
721 
722  EdgeFilter *parent = get_edge_filter();
723  T1 edge_filter(parent);
724  try {
725  set_edge_filter(&edge_filter);
726  build_block_cfg_from_ast(root, cfg);
727  } catch (...) {
728  set_edge_filter(parent);
729  throw;
730  }
731  set_edge_filter(parent);
732 }
733 
734 template<class ControlFlowGraph>
735 void
736 ControlFlow::copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/)
737 {
738  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
739  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
740 
741  dst.clear();
742  std::vector<Vertex> src_to_dst(boost::num_vertices(src), NO_VERTEX);
743 
744  typename boost::graph_traits<const ControlFlowGraph>::vertex_iterator vi, vi_end;
745  for (boost::tie(vi, vi_end)=boost::vertices(src); vi!=vi_end; ++vi) {
746  SgAsmNode *node = get_ast_node(src, *vi);
747  if (!is_vertex_filtered(node)) {
748  src_to_dst[*vi] = boost::add_vertex(dst);
749  put_ast_node(dst, src_to_dst[*vi], get_ast_node(src, *vi));
750  }
751  }
752 
753  typename boost::graph_traits<const ControlFlowGraph>::edge_iterator ei, ei_end;
754  for (boost::tie(ei, ei_end)=boost::edges(src); ei!=ei_end; ++ei) {
755  if (NO_VERTEX!=src_to_dst[boost::source(*ei, src)] && NO_VERTEX!=src_to_dst[boost::target(*ei, src)]) {
756  SgAsmNode *node1 = get_ast_node(src, boost::source(*ei, src));
757  SgAsmNode *node2 = get_ast_node(src, boost::target(*ei, src));
758  if (!is_edge_filtered(node1, node2))
759  boost::add_edge(src_to_dst[boost::source(*ei, src)], src_to_dst[boost::target(*ei, src)], dst);
760  }
761  }
762 }
763 
764 template<class ControlFlowGraph>
765 ControlFlowGraph
766 ControlFlow::copy(const ControlFlowGraph &src)
767 {
768  ControlFlowGraph dst;
769  copy(src, dst);
770  return dst;
771 }
772 
773 template<class BlockCFG, class InsnCFG>
774 void
775 ControlFlow::explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/)
776 {
777  // BlockCFG is the basic-block binary control flow graph
778  typedef typename boost::graph_traits<const BlockCFG>::vertex_descriptor BlockCFG_Vertex;
779  typedef typename boost::graph_traits<const BlockCFG>::vertex_iterator BlockCFG_VertexIterator;
780  typedef typename boost::graph_traits<const BlockCFG>::edge_iterator BlockCFG_EdgeIterator;
781 
782  // InsnCFG is the instruction binary control flow graph--it points to instructions rather than basic blocks, and changes
783  // some edges regarding function calls.
784  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor InsnCFG_Vertex;
785  typedef std::pair<InsnCFG_Vertex, InsnCFG_Vertex> InsnCFG_VertexPair;
786 
787  // Expand the cfgb basic blocks to create a cfgi that has instructions instead of blocks, and add the intra-block edges
788  cfgi.clear();
789  Map<BlockCFG_Vertex, InsnCFG_VertexPair> vertex_translation; // enter and leave instructions for each of the blocks in cfgb
790  {
791  BlockCFG_VertexIterator vi, vi_end;
792  for (boost::tie(vi, vi_end)=boost::vertices(cfgb); vi!=vi_end; ++vi) {
793  SgAsmBlock *blk = get_ast_node(cfgb, *vi);
794  const SgAsmStatementPtrList &insns = blk->get_statementList();
795  assert(!insns.empty());
796  InsnCFG_Vertex enter_vertex = boost::graph_traits<InsnCFG>::null_vertex();
797  InsnCFG_Vertex prev_vertex = boost::graph_traits<InsnCFG>::null_vertex();
798  for (SgAsmStatementPtrList::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
799  SgAsmInstruction *insn = isSgAsmInstruction(*ii);
800  assert(insn!=NULL); // basic blocks contain only instructions, no other type of asm statement
801  InsnCFG_Vertex vertex = boost::add_vertex(cfgi);
802  put_ast_node(cfgi, vertex, insn);
803  if (ii==insns.begin()) {
804  enter_vertex = vertex;
805  } else {
806  boost::add_edge(prev_vertex, vertex, cfgi);
807  }
808  prev_vertex = vertex;
809  }
810  assert(prev_vertex!=boost::graph_traits<InsnCFG>::null_vertex()); // basic block had no instructions but was in CFG!
811  vertex_translation[*vi] = InsnCFG_VertexPair(enter_vertex, prev_vertex);
812  }
813  }
814 
815  // Insert the edges from cfgb. The corresponding edge in cfgi must emanate from the final instruction of the source basic
816  // block and enter at the first instruction of the target basic block.
817  {
818  BlockCFG_EdgeIterator ei, ei_end;
819  for (boost::tie(ei, ei_end)=boost::edges(cfgb); ei!=ei_end; ++ei) {
820  InsnCFG_Vertex src_leave_vertex = vertex_translation.get_one(boost::source(*ei, cfgb)).second;
821  InsnCFG_Vertex dst_enter_vertex = vertex_translation.get_one(boost::target(*ei, cfgb)).first;
822  assert(src_leave_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
823  assert(dst_enter_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
824  boost::add_edge(src_leave_vertex, dst_enter_vertex, cfgi);
825  }
826  }
827 }
828 
829 template<class InsnCFG>
830 void
831 ControlFlow::fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
832 {
833  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor CFG_Vertex;
834  typedef typename boost::graph_traits<InsnCFG>::vertex_iterator CFG_VertexIterator;
835  typedef typename boost::graph_traits<InsnCFG>::in_edge_iterator CFG_InEdgeIterator;
836  typedef std::pair<CFG_Vertex, CFG_Vertex> CFG_VertexPair;
837  typedef Map<SgAsmInstruction*, CFG_Vertex> InsnToVertex;
838  CFG_Vertex NO_VERTEX = boost::graph_traits<InsnCFG>::null_vertex();
839 
840  // Build mappings needed later and find the function return points. We just look for the x86
841  // RET instruction for now and assume that each one we find is a return if it has no control flow successors. They have no
842  // successors at this point because CFG1 didn't have any.
843  InstructionMap insns;
844  InsnToVertex insn_to_vertex;
845  std::vector<bool> isret(boost::num_vertices(cfg), false);
846  {
847  CFG_VertexIterator vi, vi_end;
848  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
849  SgAsmInstruction *insn = get_ast_node(cfg, *vi);
850  insns[insn->get_address()] = insn;
851  insn_to_vertex[insn] = *vi;
852 
853  if (0==boost::out_degree(*vi, cfg)) {
854  // FIXME: Architecture-specific code here
855  if (SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn)) {
856  isret[*vi] = x86_ret==insn_x86->get_kind();
857  }
858  }
859  }
860  }
861 
862  // Return the entry vertex for a function that owns the indicated instruction
863  struct FunctionEntryVertex {
864  const InsnToVertex &insn_to_vertex;
865  const InstructionMap &imap;
866  FunctionEntryVertex(const InsnToVertex &insn_to_vertex, const InstructionMap &imap)
867  : insn_to_vertex(insn_to_vertex), imap(imap) {}
868  CFG_Vertex operator()(SgAsmInstruction *insn) {
869  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn, true);
870  SgAsmInstruction *entry_insn = imap.get_one(func->get_entry_va());
871  CFG_Vertex entry_vertex = insn_to_vertex.get_one(entry_insn);
872  return entry_vertex;
873  }
874  } function_entry_vertex(insn_to_vertex, insns);
875 
876  // Process each return site in order to add edges from the return site to the vertex representing the return address
877  std::vector<CFG_VertexPair> edges_to_insert, edges_to_erase;
878  {
879  CFG_VertexIterator vi, vi_end;
880  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
881  CFG_Vertex returner_vertex = *vi;
882  if (!isret[returner_vertex])
883  continue;
884  SgAsmInstruction *returner_insn = get_ast_node(cfg, returner_vertex);
885 
886  // Find all of the true call sites for the function that owns the returner instruction (e.g., RET) by recursively
887  // following inter-function CFG edges until we find the true calls (those edges that follow CALL semantics).
888  // Inter-function CFG edges can represent true calls or simply inter-function branches such as thunks. We have to
889  // gather up the information without adding it to the CFG yet (can't add while we're iterating)
890  std::vector<bool> seen(boost::num_vertices(cfg), false);
891  WorkList<CFG_Vertex> worklist; // targets of inter-function CFG edges; function callees
892  worklist.push(function_entry_vertex(returner_insn));
893  while (!worklist.empty()) {
894  CFG_Vertex callee_vertex = worklist.shift();
895  CFG_InEdgeIterator ei, ei_end;
896  for (boost::tie(ei, ei_end)=boost::in_edges(callee_vertex, cfg); ei!=ei_end; ++ei) {
897  CFG_Vertex caller_vertex = boost::source(*ei, cfg); // caller is a inter-function call or branch site
898  if (!seen[caller_vertex]) {
899  seen[caller_vertex] = true;
900  SgAsmInstruction *caller_insn = get_ast_node(cfg, caller_vertex);
901  SgAsmBlock *caller_block = SageInterface::getEnclosingNode<SgAsmBlock>(caller_insn);
902  assert(caller_block!=NULL);
903  rose_addr_t target_va, returnee_va; // returnee_va is usually the call's fall-through address
904  if (caller_block->is_function_call(target_va/*out*/, returnee_va/*out*/)) {
905  // This is a true call, so we need to add a return edge from the return instruction (the
906  // "returner") to what is probably the fall-through address of the call site (the returnee).
907  SgAsmInstruction *returnee_insn = insns.get_value_or(returnee_va, NULL);
908  CFG_Vertex returnee_vertex = insn_to_vertex.get_value_or(returnee_insn, NO_VERTEX);
909  if (returnee_vertex!=NO_VERTEX) {
910  edges_to_insert.push_back(CFG_VertexPair(returner_vertex, returnee_vertex));
911  edges_to_erase.push_back(CFG_VertexPair(caller_vertex, returnee_vertex));
912  }
913  } else {
914  // This is a non-call inter-function edge; probably a thunk. We need to find its call sites and add
915  // the returnee addresses (call fall throughs) to the returnee addresses of the RET we're
916  // processing.
917  worklist.push(function_entry_vertex(caller_insn));
918  }
919  }
920  }
921  }
922  }
923  }
924 
925  // Erase and insert edges now that we're done iterating.
926  if (!preserve_call_fallthrough_edges) {
927  for (size_t i=0; i<edges_to_erase.size(); ++i)
928  boost::remove_edge(edges_to_erase[i].first, edges_to_erase[i].second, cfg);
929  }
930  for (size_t i=0; i<edges_to_insert.size(); ++i)
931  boost::add_edge(edges_to_insert[i].first, edges_to_insert[i].second, cfg);
932 }
933 
934 template<class ControlFlowGraph>
935 void
936 ControlFlow::FlowOrder<ControlFlowGraph>::compute(const ControlFlowGraph &g, Vertex v0,
937  ReverseVertexList *reverse_order) {
938  forward_order->clear();
939  std::vector<boost::default_color_type> colors(boost::num_vertices(g), boost::white_color);
940  boost::depth_first_visit(g, v0, *this, &(colors[0]));
941  assert(!forward_order->empty()); /* it should at least contain v0 */
942  std::reverse(forward_order->begin(), forward_order->end());
943  if (reverse_order) {
944  reverse_order->clear();
945  reverse_order->resize(boost::num_vertices(g), INVALID_INDEX);
946  for (size_t i=0; i<forward_order->size(); i++)
947  (*reverse_order)[(*forward_order)[i]] = i;
948  }
949 }
950 
951 template<class ControlFlowGraph>
952 void
953 ControlFlow::FlowOrder<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g) {
954  forward_order->push_back(v);
955 }
956 
957 template<class ControlFlowGraph>
958 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
959 ControlFlow::flow_order(const ControlFlowGraph &cfg,
960  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
961  std::vector<size_t> *reverse_order/*=NULL*/)
962 {
963  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor> forward_order;
964  FlowOrder<ControlFlowGraph>(&forward_order).compute(cfg, start, reverse_order);
965  return forward_order;
966 }
967 
968 template<class ControlFlowGraph>
969 void
970 ControlFlow::ReturnBlocks<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g)
971 {
972  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
973  boost::tie(ei, ei_end) = boost::out_edges(v, g);
974  if (ei==ei_end)
975  blocks.push_back(v);
976 }
977 
978 template<class ControlFlowGraph>
979 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
980 ControlFlow::return_blocks(const ControlFlowGraph &cfg,
981  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start)
982 {
983  typename ReturnBlocks<ControlFlowGraph>::Vector result;
984  ReturnBlocks<ControlFlowGraph> visitor(result);
985  std::vector<boost::default_color_type> colors(boost::num_vertices(cfg), boost::white_color);
986  boost::depth_first_visit(cfg, start, visitor, &(colors[0]));
987  return result;
988 }
989 
990 template<class ControlFlowGraph>
991 ControlFlowGraph
993 {
994  ControlFlowGraph cfg;
995  build_block_cfg_from_ast(root, cfg);
996  return cfg;
997 }
998 
999 template<class ControlFlowGraph>
1000 ControlFlowGraph
1002 {
1003  ControlFlowGraph cfg;
1004  build_insn_cfg_from_ast(root, cfg);
1005  return cfg;
1006 }
1007 
1008 template<class ControlFlowGraph>
1009 ControlFlowGraph
1011 {
1012  ControlFlowGraph cfg;
1013  build_cg_from_ast(root, cfg);
1014  return cfg;
1015 }
1016 
1018 template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
1019 void
1020 ControlFlow::write_graphviz(std::ostream &out, const CFG &cfg,
1021  const VertexPropertyWriter &vpw, const EdgePropertyWriter &epw)
1022 {
1023  // typedef typename boost::graph_traits<CFG>::vertex_descriptor CFG_Vertex;
1024  typedef typename boost::graph_traits<CFG>::edge_descriptor CFG_Edge;
1025  typedef typename boost::graph_traits<CFG>::vertex_iterator CFG_VertexIterator;
1026  typedef typename boost::graph_traits<CFG>::out_edge_iterator CFG_OutEdgeIterator;
1027 
1028  // Partition the graph into functions and inter-function edges
1030  Functions funcs;
1031  std::vector<CFG_Edge> interfunc_edges;
1032  CFG_VertexIterator vi, vi_end;
1033  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
1034  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(get_ast_node(cfg, *vi), true);
1035  FunctionSubgraphInfo<CFG> &f = funcs[func];
1036  f.vertices.push_back(*vi);
1037  CFG_OutEdgeIterator ei, ei_end;
1038  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
1039  SgNode *tgt_node = get_ast_node(cfg, boost::target(*ei, cfg));
1040  SgAsmFunction *tgt_func = SageInterface::getEnclosingNode<SgAsmFunction>(tgt_node, true);
1041  if (tgt_func==func) {
1042  f.edges.push_back(*ei);
1043  } else {
1044  interfunc_edges.push_back(*ei);
1045  }
1046  }
1047  }
1048 
1049  // Output subgraph info, each function in its own cluster
1050  out <<"digraph G {\n";
1051  for (typename Functions::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi) {
1052  FunctionSubgraphInfo<CFG> &f = fi->second;
1053  if (!f.vertices.empty() || !f.edges.empty()) {
1054  SgNode *node = get_ast_node(cfg, f.vertices.front());
1055  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(node, true);
1056  char cluster_name[64];
1057  sprintf(cluster_name, "cluster_F%" PRIx64, func->get_entry_va());
1058  out <<" subgraph " <<cluster_name <<" {\n"
1059  <<" style=filled;\n"
1060  <<" color=lightgrey;\n"
1061  <<" label=\"Function " <<StringUtility::addrToString(func->get_entry_va())
1062  <<(func->get_name().empty()?std::string(""):(" <"+func->get_name()+">")) <<"\";\n";
1063  for (size_t i=0; i<f.vertices.size(); ++i) {
1064  out <<" " <<f.vertices[i];
1065  vpw(out, f.vertices[i]);
1066  out <<";\n";
1067  }
1068  for (size_t i=0; i<f.edges.size(); ++i) {
1069  out <<" " <<boost::source(f.edges[i], cfg) <<"->" <<boost::target(f.edges[i], cfg);
1070  epw(out, f.edges[i]);
1071  out <<";\n";
1072  }
1073  out <<" }\n"; // subgraph
1074  }
1075  }
1076 
1077  // Inter-function edges
1078  for (size_t i=0; i<interfunc_edges.size(); ++i) {
1079  out <<" " <<boost::source(interfunc_edges[i], cfg) <<"->" <<boost::target(interfunc_edges[i], cfg);
1080  epw(out, interfunc_edges[i]);
1081  out <<";\n";
1082  }
1083  out <<"}\n"; // digraph
1084 }
1085 
1086 } // namespace
1087 } // namespace
1088 
1089 #endif
1090 #endif
BlockGraph Graph
Default control flow graph.
Definition: ControlFlow.h:194
Default vertex property writer is a no-op.
Definition: ControlFlow.h:416
void put_ast_node(Sawyer::Container::Graph< V, E > &cfg, size_t vertexId, AstNode *astNode)
Set the AST node associated with a vertex.
Definition: ControlFlow.h:571
void write_graphviz(std::ostream &out, const CFG &cfg)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Definition: ControlFlow.h:432
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
bool is_function_call(rose_addr_t &target_va, rose_addr_t &return_va)
Returns true if basic block appears to be a function call.
Instruction basic block.
Base class for all binary analysis IR nodes.
Class for traversing the AST.
void apply_to_ast(const ControlFlowGraph &)
Applies graph to AST.
Definition: ControlFlow.h:604
void set_cached_vertex(size_t)
Property: Cached vertex for control flow graphs.
Base class for machine instructions.
bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter)
Determines if a vertex is filtered out.
Definition: ControlFlow.h:247
SgAsmBlock * get_entry_block() const
Function entry basic block.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmInstruction * > > InsnGraph
Default instruction-based control flow graph.
Definition: ControlFlow.h:190
void set_successors_complete(bool)
Property: Whether the successors list is complete.
void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi)
Create an instruction control flow graph from a basic block control flow graph.
Definition: ControlFlow.h:775
const T & get_value_or(const Key &key, const T &dflt) const
Convenience for getting a value from an Option.
Definition: Map.h:75
void set_parent(SgNode *parent)
All nodes in the AST contain a reference to a parent node.
List of things to work on.
Definition: WorkLists.h:60
ControlFlowGraph build_insn_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
Definition: ControlFlow.h:1001
Represents a synthesized function.
Sawyer::Container::Graph< V, E >::VertexValue get_ast_node(const Sawyer::Container::Graph< V, E > &cfg, size_t vertexId)
Return the AST node associated with a vertex.
Definition: ControlFlow.h:560
ControlFlowGraph build_block_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
Definition: ControlFlow.h:992
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > flow_order(const ControlFlowGraph &, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start, std::vector< size_t > *reverse_order=NULL)
Orders nodes by depth first search reverse post order.
Definition: ControlFlow.h:959
void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Definition: ControlFlow.h:437
ControlFlowGraph copy(const ControlFlowGraph &src)
Copies a graph while filtering.
Definition: ControlFlow.h:766
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1442
Main namespace for the ROSE library.
void fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
Fix up a CFG by changing function call and return edges.
Definition: ControlFlow.h:831
void clear_ast(SgNode *ast)
Clears successor information from the AST.
T shift()
Remove and return the item from the front of the work list.
Definition: WorkLists.h:232
void set_edge_filter(EdgeFilter *filter)
Manipulate the edge filter.
Definition: ControlFlow.h:238
bool push(const T &, boost::tribool check_uniqueness=boost::logic::indeterminate)
Add an item to the back of the work list.
Definition: WorkLists.h:198
Base class for integer values.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:9555
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst)
Determines if an edge is filtered out.
Definition: ControlFlow.h:260
ControlFlowGraph build_cg_from_ast(SgNode *root)
Builds a control flow graph with only function call edges.
Definition: ControlFlow.h:1010
const SgAsmIntegerValuePtrList & get_successors() const
Property: Control flow successors.
void makeRelativeTo(SgNode *baseNode)
Makes the value of this integer relative to some other addressable node.
bool has_instructions() const
Determins if a block contains instructions.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmBlock * > > BlockGraph
Default basic block control flow graph type.
Definition: ControlFlow.h:167
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1489
VertexFilter * get_vertex_filter() const
Manipulate the vertex filter.
Definition: ControlFlow.h:229
Represents one Intel x86 machine instruction.
Extends std::map with methods that return optional values.
Definition: Map.h:10
rose_addr_t get_entry_va() const
Property: Primary entry address.
bool empty() const
Returns true if this work list is empty.
Definition: WorkLists.h:68
const SgAsmStatementPtrList & get_statementList() const
Property: Statements of which this block is composed.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > return_blocks(const ControlFlowGraph &cfg, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start)
Returns list of function return blocks.
Definition: ControlFlow.h:980
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter)
Determines if an edge is filtered out.
Definition: ControlFlow.h:257
Binary control flow analysis.
Definition: ControlFlow.h:139
void write_graphviz(std::ostream &, const CFG &, const VertexPropertyWriter &, const EdgePropertyWriter &)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Definition: ControlFlow.h:1020
Default edge property writer is a no-op.
Definition: ControlFlow.h:422
V VertexValue
User-level data associated with vertices.
Definition: Graph.h:627
const T & get_one(const Key &key) const
Look up one value or throw an exception.
Definition: Map.h:58
rose_addr_t get_address() const
Property: Starting virtual address.
uint64_t get_absoluteValue(size_t nbits=0) const
Returns the current absolute value zero filled to 64 bits.
bool is_vertex_filtered(SgAsmNode *bb_or_insn)
Determines if a vertex is filtered out.
Definition: ControlFlow.h:248
const size_t INVALID_INDEX(-1)
Invalid array index.
void cache_vertex_descriptors(const ControlFlowGraph &)
Cache basic block vertex descriptors in AST.
Definition: ControlFlow.h:635
List of vertices and intra-function edges for one function.
Definition: ControlFlow.h:409
EdgeFilter * get_edge_filter() const
Manipulate the edge filter.
Definition: ControlFlow.h:239
void set_vertex_filter(VertexFilter *filter)
Manipulate the vertex filter.
Definition: ControlFlow.h:228