ROSE  0.11.145.0
ControlFlow.h
1 #ifndef ROSE_BinaryAnalysis_ControlFlow_H
2 #define ROSE_BinaryAnalysis_ControlFlow_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include "Map.h"
7 #include "WorkLists.h"
8 #include "SageBuilderAsm.h"
9 
10 #include <boost/foreach.hpp> // needed for iteration over boost::vertices
11 #include <boost/graph/adjacency_list.hpp>
12 #include <boost/graph/reverse_graph.hpp>
13 #include <boost/graph/depth_first_search.hpp>
14 #include <Sawyer/GraphBoost.h>
15 
16 class SgNode;
17 class SgAsmBlock;
18 
19 namespace Rose {
20 namespace BinaryAnalysis {
21 
132 class ControlFlow {
133 public:
134  ControlFlow()
135  : vertex_filter(NULL), edge_filter(NULL)
136  {}
137 
138 
157  typedef boost::adjacency_list<boost::setS, /* edges of each vertex in std::list */
158  boost::vecS, /* vertices in std::vector */
159  boost::bidirectionalS,
160  boost::property<boost::vertex_name_t, SgAsmBlock*> > BlockGraph;
161 
180  typedef boost::adjacency_list<boost::setS,
181  boost::vecS,
182  boost::bidirectionalS,
183  boost::property<boost::vertex_name_t, SgAsmInstruction*> > InsnGraph;
184 
187  typedef BlockGraph Graph;
188 
189 
190  /**********************************************************************************************************************
191  * Filters
192  **********************************************************************************************************************/
193 public:
194 
199  class VertexFilter {
200  public:
201  virtual ~VertexFilter() {}
202  virtual bool operator()(ControlFlow*, SgAsmNode*) = 0;
203  };
204 
209  class EdgeFilter {
210  public:
211  virtual ~EdgeFilter() {}
212  virtual bool operator()(ControlFlow*, SgAsmNode *source, SgAsmNode *target) = 0;
213  };
214 
221  void set_vertex_filter(VertexFilter *filter) { vertex_filter = filter; }
222  VertexFilter *get_vertex_filter() const { return vertex_filter; }
231  void set_edge_filter(EdgeFilter *filter) { edge_filter = filter; }
232  EdgeFilter *get_edge_filter() const { return edge_filter; }
240  bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter) { return filter && !(*filter)(this, bb_or_insn); }
241  bool is_vertex_filtered(SgAsmNode *bb_or_insn) { return is_vertex_filtered(bb_or_insn, vertex_filter); }
250  bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter) {
251  return filter && !(*filter)(this, src, dst);
252  }
254  return is_edge_filtered(src, dst, edge_filter);
255  }
258 protected:
259  VertexFilter *vertex_filter;
260  EdgeFilter *edge_filter;
261 
262  /**********************************************************************************************************************
263  * Methods that modify the AST
264  **********************************************************************************************************************/
265 public:
266 
273  void clear_ast(SgNode *ast);
274 
289  template<class ControlFlowGraph>
290  void apply_to_ast(const ControlFlowGraph&);
291 
305  template<class ControlFlowGraph>
306  void cache_vertex_descriptors(const ControlFlowGraph&);
307 
308  /**********************************************************************************************************************
309  * Graph construction methods
310  **********************************************************************************************************************/
311 public:
312 
338  template<class ControlFlowGraph>
339  ControlFlowGraph build_block_cfg_from_ast(SgNode *root);
340 
341  template<class ControlFlowGraph>
342  void build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
343 
344  template<class ControlFlowGraph>
345  ControlFlowGraph build_insn_cfg_from_ast(SgNode *root);
346 
347  template<class ControlFlowGraph>
348  void build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
353  template<class BlockCFG, class InsnCFG>
354  void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/);
360  template<class InsnCFG>
361  void fixup_fcall_fret(InsnCFG &cfg/*in,out*/, bool preserve_call_fallthrough_edges);
362 
372  template<class ControlFlowGraph>
373  ControlFlowGraph build_cg_from_ast(SgNode *root);
374 
375  template<class ControlFlowGraph>
376  void build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
389  template<class ControlFlowGraph>
390  ControlFlowGraph copy(const ControlFlowGraph &src);
391 
392  template<class ControlFlowGraph>
393  void copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/);
396  /***********************************************************************************************************************
397  * Graph output
398  ***********************************************************************************************************************/
399 
401  template<class CFG>
403  std::vector<typename boost::graph_traits<CFG>::vertex_descriptor> vertices;
404  std::vector<typename boost::graph_traits<CFG>::edge_descriptor> edges;
405  };
406 
408  template<class CFG>
410  void operator()(std::ostream &o, typename boost::graph_traits<CFG>::vertex_descriptor vertex) const {}
411  };
412 
414  template<class CFG>
416  void operator()(std::ostream&, typename boost::graph_traits<CFG>::edge_descriptor /*vertex*/) const {}
417  };
418 
421  template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
422  void write_graphviz(std::ostream&, const CFG&, const VertexPropertyWriter&, const EdgePropertyWriter&);
423 
424  template<typename CFG>
425  void write_graphviz(std::ostream &out, const CFG &cfg) {
427  }
428 
429  template<typename CFG, class VertexPropertyWriter>
430  void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw) {
432  }
435  /**********************************************************************************************************************
436  * Miscellaneous members
437  **********************************************************************************************************************/
438 
439 private:
440  /* Visitor used by flow_order(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
441  template<class ControlFlowGraph>
442  struct FlowOrder: public boost::default_dfs_visitor {
443  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
444  typedef std::vector<Vertex> VertexList;
445  typedef std::vector<size_t> ReverseVertexList;
446  VertexList *forward_order;
447  FlowOrder(VertexList *forward_order): forward_order(forward_order) {}
448  void compute(const ControlFlowGraph &g, Vertex v0, ReverseVertexList *reverse_order);
449  void finish_vertex(Vertex v, ControlFlowGraph g);
450  };
451 
452  /* Helper class for build_block_cfg_from_ast(). Adds vertices to its 'cfg' member. Vertices are any SgAsmBlock that
453  * contains at least one SgAsmInstruction. */
454  template<class ControlFlowGraph>
455  class VertexInserter: public AstSimpleProcessing {
456  public:
457  ControlFlow *analyzer;
458  ControlFlowGraph &cfg;
459  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
460  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
461  BlockVertexMap &bv_map;
462  VertexInserter(ControlFlow *analyzer, ControlFlowGraph &cfg, BlockVertexMap &bv_map)
463  : analyzer(analyzer), cfg(cfg), bv_map(bv_map)
464  {}
465  // Add basic block to graph if it hasn't been added already.
466  void conditionally_add_vertex(SgAsmBlock *block);
467 
468  void visit(SgNode *node) {
469  if (isSgAsmFunction(node)) {
470  // Add the function entry block before the other blocks of the function. This ensures that the entry block
471  // of a function has a lower vertex number than the other blocks of the function (the traversal is not
472  // guaranteed to visit the function basic blocks in that order).
473  conditionally_add_vertex(isSgAsmFunction(node)->get_entry_block());
474  } else {
475  conditionally_add_vertex(isSgAsmBlock(node));
476  }
477  }
478  };
479 
480 public:
513  template<class ControlFlowGraph>
514  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
515  flow_order(const ControlFlowGraph&,
516  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
517  std::vector<size_t> *reverse_order=NULL);
518 
519 private:
520  /* Visitor used by return_blocks(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
521  template<class ControlFlowGraph>
522  struct ReturnBlocks: public boost::default_dfs_visitor {
523  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
524  typedef std::vector<Vertex> Vector;
525  Vector &blocks;
526  ReturnBlocks(Vector &blocks): blocks(blocks) {}
527  void finish_vertex(Vertex v, ControlFlowGraph g);
528  };
529 
530 public:
538  template<class ControlFlowGraph>
539  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
540  return_blocks(const ControlFlowGraph &cfg,
541  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start);
542 };
543 
544 
545 /*******************************************************************************************************************************
546  * Functions
547  *******************************************************************************************************************************/
548 
551 template<class V, class E>
553 get_ast_node(const Sawyer::Container::Graph<V, E> &cfg, size_t vertexId) {
554  typedef typename Sawyer::Container::Graph<V, E> CFG;
555  typename CFG::ConstVertexValueIterator iter = cfg.findVertex(vertexId);
556  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
557  return *iter;
558 }
559 
562 template<class V, class E, class AstNode>
563 void
564 put_ast_node(Sawyer::Container::Graph<V, E> &cfg, size_t vertexId, AstNode *astNode) {
565  typedef typename Sawyer::Container::Graph<V, E> CFG;
566  typename CFG::VertexValueIterator iter = cfg.findVertex(vertexId);
567  ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
568  *iter = astNode;
569 }
570 
571 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
572 template<class A, class B, class C, class D, class E, class F, class G>
573 typename boost::property_traits<typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>,
574  boost::vertex_name_t>::type>::value_type
575 get_ast_node(const boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
576  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex) {
577  return boost::get(boost::vertex_name, cfg, vertex);
578 }
579 
580 // Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
581 template<class A, class B, class C, class D, class E, class F, class G>
582 void
583 put_ast_node(boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
584  typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex,
585  typename boost::property_traits<
586  typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>, boost::vertex_name_t>::type
587  >::value_type ast_node) {
588  boost::put(boost::vertex_name, cfg, vertex, ast_node);
589 }
590 
591 /******************************************************************************************************************************
592  * Function template definitions
593  ******************************************************************************************************************************/
594 
595 template<class ControlFlowGraph>
596 void
597 ControlFlow::apply_to_ast(const ControlFlowGraph &cfg)
598 {
599  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
600  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
601  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
602  if (!block || is_vertex_filtered(block))
603  continue;
604 
605  /* Delete old targets */
606  const SgAsmIntegerValuePtrList &targets = block->get_successors();
607  for (SgAsmIntegerValuePtrList::const_iterator ti=targets.begin(); ti!=targets.end(); ++ti)
608  delete *ti;
609 
610  /* Add new targets */
611  block->set_successors_complete(true);
612  block->get_successors().clear();
613  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
614  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
615  SgAsmBlock *target_block = get_ast_node(cfg, boost::target(*ei, cfg));
616  if (target_block && !is_edge_filtered(block, target_block)) {
617  SgAsmIntegerValueExpression *target = SageBuilderAsm::buildValueU64(target_block->get_address());
618  target->makeRelativeTo(target_block);
619  target->set_parent(block);
620  block->get_successors().push_back(target);
621  }
622  }
623  }
624 }
625 
626 template<class ControlFlowGraph>
627 void
628 ControlFlow::cache_vertex_descriptors(const ControlFlowGraph &cfg)
629 {
630  typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
631  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
632  SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
633  if (block && !is_vertex_filtered(block))
634  block->set_cached_vertex(*vi);
635  }
636 }
637 
638 template<class ControlFlowGraph>
639 void
640 ControlFlow::VertexInserter<ControlFlowGraph>::conditionally_add_vertex(SgAsmBlock *block)
641 {
642  if (block && block->has_instructions() && !analyzer->is_vertex_filtered(block) && !bv_map.exists(block)) {
643  Vertex vertex = boost::add_vertex(cfg);
644  bv_map[block] = vertex;
645  put_ast_node(cfg, vertex, block);
646  }
647 }
648 
649 template<class ControlFlowGraph>
650 void
651 ControlFlow::build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
652 {
653  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
654  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
655  typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
656  BlockVertexMap bv_map;
657 
658  // Add the vertices
659  cfg.clear();
660  VertexInserter<ControlFlowGraph>(this, cfg, bv_map).traverse(root, preorder);
661 
662  // Mapping from block entry address to CFG vertex
663  Map<rose_addr_t, Vertex> addrToVertex;
664  for (typename BlockVertexMap::iterator bvi=bv_map.begin(); bvi!=bv_map.end(); ++bvi)
665  addrToVertex[bvi->first->get_address()] = bvi->second;
666 
667  // Add the edges
668  BOOST_FOREACH (Vertex sourceVertex, boost::vertices(cfg)) {
669  SgAsmBlock *sourceBlock = get_ast_node(cfg, sourceVertex);
670  for (SgAsmIntegerValueExpression *integerValue: sourceBlock->get_successors()) {
671  Vertex targetVertex = addrToVertex.get_value_or(integerValue->get_absoluteValue(), NO_VERTEX);
672  if (targetVertex!=NO_VERTEX) {
673  SgAsmBlock *targetBlock = get_ast_node(cfg, targetVertex);
674  assert(targetBlock!=NULL); // since we have a vertex, there must be an SgAsmBlock!
675  if (!is_edge_filtered(sourceBlock, targetBlock))
676  boost::add_edge(sourceVertex, targetVertex, cfg);
677  }
678  }
679  }
680 }
681 
682 template<class ControlFlowGraph>
683 void
684 ControlFlow::build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
685 {
686  BlockGraph cfgb;
687  build_block_cfg_from_ast(root, cfgb);
688  explode_blocks(cfgb, cfg);
689  bool preserve_call_fallthrough_edges = false;
690  fixup_fcall_fret(cfg, preserve_call_fallthrough_edges);
691 }
692 
693 template<class ControlFlowGraph>
694 void
695 ControlFlow::build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/)
696 {
697  struct T1: public EdgeFilter {
698  EdgeFilter *parent;
699  T1(EdgeFilter *parent): parent(parent) {}
700  bool operator()(ControlFlow *analyzer, SgAsmNode *src, SgAsmNode *dst) {
701  SgAsmFunction *src_func = SageInterface::getEnclosingNode<SgAsmFunction>(src, true);
702  SgAsmBlock *dst_block = SageInterface::getEnclosingNode<SgAsmBlock>(dst, true);
703  SgAsmFunction *dst_func = SageInterface::getEnclosingNode<SgAsmFunction>(dst_block);
704  if (!src_func || !dst_func || dst_block!=dst_func->get_entry_block()) {
705  return false;
706  } else if (src_func!=dst_func) {
707  // inter-function call, not a return edge
708  } else {
709  // FIXME: this might not actually be a recursive call [Robb P. Matzke 2013-09-05]
710  }
711  return parent ? (*parent)(analyzer, src, dst) : true;
712  }
713  };
714 
715  EdgeFilter *parent = get_edge_filter();
716  T1 edge_filter(parent);
717  try {
718  set_edge_filter(&edge_filter);
719  build_block_cfg_from_ast(root, cfg);
720  } catch (...) {
721  set_edge_filter(parent);
722  throw;
723  }
724  set_edge_filter(parent);
725 }
726 
727 template<class ControlFlowGraph>
728 void
729 ControlFlow::copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/)
730 {
731  typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
732  Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
733 
734  dst.clear();
735  std::vector<Vertex> src_to_dst(boost::num_vertices(src), NO_VERTEX);
736 
737  typename boost::graph_traits<const ControlFlowGraph>::vertex_iterator vi, vi_end;
738  for (boost::tie(vi, vi_end)=boost::vertices(src); vi!=vi_end; ++vi) {
739  SgAsmNode *node = get_ast_node(src, *vi);
740  if (!is_vertex_filtered(node)) {
741  src_to_dst[*vi] = boost::add_vertex(dst);
742  put_ast_node(dst, src_to_dst[*vi], get_ast_node(src, *vi));
743  }
744  }
745 
746  typename boost::graph_traits<const ControlFlowGraph>::edge_iterator ei, ei_end;
747  for (boost::tie(ei, ei_end)=boost::edges(src); ei!=ei_end; ++ei) {
748  if (NO_VERTEX!=src_to_dst[boost::source(*ei, src)] && NO_VERTEX!=src_to_dst[boost::target(*ei, src)]) {
749  SgAsmNode *node1 = get_ast_node(src, boost::source(*ei, src));
750  SgAsmNode *node2 = get_ast_node(src, boost::target(*ei, src));
751  if (!is_edge_filtered(node1, node2))
752  boost::add_edge(src_to_dst[boost::source(*ei, src)], src_to_dst[boost::target(*ei, src)], dst);
753  }
754  }
755 }
756 
757 template<class ControlFlowGraph>
758 ControlFlowGraph
759 ControlFlow::copy(const ControlFlowGraph &src)
760 {
761  ControlFlowGraph dst;
762  copy(src, dst);
763  return dst;
764 }
765 
766 template<class BlockCFG, class InsnCFG>
767 void
768 ControlFlow::explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/)
769 {
770  // BlockCFG is the basic-block binary control flow graph
771  typedef typename boost::graph_traits<const BlockCFG>::vertex_descriptor BlockCFG_Vertex;
772  typedef typename boost::graph_traits<const BlockCFG>::vertex_iterator BlockCFG_VertexIterator;
773  typedef typename boost::graph_traits<const BlockCFG>::edge_iterator BlockCFG_EdgeIterator;
774 
775  // InsnCFG is the instruction binary control flow graph--it points to instructions rather than basic blocks, and changes
776  // some edges regarding function calls.
777  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor InsnCFG_Vertex;
778  typedef std::pair<InsnCFG_Vertex, InsnCFG_Vertex> InsnCFG_VertexPair;
779 
780  // Expand the cfgb basic blocks to create a cfgi that has instructions instead of blocks, and add the intra-block edges
781  cfgi.clear();
782  Map<BlockCFG_Vertex, InsnCFG_VertexPair> vertex_translation; // enter and leave instructions for each of the blocks in cfgb
783  {
784  BlockCFG_VertexIterator vi, vi_end;
785  for (boost::tie(vi, vi_end)=boost::vertices(cfgb); vi!=vi_end; ++vi) {
786  SgAsmBlock *blk = get_ast_node(cfgb, *vi);
787  const SgAsmStatementPtrList &insns = blk->get_statementList();
788  assert(!insns.empty());
789  InsnCFG_Vertex enter_vertex = boost::graph_traits<InsnCFG>::null_vertex();
790  InsnCFG_Vertex prev_vertex = boost::graph_traits<InsnCFG>::null_vertex();
791  for (SgAsmStatementPtrList::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
792  SgAsmInstruction *insn = isSgAsmInstruction(*ii);
793  assert(insn!=NULL); // basic blocks contain only instructions, no other type of asm statement
794  InsnCFG_Vertex vertex = boost::add_vertex(cfgi);
795  put_ast_node(cfgi, vertex, insn);
796  if (ii==insns.begin()) {
797  enter_vertex = vertex;
798  } else {
799  boost::add_edge(prev_vertex, vertex, cfgi);
800  }
801  prev_vertex = vertex;
802  }
803  assert(prev_vertex!=boost::graph_traits<InsnCFG>::null_vertex()); // basic block had no instructions but was in CFG!
804  vertex_translation[*vi] = InsnCFG_VertexPair(enter_vertex, prev_vertex);
805  }
806  }
807 
808  // Insert the edges from cfgb. The corresponding edge in cfgi must emanate from the final instruction of the source basic
809  // block and enter at the first instruction of the target basic block.
810  {
811  BlockCFG_EdgeIterator ei, ei_end;
812  for (boost::tie(ei, ei_end)=boost::edges(cfgb); ei!=ei_end; ++ei) {
813  InsnCFG_Vertex src_leave_vertex = vertex_translation.get_one(boost::source(*ei, cfgb)).second;
814  InsnCFG_Vertex dst_enter_vertex = vertex_translation.get_one(boost::target(*ei, cfgb)).first;
815  assert(src_leave_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
816  assert(dst_enter_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
817  boost::add_edge(src_leave_vertex, dst_enter_vertex, cfgi);
818  }
819  }
820 }
821 
822 template<class InsnCFG>
823 void
824 ControlFlow::fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
825 {
826  typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor CFG_Vertex;
827  typedef typename boost::graph_traits<InsnCFG>::vertex_iterator CFG_VertexIterator;
828  typedef typename boost::graph_traits<InsnCFG>::in_edge_iterator CFG_InEdgeIterator;
829  typedef std::pair<CFG_Vertex, CFG_Vertex> CFG_VertexPair;
830  typedef Map<SgAsmInstruction*, CFG_Vertex> InsnToVertex;
831  CFG_Vertex NO_VERTEX = boost::graph_traits<InsnCFG>::null_vertex();
832 
833  // Build mappings needed later and find the function return points. We just look for the x86
834  // RET instruction for now and assume that each one we find is a return if it has no control flow successors. They have no
835  // successors at this point because CFG1 didn't have any.
836  InstructionMap insns;
837  InsnToVertex insn_to_vertex;
838  std::vector<bool> isret(boost::num_vertices(cfg), false);
839  {
840  CFG_VertexIterator vi, vi_end;
841  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
842  SgAsmInstruction *insn = get_ast_node(cfg, *vi);
843  insns[insn->get_address()] = insn;
844  insn_to_vertex[insn] = *vi;
845 
846  if (0==boost::out_degree(*vi, cfg)) {
847  // FIXME: Architecture-specific code here
848  if (SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn)) {
849  isret[*vi] = x86_ret==insn_x86->get_kind();
850  }
851  }
852  }
853  }
854 
855  // Return the entry vertex for a function that owns the indicated instruction
856  struct FunctionEntryVertex {
857  const InsnToVertex &insn_to_vertex;
858  const InstructionMap &imap;
859  FunctionEntryVertex(const InsnToVertex &insn_to_vertex, const InstructionMap &imap)
860  : insn_to_vertex(insn_to_vertex), imap(imap) {}
861  CFG_Vertex operator()(SgAsmInstruction *insn) {
862  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn, true);
863  SgAsmInstruction *entry_insn = imap.get_one(func->get_entry_va());
864  CFG_Vertex entry_vertex = insn_to_vertex.get_one(entry_insn);
865  return entry_vertex;
866  }
867  } function_entry_vertex(insn_to_vertex, insns);
868 
869  // Process each return site in order to add edges from the return site to the vertex representing the return address
870  std::vector<CFG_VertexPair> edges_to_insert, edges_to_erase;
871  {
872  CFG_VertexIterator vi, vi_end;
873  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
874  CFG_Vertex returner_vertex = *vi;
875  if (!isret[returner_vertex])
876  continue;
877  SgAsmInstruction *returner_insn = get_ast_node(cfg, returner_vertex);
878 
879  // Find all of the true call sites for the function that owns the returner instruction (e.g., RET) by recursively
880  // following inter-function CFG edges until we find the true calls (those edges that follow CALL semantics).
881  // Inter-function CFG edges can represent true calls or simply inter-function branches such as thunks. We have to
882  // gather up the information without adding it to the CFG yet (can't add while we're iterating)
883  std::vector<bool> seen(boost::num_vertices(cfg), false);
884  WorkList<CFG_Vertex> worklist; // targets of inter-function CFG edges; function callees
885  worklist.push(function_entry_vertex(returner_insn));
886  while (!worklist.empty()) {
887  CFG_Vertex callee_vertex = worklist.shift();
888  CFG_InEdgeIterator ei, ei_end;
889  for (boost::tie(ei, ei_end)=boost::in_edges(callee_vertex, cfg); ei!=ei_end; ++ei) {
890  CFG_Vertex caller_vertex = boost::source(*ei, cfg); // caller is a inter-function call or branch site
891  if (!seen[caller_vertex]) {
892  seen[caller_vertex] = true;
893  SgAsmInstruction *caller_insn = get_ast_node(cfg, caller_vertex);
894  SgAsmBlock *caller_block = SageInterface::getEnclosingNode<SgAsmBlock>(caller_insn);
895  assert(caller_block!=NULL);
896  rose_addr_t target_va, returnee_va; // returnee_va is usually the call's fall-through address
897  if (caller_block->is_function_call(target_va/*out*/, returnee_va/*out*/)) {
898  // This is a true call, so we need to add a return edge from the return instruction (the
899  // "returner") to what is probably the fall-through address of the call site (the returnee).
900  SgAsmInstruction *returnee_insn = insns.get_value_or(returnee_va, NULL);
901  CFG_Vertex returnee_vertex = insn_to_vertex.get_value_or(returnee_insn, NO_VERTEX);
902  if (returnee_vertex!=NO_VERTEX) {
903  edges_to_insert.push_back(CFG_VertexPair(returner_vertex, returnee_vertex));
904  edges_to_erase.push_back(CFG_VertexPair(caller_vertex, returnee_vertex));
905  }
906  } else {
907  // This is a non-call inter-function edge; probably a thunk. We need to find its call sites and add
908  // the returnee addresses (call fall throughs) to the returnee addresses of the RET we're
909  // processing.
910  worklist.push(function_entry_vertex(caller_insn));
911  }
912  }
913  }
914  }
915  }
916  }
917 
918  // Erase and insert edges now that we're done iterating.
919  if (!preserve_call_fallthrough_edges) {
920  for (size_t i=0; i<edges_to_erase.size(); ++i)
921  boost::remove_edge(edges_to_erase[i].first, edges_to_erase[i].second, cfg);
922  }
923  for (size_t i=0; i<edges_to_insert.size(); ++i)
924  boost::add_edge(edges_to_insert[i].first, edges_to_insert[i].second, cfg);
925 }
926 
927 template<class ControlFlowGraph>
928 void
929 ControlFlow::FlowOrder<ControlFlowGraph>::compute(const ControlFlowGraph &g, Vertex v0,
930  ReverseVertexList *reverse_order) {
931  forward_order->clear();
932  std::vector<boost::default_color_type> colors(boost::num_vertices(g), boost::white_color);
933  boost::depth_first_visit(g, v0, *this, &(colors[0]));
934  assert(!forward_order->empty()); /* it should at least contain v0 */
935  std::reverse(forward_order->begin(), forward_order->end());
936  if (reverse_order) {
937  reverse_order->clear();
938  reverse_order->resize(boost::num_vertices(g), INVALID_INDEX);
939  for (size_t i=0; i<forward_order->size(); i++)
940  (*reverse_order)[(*forward_order)[i]] = i;
941  }
942 }
943 
944 template<class ControlFlowGraph>
945 void
946 ControlFlow::FlowOrder<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g) {
947  forward_order->push_back(v);
948 }
949 
950 template<class ControlFlowGraph>
951 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
952 ControlFlow::flow_order(const ControlFlowGraph &cfg,
953  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
954  std::vector<size_t> *reverse_order/*=NULL*/)
955 {
956  std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor> forward_order;
957  FlowOrder<ControlFlowGraph>(&forward_order).compute(cfg, start, reverse_order);
958  return forward_order;
959 }
960 
961 template<class ControlFlowGraph>
962 void
963 ControlFlow::ReturnBlocks<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g)
964 {
965  typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
966  boost::tie(ei, ei_end) = boost::out_edges(v, g);
967  if (ei==ei_end)
968  blocks.push_back(v);
969 }
970 
971 template<class ControlFlowGraph>
972 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
973 ControlFlow::return_blocks(const ControlFlowGraph &cfg,
974  typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start)
975 {
976  typename ReturnBlocks<ControlFlowGraph>::Vector result;
977  ReturnBlocks<ControlFlowGraph> visitor(result);
978  std::vector<boost::default_color_type> colors(boost::num_vertices(cfg), boost::white_color);
979  boost::depth_first_visit(cfg, start, visitor, &(colors[0]));
980  return result;
981 }
982 
983 template<class ControlFlowGraph>
984 ControlFlowGraph
986 {
987  ControlFlowGraph cfg;
988  build_block_cfg_from_ast(root, cfg);
989  return cfg;
990 }
991 
992 template<class ControlFlowGraph>
993 ControlFlowGraph
995 {
996  ControlFlowGraph cfg;
997  build_insn_cfg_from_ast(root, cfg);
998  return cfg;
999 }
1000 
1001 template<class ControlFlowGraph>
1002 ControlFlowGraph
1004 {
1005  ControlFlowGraph cfg;
1006  build_cg_from_ast(root, cfg);
1007  return cfg;
1008 }
1009 
1011 template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
1012 void
1013 ControlFlow::write_graphviz(std::ostream &out, const CFG &cfg,
1014  const VertexPropertyWriter &vpw, const EdgePropertyWriter &epw)
1015 {
1016  // typedef typename boost::graph_traits<CFG>::vertex_descriptor CFG_Vertex;
1017  typedef typename boost::graph_traits<CFG>::edge_descriptor CFG_Edge;
1018  typedef typename boost::graph_traits<CFG>::vertex_iterator CFG_VertexIterator;
1019  typedef typename boost::graph_traits<CFG>::out_edge_iterator CFG_OutEdgeIterator;
1020 
1021  // Partition the graph into functions and inter-function edges
1023  Functions funcs;
1024  std::vector<CFG_Edge> interfunc_edges;
1025  CFG_VertexIterator vi, vi_end;
1026  for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
1027  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(get_ast_node(cfg, *vi), true);
1028  FunctionSubgraphInfo<CFG> &f = funcs[func];
1029  f.vertices.push_back(*vi);
1030  CFG_OutEdgeIterator ei, ei_end;
1031  for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
1032  SgNode *tgt_node = get_ast_node(cfg, boost::target(*ei, cfg));
1033  SgAsmFunction *tgt_func = SageInterface::getEnclosingNode<SgAsmFunction>(tgt_node, true);
1034  if (tgt_func==func) {
1035  f.edges.push_back(*ei);
1036  } else {
1037  interfunc_edges.push_back(*ei);
1038  }
1039  }
1040  }
1041 
1042  // Output subgraph info, each function in its own cluster
1043  out <<"digraph G {\n";
1044  for (typename Functions::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi) {
1045  FunctionSubgraphInfo<CFG> &f = fi->second;
1046  if (!f.vertices.empty() || !f.edges.empty()) {
1047  SgNode *node = get_ast_node(cfg, f.vertices.front());
1048  SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(node, true);
1049  const size_t maxNameSize = 63;
1050  char cluster_name[maxNameSize+1];
1051  snprintf(cluster_name, maxNameSize, "cluster_F%" PRIx64, func->get_entry_va());
1052  out <<" subgraph " <<cluster_name <<" {\n"
1053  <<" style=filled;\n"
1054  <<" color=lightgrey;\n"
1055  <<" label=\"Function " <<StringUtility::addrToString(func->get_entry_va())
1056  <<(func->get_name().empty()?std::string(""):(" <"+func->get_name()+">")) <<"\";\n";
1057  for (size_t i=0; i<f.vertices.size(); ++i) {
1058  out <<" " <<f.vertices[i];
1059  vpw(out, f.vertices[i]);
1060  out <<";\n";
1061  }
1062  for (size_t i=0; i<f.edges.size(); ++i) {
1063  out <<" " <<boost::source(f.edges[i], cfg) <<"->" <<boost::target(f.edges[i], cfg);
1064  epw(out, f.edges[i]);
1065  out <<";\n";
1066  }
1067  out <<" }\n"; // subgraph
1068  }
1069  }
1070 
1071  // Inter-function edges
1072  for (size_t i=0; i<interfunc_edges.size(); ++i) {
1073  out <<" " <<boost::source(interfunc_edges[i], cfg) <<"->" <<boost::target(interfunc_edges[i], cfg);
1074  epw(out, interfunc_edges[i]);
1075  out <<";\n";
1076  }
1077  out <<"}\n"; // digraph
1078 }
1079 
1080 } // namespace
1081 } // namespace
1082 
1083 #endif
1084 #endif
void set_successors_complete(bool const &)
Property: Whether the successors list is complete.
BlockGraph Graph
Default control flow graph.
Definition: ControlFlow.h:187
Default vertex property writer is a no-op.
Definition: ControlFlow.h:409
void put_ast_node(Sawyer::Container::Graph< V, E > &cfg, size_t vertexId, AstNode *astNode)
Set the AST node associated with a vertex.
Definition: ControlFlow.h:564
void write_graphviz(std::ostream &out, const CFG &cfg)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Definition: ControlFlow.h:425
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
bool is_function_call(rose_addr_t &target_va, rose_addr_t &return_va)
Returns true if basic block appears to be a function call.
Instruction basic block.
Base class for all binary analysis IR nodes.
Class for traversing the AST.
void apply_to_ast(const ControlFlowGraph &)
Applies graph to AST.
Definition: ControlFlow.h:597
Base class for machine instructions.
bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter)
Determines if a vertex is filtered out.
Definition: ControlFlow.h:240
SgAsmBlock * get_entry_block() const
Function entry basic block.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmInstruction * > > InsnGraph
Default instruction-based control flow graph.
Definition: ControlFlow.h:183
void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi)
Create an instruction control flow graph from a basic block control flow graph.
Definition: ControlFlow.h:768
const T & get_value_or(const Key &key, const T &dflt) const
Convenience for getting a value from an Option.
Definition: Map.h:75
void set_parent(SgNode *parent)
All nodes in the AST contain a reference to a parent node.
List of things to work on.
Definition: WorkLists.h:60
ControlFlowGraph build_insn_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
Definition: ControlFlow.h:994
rose_addr_t const & get_address() const
Property: Starting virtual address.
Represents a synthesized function.
Sawyer::Container::Graph< V, E >::VertexValue get_ast_node(const Sawyer::Container::Graph< V, E > &cfg, size_t vertexId)
Return the AST node associated with a vertex.
Definition: ControlFlow.h:553
ControlFlowGraph build_block_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
Definition: ControlFlow.h:985
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > flow_order(const ControlFlowGraph &, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start, std::vector< size_t > *reverse_order=NULL)
Orders nodes by depth first search reverse post order.
Definition: ControlFlow.h:952
void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Definition: ControlFlow.h:430
ControlFlowGraph copy(const ControlFlowGraph &src)
Copies a graph while filtering.
Definition: ControlFlow.h:759
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1469
Main namespace for the ROSE library.
void fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
Fix up a CFG by changing function call and return edges.
Definition: ControlFlow.h:824
void clear_ast(SgNode *ast)
Clears successor information from the AST.
T shift()
Remove and return the item from the front of the work list.
Definition: WorkLists.h:232
rose_addr_t const & get_entry_va() const
Property: Primary entry address.
void set_edge_filter(EdgeFilter *filter)
Manipulate the edge filter.
Definition: ControlFlow.h:231
bool push(const T &, boost::tribool check_uniqueness=boost::logic::indeterminate)
Add an item to the back of the work list.
Definition: WorkLists.h:198
Base class for integer values.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:9846
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst)
Determines if an edge is filtered out.
Definition: ControlFlow.h:253
ControlFlowGraph build_cg_from_ast(SgNode *root)
Builds a control flow graph with only function call edges.
Definition: ControlFlow.h:1003
void set_cached_vertex(size_t const &)
Property: Cached vertex for control flow graphs.
void makeRelativeTo(SgNode *baseNode)
Makes the value of this integer relative to some other addressable node.
bool has_instructions() const
Determins if a block contains instructions.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmBlock * > > BlockGraph
Default basic block control flow graph type.
Definition: ControlFlow.h:160
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1516
VertexFilter * get_vertex_filter() const
Manipulate the vertex filter.
Definition: ControlFlow.h:222
Represents one Intel x86 machine instruction.
Extends std::map with methods that return optional values.
Definition: Map.h:10
SgAsmStatementPtrList const & get_statementList() const
Property: Statements of which this block is composed.
bool empty() const
Returns true if this work list is empty.
Definition: WorkLists.h:68
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > return_blocks(const ControlFlowGraph &cfg, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start)
Returns list of function return blocks.
Definition: ControlFlow.h:973
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter)
Determines if an edge is filtered out.
Definition: ControlFlow.h:250
SgAsmIntegerValuePtrList const & get_successors() const
Property: Control flow successors.
Binary control flow analysis.
Definition: ControlFlow.h:132
void write_graphviz(std::ostream &, const CFG &, const VertexPropertyWriter &, const EdgePropertyWriter &)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
Definition: ControlFlow.h:1013
Default edge property writer is a no-op.
Definition: ControlFlow.h:415
V VertexValue
User-level data associated with vertices.
Definition: Graph.h:627
const T & get_one(const Key &key) const
Look up one value or throw an exception.
Definition: Map.h:58
bool is_vertex_filtered(SgAsmNode *bb_or_insn)
Determines if a vertex is filtered out.
Definition: ControlFlow.h:241
void cache_vertex_descriptors(const ControlFlowGraph &)
Cache basic block vertex descriptors in AST.
Definition: ControlFlow.h:628
const size_t INVALID_INDEX(static_cast< size_t >(-1))
Invalid array index.
List of vertices and intra-function edges for one function.
Definition: ControlFlow.h:402
EdgeFilter * get_edge_filter() const
Manipulate the edge filter.
Definition: ControlFlow.h:232
void set_vertex_filter(VertexFilter *filter)
Manipulate the vertex filter.
Definition: ControlFlow.h:221