ROSE 0.11.145.147
ControlFlow.h
1#ifndef ROSE_BinaryAnalysis_ControlFlow_H
2#define ROSE_BinaryAnalysis_ControlFlow_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BinaryAnalysis/InstructionMap.h>
7
8#include "Map.h"
9#include "WorkLists.h"
10#include "SageBuilderAsm.h"
11#include <AstSimpleProcessing.h>
12
13#include <boost/foreach.hpp> // needed for iteration over boost::vertices
14#include <boost/graph/adjacency_list.hpp>
15#include <boost/graph/reverse_graph.hpp>
16#include <boost/graph/depth_first_search.hpp>
17#include <Sawyer/GraphBoost.h>
18
19class SgNode;
20class SgAsmBlock;
21
22namespace Rose {
23namespace BinaryAnalysis {
24
136public:
138 : vertex_filter(NULL), edge_filter(NULL)
139 {}
140
141
160 typedef boost::adjacency_list<boost::setS, /* edges of each vertex in std::list */
161 boost::vecS, /* vertices in std::vector */
162 boost::bidirectionalS,
163 boost::property<boost::vertex_name_t, SgAsmBlock*> > BlockGraph;
164
183 typedef boost::adjacency_list<boost::setS,
184 boost::vecS,
185 boost::bidirectionalS,
186 boost::property<boost::vertex_name_t, SgAsmInstruction*> > InsnGraph;
187
191
192
193 /**********************************************************************************************************************
194 * Filters
195 **********************************************************************************************************************/
196public:
197
203 public:
204 virtual ~VertexFilter() {}
205 virtual bool operator()(ControlFlow*, SgAsmNode*) = 0;
206 };
207
213 public:
214 virtual ~EdgeFilter() {}
215 virtual bool operator()(ControlFlow*, SgAsmNode *source, SgAsmNode *target) = 0;
216 };
217
224 void set_vertex_filter(VertexFilter *filter) { vertex_filter = filter; }
225 VertexFilter *get_vertex_filter() const { return vertex_filter; }
234 void set_edge_filter(EdgeFilter *filter) { edge_filter = filter; }
235 EdgeFilter *get_edge_filter() const { return edge_filter; }
243 bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter) { return filter && !(*filter)(this, bb_or_insn); }
244 bool is_vertex_filtered(SgAsmNode *bb_or_insn) { return is_vertex_filtered(bb_or_insn, vertex_filter); }
253 bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter) {
254 return filter && !(*filter)(this, src, dst);
255 }
257 return is_edge_filtered(src, dst, edge_filter);
258 }
261protected:
262 VertexFilter *vertex_filter;
263 EdgeFilter *edge_filter;
264
265 /**********************************************************************************************************************
266 * Methods that modify the AST
267 **********************************************************************************************************************/
268public:
269
276 void clear_ast(SgNode *ast);
277
292 template<class ControlFlowGraph>
293 void apply_to_ast(const ControlFlowGraph&);
294
308 template<class ControlFlowGraph>
309 void cache_vertex_descriptors(const ControlFlowGraph&);
310
311 /**********************************************************************************************************************
312 * Graph construction methods
313 **********************************************************************************************************************/
314public:
315
341 template<class ControlFlowGraph>
342 ControlFlowGraph build_block_cfg_from_ast(SgNode *root);
343
344 template<class ControlFlowGraph>
345 void build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
346
347 template<class ControlFlowGraph>
348 ControlFlowGraph build_insn_cfg_from_ast(SgNode *root);
349
350 template<class ControlFlowGraph>
351 void build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
356 template<class BlockCFG, class InsnCFG>
357 void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/);
363 template<class InsnCFG>
364 void fixup_fcall_fret(InsnCFG &cfg/*in,out*/, bool preserve_call_fallthrough_edges);
365
375 template<class ControlFlowGraph>
376 ControlFlowGraph build_cg_from_ast(SgNode *root);
377
378 template<class ControlFlowGraph>
379 void build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
392 template<class ControlFlowGraph>
393 ControlFlowGraph copy(const ControlFlowGraph &src);
394
395 template<class ControlFlowGraph>
396 void copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/);
399 /***********************************************************************************************************************
400 * Graph output
401 ***********************************************************************************************************************/
402
404 template<class CFG>
406 std::vector<typename boost::graph_traits<CFG>::vertex_descriptor> vertices;
407 std::vector<typename boost::graph_traits<CFG>::edge_descriptor> edges;
408 };
409
411 template<class CFG>
413 void operator()(std::ostream &, typename boost::graph_traits<CFG>::vertex_descriptor) const {}
414 };
415
417 template<class CFG>
419 void operator()(std::ostream&, typename boost::graph_traits<CFG>::edge_descriptor /*vertex*/) const {}
420 };
421
424 template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
425 void write_graphviz(std::ostream&, const CFG&, const VertexPropertyWriter&, const EdgePropertyWriter&);
426
427 template<typename CFG>
428 void write_graphviz(std::ostream &out, const CFG &cfg) {
430 }
431
432 template<typename CFG, class VertexPropertyWriter>
433 void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw) {
435 }
438 /**********************************************************************************************************************
439 * Miscellaneous members
440 **********************************************************************************************************************/
441
442private:
443 /* Visitor used by flow_order(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
444 template<class ControlFlowGraph>
445 struct FlowOrder: public boost::default_dfs_visitor {
446 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
447 typedef std::vector<Vertex> VertexList;
448 typedef std::vector<size_t> ReverseVertexList;
449 VertexList *forward_order;
450 FlowOrder(VertexList *forward_order): forward_order(forward_order) {}
451 void compute(const ControlFlowGraph &g, Vertex v0, ReverseVertexList *reverse_order);
452 void finish_vertex(Vertex v, ControlFlowGraph g);
453 };
454
455 /* Helper class for build_block_cfg_from_ast(). Adds vertices to its 'cfg' member. Vertices are any SgAsmBlock that
456 * contains at least one SgAsmInstruction. */
457 template<class ControlFlowGraph>
458 class VertexInserter: public AstSimpleProcessing {
459 public:
460 ControlFlow *analyzer;
461 ControlFlowGraph &cfg;
462 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
463 typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
464 BlockVertexMap &bv_map;
465 VertexInserter(ControlFlow *analyzer, ControlFlowGraph &cfg, BlockVertexMap &bv_map)
466 : analyzer(analyzer), cfg(cfg), bv_map(bv_map)
467 {}
468 // Add basic block to graph if it hasn't been added already.
469 void conditionally_add_vertex(SgAsmBlock *block);
470
471 void visit(SgNode *node) {
472 if (isSgAsmFunction(node)) {
473 // Add the function entry block before the other blocks of the function. This ensures that the entry block
474 // of a function has a lower vertex number than the other blocks of the function (the traversal is not
475 // guaranteed to visit the function basic blocks in that order).
476 conditionally_add_vertex(isSgAsmFunction(node)->get_entryBlock());
477 } else {
478 conditionally_add_vertex(isSgAsmBlock(node));
479 }
480 }
481 };
482
483public:
516 template<class ControlFlowGraph>
517 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
518 flow_order(const ControlFlowGraph&,
519 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
520 std::vector<size_t> *reverse_order=NULL);
521
522private:
523 /* Visitor used by return_blocks(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
524 template<class ControlFlowGraph>
525 struct ReturnBlocks: public boost::default_dfs_visitor {
526 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
527 typedef std::vector<Vertex> Vector;
528 Vector &blocks;
529 ReturnBlocks(Vector &blocks): blocks(blocks) {}
530 void finish_vertex(Vertex v, ControlFlowGraph g);
531 };
532
533public:
541 template<class ControlFlowGraph>
542 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
543 return_blocks(const ControlFlowGraph &cfg,
544 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start);
545};
546
547
548/*******************************************************************************************************************************
549 * Functions
550 *******************************************************************************************************************************/
551
554template<class V, class E>
556get_ast_node(const Sawyer::Container::Graph<V, E> &cfg, size_t vertexId) {
557 typedef typename Sawyer::Container::Graph<V, E> CFG;
558 typename CFG::ConstVertexValueIterator iter = cfg.findVertex(vertexId);
559 ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
560 return *iter;
561}
562
565template<class V, class E, class AstNode>
566void
567put_ast_node(Sawyer::Container::Graph<V, E> &cfg, size_t vertexId, AstNode *astNode) {
568 typedef typename Sawyer::Container::Graph<V, E> CFG;
569 typename CFG::VertexValueIterator iter = cfg.findVertex(vertexId);
570 ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
571 *iter = astNode;
572}
573
574// Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
575template<class A, class B, class C, class D, class E, class F, class G>
576typename boost::property_traits<typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>,
577 boost::vertex_name_t>::type>::value_type
578get_ast_node(const boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
579 typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex) {
580 return boost::get(boost::vertex_name, cfg, vertex);
581}
582
583// Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
584template<class A, class B, class C, class D, class E, class F, class G>
585void
586put_ast_node(boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
587 typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex,
588 typename boost::property_traits<
589 typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>, boost::vertex_name_t>::type
590 >::value_type ast_node) {
591 boost::put(boost::vertex_name, cfg, vertex, ast_node);
592}
593
594/******************************************************************************************************************************
595 * Function template definitions
596 ******************************************************************************************************************************/
597
598template<class ControlFlowGraph>
599void
600ControlFlow::apply_to_ast(const ControlFlowGraph &cfg)
601{
602 typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
603 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
604 SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
605 if (!block || is_vertex_filtered(block))
606 continue;
607
608 /* Delete old targets */
609 const SgAsmIntegerValuePtrList &targets = block->get_successors();
610 for (SgAsmIntegerValuePtrList::const_iterator ti=targets.begin(); ti!=targets.end(); ++ti)
611 delete *ti;
612
613 /* Add new targets */
614 block->set_successorsComplete(true);
615 block->get_successors().clear();
616 typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
617 for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
618 SgAsmBlock *target_block = get_ast_node(cfg, boost::target(*ei, cfg));
619 if (target_block && !is_edge_filtered(block, target_block)) {
620 SgAsmIntegerValueExpression *target = SageBuilderAsm::buildValueU64(target_block->get_address());
621 target->makeRelativeTo(target_block);
622 target->set_parent(block);
623 block->get_successors().push_back(target);
624 }
625 }
626 }
627}
628
629template<class ControlFlowGraph>
630void
631ControlFlow::cache_vertex_descriptors(const ControlFlowGraph &cfg)
632{
633 typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
634 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
635 SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
636 if (block && !is_vertex_filtered(block))
637 block->set_cachedVertex(*vi);
638 }
639}
640
641template<class ControlFlowGraph>
642void
643ControlFlow::VertexInserter<ControlFlowGraph>::conditionally_add_vertex(SgAsmBlock *block)
644{
645 if (block && block->hasInstructions() && !analyzer->is_vertex_filtered(block) && !bv_map.exists(block)) {
646 Vertex vertex = boost::add_vertex(cfg);
647 bv_map[block] = vertex;
648 put_ast_node(cfg, vertex, block);
649 }
650}
651
652template<class ControlFlowGraph>
653void
655{
656 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
657 Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
658 typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
659 BlockVertexMap bv_map;
660
661 // Add the vertices
662 cfg.clear();
663 VertexInserter<ControlFlowGraph>(this, cfg, bv_map).traverse(root, preorder);
664
665 // Mapping from block entry address to CFG vertex
666 Map<rose_addr_t, Vertex> addrToVertex;
667 for (typename BlockVertexMap::iterator bvi=bv_map.begin(); bvi!=bv_map.end(); ++bvi)
668 addrToVertex[bvi->first->get_address()] = bvi->second;
669
670 // Add the edges
671 BOOST_FOREACH (Vertex sourceVertex, boost::vertices(cfg)) {
672 SgAsmBlock *sourceBlock = get_ast_node(cfg, sourceVertex);
673 for (SgAsmIntegerValueExpression *integerValue: sourceBlock->get_successors()) {
674 Vertex targetVertex = addrToVertex.get_value_or(integerValue->get_absoluteValue(), NO_VERTEX);
675 if (targetVertex!=NO_VERTEX) {
676 SgAsmBlock *targetBlock = get_ast_node(cfg, targetVertex);
677 assert(targetBlock!=NULL); // since we have a vertex, there must be an SgAsmBlock!
678 if (!is_edge_filtered(sourceBlock, targetBlock))
679 boost::add_edge(sourceVertex, targetVertex, cfg);
680 }
681 }
682 }
683}
684
685template<class ControlFlowGraph>
686void
687ControlFlow::build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
688{
689 BlockGraph cfgb;
690 build_block_cfg_from_ast(root, cfgb);
691 explode_blocks(cfgb, cfg);
692 bool preserve_call_fallthrough_edges = false;
693 fixup_fcall_fret(cfg, preserve_call_fallthrough_edges);
694}
695
696template<class ControlFlowGraph>
697void
698ControlFlow::build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/)
699{
700 struct T1: public EdgeFilter {
701 EdgeFilter *parent;
702 T1(EdgeFilter *parent): parent(parent) {}
703 bool operator()(ControlFlow *analyzer, SgAsmNode *src, SgAsmNode *dst) {
704 SgAsmFunction *src_func = SageInterface::getEnclosingNode<SgAsmFunction>(src, true);
705 SgAsmBlock *dst_block = SageInterface::getEnclosingNode<SgAsmBlock>(dst, true);
706 SgAsmFunction *dst_func = SageInterface::getEnclosingNode<SgAsmFunction>(dst_block);
707 if (!src_func || !dst_func || dst_block!=dst_func->get_entryBlock()) {
708 return false;
709 } else if (src_func!=dst_func) {
710 // inter-function call, not a return edge
711 } else {
712 // FIXME: this might not actually be a recursive call [Robb P. Matzke 2013-09-05]
713 }
714 return parent ? (*parent)(analyzer, src, dst) : true;
715 }
716 };
717
718 EdgeFilter *parent = get_edge_filter();
719 T1 edge_filter(parent);
720 try {
721 set_edge_filter(&edge_filter);
722 build_block_cfg_from_ast(root, cfg);
723 } catch (...) {
724 set_edge_filter(parent);
725 throw;
726 }
727 set_edge_filter(parent);
728}
729
730template<class ControlFlowGraph>
731void
732ControlFlow::copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/)
733{
734 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
735 Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
736
737 dst.clear();
738 std::vector<Vertex> src_to_dst(boost::num_vertices(src), NO_VERTEX);
739
740 typename boost::graph_traits<const ControlFlowGraph>::vertex_iterator vi, vi_end;
741 for (boost::tie(vi, vi_end)=boost::vertices(src); vi!=vi_end; ++vi) {
742 SgAsmNode *node = get_ast_node(src, *vi);
743 if (!is_vertex_filtered(node)) {
744 src_to_dst[*vi] = boost::add_vertex(dst);
745 put_ast_node(dst, src_to_dst[*vi], get_ast_node(src, *vi));
746 }
747 }
748
749 typename boost::graph_traits<const ControlFlowGraph>::edge_iterator ei, ei_end;
750 for (boost::tie(ei, ei_end)=boost::edges(src); ei!=ei_end; ++ei) {
751 if (NO_VERTEX!=src_to_dst[boost::source(*ei, src)] && NO_VERTEX!=src_to_dst[boost::target(*ei, src)]) {
752 SgAsmNode *node1 = get_ast_node(src, boost::source(*ei, src));
753 SgAsmNode *node2 = get_ast_node(src, boost::target(*ei, src));
754 if (!is_edge_filtered(node1, node2))
755 boost::add_edge(src_to_dst[boost::source(*ei, src)], src_to_dst[boost::target(*ei, src)], dst);
756 }
757 }
758}
759
760template<class ControlFlowGraph>
761ControlFlowGraph
762ControlFlow::copy(const ControlFlowGraph &src)
763{
764 ControlFlowGraph dst;
765 copy(src, dst);
766 return dst;
767}
768
769template<class BlockCFG, class InsnCFG>
770void
771ControlFlow::explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/)
772{
773 // BlockCFG is the basic-block binary control flow graph
774 typedef typename boost::graph_traits<const BlockCFG>::vertex_descriptor BlockCFG_Vertex;
775 typedef typename boost::graph_traits<const BlockCFG>::vertex_iterator BlockCFG_VertexIterator;
776 typedef typename boost::graph_traits<const BlockCFG>::edge_iterator BlockCFG_EdgeIterator;
777
778 // InsnCFG is the instruction binary control flow graph--it points to instructions rather than basic blocks, and changes
779 // some edges regarding function calls.
780 typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor InsnCFG_Vertex;
781 typedef std::pair<InsnCFG_Vertex, InsnCFG_Vertex> InsnCFG_VertexPair;
782
783 // Expand the cfgb basic blocks to create a cfgi that has instructions instead of blocks, and add the intra-block edges
784 cfgi.clear();
785 Map<BlockCFG_Vertex, InsnCFG_VertexPair> vertex_translation; // enter and leave instructions for each of the blocks in cfgb
786 {
787 BlockCFG_VertexIterator vi, vi_end;
788 for (boost::tie(vi, vi_end)=boost::vertices(cfgb); vi!=vi_end; ++vi) {
789 SgAsmBlock *blk = get_ast_node(cfgb, *vi);
790 const SgAsmStatementPtrList &insns = blk->get_statementList();
791 assert(!insns.empty());
792 InsnCFG_Vertex enter_vertex = boost::graph_traits<InsnCFG>::null_vertex();
793 InsnCFG_Vertex prev_vertex = boost::graph_traits<InsnCFG>::null_vertex();
794 for (SgAsmStatementPtrList::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
795 SgAsmInstruction *insn = isSgAsmInstruction(*ii);
796 assert(insn!=NULL); // basic blocks contain only instructions, no other type of asm statement
797 InsnCFG_Vertex vertex = boost::add_vertex(cfgi);
798 put_ast_node(cfgi, vertex, insn);
799 if (ii==insns.begin()) {
800 enter_vertex = vertex;
801 } else {
802 boost::add_edge(prev_vertex, vertex, cfgi);
803 }
804 prev_vertex = vertex;
805 }
806 assert(prev_vertex!=boost::graph_traits<InsnCFG>::null_vertex()); // basic block had no instructions but was in CFG!
807 vertex_translation[*vi] = InsnCFG_VertexPair(enter_vertex, prev_vertex);
808 }
809 }
810
811 // Insert the edges from cfgb. The corresponding edge in cfgi must emanate from the final instruction of the source basic
812 // block and enter at the first instruction of the target basic block.
813 {
814 BlockCFG_EdgeIterator ei, ei_end;
815 for (boost::tie(ei, ei_end)=boost::edges(cfgb); ei!=ei_end; ++ei) {
816 InsnCFG_Vertex src_leave_vertex = vertex_translation.get_one(boost::source(*ei, cfgb)).second;
817 InsnCFG_Vertex dst_enter_vertex = vertex_translation.get_one(boost::target(*ei, cfgb)).first;
818 assert(src_leave_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
819 assert(dst_enter_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
820 boost::add_edge(src_leave_vertex, dst_enter_vertex, cfgi);
821 }
822 }
823}
824
825template<class InsnCFG>
826void
827ControlFlow::fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
828{
829 typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor CFG_Vertex;
830 typedef typename boost::graph_traits<InsnCFG>::vertex_iterator CFG_VertexIterator;
831 typedef typename boost::graph_traits<InsnCFG>::in_edge_iterator CFG_InEdgeIterator;
832 typedef std::pair<CFG_Vertex, CFG_Vertex> CFG_VertexPair;
833 typedef Map<SgAsmInstruction*, CFG_Vertex> InsnToVertex;
834 CFG_Vertex NO_VERTEX = boost::graph_traits<InsnCFG>::null_vertex();
835
836 // Build mappings needed later and find the function return points. We just look for the x86
837 // RET instruction for now and assume that each one we find is a return if it has no control flow successors. They have no
838 // successors at this point because CFG1 didn't have any.
839 InstructionMap insns;
840 InsnToVertex insn_to_vertex;
841 std::vector<bool> isret(boost::num_vertices(cfg), false);
842 {
843 CFG_VertexIterator vi, vi_end;
844 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
845 SgAsmInstruction *insn = get_ast_node(cfg, *vi);
846 insns[insn->get_address()] = insn;
847 insn_to_vertex[insn] = *vi;
848
849 if (0==boost::out_degree(*vi, cfg)) {
850 // FIXME: Architecture-specific code here
851 if (SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn)) {
852 isret[*vi] = x86_ret==insn_x86->get_kind();
853 }
854 }
855 }
856 }
857
858 // Return the entry vertex for a function that owns the indicated instruction
859 struct FunctionEntryVertex {
860 const InsnToVertex &insn_to_vertex;
861 const InstructionMap &imap;
862 FunctionEntryVertex(const InsnToVertex &insn_to_vertex, const InstructionMap &imap)
863 : insn_to_vertex(insn_to_vertex), imap(imap) {}
864 CFG_Vertex operator()(SgAsmInstruction *insn) {
865 SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn, true);
866 SgAsmInstruction *entry_insn = imap.get_one(func->get_entryVa());
867 CFG_Vertex entry_vertex = insn_to_vertex.get_one(entry_insn);
868 return entry_vertex;
869 }
870 } function_entry_vertex(insn_to_vertex, insns);
871
872 // Process each return site in order to add edges from the return site to the vertex representing the return address
873 std::vector<CFG_VertexPair> edges_to_insert, edges_to_erase;
874 {
875 CFG_VertexIterator vi, vi_end;
876 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
877 CFG_Vertex returner_vertex = *vi;
878 if (!isret[returner_vertex])
879 continue;
880 SgAsmInstruction *returner_insn = get_ast_node(cfg, returner_vertex);
881
882 // Find all of the true call sites for the function that owns the returner instruction (e.g., RET) by recursively
883 // following inter-function CFG edges until we find the true calls (those edges that follow CALL semantics).
884 // Inter-function CFG edges can represent true calls or simply inter-function branches such as thunks. We have to
885 // gather up the information without adding it to the CFG yet (can't add while we're iterating)
886 std::vector<bool> seen(boost::num_vertices(cfg), false);
887 WorkList<CFG_Vertex> worklist; // targets of inter-function CFG edges; function callees
888 worklist.push(function_entry_vertex(returner_insn));
889 while (!worklist.empty()) {
890 CFG_Vertex callee_vertex = worklist.shift();
891 CFG_InEdgeIterator ei, ei_end;
892 for (boost::tie(ei, ei_end)=boost::in_edges(callee_vertex, cfg); ei!=ei_end; ++ei) {
893 CFG_Vertex caller_vertex = boost::source(*ei, cfg); // caller is a inter-function call or branch site
894 if (!seen[caller_vertex]) {
895 seen[caller_vertex] = true;
896 SgAsmInstruction *caller_insn = get_ast_node(cfg, caller_vertex);
897 SgAsmBlock *caller_block = SageInterface::getEnclosingNode<SgAsmBlock>(caller_insn);
898 assert(caller_block!=NULL);
899 rose_addr_t target_va, returnee_va; // returnee_va is usually the call's fall-through address
900 if (caller_block->isFunctionCall(target_va/*out*/, returnee_va/*out*/)) {
901 // This is a true call, so we need to add a return edge from the return instruction (the
902 // "returner") to what is probably the fall-through address of the call site (the returnee).
903 SgAsmInstruction *returnee_insn = insns.get_value_or(returnee_va, NULL);
904 CFG_Vertex returnee_vertex = insn_to_vertex.get_value_or(returnee_insn, NO_VERTEX);
905 if (returnee_vertex!=NO_VERTEX) {
906 edges_to_insert.push_back(CFG_VertexPair(returner_vertex, returnee_vertex));
907 edges_to_erase.push_back(CFG_VertexPair(caller_vertex, returnee_vertex));
908 }
909 } else {
910 // This is a non-call inter-function edge; probably a thunk. We need to find its call sites and add
911 // the returnee addresses (call fall throughs) to the returnee addresses of the RET we're
912 // processing.
913 worklist.push(function_entry_vertex(caller_insn));
914 }
915 }
916 }
917 }
918 }
919 }
920
921 // Erase and insert edges now that we're done iterating.
922 if (!preserve_call_fallthrough_edges) {
923 for (size_t i=0; i<edges_to_erase.size(); ++i)
924 boost::remove_edge(edges_to_erase[i].first, edges_to_erase[i].second, cfg);
925 }
926 for (size_t i=0; i<edges_to_insert.size(); ++i)
927 boost::add_edge(edges_to_insert[i].first, edges_to_insert[i].second, cfg);
928}
929
930template<class ControlFlowGraph>
931void
932ControlFlow::FlowOrder<ControlFlowGraph>::compute(const ControlFlowGraph &g, Vertex v0,
933 ReverseVertexList *reverse_order) {
934 forward_order->clear();
935 std::vector<boost::default_color_type> colors(boost::num_vertices(g), boost::white_color);
936 boost::depth_first_visit(g, v0, *this, &(colors[0]));
937 assert(!forward_order->empty()); /* it should at least contain v0 */
938 std::reverse(forward_order->begin(), forward_order->end());
939 if (reverse_order) {
940 reverse_order->clear();
941 reverse_order->resize(boost::num_vertices(g), INVALID_INDEX);
942 for (size_t i=0; i<forward_order->size(); i++)
943 (*reverse_order)[(*forward_order)[i]] = i;
944 }
945}
946
947template<class ControlFlowGraph>
948void
949ControlFlow::FlowOrder<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph) {
950 forward_order->push_back(v);
951}
952
953template<class ControlFlowGraph>
954std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
955ControlFlow::flow_order(const ControlFlowGraph &cfg,
956 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
957 std::vector<size_t> *reverse_order/*=NULL*/)
958{
959 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor> forward_order;
960 FlowOrder<ControlFlowGraph>(&forward_order).compute(cfg, start, reverse_order);
961 return forward_order;
962}
963
964template<class ControlFlowGraph>
965void
966ControlFlow::ReturnBlocks<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g)
967{
968 typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
969 boost::tie(ei, ei_end) = boost::out_edges(v, g);
970 if (ei==ei_end)
971 blocks.push_back(v);
972}
973
974template<class ControlFlowGraph>
975std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
976ControlFlow::return_blocks(const ControlFlowGraph &cfg,
977 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start)
978{
979 typename ReturnBlocks<ControlFlowGraph>::Vector result;
980 ReturnBlocks<ControlFlowGraph> visitor(result);
981 std::vector<boost::default_color_type> colors(boost::num_vertices(cfg), boost::white_color);
982 boost::depth_first_visit(cfg, start, visitor, &(colors[0]));
983 return result;
984}
985
986template<class ControlFlowGraph>
987ControlFlowGraph
989{
990 ControlFlowGraph cfg;
991 build_block_cfg_from_ast(root, cfg);
992 return cfg;
993}
994
995template<class ControlFlowGraph>
996ControlFlowGraph
998{
999 ControlFlowGraph cfg;
1000 build_insn_cfg_from_ast(root, cfg);
1001 return cfg;
1002}
1003
1004template<class ControlFlowGraph>
1005ControlFlowGraph
1007{
1008 ControlFlowGraph cfg;
1009 build_cg_from_ast(root, cfg);
1010 return cfg;
1011}
1012
1014template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
1015void
1016ControlFlow::write_graphviz(std::ostream &out, const CFG &cfg,
1017 const VertexPropertyWriter &vpw, const EdgePropertyWriter &epw)
1018{
1019 // typedef typename boost::graph_traits<CFG>::vertex_descriptor CFG_Vertex;
1020 typedef typename boost::graph_traits<CFG>::edge_descriptor CFG_Edge;
1021 typedef typename boost::graph_traits<CFG>::vertex_iterator CFG_VertexIterator;
1022 typedef typename boost::graph_traits<CFG>::out_edge_iterator CFG_OutEdgeIterator;
1023
1024 // Partition the graph into functions and inter-function edges
1026 Functions funcs;
1027 std::vector<CFG_Edge> interfunc_edges;
1028 CFG_VertexIterator vi, vi_end;
1029 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
1030 SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(get_ast_node(cfg, *vi), true);
1031 FunctionSubgraphInfo<CFG> &f = funcs[func];
1032 f.vertices.push_back(*vi);
1033 CFG_OutEdgeIterator ei, ei_end;
1034 for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
1035 SgNode *tgt_node = get_ast_node(cfg, boost::target(*ei, cfg));
1036 SgAsmFunction *tgt_func = SageInterface::getEnclosingNode<SgAsmFunction>(tgt_node, true);
1037 if (tgt_func==func) {
1038 f.edges.push_back(*ei);
1039 } else {
1040 interfunc_edges.push_back(*ei);
1041 }
1042 }
1043 }
1044
1045 // Output subgraph info, each function in its own cluster
1046 out <<"digraph G {\n";
1047 for (typename Functions::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi) {
1048 FunctionSubgraphInfo<CFG> &f = fi->second;
1049 if (!f.vertices.empty() || !f.edges.empty()) {
1050 SgNode *node = get_ast_node(cfg, f.vertices.front());
1051 SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(node, true);
1052 const size_t maxNameSize = 63;
1053 char cluster_name[maxNameSize+1];
1054 snprintf(cluster_name, maxNameSize, "cluster_F%" PRIx64, func->get_entryVa());
1055 out <<" subgraph " <<cluster_name <<" {\n"
1056 <<" style=filled;\n"
1057 <<" color=lightgrey;\n"
1058 <<" label=\"Function " <<StringUtility::addrToString(func->get_entryVa())
1059 <<(func->get_name().empty()?std::string(""):(" <"+func->get_name()+">")) <<"\";\n";
1060 for (size_t i=0; i<f.vertices.size(); ++i) {
1061 out <<" " <<f.vertices[i];
1062 vpw(out, f.vertices[i]);
1063 out <<";\n";
1064 }
1065 for (size_t i=0; i<f.edges.size(); ++i) {
1066 out <<" " <<boost::source(f.edges[i], cfg) <<"->" <<boost::target(f.edges[i], cfg);
1067 epw(out, f.edges[i]);
1068 out <<";\n";
1069 }
1070 out <<" }\n"; // subgraph
1071 }
1072 }
1073
1074 // Inter-function edges
1075 for (size_t i=0; i<interfunc_edges.size(); ++i) {
1076 out <<" " <<boost::source(interfunc_edges[i], cfg) <<"->" <<boost::target(interfunc_edges[i], cfg);
1077 epw(out, interfunc_edges[i]);
1078 out <<";\n";
1079 }
1080 out <<"}\n"; // digraph
1081}
1082
1083} // namespace
1084} // namespace
1085
1086#endif
1087#endif
Class for traversing the AST.
Extends std::map with methods that return optional values.
Definition util/Map.h:13
const T & get_value_or(const Key &key, const T &dflt) const
Convenience for getting a value from an Option.
Definition util/Map.h:78
const T & get_one(const Key &key) const
Look up one value or throw an exception.
Definition util/Map.h:61
Binary control flow analysis.
void fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
Fix up a CFG by changing function call and return edges.
void set_vertex_filter(VertexFilter *filter)
Manipulate the vertex filter.
ControlFlowGraph build_insn_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
void clear_ast(SgNode *ast)
Clears successor information from the AST.
void apply_to_ast(const ControlFlowGraph &)
Applies graph to AST.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter)
Determines if an edge is filtered out.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > flow_order(const ControlFlowGraph &, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start, std::vector< size_t > *reverse_order=NULL)
Orders nodes by depth first search reverse post order.
ControlFlowGraph copy(const ControlFlowGraph &src)
Copies a graph while filtering.
void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi)
Create an instruction control flow graph from a basic block control flow graph.
void set_edge_filter(EdgeFilter *filter)
Manipulate the edge filter.
void write_graphviz(std::ostream &, const CFG &, const VertexPropertyWriter &, const EdgePropertyWriter &)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst)
Determines if an edge is filtered out.
VertexFilter * get_vertex_filter() const
Manipulate the vertex filter.
void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmInstruction * > > InsnGraph
Default instruction-based control flow graph.
EdgeFilter * get_edge_filter() const
Manipulate the edge filter.
ControlFlowGraph build_block_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
bool is_vertex_filtered(SgAsmNode *bb_or_insn)
Determines if a vertex is filtered out.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmBlock * > > BlockGraph
Default basic block control flow graph type.
void cache_vertex_descriptors(const ControlFlowGraph &)
Cache basic block vertex descriptors in AST.
bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter)
Determines if a vertex is filtered out.
BlockGraph Graph
Default control flow graph.
ControlFlowGraph build_cg_from_ast(SgNode *root)
Builds a control flow graph with only function call edges.
void write_graphviz(std::ostream &out, const CFG &cfg)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > return_blocks(const ControlFlowGraph &cfg, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start)
Returns list of function return blocks.
Graph containing user-defined vertices and edges.
Definition Graph.h:634
V VertexValue
User-level data associated with vertices.
Definition Graph.h:636
Instruction basic block.
void set_cachedVertex(size_t const &)
Property: Cached vertex for control flow graphs.
bool isFunctionCall(rose_addr_t &target_va, rose_addr_t &return_va)
Returns true if basic block appears to be a function call.
SgAsmIntegerValuePtrList const & get_successors() const
Property: Control flow successors.
void set_successorsComplete(bool const &)
Property: Whether the successors list is complete.
SgAsmStatementPtrList const & get_statementList() const
Property: Statements of which this block is composed.
bool hasInstructions() const
Determins if a block contains instructions.
Represents a synthesized function.
rose_addr_t const & get_entryVa() const
Property: Primary entry address.
SgAsmBlock * get_entryBlock() const
Function entry basic block.
std::string const & get_name() const
Property: Name.
Base class for machine instructions.
Base class for integer values.
Base class for all binary analysis IR nodes.
rose_addr_t const & get_address() const
Property: Starting virtual address.
Represents one Intel x86 machine instruction.
This class represents the base class for all IR nodes within Sage III.
List of things to work on.
Definition WorkLists.h:64
bool push(const T &, boost::tribool check_uniqueness=boost::logic::indeterminate)
Add an item to the back of the work list.
Definition WorkLists.h:202
bool empty() const
Returns true if this work list is empty.
Definition WorkLists.h:72
T shift()
Remove and return the item from the front of the work list.
Definition WorkLists.h:236
Sawyer::Container::Graph< V, E >::VertexValue get_ast_node(const Sawyer::Container::Graph< V, E > &cfg, size_t vertexId)
Return the AST node associated with a vertex.
void put_ast_node(Sawyer::Container::Graph< V, E > &cfg, size_t vertexId, AstNode *astNode)
Set the AST node associated with a vertex.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
The ROSE library.
const size_t INVALID_INDEX
Invalid array index.
Definition Constants.h:25
Default vertex property writer is a no-op.
List of vertices and intra-function edges for one function.