ROSE 0.11.145.205
ControlFlow.h
1#ifndef ROSE_BinaryAnalysis_ControlFlow_H
2#define ROSE_BinaryAnalysis_ControlFlow_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BinaryAnalysis/InstructionMap.h>
7
8#include "Map.h"
9#include "WorkLists.h"
10#include "SageBuilderAsm.h"
11#include <AstSimpleProcessing.h>
12
13#include <boost/foreach.hpp> // needed for iteration over boost::vertices
14#include <boost/graph/adjacency_list.hpp>
15#include <boost/graph/reverse_graph.hpp>
16#include <boost/graph/depth_first_search.hpp>
17#include <Sawyer/GraphBoost.h>
18
19class SgNode;
20class SgAsmBlock;
21
22namespace Rose {
23namespace BinaryAnalysis {
24
137public:
139 : vertex_filter(NULL), edge_filter(NULL)
140 {}
141
142
161 typedef boost::adjacency_list<boost::setS, /* edges of each vertex in std::list */
162 boost::vecS, /* vertices in std::vector */
163 boost::bidirectionalS,
164 boost::property<boost::vertex_name_t, SgAsmBlock*> > BlockGraph;
165
184 typedef boost::adjacency_list<boost::setS,
185 boost::vecS,
186 boost::bidirectionalS,
187 boost::property<boost::vertex_name_t, SgAsmInstruction*> > InsnGraph;
188
192
193
194 /**********************************************************************************************************************
195 * Filters
196 **********************************************************************************************************************/
197public:
198
204 public:
205 virtual ~VertexFilter() {}
206 virtual bool operator()(ControlFlow*, SgAsmNode*) = 0;
207 };
208
214 public:
215 virtual ~EdgeFilter() {}
216 virtual bool operator()(ControlFlow*, SgAsmNode *source, SgAsmNode *target) = 0;
217 };
218
225 void set_vertex_filter(VertexFilter *filter) { vertex_filter = filter; }
226 VertexFilter *get_vertex_filter() const { return vertex_filter; }
235 void set_edge_filter(EdgeFilter *filter) { edge_filter = filter; }
236 EdgeFilter *get_edge_filter() const { return edge_filter; }
244 bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter) { return filter && !(*filter)(this, bb_or_insn); }
245 bool is_vertex_filtered(SgAsmNode *bb_or_insn) { return is_vertex_filtered(bb_or_insn, vertex_filter); }
254 bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter) {
255 return filter && !(*filter)(this, src, dst);
256 }
258 return is_edge_filtered(src, dst, edge_filter);
259 }
262protected:
263 VertexFilter *vertex_filter;
264 EdgeFilter *edge_filter;
265
266 /**********************************************************************************************************************
267 * Methods that modify the AST
268 **********************************************************************************************************************/
269public:
270
277 void clear_ast(SgNode *ast);
278
293 template<class ControlFlowGraph>
294 void apply_to_ast(const ControlFlowGraph&);
295
309 template<class ControlFlowGraph>
310 void cache_vertex_descriptors(const ControlFlowGraph&);
311
312 /**********************************************************************************************************************
313 * Graph construction methods
314 **********************************************************************************************************************/
315public:
316
342 template<class ControlFlowGraph>
343 ControlFlowGraph build_block_cfg_from_ast(SgNode *root);
344
345 template<class ControlFlowGraph>
346 void build_block_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
347
348 template<class ControlFlowGraph>
349 ControlFlowGraph build_insn_cfg_from_ast(SgNode *root);
350
351 template<class ControlFlowGraph>
352 void build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
357 template<class BlockCFG, class InsnCFG>
358 void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/);
364 template<class InsnCFG>
365 void fixup_fcall_fret(InsnCFG &cfg/*in,out*/, bool preserve_call_fallthrough_edges);
366
376 template<class ControlFlowGraph>
377 ControlFlowGraph build_cg_from_ast(SgNode *root);
378
379 template<class ControlFlowGraph>
380 void build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/);
393 template<class ControlFlowGraph>
394 ControlFlowGraph copy(const ControlFlowGraph &src);
395
396 template<class ControlFlowGraph>
397 void copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/);
400 /***********************************************************************************************************************
401 * Graph output
402 ***********************************************************************************************************************/
403
405 template<class CFG>
407 std::vector<typename boost::graph_traits<CFG>::vertex_descriptor> vertices;
408 std::vector<typename boost::graph_traits<CFG>::edge_descriptor> edges;
409 };
410
412 template<class CFG>
414 void operator()(std::ostream &, typename boost::graph_traits<CFG>::vertex_descriptor) const {}
415 };
416
418 template<class CFG>
420 void operator()(std::ostream&, typename boost::graph_traits<CFG>::edge_descriptor /*vertex*/) const {}
421 };
422
425 template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
426 void write_graphviz(std::ostream&, const CFG&, const VertexPropertyWriter&, const EdgePropertyWriter&);
427
428 template<typename CFG>
429 void write_graphviz(std::ostream &out, const CFG &cfg) {
431 }
432
433 template<typename CFG, class VertexPropertyWriter>
434 void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw) {
436 }
439 /**********************************************************************************************************************
440 * Miscellaneous members
441 **********************************************************************************************************************/
442
443private:
444 /* Visitor used by flow_order(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
445 template<class ControlFlowGraph>
446 struct FlowOrder: public boost::default_dfs_visitor {
447 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
448 typedef std::vector<Vertex> VertexList;
449 typedef std::vector<size_t> ReverseVertexList;
450 VertexList *forward_order;
451 FlowOrder(VertexList *forward_order): forward_order(forward_order) {}
452 void compute(const ControlFlowGraph &g, Vertex v0, ReverseVertexList *reverse_order);
453 void finish_vertex(Vertex v, ControlFlowGraph g);
454 };
455
456 /* Helper class for build_block_cfg_from_ast(). Adds vertices to its 'cfg' member. Vertices are any SgAsmBlock that
457 * contains at least one SgAsmInstruction. */
458 template<class ControlFlowGraph>
459 class VertexInserter: public AstSimpleProcessing {
460 public:
461 ControlFlow *analyzer;
462 ControlFlowGraph &cfg;
463 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
464 typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
465 BlockVertexMap &bv_map;
466 VertexInserter(ControlFlow *analyzer, ControlFlowGraph &cfg, BlockVertexMap &bv_map)
467 : analyzer(analyzer), cfg(cfg), bv_map(bv_map)
468 {}
469 // Add basic block to graph if it hasn't been added already.
470 void conditionally_add_vertex(SgAsmBlock *block);
471
472 void visit(SgNode *node) {
473 if (isSgAsmFunction(node)) {
474 // Add the function entry block before the other blocks of the function. This ensures that the entry block
475 // of a function has a lower vertex number than the other blocks of the function (the traversal is not
476 // guaranteed to visit the function basic blocks in that order).
477 conditionally_add_vertex(isSgAsmFunction(node)->get_entryBlock());
478 } else {
479 conditionally_add_vertex(isSgAsmBlock(node));
480 }
481 }
482 };
483
484public:
517 template<class ControlFlowGraph>
518 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
519 flow_order(const ControlFlowGraph&,
520 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
521 std::vector<size_t> *reverse_order=NULL);
522
523private:
524 /* Visitor used by return_blocks(). Declaring this in function scope results in boost errors (boost-1.42, 2011-05). */
525 template<class ControlFlowGraph>
526 struct ReturnBlocks: public boost::default_dfs_visitor {
527 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
528 typedef std::vector<Vertex> Vector;
529 Vector &blocks;
530 ReturnBlocks(Vector &blocks): blocks(blocks) {}
531 void finish_vertex(Vertex v, ControlFlowGraph g);
532 };
533
534public:
542 template<class ControlFlowGraph>
543 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
544 return_blocks(const ControlFlowGraph &cfg,
545 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start);
546};
547
548
549/*******************************************************************************************************************************
550 * Functions
551 *******************************************************************************************************************************/
552
555template<class V, class E>
557get_ast_node(const Sawyer::Container::Graph<V, E> &cfg, size_t vertexId) {
558 typedef typename Sawyer::Container::Graph<V, E> CFG;
559 typename CFG::ConstVertexValueIterator iter = cfg.findVertex(vertexId);
560 ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
561 return *iter;
562}
563
566template<class V, class E, class AstNode>
567void
568put_ast_node(Sawyer::Container::Graph<V, E> &cfg, size_t vertexId, AstNode *astNode) {
569 typedef typename Sawyer::Container::Graph<V, E> CFG;
570 typename CFG::VertexValueIterator iter = cfg.findVertex(vertexId);
571 ASSERT_forbid2(iter==cfg.vertices().end(), "invalid vertex ID " + StringUtility::numberToString(vertexId));
572 *iter = astNode;
573}
574
575// Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
576template<class A, class B, class C, class D, class E, class F, class G>
577typename boost::property_traits<typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>,
578 boost::vertex_name_t>::type>::value_type
579get_ast_node(const boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
580 typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex) {
581 return boost::get(boost::vertex_name, cfg, vertex);
582}
583
584// Sorry about this mess! The goal is to match only boost::adjacency_list graphs.
585template<class A, class B, class C, class D, class E, class F, class G>
586void
587put_ast_node(boost::adjacency_list<A, B, C, D, E, F, G> &cfg,
588 typename boost::graph_traits<boost::adjacency_list<A, B, C, D, E, F, G> >::vertex_descriptor vertex,
589 typename boost::property_traits<
590 typename boost::property_map<boost::adjacency_list<A, B, C, D, E, F, G>, boost::vertex_name_t>::type
591 >::value_type ast_node) {
592 boost::put(boost::vertex_name, cfg, vertex, ast_node);
593}
594
595/******************************************************************************************************************************
596 * Function template definitions
597 ******************************************************************************************************************************/
598
599template<class ControlFlowGraph>
600void
601ControlFlow::apply_to_ast(const ControlFlowGraph &cfg)
602{
603 typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
604 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
605 SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
606 if (!block || is_vertex_filtered(block))
607 continue;
608
609 /* Delete old targets */
610 const SgAsmIntegerValuePtrList &targets = block->get_successors();
611 for (SgAsmIntegerValuePtrList::const_iterator ti=targets.begin(); ti!=targets.end(); ++ti)
612 delete *ti;
613
614 /* Add new targets */
615 block->set_successorsComplete(true);
616 block->get_successors().clear();
617 typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
618 for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
619 SgAsmBlock *target_block = get_ast_node(cfg, boost::target(*ei, cfg));
620 if (target_block && !is_edge_filtered(block, target_block)) {
621 SgAsmIntegerValueExpression *target = SageBuilderAsm::buildValueU64(target_block->get_address());
622 target->makeRelativeTo(target_block);
623 target->set_parent(block);
624 block->get_successors().push_back(target);
625 }
626 }
627 }
628}
629
630template<class ControlFlowGraph>
631void
632ControlFlow::cache_vertex_descriptors(const ControlFlowGraph &cfg)
633{
634 typename boost::graph_traits<ControlFlowGraph>::vertex_iterator vi, vi_end;
635 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
636 SgAsmBlock *block = get_ast_node(cfg, *vi); // FIXME: Instruction CFGs not supported yet
637 if (block && !is_vertex_filtered(block))
638 block->set_cachedVertex(*vi);
639 }
640}
641
642template<class ControlFlowGraph>
643void
644ControlFlow::VertexInserter<ControlFlowGraph>::conditionally_add_vertex(SgAsmBlock *block)
645{
646 if (block && block->hasInstructions() && !analyzer->is_vertex_filtered(block) && !bv_map.exists(block)) {
647 Vertex vertex = boost::add_vertex(cfg);
648 bv_map[block] = vertex;
649 put_ast_node(cfg, vertex, block);
650 }
651}
652
653template<class ControlFlowGraph>
654void
656{
657 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
658 Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
659 typedef Map<SgAsmBlock*, Vertex> BlockVertexMap;
660 BlockVertexMap bv_map;
661
662 // Add the vertices
663 cfg.clear();
664 VertexInserter<ControlFlowGraph>(this, cfg, bv_map).traverse(root, preorder);
665
666 // Mapping from block entry address to CFG vertex
667 Map<rose_addr_t, Vertex> addrToVertex;
668 for (typename BlockVertexMap::iterator bvi=bv_map.begin(); bvi!=bv_map.end(); ++bvi)
669 addrToVertex[bvi->first->get_address()] = bvi->second;
670
671 // Add the edges
672 BOOST_FOREACH (Vertex sourceVertex, boost::vertices(cfg)) {
673 SgAsmBlock *sourceBlock = get_ast_node(cfg, sourceVertex);
674 for (SgAsmIntegerValueExpression *integerValue: sourceBlock->get_successors()) {
675 Vertex targetVertex = addrToVertex.get_value_or(integerValue->get_absoluteValue(), NO_VERTEX);
676 if (targetVertex!=NO_VERTEX) {
677 SgAsmBlock *targetBlock = get_ast_node(cfg, targetVertex);
678 assert(targetBlock!=NULL); // since we have a vertex, there must be an SgAsmBlock!
679 if (!is_edge_filtered(sourceBlock, targetBlock))
680 boost::add_edge(sourceVertex, targetVertex, cfg);
681 }
682 }
683 }
684}
685
686template<class ControlFlowGraph>
687void
688ControlFlow::build_insn_cfg_from_ast(SgNode *root, ControlFlowGraph &cfg)
689{
690 BlockGraph cfgb;
691 build_block_cfg_from_ast(root, cfgb);
692 explode_blocks(cfgb, cfg);
693 bool preserve_call_fallthrough_edges = false;
694 fixup_fcall_fret(cfg, preserve_call_fallthrough_edges);
695}
696
697template<class ControlFlowGraph>
698void
699ControlFlow::build_cg_from_ast(SgNode *root, ControlFlowGraph &cfg/*out*/)
700{
701 struct T1: public EdgeFilter {
702 EdgeFilter *parent;
703 T1(EdgeFilter *parent): parent(parent) {}
704 bool operator()(ControlFlow *analyzer, SgAsmNode *src, SgAsmNode *dst) {
705 SgAsmFunction *src_func = SageInterface::getEnclosingNode<SgAsmFunction>(src, true);
706 SgAsmBlock *dst_block = SageInterface::getEnclosingNode<SgAsmBlock>(dst, true);
707 SgAsmFunction *dst_func = SageInterface::getEnclosingNode<SgAsmFunction>(dst_block);
708 if (!src_func || !dst_func || dst_block!=dst_func->get_entryBlock()) {
709 return false;
710 } else if (src_func!=dst_func) {
711 // inter-function call, not a return edge
712 } else {
713 // FIXME: this might not actually be a recursive call [Robb P. Matzke 2013-09-05]
714 }
715 return parent ? (*parent)(analyzer, src, dst) : true;
716 }
717 };
718
719 EdgeFilter *parent = get_edge_filter();
720 T1 edge_filter(parent);
721 try {
722 set_edge_filter(&edge_filter);
723 build_block_cfg_from_ast(root, cfg);
724 } catch (...) {
725 set_edge_filter(parent);
726 throw;
727 }
728 set_edge_filter(parent);
729}
730
731template<class ControlFlowGraph>
732void
733ControlFlow::copy(const ControlFlowGraph &src, ControlFlowGraph &dst/*out*/)
734{
735 typedef typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor Vertex;
736 Vertex NO_VERTEX = boost::graph_traits<ControlFlowGraph>::null_vertex();
737
738 dst.clear();
739 std::vector<Vertex> src_to_dst(boost::num_vertices(src), NO_VERTEX);
740
741 typename boost::graph_traits<const ControlFlowGraph>::vertex_iterator vi, vi_end;
742 for (boost::tie(vi, vi_end)=boost::vertices(src); vi!=vi_end; ++vi) {
743 SgAsmNode *node = get_ast_node(src, *vi);
744 if (!is_vertex_filtered(node)) {
745 src_to_dst[*vi] = boost::add_vertex(dst);
746 put_ast_node(dst, src_to_dst[*vi], get_ast_node(src, *vi));
747 }
748 }
749
750 typename boost::graph_traits<const ControlFlowGraph>::edge_iterator ei, ei_end;
751 for (boost::tie(ei, ei_end)=boost::edges(src); ei!=ei_end; ++ei) {
752 if (NO_VERTEX!=src_to_dst[boost::source(*ei, src)] && NO_VERTEX!=src_to_dst[boost::target(*ei, src)]) {
753 SgAsmNode *node1 = get_ast_node(src, boost::source(*ei, src));
754 SgAsmNode *node2 = get_ast_node(src, boost::target(*ei, src));
755 if (!is_edge_filtered(node1, node2))
756 boost::add_edge(src_to_dst[boost::source(*ei, src)], src_to_dst[boost::target(*ei, src)], dst);
757 }
758 }
759}
760
761template<class ControlFlowGraph>
762ControlFlowGraph
763ControlFlow::copy(const ControlFlowGraph &src)
764{
765 ControlFlowGraph dst;
766 copy(src, dst);
767 return dst;
768}
769
770template<class BlockCFG, class InsnCFG>
771void
772ControlFlow::explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi/*out*/)
773{
774 // BlockCFG is the basic-block binary control flow graph
775 typedef typename boost::graph_traits<const BlockCFG>::vertex_descriptor BlockCFG_Vertex;
776 typedef typename boost::graph_traits<const BlockCFG>::vertex_iterator BlockCFG_VertexIterator;
777 typedef typename boost::graph_traits<const BlockCFG>::edge_iterator BlockCFG_EdgeIterator;
778
779 // InsnCFG is the instruction binary control flow graph--it points to instructions rather than basic blocks, and changes
780 // some edges regarding function calls.
781 typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor InsnCFG_Vertex;
782 typedef std::pair<InsnCFG_Vertex, InsnCFG_Vertex> InsnCFG_VertexPair;
783
784 // Expand the cfgb basic blocks to create a cfgi that has instructions instead of blocks, and add the intra-block edges
785 cfgi.clear();
786 Map<BlockCFG_Vertex, InsnCFG_VertexPair> vertex_translation; // enter and leave instructions for each of the blocks in cfgb
787 {
788 BlockCFG_VertexIterator vi, vi_end;
789 for (boost::tie(vi, vi_end)=boost::vertices(cfgb); vi!=vi_end; ++vi) {
790 SgAsmBlock *blk = get_ast_node(cfgb, *vi);
791 const SgAsmStatementPtrList &insns = blk->get_statementList();
792 assert(!insns.empty());
793 InsnCFG_Vertex enter_vertex = boost::graph_traits<InsnCFG>::null_vertex();
794 InsnCFG_Vertex prev_vertex = boost::graph_traits<InsnCFG>::null_vertex();
795 for (SgAsmStatementPtrList::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
796 SgAsmInstruction *insn = isSgAsmInstruction(*ii);
797 assert(insn!=NULL); // basic blocks contain only instructions, no other type of asm statement
798 InsnCFG_Vertex vertex = boost::add_vertex(cfgi);
799 put_ast_node(cfgi, vertex, insn);
800 if (ii==insns.begin()) {
801 enter_vertex = vertex;
802 } else {
803 boost::add_edge(prev_vertex, vertex, cfgi);
804 }
805 prev_vertex = vertex;
806 }
807 assert(prev_vertex!=boost::graph_traits<InsnCFG>::null_vertex()); // basic block had no instructions but was in CFG!
808 vertex_translation[*vi] = InsnCFG_VertexPair(enter_vertex, prev_vertex);
809 }
810 }
811
812 // Insert the edges from cfgb. The corresponding edge in cfgi must emanate from the final instruction of the source basic
813 // block and enter at the first instruction of the target basic block.
814 {
815 BlockCFG_EdgeIterator ei, ei_end;
816 for (boost::tie(ei, ei_end)=boost::edges(cfgb); ei!=ei_end; ++ei) {
817 InsnCFG_Vertex src_leave_vertex = vertex_translation.get_one(boost::source(*ei, cfgb)).second;
818 InsnCFG_Vertex dst_enter_vertex = vertex_translation.get_one(boost::target(*ei, cfgb)).first;
819 assert(src_leave_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
820 assert(dst_enter_vertex!=boost::graph_traits<InsnCFG>::null_vertex());
821 boost::add_edge(src_leave_vertex, dst_enter_vertex, cfgi);
822 }
823 }
824}
825
826template<class InsnCFG>
827void
828ControlFlow::fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
829{
830 typedef typename boost::graph_traits<InsnCFG>::vertex_descriptor CFG_Vertex;
831 typedef typename boost::graph_traits<InsnCFG>::vertex_iterator CFG_VertexIterator;
832 typedef typename boost::graph_traits<InsnCFG>::in_edge_iterator CFG_InEdgeIterator;
833 typedef std::pair<CFG_Vertex, CFG_Vertex> CFG_VertexPair;
834 typedef Map<SgAsmInstruction*, CFG_Vertex> InsnToVertex;
835 CFG_Vertex NO_VERTEX = boost::graph_traits<InsnCFG>::null_vertex();
836
837 // Build mappings needed later and find the function return points. We just look for the x86
838 // RET instruction for now and assume that each one we find is a return if it has no control flow successors. They have no
839 // successors at this point because CFG1 didn't have any.
840 InstructionMap insns;
841 InsnToVertex insn_to_vertex;
842 std::vector<bool> isret(boost::num_vertices(cfg), false);
843 {
844 CFG_VertexIterator vi, vi_end;
845 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
846 SgAsmInstruction *insn = get_ast_node(cfg, *vi);
847 insns[insn->get_address()] = insn;
848 insn_to_vertex[insn] = *vi;
849
850 if (0==boost::out_degree(*vi, cfg)) {
851 // FIXME: Architecture-specific code here
852 if (SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn)) {
853 isret[*vi] = x86_ret==insn_x86->get_kind();
854 }
855 }
856 }
857 }
858
859 // Return the entry vertex for a function that owns the indicated instruction
860 struct FunctionEntryVertex {
861 const InsnToVertex &insn_to_vertex;
862 const InstructionMap &imap;
863 FunctionEntryVertex(const InsnToVertex &insn_to_vertex, const InstructionMap &imap)
864 : insn_to_vertex(insn_to_vertex), imap(imap) {}
865 CFG_Vertex operator()(SgAsmInstruction *insn) {
866 SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(insn, true);
867 SgAsmInstruction *entry_insn = imap.get_one(func->get_entryVa());
868 CFG_Vertex entry_vertex = insn_to_vertex.get_one(entry_insn);
869 return entry_vertex;
870 }
871 } function_entry_vertex(insn_to_vertex, insns);
872
873 // Process each return site in order to add edges from the return site to the vertex representing the return address
874 std::vector<CFG_VertexPair> edges_to_insert, edges_to_erase;
875 {
876 CFG_VertexIterator vi, vi_end;
877 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
878 CFG_Vertex returner_vertex = *vi;
879 if (!isret[returner_vertex])
880 continue;
881 SgAsmInstruction *returner_insn = get_ast_node(cfg, returner_vertex);
882
883 // Find all of the true call sites for the function that owns the returner instruction (e.g., RET) by recursively
884 // following inter-function CFG edges until we find the true calls (those edges that follow CALL semantics).
885 // Inter-function CFG edges can represent true calls or simply inter-function branches such as thunks. We have to
886 // gather up the information without adding it to the CFG yet (can't add while we're iterating)
887 std::vector<bool> seen(boost::num_vertices(cfg), false);
888 WorkList<CFG_Vertex> worklist; // targets of inter-function CFG edges; function callees
889 worklist.push(function_entry_vertex(returner_insn));
890 while (!worklist.empty()) {
891 CFG_Vertex callee_vertex = worklist.shift();
892 CFG_InEdgeIterator ei, ei_end;
893 for (boost::tie(ei, ei_end)=boost::in_edges(callee_vertex, cfg); ei!=ei_end; ++ei) {
894 CFG_Vertex caller_vertex = boost::source(*ei, cfg); // caller is a inter-function call or branch site
895 if (!seen[caller_vertex]) {
896 seen[caller_vertex] = true;
897 SgAsmInstruction *caller_insn = get_ast_node(cfg, caller_vertex);
898 SgAsmBlock *caller_block = SageInterface::getEnclosingNode<SgAsmBlock>(caller_insn);
899 assert(caller_block!=NULL);
900 rose_addr_t target_va, returnee_va; // returnee_va is usually the call's fall-through address
901 if (caller_block->isFunctionCall(target_va/*out*/, returnee_va/*out*/)) {
902 // This is a true call, so we need to add a return edge from the return instruction (the
903 // "returner") to what is probably the fall-through address of the call site (the returnee).
904 SgAsmInstruction *returnee_insn = insns.get_value_or(returnee_va, NULL);
905 CFG_Vertex returnee_vertex = insn_to_vertex.get_value_or(returnee_insn, NO_VERTEX);
906 if (returnee_vertex!=NO_VERTEX) {
907 edges_to_insert.push_back(CFG_VertexPair(returner_vertex, returnee_vertex));
908 edges_to_erase.push_back(CFG_VertexPair(caller_vertex, returnee_vertex));
909 }
910 } else {
911 // This is a non-call inter-function edge; probably a thunk. We need to find its call sites and add
912 // the returnee addresses (call fall throughs) to the returnee addresses of the RET we're
913 // processing.
914 worklist.push(function_entry_vertex(caller_insn));
915 }
916 }
917 }
918 }
919 }
920 }
921
922 // Erase and insert edges now that we're done iterating.
923 if (!preserve_call_fallthrough_edges) {
924 for (size_t i=0; i<edges_to_erase.size(); ++i)
925 boost::remove_edge(edges_to_erase[i].first, edges_to_erase[i].second, cfg);
926 }
927 for (size_t i=0; i<edges_to_insert.size(); ++i)
928 boost::add_edge(edges_to_insert[i].first, edges_to_insert[i].second, cfg);
929}
930
931template<class ControlFlowGraph>
932void
933ControlFlow::FlowOrder<ControlFlowGraph>::compute(const ControlFlowGraph &g, Vertex v0,
934 ReverseVertexList *reverse_order) {
935 forward_order->clear();
936 std::vector<boost::default_color_type> colors(boost::num_vertices(g), boost::white_color);
937 boost::depth_first_visit(g, v0, *this, &(colors[0]));
938 assert(!forward_order->empty()); /* it should at least contain v0 */
939 std::reverse(forward_order->begin(), forward_order->end());
940 if (reverse_order) {
941 reverse_order->clear();
942 reverse_order->resize(boost::num_vertices(g), INVALID_INDEX);
943 for (size_t i=0; i<forward_order->size(); i++)
944 (*reverse_order)[(*forward_order)[i]] = i;
945 }
946}
947
948template<class ControlFlowGraph>
949void
950ControlFlow::FlowOrder<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph) {
951 forward_order->push_back(v);
952}
953
954template<class ControlFlowGraph>
955std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
956ControlFlow::flow_order(const ControlFlowGraph &cfg,
957 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start,
958 std::vector<size_t> *reverse_order/*=NULL*/)
959{
960 std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor> forward_order;
961 FlowOrder<ControlFlowGraph>(&forward_order).compute(cfg, start, reverse_order);
962 return forward_order;
963}
964
965template<class ControlFlowGraph>
966void
967ControlFlow::ReturnBlocks<ControlFlowGraph>::finish_vertex(Vertex v, ControlFlowGraph g)
968{
969 typename boost::graph_traits<ControlFlowGraph>::out_edge_iterator ei, ei_end;
970 boost::tie(ei, ei_end) = boost::out_edges(v, g);
971 if (ei==ei_end)
972 blocks.push_back(v);
973}
974
975template<class ControlFlowGraph>
976std::vector<typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor>
977ControlFlow::return_blocks(const ControlFlowGraph &cfg,
978 typename boost::graph_traits<ControlFlowGraph>::vertex_descriptor start)
979{
980 typename ReturnBlocks<ControlFlowGraph>::Vector result;
981 ReturnBlocks<ControlFlowGraph> visitor(result);
982 std::vector<boost::default_color_type> colors(boost::num_vertices(cfg), boost::white_color);
983 boost::depth_first_visit(cfg, start, visitor, &(colors[0]));
984 return result;
985}
986
987template<class ControlFlowGraph>
988ControlFlowGraph
990{
991 ControlFlowGraph cfg;
992 build_block_cfg_from_ast(root, cfg);
993 return cfg;
994}
995
996template<class ControlFlowGraph>
997ControlFlowGraph
999{
1000 ControlFlowGraph cfg;
1001 build_insn_cfg_from_ast(root, cfg);
1002 return cfg;
1003}
1004
1005template<class ControlFlowGraph>
1006ControlFlowGraph
1008{
1009 ControlFlowGraph cfg;
1010 build_cg_from_ast(root, cfg);
1011 return cfg;
1012}
1013
1015template<typename CFG, class VertexPropertyWriter, class EdgePropertyWriter>
1016void
1017ControlFlow::write_graphviz(std::ostream &out, const CFG &cfg,
1018 const VertexPropertyWriter &vpw, const EdgePropertyWriter &epw)
1019{
1020 // typedef typename boost::graph_traits<CFG>::vertex_descriptor CFG_Vertex;
1021 typedef typename boost::graph_traits<CFG>::edge_descriptor CFG_Edge;
1022 typedef typename boost::graph_traits<CFG>::vertex_iterator CFG_VertexIterator;
1023 typedef typename boost::graph_traits<CFG>::out_edge_iterator CFG_OutEdgeIterator;
1024
1025 // Partition the graph into functions and inter-function edges
1027 Functions funcs;
1028 std::vector<CFG_Edge> interfunc_edges;
1029 CFG_VertexIterator vi, vi_end;
1030 for (boost::tie(vi, vi_end)=boost::vertices(cfg); vi!=vi_end; ++vi) {
1031 SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(get_ast_node(cfg, *vi), true);
1032 FunctionSubgraphInfo<CFG> &f = funcs[func];
1033 f.vertices.push_back(*vi);
1034 CFG_OutEdgeIterator ei, ei_end;
1035 for (boost::tie(ei, ei_end)=boost::out_edges(*vi, cfg); ei!=ei_end; ++ei) {
1036 SgNode *tgt_node = get_ast_node(cfg, boost::target(*ei, cfg));
1037 SgAsmFunction *tgt_func = SageInterface::getEnclosingNode<SgAsmFunction>(tgt_node, true);
1038 if (tgt_func==func) {
1039 f.edges.push_back(*ei);
1040 } else {
1041 interfunc_edges.push_back(*ei);
1042 }
1043 }
1044 }
1045
1046 // Output subgraph info, each function in its own cluster
1047 out <<"digraph G {\n";
1048 for (typename Functions::iterator fi=funcs.begin(); fi!=funcs.end(); ++fi) {
1049 FunctionSubgraphInfo<CFG> &f = fi->second;
1050 if (!f.vertices.empty() || !f.edges.empty()) {
1051 SgNode *node = get_ast_node(cfg, f.vertices.front());
1052 SgAsmFunction *func = SageInterface::getEnclosingNode<SgAsmFunction>(node, true);
1053 const size_t maxNameSize = 63;
1054 char cluster_name[maxNameSize+1];
1055 snprintf(cluster_name, maxNameSize, "cluster_F%" PRIx64, func->get_entryVa());
1056 out <<" subgraph " <<cluster_name <<" {\n"
1057 <<" style=filled;\n"
1058 <<" color=lightgrey;\n"
1059 <<" label=\"Function " <<StringUtility::addrToString(func->get_entryVa())
1060 <<(func->get_name().empty()?std::string(""):(" <"+func->get_name()+">")) <<"\";\n";
1061 for (size_t i=0; i<f.vertices.size(); ++i) {
1062 out <<" " <<f.vertices[i];
1063 vpw(out, f.vertices[i]);
1064 out <<";\n";
1065 }
1066 for (size_t i=0; i<f.edges.size(); ++i) {
1067 out <<" " <<boost::source(f.edges[i], cfg) <<"->" <<boost::target(f.edges[i], cfg);
1068 epw(out, f.edges[i]);
1069 out <<";\n";
1070 }
1071 out <<" }\n"; // subgraph
1072 }
1073 }
1074
1075 // Inter-function edges
1076 for (size_t i=0; i<interfunc_edges.size(); ++i) {
1077 out <<" " <<boost::source(interfunc_edges[i], cfg) <<"->" <<boost::target(interfunc_edges[i], cfg);
1078 epw(out, interfunc_edges[i]);
1079 out <<";\n";
1080 }
1081 out <<"}\n"; // digraph
1082}
1083
1084} // namespace
1085} // namespace
1086
1087#endif
1088#endif
Class for traversing the AST.
Extends std::map with methods that return optional values.
Definition util/Map.h:13
const T & get_value_or(const Key &key, const T &dflt) const
Convenience for getting a value from an Option.
Definition util/Map.h:78
const T & get_one(const Key &key) const
Look up one value or throw an exception.
Definition util/Map.h:61
Binary control flow analysis.
void fixup_fcall_fret(InsnCFG &cfg, bool preserve_call_fallthrough_edges)
Fix up a CFG by changing function call and return edges.
void set_vertex_filter(VertexFilter *filter)
Manipulate the vertex filter.
ControlFlowGraph build_insn_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
void clear_ast(SgNode *ast)
Clears successor information from the AST.
void apply_to_ast(const ControlFlowGraph &)
Applies graph to AST.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst, EdgeFilter *filter)
Determines if an edge is filtered out.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > flow_order(const ControlFlowGraph &, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start, std::vector< size_t > *reverse_order=NULL)
Orders nodes by depth first search reverse post order.
ControlFlowGraph copy(const ControlFlowGraph &src)
Copies a graph while filtering.
void explode_blocks(const BlockCFG &cfgb, InsnCFG &cfgi)
Create an instruction control flow graph from a basic block control flow graph.
void set_edge_filter(EdgeFilter *filter)
Manipulate the edge filter.
void write_graphviz(std::ostream &, const CFG &, const VertexPropertyWriter &, const EdgePropertyWriter &)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
bool is_edge_filtered(SgAsmNode *src, SgAsmNode *dst)
Determines if an edge is filtered out.
VertexFilter * get_vertex_filter() const
Manipulate the vertex filter.
void write_graphviz(std::ostream &out, const CFG &cfg, const VertexPropertyWriter &vpw)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmInstruction * > > InsnGraph
Default instruction-based control flow graph.
EdgeFilter * get_edge_filter() const
Manipulate the edge filter.
ControlFlowGraph build_block_cfg_from_ast(SgNode *root)
Builds a control flow graph for part of an AST.
bool is_vertex_filtered(SgAsmNode *bb_or_insn)
Determines if a vertex is filtered out.
boost::adjacency_list< boost::setS, boost::vecS, boost::bidirectionalS, boost::property< boost::vertex_name_t, SgAsmBlock * > > BlockGraph
Default basic block control flow graph type.
void cache_vertex_descriptors(const ControlFlowGraph &)
Cache basic block vertex descriptors in AST.
bool is_vertex_filtered(SgAsmNode *bb_or_insn, VertexFilter *filter)
Determines if a vertex is filtered out.
BlockGraph Graph
Default control flow graph.
ControlFlowGraph build_cg_from_ast(SgNode *root)
Builds a control flow graph with only function call edges.
void write_graphviz(std::ostream &out, const CFG &cfg)
Write a CFG to a graphviz file, creating a cluster subgraph for each function.
std::vector< typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor > return_blocks(const ControlFlowGraph &cfg, typename boost::graph_traits< ControlFlowGraph >::vertex_descriptor start)
Returns list of function return blocks.
Graph containing user-defined vertices and edges.
Definition Graph.h:634
V VertexValue
User-level data associated with vertices.
Definition Graph.h:636
Instruction basic block.
void set_cachedVertex(size_t const &)
Property: Cached vertex for control flow graphs.
bool isFunctionCall(rose_addr_t &target_va, rose_addr_t &return_va)
Returns true if basic block appears to be a function call.
SgAsmIntegerValuePtrList const & get_successors() const
Property: Control flow successors.
void set_successorsComplete(bool const &)
Property: Whether the successors list is complete.
SgAsmStatementPtrList const & get_statementList() const
Property: Statements of which this block is composed.
bool hasInstructions() const
Determins if a block contains instructions.
Represents a synthesized function.
rose_addr_t const & get_entryVa() const
Property: Primary entry address.
SgAsmBlock * get_entryBlock() const
Function entry basic block.
std::string const & get_name() const
Property: Name.
Base class for machine instructions.
Base class for integer values.
Base class for all binary analysis IR nodes.
rose_addr_t const & get_address() const
Property: Starting virtual address.
Represents one Intel x86 machine instruction.
This class represents the base class for all IR nodes within Sage III.
List of things to work on.
Definition WorkLists.h:64
bool push(const T &, boost::tribool check_uniqueness=boost::logic::indeterminate)
Add an item to the back of the work list.
Definition WorkLists.h:202
bool empty() const
Returns true if this work list is empty.
Definition WorkLists.h:72
T shift()
Remove and return the item from the front of the work list.
Definition WorkLists.h:236
Sawyer::Container::Graph< V, E >::VertexValue get_ast_node(const Sawyer::Container::Graph< V, E > &cfg, size_t vertexId)
Return the AST node associated with a vertex.
void put_ast_node(Sawyer::Container::Graph< V, E > &cfg, size_t vertexId, AstNode *astNode)
Set the AST node associated with a vertex.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
ROSE_UTIL_API std::string numberToString(long long)
Convert an integer to a string.
The ROSE library.
const size_t INVALID_INDEX
Invalid array index.
Definition Constants.h:25
Default vertex property writer is a no-op.
List of vertices and intra-function edges for one function.