ROSE  0.9.9.14
SgAsmFunction.C
1 /* SgAsmFunction member definitions. Do not move them to src/ROSETTA/Grammar/BinaryInstruction.code (or any *.code file)
2  * because then they won't get indexed/formatted/etc. by C-aware tools. */
3 
4 #include "sage3basic.h"
5 #include "stringify.h"
6 
7 #include "rosePublicConfig.h"
8 #ifdef ROSE_HAVE_GCRYPT_H
9 #include <gcrypt.h>
10 #endif
11 
12 using namespace rose;
13 
14 std::string
15 SgAsmFunction::reason_key(const std::string &prefix)
16 {
17  return (prefix + "E = entry address H = CFG head C = function call(*)\n" +
18  prefix + "X = exception frame T = thunk I = imported/dyn-linked\n" +
19  prefix + "O = exported S = function symbol P = instruction pattern\n" +
20  prefix + "G = CFG graph analysis U = user-def detection N = NOP/zero padding\n" +
21  prefix + "D = discontiguous blocks V = intra-function block L = leftover blocks\n" +
22  prefix + "Mxxx are miscellaneous reasons (at most one misc reason per function):\n" +
23  prefix + " M001 = code between function padding bytes\n" +
24  prefix + "Note: \"c\" means this is the target of a call-like instruction or instruction\n" +
25  prefix + " sequence but the call is not present in the global control flow graph, while\n" +
26  prefix + " \"C\" means the call is in the CFG.\n");
27 }
28 
29 std::string
30 SgAsmFunction::reason_str(bool do_pad) const
31 {
32  return reason_str(do_pad, get_reason());
33 }
34 
35 std::string
36 SgAsmFunction::reason_str(bool do_pad, unsigned r)
37 {
38  using namespace StringUtility; // for add_to_reason_string()
39  std::string result;
40 
41  /* entry point and instruction heads are mutually exclusive, so we use the same column for both when padding. */
42  if (r & FUNC_ENTRY_POINT) {
43  add_to_reason_string(result, true, do_pad, "E", "entry point");
44  } else {
45  add_to_reason_string(result, (r & FUNC_INSNHEAD), do_pad, "H", "insn head");
46  }
47 
48  /* Function call:
49  * "C" means the function was detected because we saw a call-like instructon (such as x86 CALL or FARCALL) or instruction
50  * sequence (such as pushing the return value and then branching) in code that was determined to be reachable by
51  * analyzing the control flow graph.
52  *
53  * "c" means this function is the target of some call-like instruction (such as x86 CALL or FARCALL) but could not
54  * determine whether the instruction is actually executed.
55  */
56  if (r & FUNC_CALL_TARGET) {
57  add_to_reason_string(result, true, do_pad, "C", "function call");
58  } else {
59  add_to_reason_string(result, (r & FUNC_CALL_INSN), do_pad, "c", "call instruction");
60  }
61 
62  if (r & FUNC_EH_FRAME) {
63  add_to_reason_string(result, true, do_pad, "X", "exception frame");
64  } else {
65  add_to_reason_string(result, (r & FUNC_THUNK), do_pad, "T", "thunk");
66  }
67  add_to_reason_string(result, (r & FUNC_IMPORT), do_pad, "I", "import");
68  add_to_reason_string(result, (r & FUNC_EXPORT), do_pad, "E", "export");
69  add_to_reason_string(result, (r & FUNC_SYMBOL), do_pad, "S", "symbol");
70  add_to_reason_string(result, (r & FUNC_PATTERN), do_pad, "P", "pattern");
71  add_to_reason_string(result, (r & FUNC_GRAPH), do_pad, "G", "graph");
72  add_to_reason_string(result, (r & FUNC_USERDEF), do_pad, "U", "user defined");
73  add_to_reason_string(result, (r & FUNC_PADDING), do_pad, "N", "padding");
74  add_to_reason_string(result, (r & FUNC_DISCONT), do_pad, "D", "discontiguous");
75  add_to_reason_string(result, (r & FUNC_LEFTOVERS), do_pad, "L", "leftovers");
76  add_to_reason_string(result, (r & FUNC_INTRABLOCK), do_pad, "V", "intrablock");
77 
78  /* The miscellaneous marker is special. It's a single letter like the others, but is followed by a fixed width
79  * integer indicating the (user-defined) algorithm that added the function. */
80  {
81  char abbr[32], full[64];
82  int width = snprintf(abbr, sizeof abbr, "%u", FUNC_MISCMASK);
83  snprintf(abbr, sizeof abbr, "M%0*u", width, (r & FUNC_MISCMASK));
84  abbr[sizeof(abbr)-1] = '\0';
85  if (!do_pad) {
86  std::string miscname = stringifySgAsmFunctionFunctionReason((r & FUNC_MISCMASK), "FUNC_");
87  if (miscname.empty() || miscname[0]=='(') {
88  snprintf(full, sizeof full, "misc-%u", (r & FUNC_MISCMASK));
89  } else {
90  for (size_t i=0; i<miscname.size(); ++i)
91  miscname[i] = tolower(miscname[i]);
92  strncpy(full, miscname.c_str(), sizeof full);
93  }
94  full[sizeof(full)-1] = '\0';
95  } else {
96  full[0] = '\0';
97  }
98  add_to_reason_string(result, (r & FUNC_MISCMASK), do_pad, abbr, full);
99  }
100 
101  return result;
102 }
103 
104 size_t
105 SgAsmFunction::get_extent(AddressIntervalSet *extents, rose_addr_t *lo_addr, rose_addr_t *hi_addr, NodeSelector *selector)
106 {
107  struct T1: public AstSimpleProcessing {
108  AddressIntervalSet *extents;
109  rose_addr_t *lo_addr, *hi_addr;
110  NodeSelector *selector;
111  size_t nnodes;
112  T1(AddressIntervalSet *extents, rose_addr_t *lo_addr, rose_addr_t *hi_addr, NodeSelector *selector)
113  : extents(extents), lo_addr(lo_addr), hi_addr(hi_addr), selector(selector), nnodes(0) {
114  if (lo_addr)
115  *lo_addr = 0;
116  if (hi_addr)
117  *hi_addr = 0;
118  }
119  void visit(SgNode *node) {
120  if (selector && !(*selector)(node))
121  return;
122  SgAsmInstruction *insn = isSgAsmInstruction(node);
123  SgAsmStaticData *data = isSgAsmStaticData(node);
124  rose_addr_t lo, hi;
125  if (insn) {
126  lo = insn->get_address();
127  hi = lo + insn->get_size();
128  } else if (data) {
129  lo = data->get_address();
130  hi = lo + data->get_size();
131  } else {
132  return;
133  }
134 
135  if (0==nnodes++) {
136  if (lo_addr)
137  *lo_addr = lo;
138  if (hi_addr)
139  *hi_addr = hi;
140  } else {
141  if (lo_addr)
142  *lo_addr = std::min(*lo_addr, lo);
143  if (hi_addr)
144  *hi_addr = std::max(*hi_addr, hi);
145  }
146  if (extents && hi>lo)
147  extents->insert(AddressInterval::baseSize(lo, hi-lo));
148  }
149  } t1(extents, lo_addr, hi_addr, selector);
150  t1.traverse(this, preorder);
151  return t1.nnodes;
152 }
153 
154 bool
155 SgAsmFunction::get_sha1(uint8_t digest[20], NodeSelector *selector)
156 {
157 #ifdef ROSE_HAVE_GCRYPT_H
158  struct T1: public AstSimpleProcessing {
159  NodeSelector *selector;
160  gcry_md_hd_t md; // message digest
161  T1(NodeSelector *selector): selector(selector) {
162  gcry_error_t error __attribute__((unused)) = gcry_md_open(&md, GCRY_MD_SHA1, 0);
163  assert(GPG_ERR_NO_ERROR==error);
164  }
165  ~T1() {
166  gcry_md_close(md);
167  }
168  void visit(SgNode *node) {
169  if (selector && !(*selector)(node))
170  return;
171  SgAsmInstruction *insn = isSgAsmInstruction(node);
172  SgAsmStaticData *data = isSgAsmStaticData(node);
173  if (insn) {
174  SgUnsignedCharList buf = insn->get_raw_bytes();
175  gcry_md_write(md, &buf[0], buf.size());
176  } else if (data) {
177  SgUnsignedCharList buf = data->get_raw_bytes();
178  gcry_md_write(md, &buf[0], buf.size());
179  }
180  }
181  void read(uint8_t digest[20]) {
182  assert(gcry_md_get_algo_dlen(GCRY_MD_SHA1)==20);
183  gcry_md_final(md);
184  unsigned char *d = gcry_md_read(md, GCRY_MD_SHA1);
185  assert(d!=NULL);
186  memcpy(digest, d, 20);
187  }
188  } t1(selector);
189  t1.traverse(this, preorder);
190  t1.read(digest);
191  return true;
192 #else
193  memset(digest, 0, 20);
194  return false;
195 #endif
196 }
197 
200 SgAsmBlock *
202  for (SgAsmStatementPtrList::const_iterator si=p_statementList.begin(); si!=p_statementList.end(); ++si) {
203  SgAsmBlock *bb = isSgAsmBlock(*si);
204  if (bb && bb->get_address()==p_entry_va)
205  return bb;
206  }
207  return NULL;
208 }
209 
210 int
211 SgAsmFunction::nrOfValidInstructions( std::vector<SgNode*>& succs ) {
212 // std::vector<SgNode*> succs = this->get_traversalSuccessorContainer();
213  std::vector<SgNode*>::reverse_iterator j = succs.rbegin();
214  int instructions = succs.size();
215  bool foundRet=false;
216  bool nodeOtherThanNopAfterRetExists=false;
217 /*
218  if (j!=succs.begin())
219  j--;
220  else
221  return 0;
222 */
223  for (;j!=succs.rend(); j++) {
224  SgAsmX86Instruction* n = isSgAsmX86Instruction(*j);
225  if (n && (n->get_kind() == x86_ret || n->get_kind() == x86_hlt)) {
226  foundRet=true;
227  break;
228  } else {
229  if (n && n->get_kind() != x86_nop) {
230  nodeOtherThanNopAfterRetExists= true;
231  }
232  instructions--;
233  }
234  }
235  if (!foundRet)
236  instructions = succs.size();
237  // if we find a return and there are NOPs following it somewhere,
238  // we cut off the CFG at the NOP but we keep valid instructions
239  // after the RET
240  if (foundRet)
241  if (nodeOtherThanNopAfterRetExists)
242  return succs.size();
243 
244  return instructions;
245 }
246 
247 void
249  {
250  p_statementList.clear();
251  }
252 
253 void
254 SgAsmFunction::append_dest( SgAsmStatement* statement )
255  {
256  p_dest.push_back(statement);
257  }
258 
259 void
261  {
262  p_statementList.push_back(statement);
263  }
264 
265 void
267  {
268  SgAsmStatementPtrList::iterator l = p_statementList.begin();
269  for (;l!=p_statementList.end();l++) {
270  SgAsmStatement* st = *l;
271  if (st==statement) {
272  break;
273  }
274  }
275  if (l!=p_statementList.end())
276  p_statementList.erase(l);
277  }
278 
279 // DQ (4/29/2010): Added function to support scoring functions as likely valid functions (work with CERT).
280 int
281 SgAsmFunction::get_stackNutralityMetric() const
282  {
283  // This function computes the positon of the stack at the end of the function relative to the
284  // start of the function and contributes to a scoring of functions as valid functions.
285 
286  printf ("Error: This SgAsmFunction::get_stackNutralityMetric() function is not yet implemented. \n");
287  ROSE_ASSERT(false);
288 
289  return 0;
290  }
void remove_statement(SgAsmStatement *statement)
Erases statement from statement list.
void remove_children()
Removes all statements.
virtual size_t get_size() const
Returns the size of an instruction in bytes.
Instruction basic block.
size_t get_size() const
Property: Size of static data in bytes.
Class for traversing the AST.
Base class for machine instructions.
X86InstructionKind get_kind() const
Property: Instruction kind.
SgAsmBlock * get_entry_block() const
Function entry basic block.
Selection functor for SgAsmFunction::get_extent().
void insert(const Interval2 &interval)
Insert specified values.
Definition: IntervalSet.h:532
static Interval baseSize(rose_addr_t lo, rose_addr_t size)
Construct an interval from one endpoint and a size.
Definition: Interval.h:161
static std::string reason_key(const std::string &prefix="")
Multi-line description of function reason keys from unparser.
Definition: SgAsmFunction.C:15
size_t get_extent(AddressIntervalSet *emap=NULL, rose_addr_t *lo_addr=NULL, rose_addr_t *hi_addr=NULL, NodeSelector *selector=NULL)
Returns information about the function addresses.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:8212
const SgUnsignedList & get_raw_bytes() const
Property: Raw bytes of an instruction.
Represents static data in an executable.
bool get_sha1(uint8_t digest[20], NodeSelector *selector=NULL)
Computes the SHA1 message digest for the bytes of a function.
Represents one Intel x86 machine instruction.
const SgUnsignedCharList & get_raw_bytes() const
Property: Raw bytes.
std::string stringifySgAsmFunctionFunctionReason(long int n, const char *strip=NULL, bool canonic=false)
Converts an enum of type SgAsmFunction::FunctionReason to a string.
Definition: stringify.C:25186
ROSE_UTIL_API void add_to_reason_string(std::string &result, bool isset, bool do_pad, const std::string &abbr, const std::string &full)
Append an abbreviation or full name to a string.
void append_statement(SgAsmStatement *)
Adds statement to end of statement list.
std::string reason_str(bool pad) const
Returns a very short string describing the reason mask.
Definition: SgAsmFunction.C:30
Main namespace for the ROSE library.