ROSE  0.9.9.14
Partitioner.C
1 /* Algorithms to detect what instructions make up basic blocks and which blocks make up functions, and how to create the
2  * necessary SgAsmBlock and SgAsmFunction IR nodes from this information. */
3 #include "sage3basic.h"
4 
5 #include "Diagnostics.h"
6 #include "Partitioner.h"
7 #include "Assembler.h"
8 #include "AssemblerX86.h"
9 #include "AsmUnparser_compat.h"
10 #include "BinaryLoader.h"
11 #include "MemoryCellList.h"
12 #include "PartialSymbolicSemantics.h" // FIXME: expensive to compile; remove when no longer needed [RPM 2012-05-06]
13 #include "stringify.h"
14 
15 #include "PartialSymbolicSemantics2.h"
16 #include "DispatcherX86.h"
17 
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <math.h>
21 #include <Sawyer/Optional.h>
22 #include <Sawyer/ProgressBar.h>
23 #include <stdarg.h>
24 
25 namespace rose {
26 namespace BinaryAnalysis {
27 
28 using namespace Diagnostics;
29 using namespace StringUtility;
30 
31 /* See header file for full documentation. */
32 
33 std::ostream& operator<<(std::ostream &o, const Partitioner::Exception &e)
34 {
35  e.print(o);
36  return o;
37 }
38 
40 
41 // class method
42 void Partitioner::initDiagnostics() {
43  static bool initialized = false;
44  if (!initialized) {
45  initialized = true;
46  Diagnostics::initAndRegister(&mlog, "rose::BinaryAnalysis::Partitioner");
47  }
48 }
49 
50 /* class method */
53 {
54  return insn ? isSgAsmInstruction(insn->node) : NULL;
55 }
56 
57 /* class method */
60 {
61  return ::isSgAsmInstruction(node);
62 }
63 
64 /* class method */
67 {
68  return insn ? isSgAsmX86Instruction(insn->node) : NULL;
69 }
70 
71 /* class method */
74 {
75  return ::isSgAsmX86Instruction(node);
76 }
77 
78 /* class method */
81 {
82  return insn ? isSgAsmM68kInstruction(insn->node) : NULL;
83 }
84 
85 /* class method */
88 {
89  return ::isSgAsmM68kInstruction(node);
90 }
91 
92 /* Progress report class variables. */
93 double Partitioner::progress_interval = 10.0;
94 double Partitioner::progress_time = 0.0;
95 
96 /* Set progress reporting values. */
97 void
99 {
100  progress_interval = min_interval;
101 }
102 
103 /* Produce a progress report if enabled. */
104 void
105 Partitioner::update_progress(SgAsmBlock::Reason reason, size_t pass) const
106 {
107  if (progress_interval>=0 && mlog[INFO]) {
108  double curtime = Sawyer::Message::now();
109  if (curtime - progress_time >= progress_interval) {
110  mlog[INFO] <<"starting " <<stringifySgAsmBlockReason(reason, "BLK_") <<" pass " <<pass
111  <<": " <<StringUtility::plural(functions.size(), "functions")
112  <<", " <<StringUtility::plural(insns.size(), "instructions")
113  <<", " <<StringUtility::plural(basic_blocks.size(), "blocks")
114  <<"\n";
115  progress_time = curtime;
116  }
117  }
118 }
119 
121  const Partitioner *p;
122  ProgressSuffix(): p(NULL) {}
123  ProgressSuffix(const Partitioner *p): p(p) {}
124  void print(std::ostream &o) const {
125  if (p!=NULL) {
126  o <<(1==p->basic_blocks.size() ? " block" : " blocks");// label for value printed by ProgressBar
127  o <<" " <<StringUtility::plural(p->functions.size(), "functions");
128  }
129  }
130 };
131 
132 std::ostream& operator<<(std::ostream &o, const ProgressSuffix &suffix) {
133  suffix.print(o);
134  return o;
135 }
136 
137 void
138 Partitioner::update_progress() const
139 {
140  static Sawyer::ProgressBar<size_t, ProgressSuffix> *progressBar = NULL;
141  if (!progressBar)
142  progressBar = new Sawyer::ProgressBar<size_t, ProgressSuffix>(mlog[MARCH], "");
143  progressBar->suffix(ProgressSuffix(this));
144  progressBar->value(basic_blocks.size());
145 }
146 
147 /* Parse argument for "-rose:partitioner_search" command-line swich. */
148 unsigned
149 Partitioner::parse_switches(const std::string &s, unsigned flags)
150 {
151  size_t at=0;
152  while (at<s.size()) {
153  enum { SET_BIT, CLEAR_BIT, SET_VALUE, NOT_SPECIFIED } howset = NOT_SPECIFIED;
154 
155  if (s[at]=='-') {
156  howset = CLEAR_BIT;
157  at++;
158  } else if (s[at]=='+') {
159  howset = SET_BIT;
160  at++;
161  } else if (s[at]=='=') {
162  howset = SET_VALUE;
163  at++;
164  }
165  if (at>=s.size())
166  throw Exception("heuristic name must follow qualifier");
167 
168  size_t comma = s.find(",", at);
169  std::string word = std::string(s, at, comma-at);
170  if (word.size()==0)
171  throw Exception("heuristic name must follow comma");
172 
173  unsigned bits = 0;
174  if (word=="entry" || word=="entry_point") {
176  } else if (word=="call_target") {
178  } else if (word=="call_insn") {
180  } else if (word=="call") {
182  } else if (word=="eh" || word=="eh_frame") {
184  } else if (word=="import") {
186  } else if (word=="export") {
188  } else if (word=="symbol") {
190  } else if (word=="pattern") {
192  } else if (word=="userdef") {
194  } else if (word=="pad" || word=="padding" || word=="interpad") {
196  } else if (word=="intrablock") {
198  } else if (word=="thunk") {
200  } else if (word=="misc" || word=="miscellaneous" || word=="interpadfunc") {
202  } else if (word=="unassigned" || word=="unclassified" || word=="leftover" || word=="leftovers") {
204  } else if (word=="default") {
206  if (howset==NOT_SPECIFIED) howset = SET_VALUE;
207  } else if (isdigit(word[0])) {
208  bits = strtol(word.c_str(), NULL, 0);
209  } else {
210  throw Exception("unknown partitioner heuristic: \"" + word + "\"");
211  }
212 
213  switch (howset) {
214  case SET_VALUE:
215  flags = 0;
216  case NOT_SPECIFIED:
217  case SET_BIT:
218  flags |= bits;
219  break;
220  case CLEAR_BIT:
221  flags &= ~bits;
222  break;
223  }
224 
225  at = comma==std::string::npos ? s.size() : comma+1;
226  }
227  return flags;
228 }
229 
230 /* Set disassembly and memory initialization maps. */
231 void
233 {
234  this->map = map;
235  if (map) {
236  if (ro_map) {
237  this->ro_map = ro_map->shallowCopy();
238  } else {
239  this->ro_map = map->shallowCopy();
240  this->ro_map->require(MemoryMap::READABLE).prohibit(MemoryMap::WRITABLE).keep();
241  }
242  } else {
243  this->ro_map = MemoryMap::Ptr();
244  }
245 }
246 
247 /* Looks for a jump table. Documented in header file. */
249 Partitioner::discover_jump_table(BasicBlock *bb, bool do_create, ExtentMap *table_extent)
250 {
251  using namespace BinaryAnalysis::InstructionSemantics2;
252 
253  /* Do some cheap up-front checks. */
254  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(bb->last_insn());
255  if (!insn_x86 || (insn_x86->get_kind()!=x86_jmp && insn_x86->get_kind()==x86_farjmp) ||
256  1!=insn_x86->get_operandList()->get_operands().size())
257  return Disassembler::AddressSet();
258  SgAsmExpression *target_expr = insn_x86->get_operandList()->get_operands()[0];
259  SgAsmMemoryReferenceExpression *mre = isSgAsmMemoryReferenceExpression(target_expr);
260  SgAsmRegisterReferenceExpression *rre = isSgAsmRegisterReferenceExpression(target_expr);
261  if (!mre && !rre)
262  return Disassembler::AddressSet(); // no indirection
263 
264  /* Evaluate the basic block semantically to get an expression for the final EIP. */
265  const RegisterDictionary *regdict = RegisterDictionary::dictionary_amd64(); // compatible w/ older x86 models
266  const RegisterDescriptor *REG_EIP = regdict->lookup("eip");
267  PartialSymbolicSemantics::RiscOperatorsPtr ops = PartialSymbolicSemantics::RiscOperators::instance(regdict);
268  BaseSemantics::DispatcherPtr dispatcher = DispatcherX86::instance(ops, 32);
269  ops->set_memory_map(ro_map);
270  try {
271  for (size_t i=0; i<bb->insns.size(); ++i) {
272  insn_x86 = isSgAsmX86Instruction(bb->insns[i]->node);
273  ASSERT_not_null(insn_x86); // we know we're in a basic block of x86 instructions already
274  dispatcher->processInstruction(insn_x86);
275  }
276  } catch (...) {
277  return Disassembler::AddressSet(); // something went wrong, so just give up (e.g., unhandled instruction)
278 
279  }
280 
281  /* Scan through memory to find from whence the EIP value came. There's no need to scan for an EIP which is a known value
282  * since such control flow successors would be picked the usual way elsewhere. It's also quite possible that the EIP value
283  * is also stored at some other memory addresses outside the jump table (e.g., a function pointer argument stored on the
284  * stack), so we also skip over any memory whose address is known. */
285  Disassembler::AddressSet successors;
286  BaseSemantics::SValuePtr eip = ops->readRegister(*REG_EIP);
287  static const size_t entry_size = 4; // FIXME: bytes per jump table entry
288  uint8_t *buf = new uint8_t[entry_size];
289  if (!eip->is_number()) {
290  BaseSemantics::MemoryCellListPtr mem = BaseSemantics::MemoryCellList::promote(ops->currentState()->memoryState());
291  for (BaseSemantics::MemoryCellList::CellList::iterator mi=mem->get_cells().begin(); mi!=mem->get_cells().end(); ++mi) {
292  BaseSemantics::MemoryCellPtr cell = *mi;
293  if (cell->get_address()->is_number() && cell->get_value()->must_equal(eip)) {
294  rose_addr_t base_va = cell->get_address()->get_number();
295  size_t nentries = 0;
296  while (1) {
297  size_t nread = ro_map->readQuick(buf, base_va+nentries*entry_size, entry_size);
298  if (nread!=entry_size)
299  break;
300  rose_addr_t target_va = 0;
301  for (size_t i=0; i<entry_size; i++)
302  target_va |= buf[i] << (i*8);
303  if (!map->at(target_va).require(MemoryMap::EXECUTABLE).exists())
304  break;
305  successors.insert(target_va);
306  ++nentries;
307  }
308  if (nentries>0) {
309  if (table_extent)
310  table_extent->insert(Extent(base_va, nentries*entry_size));
311  if (do_create) {
312  DataBlock *dblock = find_db_starting(base_va, nentries*entry_size);
313  append(bb, dblock, SgAsmBlock::BLK_JUMPTABLE);
314  }
315  mlog[TRACE] <<"[jump table at " <<addrToString(base_va) <<"+" <<nentries <<"*" <<entry_size <<"]";
316  }
317  }
318  }
319  }
320  delete [] buf;
321  return successors;
322 }
323 
327 void
329 {
330  ASSERT_not_null(bb);
331  ASSERT_forbid(bb->insns.empty());
332  if (bb->valid_cache()) return;
333 
334  /* Successor analysis. */
335  std::vector<SgAsmInstruction*> inodes;
336  for (InstructionVector::const_iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii)
337  inodes.push_back(isSgAsmInstruction(*ii));
338  bb->cache.sucs = bb->insns.front()->node->getSuccessors(inodes, &(bb->cache.sucs_complete), ro_map);
339 
340  /* Try to handle indirect jumps of the form "jmp ds:[BASE+REGISTER*WORDSIZE]". The trick is to assume that some kind of
341  * jump table exists beginning at address BASE, and that the table contains only addresses of valid code. All we need to
342  * do is look for the first entry in the table that doesn't point into an executable region of the disassembly memory
343  * map. We use a "do" loop so the logic nesting doesn't get so deep: just break when we find that something doesn't match
344  * what we expect. */
345  if (!bb->cache.sucs_complete && bb->cache.sucs.empty()) {
346  ExtentMap table_extent;
347  Disassembler::AddressSet table_entries = discover_jump_table(bb, true, &table_extent);
348  if (!table_entries.empty()) {
349  bb->cache.sucs.insert(table_entries.begin(), table_entries.end());
350  mlog[TRACE] <<"[jump table at " <<table_extent <<"]";
351  }
352  }
353 
354  // Remove successors for certain kinds of indirect calls (calls made through a register or memory). If this is an indirect
355  // call, then scan through the successors and remove some. Each successor will be in one of the following categories:
356  // 1. a mapped address at which we can make an instruction [keep the successor]
357  // 2. a mapped address at which we cannot make an instruction, e.g., illegal opcode [erase the successor]
358  // 3. an address which is not mapped, but which might be mapped in the future [keep the successor]
359  // 4. a non-address, e.g., an "ordinal" from a PE Import Address Table [erase the successor]
360  //
361  // For instance, the x86 PE code "call ds:[IAT+X]" is a call to an imported function. If the dynamic linker hasn't run or
362  // been simulated yet, then the Import Address Table entry [IAT+X] could be either an address that isn't mapped, or a
363  // non-address "ordinal". We want to erase successors that are ordinals or other garbage, but keep those that are
364  // addresses. We keep even the unmapped addresses because the Partitioner might not have the whole picture right now (the
365  // usr might run the Partitioner, then simulate dynamic linking, then run another Partitioner on the libraries, then join
366  // things together into one large control flow graph). It's not generally possible to distinguish between ordinals and
367  // addresses, but we can use the fact that ordinals are table indices and are therefore probably relatively small. We also
368  // look for indirect calls through registers so that we can support things like "mov edi, ds:[IAT+X]; ...; call edi"
369  if (SgAsmX86Instruction *last_insn = isSgAsmX86Instruction(bb->last_insn())) {
370  static const rose_addr_t largest_ordinal = 10000; // arbitrary
371  bool is_call = last_insn->get_kind() == x86_call || last_insn->get_kind() == x86_farcall;
372  const SgAsmExpressionPtrList &operands = last_insn->get_operandList()->get_operands();
373  if (is_call && 1==operands.size() &&
374  (isSgAsmRegisterReferenceExpression(operands[0]) || isSgAsmMemoryReferenceExpression(operands[0])) &&
375  bb->cache.sucs.size() > 0) {
376  for (Disassembler::AddressSet::iterator si=bb->cache.sucs.begin(); si!=bb->cache.sucs.end(); /*void*/) {
377  rose_addr_t successor_va = *si;
378  if (find_instruction(successor_va)) { // category 1
379  ++si;
380  } else if (map->at(successor_va).exists()) { // category 2
381  bb->cache.sucs.erase(si++);
382  } else if (successor_va > largest_ordinal) { // category 3
383  ++si;
384  } else { // category 4
385  bb->cache.sucs.erase(si++);
386  }
387  }
388  }
389  }
390 
391  /* Call target analysis. A function call is any CALL-like instruction except when the call target is the fall-through
392  * address and the instruction at the fall-through address pops the top of the stack (this is one way position independent
393  * code loads the instruction pointer register into a general-purpose register). FIXME: For now we'll assume that any call
394  * to the fall-through address is not a function call. */
395  rose_addr_t fallthrough_va = bb->last_insn()->get_address() + bb->last_insn()->get_size();
396  rose_addr_t target_va = NO_TARGET;
397  bool looks_like_call = bb->insns.front()->node->isFunctionCallSlow(inodes, &target_va, NULL);
398  if (looks_like_call && target_va!=fallthrough_va) {
399  bb->cache.is_function_call = true;
400  bb->cache.call_target = target_va;
401  } else {
402  bb->cache.is_function_call = false;
403  bb->cache.call_target = NO_TARGET;
404  }
405 
406  /* Function return analysis */
408  bb->insns.front()->node->isFunctionReturnSlow(inodes);
409 
410  bb->validate_cache();
411 }
412 
416 bool
417 Partitioner::is_function_call(BasicBlock *bb, rose_addr_t *target_va)
418 {
419  update_analyses(bb); /*make sure cache is current*/
420  if (target_va) *target_va = bb->cache.call_target;
421  return bb->cache.is_function_call;
422 }
423 
438 {
439  update_analyses(bb); /*make sure cache is current*/
440 
441  /* Follow alias_for links. */
443  for (Disassembler::AddressSet::const_iterator si=bb->cache.sucs.begin(); si!=bb->cache.sucs.end(); ++si)
444  retval.insert(canonic_block(*si));
445  if (complete) *complete = bb->cache.sucs_complete;
446 
447  /* Run non-local analyses if necessary. These are never cached here in this block. */
448 
449  // If this block ends with what appears to be a function call then we should perhaps add the fall-through address as a
450  // successor. In fact, since most calls may return, assume that this call also may return unless we already know there's a
451  // function there and we haven't yet proven that it may return.
452  if (bb->cache.is_function_call) {
453  rose_addr_t fall_through_va = canonic_block(bb->last_insn()->get_address() + bb->last_insn()->get_size());
454  rose_addr_t call_target_va = call_target(bb);
455  if (call_target_va!=NO_TARGET) {
456  Instruction *target_insn = find_instruction(call_target_va, true);
457  BasicBlock *target_bb = target_insn ? find_bb_starting(call_target_va, false) : NULL;
458  if (!target_insn) {
459  // We know the call target, but could not obtain an instruction there. The target might be a dynamically
460  // linked function that isn't mapped yet. Assume it may return since most of them can.
461  retval.insert(fall_through_va);
462  } else if (target_bb && target_bb->function) {
463  // There is a basic block at the call target and the block belongs to a function already. This call may
464  // return if the target function may return
465  if (target_bb->function->possible_may_return())
466  retval.insert(fall_through_va);
467  } else if (target_bb) {
468  // There is a basic block at the call target but it hasn't been assigned to a function yet. Assume that it can
469  // return since most calls can.
470  retval.insert(fall_through_va);
471  } else {
472  // We were able to disassemble an instruction at the call target, but no basic block exists there yet. Assume
473  // that the call may return since most calls can.
474  assert(target_insn);
475  assert(!target_bb);
476  retval.insert(fall_through_va);
477  }
478  } else {
479  retval.insert(fall_through_va); // most calls may return, so assume this one can
480  }
481  }
482 
483  return retval;
484 }
485 
489 rose_addr_t
491 {
492  update_analyses(bb); /*make sure cache is current*/
493  if (bb->cache.call_target==NO_TARGET) return NO_TARGET;
494  return canonic_block(bb->cache.call_target);
495 }
496 
497 /* Returns true if the basic block at the specified virtual address appears to pop the return address from the top of the
498  * stack without returning.
499  *
500  * FIXME: This is far from perfect: it analyzes only the first basic block; it may have incomplete information about where the
501  * basic block ends due to not yet having discovered all incoming CFG edges; it doesn't consider cases where the return
502  * value is popped but saved and restored later; etc. It also only handles x86 instructions at this time.
503  * [RPM 2010-04-30] */
504 bool
506 {
507  using namespace BinaryAnalysis::InstructionSemantics;
508 
509  bool on_stack = true; /*assume return value stays on stack; prove otherwise*/
510 
511  /* Create the basic block if possible, but if we created it here then we should clear it below. */
512  BasicBlock *bb = find_bb_containing(va, false);
513  bool preexisting = bb!=NULL;
514  if (!bb) bb = find_bb_containing(va);
515  if (!bb) return false;
516  try {
517 
518  SgAsmX86Instruction *last_insn = isSgAsmX86Instruction(bb->last_insn());
519 
520  typedef PartialSymbolicSemantics::Policy<> Policy;
521  typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics;
522  Policy policy;
523  policy.set_map(get_map());
524  PartialSymbolicSemantics::ValueType<32> orig_retaddr;
525  policy.writeMemory(x86_segreg_ss, policy.readRegister<32>("esp"), orig_retaddr, policy.true_());
526  Semantics semantics(policy);
527 
528  try {
529  for (InstructionVector::iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii) {
530  SgAsmX86Instruction *insn = isSgAsmX86Instruction(*ii);
531  if (!insn) return false;
532  if (insn==last_insn && insn->get_kind()==x86_ret) break;
533  semantics.processInstruction(insn);
534  }
535  on_stack = policy.on_stack(orig_retaddr);
536  if (!on_stack)
537  mlog[TRACE] <<"[B" <<addrToString(va) <<"#" <<bb->insns.size() <<" discards return address]";
538  } catch (const Semantics::Exception&) {
539  /*void*/
540  } catch (const Policy::Exception&) {
541  /*void*/
542  }
543 
544  } catch(...) {
545  if (!preexisting)
546  discard(bb);
547  throw;
548  }
549 
550  /* We don't want to have a basic block created just because we did some analysis. */
551  if (!preexisting)
552  discard(bb);
553 
554  /* Is the original return value still on the stack? */
555  return !on_stack;
556 }
557 
560 rose_addr_t
562 {
563  ASSERT_forbid(insns.empty());
564  return insns.front()->get_address();
565 }
566 
569 rose_addr_t
571 {
572  ASSERT_forbid(nodes.empty());
573  return nodes.front()->get_address();
574 }
575 
581 {
582  if (dblock->function)
583  return dblock->function;
584  if (dblock->basic_block)
585  return dblock->basic_block->function;
586  return NULL;
587 }
588 
589 /* Returns last instruction, the one that exits from the block. */
592 {
593  ASSERT_forbid(insns.empty());
594  return insns.back();
595 }
596 
597 /* Removes association between data blocks and basic blocks. */
598 void
600 {
601  for (std::set<DataBlock*>::iterator di=data_blocks.begin(); di!=data_blocks.end(); ++di)
602  (*di)->basic_block = NULL;
603  data_blocks.clear();
604 }
605 
606 /* Release all basic blocks from a function. Do not delete the blocks. */
607 void
609 {
610  for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi)
611  bi->second->function = NULL;
612  basic_blocks.clear();
613 }
614 
615 /* Release all data blocks from a function. Do not delete the blocks. */
616 void
618 {
619  for (DataBlocks::iterator bi=data_blocks.begin(); bi!=data_blocks.end(); ++bi)
620  bi->second->function = NULL;
621  data_blocks.clear();
622 }
623 
624 /* Move basic blocks from the other function to this one. */
625 void
627 {
628  for (BasicBlocks::iterator bbi=other->basic_blocks.begin(); bbi!=other->basic_blocks.end(); ++bbi) {
629  basic_blocks[bbi->first] = bbi->second;
630  bbi->second->function = this;
631  }
632  other->basic_blocks.clear();
633 
634  heads.insert(other->heads.begin(), other->heads.end());
635  other->heads.clear();
636 }
637 
638 /* Move data blocks from the other function to this one. */
639 void
641 {
642  for (DataBlocks::iterator dbi=other->data_blocks.begin(); dbi!=other->data_blocks.end(); ++dbi) {
643  data_blocks[dbi->first] = dbi->second;
644  dbi->second->function = this;
645  }
646  other->data_blocks.clear();
647 }
648 
649 /* Increase knowledge about may-return property. */
650 void
652  switch (get_may_return()) {
654  set_may_return(new_value);
655  break;
657  if (SgAsmFunction::RET_ALWAYS==new_value || SgAsmFunction::RET_NEVER==new_value)
658  set_may_return(new_value);
659  break;
662  break;
663  }
664 }
665 
666 void
668 {
669  std::string may_return_str = stringifySgAsmFunctionMayReturn(get_may_return(), "RET_");
670  for (size_t i=0; i<may_return_str.size(); ++i)
671  may_return_str[i] = tolower(may_return_str[i]);
672  o <<"{nbblocks=" <<basic_blocks.size() <<", ndblocks=" <<data_blocks.size() <<", " <<may_return_str;
673 }
674 
677 {
678  BasicBlocks::const_iterator bi=basic_blocks.find(entry_va);
679  return bi==basic_blocks.end() ? NULL : bi->second;
680 }
681 
682 /* Return partitioner to initial state */
683 void
685 {
686  /* Delete all functions */
687  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
688  fi->second->clear_basic_blocks();
689  fi->second->clear_data_blocks();
690  delete fi->second;
691  }
692  functions.clear();
693 
694  /* Delete all basic blocks. We don't need to call Partitioner::discard() to fix up ptrs because all functions that might
695  * have pointed to this block have already been deleted. */
696  for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi)
697  delete bi->second;
698  basic_blocks.clear();
699 
700  /* Delete all data blocks, but not the SgAsmStaticData nodes to which they point. */
701  for (DataBlocks::iterator bi=data_blocks.begin(); bi!=data_blocks.end(); ++bi)
702  delete bi->second;
703  data_blocks.clear();
704 
705  /* Delete all block IPD configuration data. */
706  for (BlockConfigMap::iterator bci=block_config.begin(); bci!=block_config.end(); ++bci)
707  delete bci->second;
708  block_config.clear();
709 
710  /* Delete all instructions by deleting the Partitioner::Instruction objects, but not the underlying SgAsmInstruction
711  * objects because the latter might be used by whatever's calling the Partitioner. */
712  for (InstructionMap::iterator ii=insns.begin(); ii!=insns.end(); ++ii)
713  delete ii->second;
714  insns.clear();
715 
716  /* Clear all disassembly failures from the cache. */
717  clear_disassembler_errors();
718 
719  /* Clear statistics and code criteria. This object manages memory for its statistics, but the caller manages the memory
720  * for the code criteria. */
721  delete aggregate_mean; aggregate_mean = NULL;
722  delete aggregate_variance; aggregate_variance = NULL;
723  code_criteria = NULL; // owned by user
724 }
725 
726 void
727 Partitioner::load_config(const std::string &filename) {
728  if (filename.empty())
729  return;
730 #ifdef _MSC_VER /* tps (06/23/2010) : Does not work under Windows */
731  throw IPDParser::Exception("IPD parsing not supported on Windows platforms");
732 #else
733  int fd = open(filename.c_str(), O_RDONLY);
734  if (fd<0)
735  throw IPDParser::Exception(strerror(errno), filename);
736  struct stat sb;
737  fstat(fd, &sb);
738  char *config = new char[sb.st_size];
739  ssize_t nread = read(fd, config, sb.st_size);
740  if (nread<0 || nread<sb.st_size) {
741  delete[] config;
742  close(fd);
743  throw IPDParser::Exception(strerror(errno), filename);
744  }
745  IPDParser(this, config, sb.st_size, filename).parse();
746  delete[] config;
747  close(fd);
748 #endif
749 }
750 
758 void
759 Partitioner::truncate(BasicBlock* bb, rose_addr_t va)
760 {
761  ASSERT_not_null(bb);
762  ASSERT_require(bb==find_bb_containing(va));
763 
764  /* Find the cut point in the instruction vector. I.e., the first instruction to remove from the vector. */
765  InstructionVector::iterator cut = bb->insns.begin();
766  while (cut!=bb->insns.end() && (*cut)->get_address()!=va) ++cut;
767  ASSERT_require(cut!=bb->insns.begin()); /*we can't remove them all since basic blocks are never empty*/
768 
769  /* Remove instructions (from the cut point and beyond) and all the data blocks. */
770  for (InstructionVector::iterator ii=cut; ii!=bb->insns.end(); ++ii) {
771  Instruction *insn = *ii;
772  ASSERT_require(insn->bblock==bb);
773  insn->bblock = NULL;
774  }
775  if (cut!=bb->insns.end()) {
776  bb->insns.erase(cut, bb->insns.end());
777  bb->clear_data_blocks();
778  }
779 }
780 
781 /* Append instruction to basic block */
782 void
784 {
785  ASSERT_not_null(bb);
786  ASSERT_not_null(insn);
787  ASSERT_require2(NULL==insn->bblock, "instruction must not have already belonged to a basic block");
788  insn->bblock = bb;
789  bb->insns.push_back(insn);
790 }
791 
801 void
802 Partitioner::append(BasicBlock *bb, DataBlock *db, unsigned reason)
803 {
804  ASSERT_not_null(bb);
805  ASSERT_not_null(db);
806  db->reason |= reason;
807 
808  if (db->basic_block!=NULL)
809  db->basic_block->data_blocks.erase(db);
810 
811  bb->data_blocks.insert(db);
812  db->basic_block = bb;
813 }
814 
826 void
827 Partitioner::append(Function* f, BasicBlock *bb, unsigned reason, bool keep/*=false*/)
828 {
829  ASSERT_not_null(f);
830  ASSERT_not_null(bb);
831 
832  if (keep)
833  f->heads.insert(bb->address());
834  bb->reason |= reason;
835 
836  if (bb->function==f)
837  return;
838 
839  ASSERT_require(bb->function==NULL);
840  bb->function = f;
841  f->basic_blocks[bb->address()] = bb;
842 
843  /* If the block is a function return then mark the function as returning. On a transition from a non-returning function
844  * to a returning function, we must mark all calling functions as pending so that the fall-through address of their
845  * function calls to this function are eventually discovered. This includes recursive calls since we may have already
846  * discovered the recursive call but not followed the fall-through address. Marking callers as "pending" happens in the
847  * analyze_cfg() method, were we can handle all the calls at the same time (more efficient than doing one at a time right
848  * here). */
849  update_analyses(bb);
850  if (bb->cache.function_return)
852 }
853 
863 void
864 Partitioner::append(Function *func, DataBlock *block, unsigned reason, bool force)
865 {
866  ASSERT_not_null(func);
867  ASSERT_not_null(block);
868 
869  if (force) {
870  if (block->function)
871  remove(block->function, block);
872  if (block->basic_block)
873  remove(block->basic_block, block);
874  }
875 
876  block->reason |= reason;
877  if (block->function==func)
878  return;
879 
880  ASSERT_require(block->function==NULL);
881  block->function = func;
882  func->data_blocks[block->address()] = block;
883 }
884 
887 void
889 {
890  ASSERT_not_null(f);
891  ASSERT_not_null(bb);
892  ASSERT_require(bb->function==f);
893  bb->function = NULL;
894  f->basic_blocks.erase(bb->address());
895 }
896 
900 void
902 {
903  ASSERT_not_null(f);
904  ASSERT_not_null(db);
905  ASSERT_require(db->function==f);
906  db->function = NULL;
907  f->data_blocks.erase(db->address());
908 }
909 
912 void
914 {
915  ASSERT_not_null(bb);
916  if (db && db->basic_block==bb) {
917  bb->data_blocks.erase(db);
918  db->basic_block = NULL;
919  }
920 }
921 
922 /* Remove instruction from consideration. */
924 Partitioner::discard(Instruction *insn, bool discard_entire_block)
925 {
926  if (insn) {
927  rose_addr_t va = insn->get_address();
928  BasicBlock *bb = find_bb_containing(va, false);
929  if (bb) {
930  if (discard_entire_block) {
931  discard(bb);
932  } else if (bb->insns.front()==insn) {
933  discard(bb);
934  } else {
935  truncate(bb, va);
936  }
937  }
938  insns.erase(va);
939  }
940  return NULL;
941 }
942 
943 /* Delete block, returning its instructions back to the (implied) list of free instructions. */
946 {
947  if (bb) {
948  ASSERT_require(NULL==bb->function);
949 
950  /* Remove instructions from the block, returning them to the (implied) list of free instructions. */
951  for (InstructionVector::iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii) {
952  Instruction *insn = *ii;
953  ASSERT_require(insn->bblock==bb);
954  insn->bblock = NULL;
955  }
956 
957  /* Remove the association between data blocks and this basic block. */
958  bb->clear_data_blocks();
959 
960  /* Remove the block from the partitioner. */
961  basic_blocks.erase(bb->address());
962  delete bb;
963  }
964  return NULL;
965 }
966 
967 /* Finds (or possibly creates) an instruction beginning at the specified address. */
969 Partitioner::find_instruction(rose_addr_t va, bool create/*=true*/)
970 {
971  InstructionMap::iterator ii = insns.find(va);
972  if (create && disassembler && ii==insns.end() && bad_insns.find(va)==bad_insns.end()) {
973  Instruction *insn = NULL;
974  try {
975  insn = new Instruction(disassembler->disassembleOne(map, va, NULL));
976  ii = insns.insert(std::make_pair(va, insn)).first;
977  } catch (const Disassembler::Exception &e) {
978  bad_insns.insert(std::make_pair(va, e));
979  }
980  }
981  return ii==insns.end() ? NULL : ii->second;
982 }
983 
1004 Partitioner::find_bb_containing(rose_addr_t va, bool create/*true*/)
1005 {
1006  Instruction *insn = find_instruction(va);
1007  if (!insn)
1008  return NULL;
1009  if (!create || insn->bblock!=NULL)
1010  return insn->bblock;
1011  update_progress();
1012  BasicBlock *bb = insn->bblock;
1013  if (!bb) {
1014  bb = new BasicBlock;
1015  basic_blocks.insert(std::make_pair(va, bb));
1016  }
1017 
1018  while (1) {
1019  append(bb, insn);
1020 
1021  /* Find address of next instruction, or whether this insn is the end of the block */
1022  va += insn->get_size();
1023  if (insn->terminates_basic_block()) { /*naively terminates?*/
1024  bool complete;
1025  const Disassembler::AddressSet& sucs = successors(bb, &complete);
1026  if ((func_heuristics & SgAsmFunction::FUNC_CALL_TARGET) && is_function_call(bb, NULL)) {
1027  /* When we are detecting functions based on x86 CALL instructions (or similar for other architectures) then
1028  * the instruction after the CALL should never be part of this basic block. Otherwise allow the call to be
1029  * part of the basic block initially and we'll split the block later if we need to. */
1030  break;
1031  } else if (allow_discont_blocks) {
1032  if (!complete || sucs.size()!=1)
1033  break;
1034  va = *(sucs.begin());
1035  } else {
1036  if (!complete || sucs.size()!=1 || *(sucs.begin())!=va)
1037  break;
1038  }
1039  }
1040 
1041  /* Get the next instruction */
1042  insn = find_instruction(va);
1043  if (!insn || insn->bblock)
1044  break;
1045  }
1046  return bb;
1047 }
1048 
1053 Partitioner::find_bb_starting(rose_addr_t va, bool create/*true*/)
1054 {
1055  BasicBlock *bb = find_bb_containing(va, create);
1056  if (!bb)
1057  return NULL;
1058  if (va==bb->address())
1059  return bb;
1060  if (!create)
1061  return NULL;
1062  mlog[TRACE] <<"[split from B" <<addrToString(bb->address()) <<"#" <<bb->insns.size() <<"]";
1063  if (bb->function!=NULL)
1064  bb->function->pending = true;
1065  truncate(bb, va);
1066  bb = find_bb_containing(va);
1067  ASSERT_not_null(bb);
1068  ASSERT_require(va==bb->address());
1069  return bb;
1070 }
1071 
1075 rose_addr_t
1077 {
1078  for (size_t i=0; i<100; i++) {
1079  BasicBlock *bb = find_bb_starting(va, false);
1080  if (!bb || !bb->cache.alias_for) return va;
1081  mlog[TRACE] <<"[B" <<addrToString(va) <<"->B" <<addrToString(bb->cache.alias_for) <<"]";
1082  va = bb->cache.alias_for;
1083  }
1084  ASSERT_not_reachable("possible alias loop");
1085  return va;
1086 }
1087 
1088 /* Finds an existing function definition. */
1090 Partitioner::find_function(rose_addr_t entry_va)
1091 {
1092  Functions::iterator fi = functions.find(entry_va);
1093  if (fi==functions.end()) return NULL;
1094  return fi->second;
1095 }
1096 
1097 /* Adds or updates a function definition. */
1099 Partitioner::add_function(rose_addr_t entry_va, unsigned reasons, std::string name)
1100 {
1101  Function *f = NULL;
1102  Functions::iterator fi = functions.find(entry_va);
1103  if (fi==functions.end()) {
1104  f = new Function(entry_va, reasons, name);
1105  functions[entry_va] = f;
1106  } else {
1107  f = fi->second;
1108  ASSERT_require(f->entry_va==entry_va);
1109  if (reasons & SgAsmFunction::FUNC_MISCMASK)
1110  f->reason &= ~SgAsmFunction::FUNC_MISCMASK;
1111  f->reason |= reasons;
1112  if (name!="") f->name = name;
1113  }
1114  return f;
1115 }
1116 
1117 /* Do whatever's necessary to finish loading IPD configuration. */
1118 void
1120 {
1121  using namespace BinaryAnalysis::InstructionSemantics;
1122 
1123  for (BlockConfigMap::iterator bci=block_config.begin(); bci!=block_config.end(); ++bci) {
1124  rose_addr_t va = bci->first;
1125  BlockConfig *bconf = bci->second;
1126 
1127  BasicBlock *bb = find_bb_starting(va);
1128  if (!bb)
1129  throw Exception("cannot obtain IPD-specified basic block at " + StringUtility::addrToString(va));
1130  if (bb->insns.size()<bconf->ninsns)
1131  throw Exception("cannot obtain " + StringUtility::numberToString(bconf->ninsns) + "-instruction basic block at " +
1132  StringUtility::addrToString(va) + " (only " + StringUtility::numberToString(bb->insns.size()) +
1133  " available)");
1134  if (bb->insns.size()>bconf->ninsns)
1135  truncate(bb, bb->insns[bconf->ninsns]->get_address());
1136 
1137  /* Initial analysis followed augmented by settings from the configuration. */
1138  update_analyses(bb);
1139  bb->cache.alias_for = bconf->alias_for;
1140  if (bconf->sucs_specified) {
1141  bb->cache.sucs = bconf->sucs;
1142  bb->cache.sucs_complete = bconf->sucs_complete;
1143  }
1144  if (!bconf->sucs_program.empty()) {
1145  /* "Execute" the program that will detect successors. We do this by interpreting the basic block to initialize
1146  * registers, loading the successor program, pushing some arguments onto the program's stack, interpreting the
1147  * program, extracting return values from memory, and unloading the program. */
1148  char block_name_str[64];
1149  sprintf(block_name_str, "B%08" PRIx64, va);
1150  std::string block_name = block_name_str;
1151  mlog[DEBUG] << "running successors program for " <<block_name_str <<"\n";
1152 
1153  // FIXME: Use a copy (COW) version of the map so we don't need to modify the real map and so that the simulated
1154  // program can't accidentally modify the stuff being disassembled. [RPM 2012-05-07]
1155  MemoryMap::Ptr map = get_map();
1156  ASSERT_not_null(map);
1157  typedef PartialSymbolicSemantics::Policy<> Policy;
1158  typedef X86InstructionSemantics<Policy, PartialSymbolicSemantics::ValueType> Semantics;
1159  Policy policy;
1160  policy.set_map(map);
1161  Semantics semantics(policy);
1162 
1163  mlog[DEBUG] <<" running semantics for the basic block...\n";
1164  for (InstructionVector::iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii) {
1165  SgAsmX86Instruction *insn = isSgAsmX86Instruction(*ii);
1166  ASSERT_not_null(insn);
1167  semantics.processInstruction(insn);
1168  }
1169 
1170  /* Load the program. Keep at least one unmapped byte between the program text, stack, and svec areas in order to
1171  * help with debugging. */
1172  mlog[DEBUG] <<" loading the program...\n";
1173 
1174  /* Load the instructions to execute */
1175  rose_addr_t text_va = *map->findFreeSpace(bconf->sucs_program.size(), 4096);
1176  AddressInterval textInterval = AddressInterval::baseSize(text_va, bconf->sucs_program.size());
1177  map->insert(textInterval,
1179  MemoryMap::READABLE | MemoryMap::EXECUTABLE,
1180  "successors program text"));
1181 
1182  /* Create a stack */
1183  static const size_t stack_size = 8192;
1184  rose_addr_t stack_va = *map->findFreeSpace(stack_size, stack_size, 4096);
1185  AddressInterval stackInterval = AddressInterval::baseSize(stack_va, stack_size);
1186  map->insert(stackInterval,
1187  MemoryMap::Segment::anonymousInstance(stack_size, MemoryMap::READABLE|MemoryMap::WRITABLE,
1188  block_name + " successors stack"));
1189  rose_addr_t stack_ptr = stack_va + stack_size;
1190 
1191  /* Create an area for the returned vector of successors */
1192  static const size_t svec_size = 8192;
1193  rose_addr_t svec_va = *map->findFreeSpace(text_va, svec_size, 4096);
1194  AddressInterval svecInterval = AddressInterval::baseSize(svec_va, svec_size);
1195  map->insert(svecInterval,
1196  MemoryMap::Segment::anonymousInstance(svec_size, MemoryMap::READABLE|MemoryMap::WRITABLE,
1197  block_name + " successors vector"));
1198 
1199  /* What is the "return" address. Eventually the successors program will execute a "RET" instruction that will
1200  * return to this address. We can choose something arbitrary as long as it doesn't conflict with anything else.
1201  * We'll use the first byte past the end of the successor program, which gives the added benefit that the
1202  * successor program doesn't actually have to even return -- it can just fall off the end. */
1203  rose_addr_t return_va = text_va + bconf->sucs_program.size();
1204  if (mlog[DEBUG]) {
1205  mlog[DEBUG] <<" memory map after program is loaded:\n";
1206  map->dump(mlog[DEBUG], " ");
1207  }
1208 
1209  /* Push arguments onto the stack in reverse order. */
1210  mlog[DEBUG] <<" setting up the call frame...\n";
1211 
1212  /* old stack pointer */
1213  stack_ptr -= 4;
1214  policy.writeMemory<32>(x86_segreg_ss, policy.number<32>(stack_ptr),
1215  policy.readRegister<32>("esp"), policy.true_());
1216 
1217  /* address past the basic block's last instruction */
1218  stack_ptr -= 4;
1219  policy.writeMemory<32>(x86_segreg_ss, policy.number<32>(stack_ptr),
1220  policy.number<32>(bb->insns.back()->get_address()+bb->insns.back()->get_size()),
1221  policy.true_());
1222 
1223  /* address of basic block's first instruction */
1224  stack_ptr -= 4;
1225  policy.writeMemory<32>(x86_segreg_ss, policy.number<32>(stack_ptr),
1226  policy.number<32>(bb->insns.front()->get_address()), policy.true_());
1227 
1228  /* size of svec in bytes */
1229  stack_ptr -= 4;
1230  policy.writeMemory<32>(x86_segreg_ss, policy.number<32>(stack_ptr),
1231  policy.number<32>(svec_size), policy.true_());
1232 
1233  /* address of svec */
1234  stack_ptr -= 4;
1235  policy.writeMemory<32>(x86_segreg_ss, policy.number<32>(stack_ptr),
1236  policy.number<32>(svec_va), policy.true_());
1237 
1238  /* return address for successors program */
1239  stack_ptr -= 4;
1240  policy.writeMemory<32>(x86_segreg_ss, policy.number<32>(stack_ptr),
1241  policy.number<32>(return_va), policy.true_());
1242 
1243  /* Adjust policy stack pointer */
1244  policy.writeRegister("esp", policy.number<32>(stack_ptr));
1245 
1246  /* Interpret the program */
1247  mlog[DEBUG] <<" running the program...\n";
1249  ASSERT_not_null(disassembler);
1250  policy.writeRegister("eip", policy.number<32>(text_va));
1251  while (1) {
1252  rose_addr_t ip = policy.readRegister<32>("eip").known_value();
1253  if (ip==return_va) break;
1254  SgAsmX86Instruction *insn = isSgAsmX86Instruction(disassembler->disassembleOne(map, ip));
1255  mlog[DEBUG] <<" " <<addrToString(ip) <<": " <<(insn?unparseInstruction(insn):std::string("<null>")) <<"\n";
1256  ASSERT_not_null(insn);
1257  semantics.processInstruction(insn);
1258  ASSERT_require(policy.readRegister<32>("eip").is_known());
1260  }
1261 
1262  /* Extract the list of successors. The number of successors is the first element of the list. */
1263  mlog[DEBUG] <<" extracting program return values...\n";
1264  PartialSymbolicSemantics::ValueType<32> nsucs = policy.readMemory<32>(x86_segreg_ss, policy.number<32>(svec_va),
1265  policy.true_());
1266  ASSERT_require(nsucs.is_known());
1267  mlog[DEBUG] <<" number of successors: " <<nsucs.known_value() <<"\n";
1268  ASSERT_require(nsucs.known_value()*4 <= svec_size-4); /*first entry is size*/
1269  for (size_t i=0; i<nsucs.known_value(); i++) {
1270  PartialSymbolicSemantics::ValueType<32> suc_va = policy.readMemory<32>(x86_segreg_ss,
1271  policy.number<32>(svec_va+4+i*4),
1272  policy.true_());
1273  if (suc_va.is_known()) {
1274  mlog[DEBUG] <<" #" <<i <<": " <<addrToString(suc_va.known_value()) <<"\n";
1275  bb->cache.sucs.insert(suc_va.known_value());
1276  } else {
1277  mlog[DEBUG] <<" #" <<i <<": unknown\n";
1278  bb->cache.sucs_complete = false;
1279  }
1280  }
1281 
1282  /* Unmap the program */
1283  mlog[DEBUG] <<" unmapping the program...\n";
1284  map->erase(textInterval);
1285  map->erase(stackInterval);
1286  map->erase(svecInterval);
1287 
1288  mlog[DEBUG] <<" done.\n";
1289  }
1290  }
1291 }
1292 
1293 /* Marks program entry addresses as functions. */
1294 void
1296 {
1297  assert(fhdr!=NULL);
1298  SgRVAList entries = fhdr->get_entry_rvas();
1299 
1300  // Libraries don't have entry addresses
1301  if ((entries.empty() || (1==entries.size() && 0==entries[0].get_rva())) &&
1303  return;
1304  }
1305 
1306  for (size_t i=0; i<entries.size(); i++) {
1307  rose_addr_t entry_va = entries[i].get_rva() + fhdr->get_base_va();
1308  if (find_instruction(entry_va))
1309  add_function(entry_va, SgAsmFunction::FUNC_ENTRY_POINT);
1310  }
1311 }
1312 
1313 /* Use the Frame Descriptor Entry Records of the ELF .eh_frame section to mark functions. */
1314 void
1316 {
1317  SgAsmGenericSectionList *sections = fhdr->get_sections();
1318  for (size_t i=0; i<sections->get_sections().size(); i++) {
1319  SgAsmElfEHFrameSection *ehframe = isSgAsmElfEHFrameSection(sections->get_sections()[i]);
1320  if (ehframe!=NULL) {
1321  SgAsmElfEHFrameEntryCIList *ci_entries = ehframe->get_ci_entries();
1322  for (size_t j=0; j<ci_entries->get_entries().size(); j++) {
1323  SgAsmElfEHFrameEntryCI *cie = ci_entries->get_entries()[j];
1324  SgAsmElfEHFrameEntryFDList *fd_entries = cie->get_fd_entries();
1325  for (size_t k=0; k<fd_entries->get_entries().size(); k++) {
1326  SgAsmElfEHFrameEntryFD *fde = fd_entries->get_entries()[k];
1327  rose_addr_t target = fde->get_begin_rva().get_rva();
1328  if (find_instruction(target))
1329  add_function(target, SgAsmFunction::FUNC_EH_FRAME);
1330  }
1331  }
1332  }
1333  }
1334 }
1335 
1336 /* Adds each entry of the ELF procedure lookup table (.plt section) to the list of functions. */
1337 void
1339 {
1340  /* This function is ELF, x86 specific. */
1341  SgAsmElfFileHeader *elf = isSgAsmElfFileHeader(fhdr);
1342  if (!elf) return;
1343 
1344  /* Find important sections */
1345  SgAsmGenericSection *plt = elf->get_section_by_name(".plt");
1346  if (!plt || !plt->is_mapped()) return;
1347  SgAsmGenericSection *gotplt = elf->get_section_by_name(".got.plt");
1348  if (!gotplt || !gotplt->is_mapped()) return;
1349 
1350  /* Find all relocation sections */
1351  std::set<SgAsmElfRelocSection*> rsects;
1352  const SgAsmGenericSectionPtrList &sections = elf->get_sections()->get_sections();
1353  for (SgAsmGenericSectionPtrList::const_iterator si=sections.begin(); si!=sections.end(); ++si) {
1354  SgAsmElfRelocSection *reloc_section = isSgAsmElfRelocSection(*si);
1355  if (reloc_section)
1356  rsects.insert(reloc_section);
1357  }
1358  if (rsects.empty()) return;
1359 
1360  /* Look at each instruction in the .plt section. If the instruction is a computed jump to an address stored in the
1361  * .got.plt then we've found the beginning of a plt trampoline. */
1362  rose_addr_t plt_offset = 14; /* skip the first entry (PUSH ds:XXX; JMP ds:YYY; 0x00; 0x00)--the JMP is not a function*/
1363  while (plt_offset<plt->get_mapped_size()) {
1364 
1365  /* Find an x86 instruction */
1366  Instruction *insn = find_instruction(plt->get_mapped_actual_va()+plt_offset);
1367  if (!insn) {
1368  ++plt_offset;
1369  continue;
1370  }
1371  plt_offset += insn->get_size();
1372  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
1373  if (!insn_x86) continue;
1374 
1375  rose_addr_t gotplt_va = get_indirection_addr(insn_x86, elf->get_base_va()+gotplt->get_mapped_preferred_rva());
1376  if (gotplt_va < elf->get_base_va() + gotplt->get_mapped_preferred_rva() ||
1377  gotplt_va >= elf->get_base_va() + gotplt->get_mapped_preferred_rva() + gotplt->get_mapped_size()) {
1378  continue; /* jump is not indirect through the .got.plt section */
1379  }
1380 
1381  /* Find the relocation entry whose offset is the gotplt_va and use that entry's symbol for the function name. */
1382  std::string name;
1383  for (std::set<SgAsmElfRelocSection*>::iterator ri=rsects.begin(); ri!=rsects.end() && name.empty(); ++ri) {
1384  SgAsmElfRelocEntryList *entries = (*ri)->get_entries();
1385  SgAsmElfSymbolSection *symbol_section = isSgAsmElfSymbolSection((*ri)->get_linked_section());
1386  SgAsmElfSymbolList *symbols = symbol_section ? symbol_section->get_symbols() : NULL;
1387  for (size_t ei=0; ei<entries->get_entries().size() && name.empty() && symbols; ++ei) {
1388  SgAsmElfRelocEntry *rel = entries->get_entries()[ei];
1389  if (rel->get_r_offset()==gotplt_va) {
1390  unsigned long symbol_idx = rel->get_sym();
1391  if (symbol_idx < symbols->get_symbols().size()) {
1392  SgAsmElfSymbol *symbol = symbols->get_symbols()[symbol_idx];
1393  name = symbol->get_name()->get_string() + "@plt";
1394  }
1395  }
1396  }
1397  }
1398 
1399  Function *plt_func = add_function(insn->get_address(), SgAsmFunction::FUNC_IMPORT, name);
1400 
1401  /* FIXME: Assume that most PLT functions return. We make this assumption for now because the PLT table contains an
1402  * indirect jump through the .plt.got data area and we don't yet do static analysis of the data. Because of
1403  * that, all the PLT functons will contain only a basic block with the single indirect jump, and no return
1404  * (e.g., x86 RET or RETF) instruction, and therefore the function would not normally be marked as returning.
1405  * [RPM 2010-05-11] */
1406  if ("abort@plt"!=name && "execl@plt"!=name && "execlp@plt"!=name && "execv@plt"!=name && "execvp@plt"!=name &&
1407  "exit@plt"!=name && "_exit@plt"!=name && "fexecve@plt"!=name &&
1408  "longjmp@plt"!=name && "__longjmp@plt"!=name && "siglongjmp@plt"!=name) {
1410  } else {
1412  }
1413  }
1414 }
1415 
1416 /* Use symbol tables to determine function entry points. */
1417 void
1419 {
1420  SgAsmGenericSectionList *sections = fhdr->get_sections();
1421  for (size_t i=0; i<sections->get_sections().size(); i++) {
1422 
1423  /* If this is a symbol table of some sort, then get the list of symbols. */
1424  std::vector<SgAsmGenericSymbol*> symbols;
1425  if (isSgAsmElfSymbolSection(sections->get_sections()[i])) {
1426  SgAsmElfSymbolList *elf_symbols = isSgAsmElfSymbolSection(sections->get_sections()[i])->get_symbols();
1427  for (size_t j=0; j<elf_symbols->get_symbols().size(); j++) {
1428  symbols.push_back(elf_symbols->get_symbols()[j]);
1429  }
1430  } else if (isSgAsmCoffSymbolTable(sections->get_sections()[i])) {
1431  SgAsmCoffSymbolList *coff_symbols = isSgAsmCoffSymbolTable(sections->get_sections()[i])->get_symbols();
1432  for (size_t j=0; j<coff_symbols->get_symbols().size(); j++) {
1433  symbols.push_back(coff_symbols->get_symbols()[j]);
1434  }
1435  }
1436 
1437  for (size_t j=0; j<symbols.size(); j++) {
1438  SgAsmGenericSymbol *symbol = symbols[j];
1441  symbol->get_value()!=0) {
1442  rose_addr_t value = fhdr->get_base_va() + symbol->get_value();
1443  SgAsmGenericSection *section = symbol->get_bound();
1444 
1445  // Add a function at the symbol's value. If the symbol is bound to a section and the section is mapped at a
1446  // different address than it expected to be mapped, then adjust the symbol's value by the same amount.
1447  rose_addr_t va_1 = value;
1448  if (section!=NULL && section->is_mapped() &&
1449  section->get_mapped_preferred_va()!=section->get_mapped_actual_va()) {
1450  va_1 += section->get_mapped_actual_va() - section->get_mapped_preferred_va();
1451  }
1452  if (find_instruction(va_1))
1453  add_function(va_1, SgAsmFunction::FUNC_SYMBOL, symbol->get_name()->get_string());
1454 
1455  // Sometimes weak symbol values are offsets from a section (this code handles that), but other times they're
1456  // the value is used directly (the above code handled that case). */
1457  if (section && symbol->get_binding()==SgAsmGenericSymbol::SYM_WEAK)
1458  value += section->get_mapped_actual_va();
1459  if (find_instruction(value))
1460  add_function(value, SgAsmFunction::FUNC_SYMBOL, symbol->get_name()->get_string());
1461  }
1462  }
1463  }
1464 }
1465 
1466 /* Adds PE exports as function entry points. */
1467 void
1469 {
1470  SgAsmGenericSectionList *sections = fhdr->get_sections();
1471  for (size_t i=0; i<sections->get_sections().size(); ++i) {
1472  if (SgAsmPEExportSection *export_section = isSgAsmPEExportSection(sections->get_sections()[i])) {
1473  const SgAsmPEExportEntryPtrList &exports = export_section->get_exports()->get_exports();
1474  for (SgAsmPEExportEntryPtrList::const_iterator ei=exports.begin(); ei!=exports.end(); ++ei) {
1475  rose_addr_t va = (*ei)->get_export_rva().get_va();
1476  if (find_instruction(va))
1477  add_function(va, SgAsmFunction::FUNC_EXPORT, (*ei)->get_name()->get_string());
1478  }
1479  }
1480  }
1481 }
1482 
1483 /* Tries to match x86 "(mov rdi,rdi)?; push rbp; mov rbp,rsp" (or the 32-bit equivalent). */
1484 Partitioner::InstructionMap::const_iterator
1485 Partitioner::pattern1(const InstructionMap& insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
1486 {
1487  InstructionMap::const_iterator ii = first;
1488  Disassembler::AddressSet matches;
1489 
1490  /* Look for optional "mov rdi, rdi"; if found, advance ii iterator to fall-through instruction */
1491  do {
1492  SgAsmX86Instruction *insn = isSgAsmX86Instruction(ii->second);
1493  if (!insn || insn->get_kind()!=x86_mov)
1494  break;
1495  const SgAsmExpressionPtrList &opands = insn->get_operandList()->get_operands();
1496  if (opands.size()!=2)
1497  break;
1498  SgAsmRegisterReferenceExpression *rre = isSgAsmRegisterReferenceExpression(opands[0]);
1499  if (!rre ||
1500  rre->get_descriptor().get_major()!=x86_regclass_gpr ||
1501  rre->get_descriptor().get_minor()!=x86_gpr_di)
1502  break;
1503  rre = isSgAsmRegisterReferenceExpression(opands[1]);
1504  if (!rre ||
1505  rre->get_descriptor().get_major()!=x86_regclass_gpr ||
1506  rre->get_descriptor().get_minor()!=x86_gpr_di)
1507  break;
1508  matches.insert(ii->first);
1509  ii = insns.find(ii->first + insn->get_size());
1510  } while (0);
1511 
1512  /* Look for "push rbp" */
1513  {
1514  if (ii==insns.end())
1515  return insns.end();
1516  SgAsmX86Instruction *insn = isSgAsmX86Instruction(ii->second);
1517  if (!insn || insn->get_kind()!=x86_push)
1518  return insns.end();
1519  const SgAsmExpressionPtrList &opands = insn->get_operandList()->get_operands();
1520  if (opands.size()!=1)
1521  return insns.end();
1522  SgAsmRegisterReferenceExpression *rre = isSgAsmRegisterReferenceExpression(opands[0]);
1523  if (!rre ||
1524  rre->get_descriptor().get_major()!=x86_regclass_gpr ||
1525  rre->get_descriptor().get_minor()!=x86_gpr_bp)
1526  return insns.end();
1527  matches.insert(ii->first);
1528  ii = insns.find(ii->first + insn->get_size());
1529  }
1530 
1531  /* Look for "mov rbp,rsp" */
1532  {
1533  if (ii==insns.end())
1534  return insns.end();
1535  SgAsmX86Instruction *insn = isSgAsmX86Instruction(ii->second);
1536  if (!insn || insn->get_kind()!=x86_mov)
1537  return insns.end();
1538  const SgAsmExpressionPtrList &opands = insn->get_operandList()->get_operands();
1539  if (opands.size()!=2)
1540  return insns.end();
1541  SgAsmRegisterReferenceExpression *rre = isSgAsmRegisterReferenceExpression(opands[0]);
1542  if (!rre ||
1543  rre->get_descriptor().get_major()!=x86_regclass_gpr ||
1544  rre->get_descriptor().get_minor()!=x86_gpr_bp)
1545  return insns.end();
1546  rre = isSgAsmRegisterReferenceExpression(opands[1]);
1547  if (!rre ||
1548  rre->get_descriptor().get_major()!=x86_regclass_gpr ||
1549  rre->get_descriptor().get_minor()!=x86_gpr_sp)
1550  return insns.end();
1551  matches.insert(ii->first);
1552  }
1553 
1554  exclude.insert(matches.begin(), matches.end());
1555  return first;
1556 }
1557 
1558 #if 0 /*commented out in Partitioner::mark_func_patterns()*/
1559 /* Tries to match x86 "nop;nop;nop" followed by something that's not a nop. */
1560 Partitioner::InstructionMap::const_iterator
1561 Partitioner::pattern2(const InstructionMap& insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
1562 {
1563  InstructionMap::const_iterator ii = first;
1564  Disassembler::AddressSet matches;
1565 
1566  /* Look for three "nop" instructions */
1567  for (size_t i=0; i<3; i++) {
1568  SgAsmX86Instruction *nop = isSgAsmX86Instruction(ii->second);
1569  if (!nop) return insns.end();
1570  if (nop->get_kind()!=x86_nop) return insns.end();
1571  if (nop->get_operandList()->get_operands().size()!=0) return insns.end(); /*only zero-arg NOPs allowed*/
1572  matches.insert(ii->first);
1573  ii = insns.find(ii->first + nop->get_size());
1574  if (ii==insns.end()) return insns.end();
1575  }
1576 
1577  /* Look for something that's not a "nop"; this is the function entry point. */
1578  SgAsmX86Instruction *notnop = isSgAsmX86Instruction(ii->second);
1579  if (!notnop) return insns.end();
1580  if (notnop->get_kind()==x86_nop) return insns.end();
1581  matches.insert(ii->first);
1582 
1583  exclude.insert(matches.begin(), matches.end());
1584  return ii;
1585 }
1586 #endif
1587 
1588 #if 0 /* commented out in Partitioner::mark_func_patterns() */
1589 /* Tries to match x86 "leave;ret" followed by one or more "nop" followed by a non-nop */
1590 Partitioner::InstructionMap::const_iterator
1591 Partitioner::pattern3(const InstructionMap& insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
1592 {
1593  InstructionMap::const_iterator ii = first;
1594  Disassembler::AddressSet matches;
1595 
1596  /* leave; ret; nop */
1597  for (size_t i=0; i<3; i++) {
1598  SgAsmX86Instruction *insn = isSgAsmX86Instruction(ii->second);
1599  if (!insn) return insns.end();
1600  if ((i==0 && insn->get_kind()!=x86_leave) ||
1601  (i==1 && insn->get_kind()!=x86_ret) ||
1602  (i==2 && insn->get_kind()!=x86_nop))
1603  return insns.end();
1604  matches.insert(ii->first);
1605  ii = insns.find(ii->first + insn->get_size());
1606  if (ii==insns.end()) return insns.end();
1607  }
1608 
1609  /* Zero or more "nop" instructions */
1610  while (1) {
1611  SgAsmX86Instruction *insn = isSgAsmX86Instruction(ii->second);
1612  if (!insn) return insns.end();
1613  if (insn->get_kind()!=x86_nop) break;
1614  matches.insert(ii->first);
1615  ii = insns.find(ii->first + insn->get_size());
1616  if (ii==insns.end()) return insns.end();
1617  }
1618 
1619  /* This must be something that's not a "nop", but make sure it's an x86 instruction anyway. */
1620  SgAsmX86Instruction *insn = isSgAsmX86Instruction(ii->second);
1621  if (!insn) return insns.end();
1622  matches.insert(ii->first);
1623 
1624  exclude.insert(matches.begin(), matches.end());
1625  return ii;
1626 }
1627 #endif
1628 
1629 // class method: Matches an x86 "ENTER xxxx, 0" instruction.
1630 Partitioner::InstructionMap::const_iterator
1631 Partitioner::pattern4(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
1632 {
1633  SgAsmX86Instruction *insn = isSgAsmX86Instruction(first->second);
1634  if (!insn || insn->get_kind()!=x86_enter)
1635  return insns.end();
1636  const SgAsmExpressionPtrList &args = insn->get_operandList()->get_operands();
1637  if (args.size()!=2)
1638  return insns.end();
1639  SgAsmIntegerValueExpression *arg = isSgAsmIntegerValueExpression(args[1]);
1640  if (!arg || 0!=arg->get_absoluteValue())
1641  return insns.end();
1642 
1643  for (size_t i=0; i<insn->get_size(); ++i)
1644  exclude.insert(insn->get_address() + i);
1645  return first;
1646 }
1647 
1648 // class method: tries to match m68k "link.w a6, IMM16" where IMM16 is zero or negative
1649 Partitioner::InstructionMap::const_iterator
1650 Partitioner::pattern5(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
1651 {
1652  SgAsmM68kInstruction *insn = isSgAsmM68kInstruction(first->second);
1653  if (!insn || insn->get_kind()!=m68k_link)
1654  return insns.end();
1655  const SgAsmExpressionPtrList &args = insn->get_operandList()->get_operands();
1656  if (args.size()!=2)
1657  return insns.end();
1658  SgAsmDirectRegisterExpression *rre = isSgAsmDirectRegisterExpression(args[0]);
1659  SgAsmIntegerValueExpression *ival = isSgAsmIntegerValueExpression(args[1]);
1660  if (!rre || !ival)
1661  return insns.end();
1662  RegisterDescriptor reg = rre->get_descriptor();
1663  if (reg.get_major()!=m68k_regclass_addr || reg.get_minor()!=6/*link register*/)
1664  return insns.end();
1665  int64_t displacement = ival->get_signedValue();
1666  if (displacement>0)
1667  return insns.end();
1668 
1669  for (size_t i=0; i<insn->get_size(); ++i)
1670  exclude.insert(insn->get_address() + i);
1671  return first;
1672 }
1673 
1674 // class method: tries to match m68k instructions: "rts; (trapf)?; lea.l [a7-X], a7"
1675 Partitioner::InstructionMap::const_iterator
1676 Partitioner::pattern6(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
1677 {
1678  // rts
1679  SgAsmM68kInstruction *insn = isSgAsmM68kInstruction(first->second);
1680  if (!insn || insn->get_kind()!=m68k_rts)
1681  return insns.end();
1682  ++first;
1683 
1684  // trapf (padding)
1685  insn = isSgAsmM68kInstruction(first->second);
1686  if (insn && insn->get_kind()==m68k_trapf)
1687  ++first;
1688 
1689  // leal. [a7-X], a7
1690  insn = isSgAsmM68kInstruction(first->second);
1691  if (!insn || insn->get_kind()!=m68k_lea || insn->get_operandList()->get_operands().size()!=2)
1692  return insns.end();
1693  const SgAsmExpressionPtrList &args = insn->get_operandList()->get_operands();
1694  SgAsmMemoryReferenceExpression *mre = isSgAsmMemoryReferenceExpression(args[0]);
1695  SgAsmBinaryAdd *sum = mre ? isSgAsmBinaryAdd(mre->get_address()) : NULL;
1696  SgAsmDirectRegisterExpression *reg1 = sum ? isSgAsmDirectRegisterExpression(sum->get_lhs()) : NULL;
1697  SgAsmIntegerValueExpression *addend = sum ? isSgAsmIntegerValueExpression(sum->get_rhs()) : NULL;
1698  SgAsmDirectRegisterExpression *reg2 = isSgAsmDirectRegisterExpression(args[1]);
1699  if (!reg1 || reg1->get_descriptor()!=RegisterDescriptor(m68k_regclass_addr, 7, 0, 32) ||
1700  !reg2 || reg2->get_descriptor()!=RegisterDescriptor(m68k_regclass_addr, 7, 0, 32) ||
1701  !addend || addend->get_signedValue()>0 || addend->get_signedValue()<4096 /*arbitrary*/)
1702  return insns.end();
1703 
1704  return first; // the LEA instruction is the start of a function
1705 }
1706 
1711 void
1713 {
1714  // Create functions when we see certain patterns of bytes
1715  struct T1: ByteRangeCallback {
1716  Partitioner *p;
1717  T1(Partitioner *p): p(p) {}
1718  virtual bool operator()(bool enabled, const Args &args) ROSE_OVERRIDE {
1719  ASSERT_not_null(args.restrict_map);
1720  uint8_t buf[4096];
1721  if (enabled) {
1722  rose_addr_t va = args.range.first();
1723  while (va<=args.range.last()) {
1724  size_t nbytes = std::min(args.range.last()+1-va, (rose_addr_t)sizeof buf);
1725  size_t nread = args.restrict_map->readQuick(buf, va, nbytes);
1726  for (size_t i=0; i<nread; ++i) {
1727  if (i+5<nread && // x86:
1728  0x8b==buf[i+0] && 0xff==buf[i+1] && // mov edi, edi
1729  0x55==buf[i+2] && // push ebp
1730  0x8b==buf[i+3] && 0xec==buf[i+4]) { // mov ebp, esp
1732  i += 4;
1733  } else if (i+3<nread && // x86:
1734  0x55==buf[i+0] && // push ebp
1735  0x8b==buf[i+1] && 0xec==buf[i+2]) { // mov ebp, esp
1737  i += 2;
1738  }
1739  }
1740  va += nread;
1741  }
1742  }
1743  return enabled;
1744  }
1745  } t1(this);
1746  MemoryMap::Ptr mm = get_map();
1747  if (mm)
1748  scan_unassigned_bytes(&t1, mm);
1749 
1750  // Create functions when we see certain patterns of instructions. Note that this will only work if we've already
1751  // disassembled the instructions.
1752  Disassembler::AddressSet exclude;
1753  InstructionMap::const_iterator found;
1754 
1755  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1756  if (exclude.find(ii->first)==exclude.end() &&
1757  ((found=pattern1(insns, ii, exclude))!=insns.end() ||
1758  (found=pattern4(insns, ii, exclude))!=insns.end() ||
1759  (found=pattern5(insns, ii, exclude))!=insns.end() ||
1760  (found=pattern6(insns, ii, exclude))!=insns.end()))
1761  add_function(found->first, SgAsmFunction::FUNC_PATTERN);
1762  }
1763 #if 0 /* Disabled because NOPs sometimes legitimately appear inside functions */
1764  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1765  if (exclude.find(ii->first)==exclude.end() && (found=pattern2(insns, ii, exclude))!=insns.end())
1766  add_function(found->first, SgAsmFunction::FUNC_PATTERN);
1767  }
1768 #endif
1769 #if 0 /* Disabled because NOPs sometimes follow "leave;ret" for functions with multiple returns. */
1770  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1771  if (exclude.find(ii->first)==exclude.end() && (found=pattern3(insns, ii, exclude))!=insns.end())
1772  add_function(found->first, SgAsmFunction::FUNC_PATTERN);
1773  }
1774 #endif
1775 }
1776 
1777 /* Make all CALL/FARCALL targets functions. This is a naive approach that won't work for some obfuscated software. A more
1778  * thorough approach considers only those calls that are reachable. A CALL whose target is the address following the CALL
1779  * instruction is not counted as a function call. */
1780 void
1782 {
1783  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
1784  std::vector<SgAsmInstruction*> iv;
1785  iv.push_back(ii->second->node);
1786  rose_addr_t target_va=NO_TARGET;
1787  if (ii->second->node->isFunctionCallSlow(iv, &target_va, NULL) && target_va!=NO_TARGET &&
1788  target_va!=ii->first + ii->second->get_size()) {
1789  add_function(target_va, SgAsmFunction::FUNC_CALL_TARGET, "");
1790  }
1791  }
1792 }
1793 
1794 /* Scan through ranges of contiguous instructions */
1795 void
1797  Instruction *prev, Instruction *end)
1798 {
1799  while (!insns.empty()) {
1800  Instruction *first = insns.begin()->second;
1801  rose_addr_t va = first->get_address();
1802  InstructionMap::iterator ii = insns.find(va);
1803  InstructionVector contig;
1804  while (ii!=insns.end()) {
1805  contig.push_back(ii->second);
1806  va += ii->second->get_size();
1807  ii = insns.find(va);
1808  }
1809  cblist.apply(true, InsnRangeCallback::Args(this, prev, first, end, contig.size()));
1810  for (size_t i=0; i<contig.size(); i++)
1811  insns.erase(contig[i]->get_address());
1812  }
1813 }
1814 
1815 /* Scan through all unassigned instructions */
1816 void
1818 {
1819  if (cblist.empty())
1820  return;
1821 
1822  /* We can't iterate over the instruction list while invoking callbacks because one of them might change the instruction
1823  * list. Therefore, iterate over a copy of the list. Instructions are never deleted by the partitioner (only added), so
1824  * this is safe to do. */
1825  InstructionMap all = this->insns;
1826  InstructionMap range;
1827  Instruction *prev = NULL;
1828  for (InstructionMap::iterator ai=all.begin(); ai!=all.end(); ++ai) {
1829  BasicBlock *bb = find_bb_containing(ai->first, false);
1830  Function *func = bb ? bb->function : NULL;
1831  if (func) {
1832  if (!range.empty()) {
1833  scan_contiguous_insns(range, cblist, prev, ai->second);
1834  range.clear();
1835  }
1836  prev = ai->second;
1837  } else {
1838  range.insert(*ai);
1839  }
1840  }
1841  if (!range.empty())
1842  scan_contiguous_insns(range, cblist, prev, NULL);
1843 }
1844 
1845 /* Similar to scan_unassigned_insns except only invokes callbacks when the "prev" and "end" instructions belong to different
1846  * functions or one of them doesn't exist. */
1847 void
1849 {
1850  if (cblist.empty())
1851  return;
1852 
1853  struct Filter: public InsnRangeCallback {
1854  virtual bool operator()(bool enabled, const Args &args) {
1855  if (enabled) {
1856  if (!args.insn_prev || !args.insn_end)
1857  return true;
1858  BasicBlock *bb_lt = args.partitioner->find_bb_containing(args.insn_prev->get_address(), false);
1859  BasicBlock *bb_rt = args.partitioner->find_bb_containing(args.insn_end->get_address(), false);
1860  ASSERT_require(bb_lt && bb_lt->function); // because we're invoked from scan_unassigned_insns
1861  ASSERT_require(bb_rt && bb_rt->function); // ditto
1862  enabled = bb_lt->function != bb_rt->function;
1863  }
1864  return enabled;
1865  }
1866  } filter;
1867  InsnRangeCallbacks cblist2 = cblist;
1868  cblist2.prepend(&filter);
1869  scan_unassigned_insns(cblist2);
1870 }
1871 
1872 /* Similar to scan_unassigned_insns except only invokes callbacks when "prev" and "end" instructions belong to the same
1873  * function. */
1874 void
1876 {
1877  if (cblist.empty())
1878  return;
1879 
1880  struct Filter: public InsnRangeCallback {
1881  virtual bool operator()(bool enabled, const Args &args) {
1882  if (enabled) {
1883  if (!args.insn_prev || !args.insn_end)
1884  return false;
1885  BasicBlock *bb_lt = args.partitioner->find_bb_containing(args.insn_prev->get_address(), false);
1886  BasicBlock *bb_rt = args.partitioner->find_bb_containing(args.insn_end->get_address(), false);
1887  ASSERT_require(bb_lt && bb_lt->function); // because we're invoked from scan_unassigned_insns
1888  ASSERT_require(bb_rt && bb_rt->function); // ditto
1889  enabled = bb_lt->function == bb_rt->function;
1890  }
1891  return enabled;
1892  }
1893  } filter;
1894  InsnRangeCallbacks cblist2 = cblist;
1895  cblist2.prepend(&filter);
1896  scan_unassigned_insns(cblist2);
1897 }
1898 
1899 void
1901 {
1902  if (cblist.empty())
1903  return;
1904 
1905  /* Get range map for addresses assigned to functions (function instructions and data w/ function pointers). */
1906  FunctionRangeMap assigned;
1907  function_extent(&assigned);
1908 
1909  /* Unassigned ranges are the inverse of everything assigned. Then further restrict the unassigned range map according to
1910  * the supplied memory map. */
1911  ExtentMap unassigned = assigned.invert<ExtentMap>();
1912  if (restrict_map) {
1913  AddressIntervalSet nonmappedAddrs(*restrict_map);
1914  nonmappedAddrs.invert();
1915  ExtentMap toRemove = toExtentMap(nonmappedAddrs);
1916  unassigned.erase_ranges(toRemove);
1917  }
1918 
1919  /* Traverse the unassigned map, invoking the callbacks for each range. */
1920  for (ExtentMap::iterator ri=unassigned.begin(); ri!=unassigned.end(); ++ri)
1921  cblist.apply(true, ByteRangeCallback::Args(this, restrict_map, assigned, ri->first));
1922 }
1923 
1924 void
1926 {
1927  if (cblist.empty())
1928  return;
1929 
1930  struct Filter: public ByteRangeCallback {
1931  virtual bool operator()(bool enabled, const Args &args) {
1932  if (enabled) {
1933  if (args.range.first()<=Extent::minimum() || args.range.last()>=Extent::maximum())
1934  return false; // nothing before and/or after
1935 
1936  /* Find the closest function before this range. */
1937  FunctionRangeMap::const_iterator prev = args.ranges.find_prior(args.range.first()-1);
1938  if (prev==args.ranges.end())
1939  return false; // nothing before this range
1940 
1941  /* Find the closest function above this range. */
1942  FunctionRangeMap::const_iterator next = args.ranges.lower_bound(args.range.last()+1);
1943  if (next==args.ranges.end())
1944  return false; // nothing after this range
1945 
1946  /* Continue only if this range is between two of the same functions. */
1947  enabled = prev->second.get()==next->second.get();
1948  }
1949  return enabled;
1950  }
1951  } filter;
1952  ByteRangeCallbacks cblist2 = cblist;
1953  cblist2.prepend(&filter);
1954  scan_unassigned_bytes(cblist2, restrict_map);
1955 }
1956 
1957 void
1959 {
1960  if (cblist.empty())
1961  return;
1962 
1963  struct Filter: public ByteRangeCallback {
1964  virtual bool operator()(bool enabled, const Args &args) {
1965  if (enabled) {
1966  if (args.range.first()<=Extent::minimum() || args.range.last()>=Extent::maximum())
1967  return true; // nothing before and/or after
1968 
1969  /* Find the closest function before this range. */
1970  FunctionRangeMap::const_iterator prev = args.ranges.find_prior(args.range.first()-1);
1971  if (prev==args.ranges.end())
1972  return true; // nothing before this range
1973 
1974  /* Find the closest function above this range. */
1975  FunctionRangeMap::const_iterator next = args.ranges.lower_bound(args.range.last()+1);
1976  if (next==args.ranges.end())
1977  return true; // nothing after this range
1978 
1979  /* Continue only if this range is between two different functions. */
1980  enabled = prev->second.get()!=next->second.get();
1981  }
1982  return enabled;
1983  }
1984  } filter;
1985  ByteRangeCallbacks cblist2 = cblist;
1986  cblist2.prepend(&filter);
1987  scan_unassigned_bytes(cblist2, restrict_map);
1988 }
1989 
1990 bool
1992 {
1993  Stream trace(mlog[TRACE]);
1994  trace.facilityName("Partitioner::FindDataPadding");
1995  if (!enabled)
1996  return false;
1997  Partitioner *p = args.partitioner;
1998  Extent range = args.range;
1999 
2000  /* What is the maximum pattern length in bytes? */
2001  if (patterns.empty())
2002  return true;
2003  size_t max_psize = patterns[0].size();
2004  for (size_t pi=1; pi<patterns.size(); ++pi)
2005  max_psize = std::max(max_psize, patterns[pi].size());
2006 
2007  /* What is the previous function? The one to which padding is to be attached. */
2008  if (range.first()<=Extent::minimum())
2009  return true;
2010  FunctionRangeMap::const_iterator prev = begins_contiguously ?
2011  args.ranges.find(range.first()-1) :
2012  args.ranges.find_prior(range.first()-1);
2013  if (prev==args.ranges.end())
2014  return true;
2015  Function *func = prev->second.get();
2016  ASSERT_not_null(func);
2017 
2018  /* Do we need to be contiguous with a following function? This only checks whether the incoming range ends at the
2019  * beginning of a function. We'll check below whether a padding sequence also ends at the end of this range. */
2020  if (ends_contiguously) {
2021  if (max_psize*maximum_nrep < range.size())
2022  return true;
2023  if (range.last()>=Extent::maximum())
2024  return true;
2025  FunctionRangeMap::const_iterator next = args.ranges.find(range.last()+1);
2026  if (next==args.ranges.end())
2027  return true;
2028  }
2029 
2030  /* To keep things simple, we read the entire range (which might not be contigous in the MemoryMap) into a contiguous
2031  * buffer. However, that means we had better not try to read huge, anonymous ranges. Also handle the case of a short
2032  * read, although this shouldn't happen if the caller supplied the correct memory map to the scan_*_bytes() method. */
2033  if (range.size() > maximum_range_size)
2034  return true;
2035  MemoryMap::Ptr map = args.restrict_map ? args.restrict_map : p->ro_map;
2036  SgUnsignedCharList buf = map->readVector(range.first(), range.size());
2037  if (ends_contiguously && buf.size()<range.size())
2038  return true;
2039  range.resize(buf.size());
2040 
2041  /* There might be more than one sequence of padding bytes. Look for all of them, but constrained according to
2042  * begins_contiguously and ends_contiguously. */
2043  size_t nblocks = 0; // number of blocks added to function
2044  while (!range.empty()) {
2045  if (begins_contiguously && range.first()>args.range.first())
2046  return true;
2047  for (size_t pi=0; pi<patterns.size(); ++pi) {
2048  size_t psize = patterns[pi].size();
2049  ASSERT_require(psize>0);
2050  size_t nrep = 0;
2051  for (size_t offset=range.first()-args.range.first();
2052  offset+psize<=buf.size() && nrep<maximum_nrep;
2053  offset+=psize, ++nrep) {
2054  if (memcmp(&buf[offset], &patterns[pi][0], psize))
2055  break;
2056  }
2057  if (nrep>0 && nrep>=minimum_nrep && (!ends_contiguously || nrep*psize==range.size())) {
2058  /* Found a matching repeated pattern. Add data block to function. */
2059  DataBlock *dblock = p->find_db_starting(range.first(), nrep*psize);
2060  ASSERT_not_null(dblock);
2061  p->append(func, dblock, SgAsmBlock::BLK_PADDING);
2062  ++nblocks;
2063  ++nfound;
2064  if (trace) {
2065  if (1==nblocks)
2066  trace <<"FindDataPadding for F" <<addrToString(func->entry_va) <<": added";
2067  trace <<" D" <<addrToString(range.first());
2068  }
2069  range.first(range.first()+nrep*psize-1); // will be incremented after break
2070  break;
2071  }
2072  }
2073  if (!range.empty())
2074  range.first(range.first()+1);
2075  }
2076  if (trace && nblocks>0)
2077  trace <<"\n";
2078 
2079  return true;
2080 }
2081 
2082 bool
2083 Partitioner::FindData::operator()(bool enabled, const Args &args)
2084 {
2085  if (!enabled)
2086  return false;
2087  Partitioner *p = args.partitioner;
2088 
2089  /* We must have a function immediately before this range and to which this range's data can be attached. */
2090  if (args.range.first()<=Extent::minimum())
2091  return true;
2092  FunctionRangeMap::const_iterator prev = args.ranges.find(args.range.first()-1);
2093  if (prev==args.ranges.end())
2094  return true;
2095  Function *func = prev->second.get();
2096  ASSERT_not_null(func);
2097 
2098  /* Don't append data to non-functions. */
2099  if (0!=(func->reason & excluded_reasons))
2100  return true;
2101 
2102  /* Padding ranges are computed once and cached. */
2103  if (NULL==padding_ranges) {
2104  padding_ranges = new DataRangeMap;
2105  p->padding_extent(padding_ranges);
2106  }
2107 
2108  /* Don't append data if the previous thing is padding. */
2109  if (padding_ranges->find(args.range.first()-1)!=padding_ranges->end())
2110  return true;
2111 
2112  /* Create a data block and add it to the previous function. */
2113  DataBlock *dblock = p->find_db_starting(args.range.first(), args.range.size());
2114  ASSERT_not_null(dblock);
2115  p->append(func, dblock, SgAsmBlock::BLK_FINDDATA);
2116  ++nfound;
2117  mlog[TRACE] <<"FindData: for F" <<addrToString(func->entry_va) <<": added D" <<addrToString(args.range.first()) <<"\n";
2118 
2119  return true;
2120 }
2121 
2122 /* Create functions or data for inter-function padding instruction sequences. Returns true if we did not find interfunction
2123  * padding and other padding callbacks should proceed; returns false if we did find padding and the others should be skipped. */
2124 bool
2126 {
2127  Stream trace(mlog[TRACE]);
2128  trace.facilityName("Partitioner::FindInsnPadding");
2129 
2130  if (!enabled)
2131  return false;
2132  if (!args.insn_prev)
2133  return true;
2134  ASSERT_require(args.ninsns>0);
2135  ASSERT_not_null(args.insn_prev);
2136  ASSERT_not_null(args.insn_begin);
2137 
2138  if (begins_contiguously &&
2139  args.insn_begin->get_address()!=args.insn_prev->get_address()+args.insn_prev->get_size())
2140  return true;
2141 
2142  /* The preceding function. We'll add the padding as data to this function, unless we're creating explicity padding
2143  * functions. */
2144  Partitioner *p = args.partitioner;
2145  Function *prev_func = NULL;
2146  {
2147  BasicBlock *last_block = p->find_bb_containing(args.insn_prev->get_address(), false);
2148  ASSERT_not_null(last_block);
2149  prev_func = last_block->function;
2150  }
2151 
2152  /* Loop over the inter-function instructions and accumulate contiguous ranges of padding. */
2153  bool retval = true;
2154  InstructionVector padding;
2155  rose_addr_t va = args.insn_begin->get_address();
2156  Instruction *insn = p->find_instruction(va);
2157  for (size_t i=0; i<args.ninsns && insn!=NULL; i++) {
2158 
2159  /* Does this instruction match? */
2160  bool matches = false;
2161  ASSERT_not_null(insn); // callback is being invoked over instructions
2162  BasicBlock *bb = p->find_bb_containing(va, false);
2163  if (bb && bb->function)
2164  break; // insn is already assigned to a function
2165 
2166  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(p->find_instruction(va));
2167  if (!matches && insn_x86) {
2168  if (x86_kinds.find(insn_x86->get_kind())!=x86_kinds.end())
2169  matches = true;
2170  }
2171 
2172  for (size_t j=0; !matches && j<byte_patterns.size(); j++) {
2173  if (byte_patterns[j]==insn->get_raw_bytes())
2174  matches = true;
2175  }
2176 
2177 
2178  /* Advance to next instruction, or null. We do this inside the loop so we only need one copy of the code that inserts
2179  * the basic blocks for padding. */
2180  va += insn->get_size();
2181  if (matches)
2182  padding.push_back(insn);
2183  insn = i+1<args.ninsns ? p->find_instruction(va) : NULL;
2184 
2185  /* Do we have padding to insert, and are we at the end of that padding? If not, then continue looping. */
2186  if ((matches && insn) || padding.empty())
2187  continue; // try to grab more padding instructions
2188  if (begins_contiguously &&
2189  padding.front()->get_address()!=args.insn_prev->get_address()+args.insn_prev->get_size())
2190  return true; // no point in even continuing the loop, since we're already past the first instruction now.
2191  if (ends_contiguously) {
2192  if (!matches) {
2193  padding.clear();
2194  continue;
2195  } else if (args.insn_end) {
2196  if (padding.back()->get_address()+padding.back()->get_size() != args.insn_end->get_address()) {
2197  padding.clear();
2198  continue;
2199  }
2200  } else if (i+1<args.ninsns) {
2201  padding.clear();
2202  continue;
2203  }
2204  }
2205 
2206  /* Make sure we got enough bytes. We can subtract first from last since we know they're contiguous in memory. */
2207  if (padding.back()->get_address()+padding.back()->get_size() - padding.front()->get_address() < minimum_size) {
2208  padding.clear();
2209  continue;
2210  }
2211 
2212  /* If we get here, then we have padding instructions. Either create a data block to hold the padding, or a new
2213  * function to hold the padding. When creating a function, the basic blocks are added as CFG heads, which cause the
2214  * block to remain with the function even though it might not be reachable by the CFG starting from the function's
2215  * entry point. This is especially true for padding like x86 INT3 instructions, which have no known CFG successors and
2216  * occupy singleton basic blocks. */
2217  ++nfound;
2218  ASSERT_forbid(padding.empty());
2219  if (add_as_data) {
2220  ASSERT_not_null(prev_func);
2221  rose_addr_t begin_va = padding.front()->get_address();
2222  rose_addr_t end_va = padding.back()->get_address() + padding.back()->get_size();
2223  ASSERT_require(end_va>begin_va);
2224  size_t size = end_va - begin_va;
2225  DataBlock *dblock = p->find_db_starting(begin_va, size);
2226  ASSERT_not_null(dblock);
2227  p->append(prev_func, dblock, SgAsmBlock::BLK_PADDING);
2228  for (size_t i=0; i<padding.size(); i++)
2229  p->discard(padding[i]);
2230  trace <<"for F" <<addrToString(prev_func->entry_va) <<": added D" <<addrToString(begin_va) <<"\n";
2231  } else {
2232  Function *new_func = p->add_function(padding.front()->get_address(), SgAsmFunction::FUNC_PADDING);
2233  p->find_bb_starting(padding.front()->get_address()); // split first block if necessary
2234  p->find_bb_starting(va); // split last block if necessary
2235  trace <<"for F" <<addrToString(new_func->entry_va) <<": added";
2236  for (size_t i=0; i<padding.size(); i++) {
2237  BasicBlock *bb = p->find_bb_containing(padding[i]->get_address());
2238  if (bb && !bb->function) {
2239  p->append(new_func, bb, SgAsmBlock::BLK_PADDING, true/*head of CFG subgraph*/);
2240  trace <<" B" <<addrToString(bb->address()) <<"#" <<bb->insns.size();
2241  }
2242  }
2243  trace <<"\n";
2244  }
2245 
2246  retval = padding.size()!=args.ninsns; // allow other callbacks to run only if we didn't suck up all the instructions
2247  padding.clear();
2248  }
2249  return retval;
2250 }
2251 
2258 Partitioner::find_db_starting(rose_addr_t start_va, size_t size/*=0*/)
2259 {
2260  DataBlock *db = NULL;
2261  DataBlocks::iterator dbi = data_blocks.find(start_va);
2262  if (dbi!=data_blocks.end()) {
2263  db = dbi->second;
2264  if (0==size)
2265  return db; /* caller doesn't care about the size, only whether the block is present. */
2266 
2267  /* Check whether the block contains all the addresses we want. They might not all be in the first node. */
2268  DataRangeMap want; want.insert(Extent(start_va, size));
2269  DataRangeMap have; datablock_extent(db, &have);
2270  want.erase_ranges(have);
2271  if (want.empty())
2272  return db;
2273  }
2274  if (0==size)
2275  return NULL;
2276 
2277  /* Create a new SgAsmStaticData node to represent the entire address range and add it to the (possibly new) data block.
2278  * When adding to an existing block, the new SgAsmStaticData node will overlap with at least the initial node, and possibly
2279  * others. */
2280  if (!db) {
2281  db = new DataBlock;
2282  data_blocks[start_va] = db;
2283  }
2284 
2285  SgUnsignedCharList raw_bytes = map->readVector(start_va, size, 0);
2286  ASSERT_require(raw_bytes.size()==size);
2287  SgAsmStaticData *datum = new SgAsmStaticData;
2288  datum->set_address(start_va);
2289  datum->set_raw_bytes(raw_bytes);
2290  db->nodes.push_back(datum);
2291 
2292  return db;
2293 }
2294 
2295 bool
2297 {
2298  if (!enabled)
2299  return false;
2300  Partitioner *p = args.partitioner;
2301 
2302  /* Compute and cache the extents of all known functions. */
2303  if (!function_extents) {
2304  function_extents = new FunctionRangeMap;
2305  p->function_extent(function_extents);
2306  }
2307 
2308  /* Compute and cache code criteria. */
2309  if (!code_criteria) {
2310  RegionStats *mean = p->aggregate_statistics();
2311  RegionStats *variance = p->get_aggregate_variance();
2312  code_criteria = p->new_code_criteria(mean, variance, threshold);
2313  }
2314 
2315  /* This range must begin contiguously with a valid function. */
2316  if (args.range.first()<=Extent::minimum())
2317  return true;
2318  FunctionRangeMap::iterator prev = function_extents->find(args.range.first()-1);
2319  if (prev==function_extents->end())
2320  return true;
2321  Function *func = prev->second.get();
2322  if (0!=(func->reason & excluded_reasons))
2323  return true;
2324 
2325  /* Should this range end contiguously with the same function? Perhaps we should relax this and only require that the
2326  * preceding function has an address after this range? [RPM 2011-11-25] */
2327  if (require_intrafunction) {
2328  if (args.range.last()>=Extent::maximum())
2329  return true;
2330  FunctionRangeMap::iterator next = function_extents->find(args.range.last()+1);
2331  if (next==function_extents->end() || next->second.get()!=func)
2332  return true;
2333  }
2334 
2335  /* If the preceding function is interleaved with another then how can we know that the instructions in question should
2336  * actually belong to this function? If we're interleaved with one other function, then we could very easily be
2337  * interleaved with additional functions and this address region could belong to any of them. */
2338  if (require_noninterleaved && !p->is_contiguous(func, false))
2339  return true;
2340 
2341  /* Bail unless the region statistically looks like code. */
2342  ExtentMap pending;
2343  pending.insert(args.range);
2344  RegionStats *stats = p->region_statistics(pending);
2345  double raw_vote;
2346  if (!code_criteria->satisfied_by(stats, &raw_vote))
2347  return true;
2348 
2349  /* Get the list of basic blocks for the instructions in this range and their address extents. Bail if a basic block
2350  * extends beyond the address ranges we're considering, rather than splitting the block. Also bail if we encounter two or
2351  * more instructions that overlap since that's a pretty strong indication that this isn't code. */
2352  std::set<BasicBlock*> bblocks;
2353  while (!pending.empty()) {
2354  rose_addr_t va = pending.min();
2355  BasicBlock *bb = va==args.range.first() ? p->find_bb_starting(va) : p->find_bb_containing(va);
2356  if (!bb || bb->function)
2357  return true;
2358  if (bblocks.find(bb)==bblocks.end()) {
2359  bblocks.insert(bb);
2360  for (InstructionVector::iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii) {
2361  Extent ie((*ii)->get_address(), (*ii)->get_size());
2362  if (!pending.contains(ie))
2363  return true;
2364  pending.erase(ie);
2365  }
2366  }
2367  }
2368 
2369  /* All looks good. Add the basic blocks to the preceding function. */
2370  for (std::set<BasicBlock*>::iterator bi=bblocks.begin(); bi!=bblocks.end(); ++bi) {
2371  (*bi)->code_likelihood = raw_vote;
2372  p->append(func, *bi, SgAsmBlock::BLK_FRAGMENT, true/*CFG head*/);
2373  ++nfound;
2374  }
2375 
2376  return true;
2377 }
2378 
2379 /* Create functions for any basic blocks that consist of only a JMP to another function. */
2380 bool
2381 Partitioner::FindThunks::operator()(bool enabled, const Args &args)
2382 {
2383  if (!enabled)
2384  return false;
2385 
2386  Partitioner *p = args.partitioner;
2387  rose_addr_t va = args.insn_begin->get_address();
2388  rose_addr_t next_va = 0;
2389  for (size_t i=0; i<args.ninsns; i++, va=next_va) {
2390  Instruction *insn = p->find_instruction(va);
2391  ASSERT_not_null(insn);
2392  next_va = va + insn->get_size();
2393 
2394  /* Instruction must be an x86 JMP */
2395  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
2396  if (!insn_x86 || (insn_x86->get_kind()!=x86_jmp && insn_x86->get_kind()!=x86_farjmp))
2397  continue;
2398 
2399  /* Instruction must not be in the middle of an existing basic block. */
2400  BasicBlock *bb = p->find_bb_containing(va, false);
2401  if (bb && bb->address()!=va)
2402  continue;
2403 
2404  if (validate_targets) {
2405  /* Instruction must have a single successor */
2406  bool complete;
2407  Disassembler::AddressSet succs = insn->getSuccessors(&complete);
2408  if (!complete && 1!=succs.size())
2409  continue;
2410  rose_addr_t target_va = *succs.begin();
2411 
2412  /* The target (single successor) must be a known function which is not padding. */
2413  Functions::iterator fi = p->functions.find(target_va);
2414  if (fi==p->functions.end() || 0!=(fi->second->reason & SgAsmFunction::FUNC_PADDING))
2415  continue;
2416  }
2417 
2418  /* Create the basic block for the JMP instruction. This block must be a single instruction, which it should be since
2419  * we already checked that its only successor is another function. */
2420  if (!bb)
2421  bb = p->find_bb_starting(va);
2422  ASSERT_not_null(bb);
2423  ASSERT_require(1==bb->insns.size());
2425  p->append(thunk, bb, SgAsmBlock::BLK_ENTRY_POINT);
2426  ++nfound;
2427 
2428  mlog[TRACE] <<"FindThunks: found F" <<addrToString(va) <<"\n";
2429  }
2430 
2431  return true;
2432 }
2433 
2434 bool
2436 {
2437  if (!enabled)
2438  return false;
2439  Partitioner *p = args.partitioner;
2440 
2441  /* Initialize the data block ranges once and cache it in this object. */
2442  if (!padding_ranges) {
2443  padding_ranges = new DataRangeMap;
2444  p->padding_extent(padding_ranges);
2445  }
2446 
2447  /* Range must be immediately preceded by a padding block. */
2448  if (args.range.first()<=Extent::minimum())
2449  return true;
2450  if (padding_ranges->find(args.range.first()-1) == padding_ranges->end())
2451  return true;
2452 
2453  /* Range must be immediately followed by a padding block. */
2454  if (args.range.last()>=Extent::maximum())
2455  return true;
2456  DataRangeMap::iterator next = padding_ranges->find(args.range.last()+1);
2457  if (next==padding_ranges->end())
2458  return true;
2459  DataBlock *next_dblock = next->second.get();
2460 
2461  /* Create a new function and move the following padding to the new function. */
2463  p->append(new_func, next_dblock, SgAsmBlock::BLK_PADDING, true/*force*/);
2464  ++nfound;
2465 
2466  mlog[TRACE] <<"FindInterPadFunctions: added F" <<addrToString(new_func->entry_va) <<"\n";
2467  return true;
2468 }
2469 
2470 bool
2472 {
2473  if (!enabled)
2474  return false;
2475  Partitioner *p = args.partitioner;
2476  Extent range = args.range;
2477 
2478  while (!range.empty()) {
2479 
2480  /* Find a single, contiguous thunk table. */
2481  InstructionMap thunks; // each thunk is a single JMP instruction
2482  bool in_table = false;
2483  rose_addr_t va;
2484  for (va=range.first(); va<=range.last() && (in_table || thunks.empty()); va++) {
2485  if (begins_contiguously && !in_table && va>args.range.first())
2486  return true;
2487 
2488  /* Must be a JMP instruction. */
2489  Instruction *insn = p->find_instruction(va);
2490  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
2491  if (!insn_x86 || (insn_x86->get_kind()!=x86_jmp && insn_x86->get_kind()!=x86_farjmp)) {
2492  in_table = false;
2493  continue;
2494  }
2495 
2496  /* Instruction must not be part of a larger basic block. Be careful not to create basic blocks unecessarily. */
2497  BasicBlock *bb = p->find_bb_containing(va, false);
2498  if (bb && bb->insns.size()>1) {
2499  in_table = false;
2500  continue;
2501  }
2502  if (!bb)
2503  bb = p->find_bb_starting(va);
2504  ASSERT_not_null(bb);
2505 
2506  if (validate_targets) {
2507  /* Find successors of the JMP instruction. */
2509  bool complete;
2510  if (bb->insns.size()>1) {
2511  succs.insert(bb->insns[1]->get_address());
2512  complete = true;
2513  } else {
2514  succs = p->successors(bb, &complete);
2515  }
2516 
2517  /* If the successor is known, it should point to another instruction. */
2518  bool points_to_insn = true;
2519  for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end() && points_to_insn; ++si)
2520  points_to_insn = NULL != p->find_instruction(*si);
2521  if (!points_to_insn) {
2522  in_table = false;
2523  continue;
2524  }
2525  }
2526 
2527  /* This is a thunk. Save it and skip ahead to the start of the following instruction. */
2528  in_table = true;
2529  thunks[insn->get_address()] = insn;
2530  va += insn->get_size() - 1; // also incremented by loop
2531  }
2532 
2533  /* This is only a thunk table if we found enough thunks and (if appropriate) ends at the end of the range. */
2534  if (thunks.size()>minimum_nthunks && (!ends_contiguously || va==args.range.last()+1)) {
2535  for (InstructionMap::iterator ii=thunks.begin(); ii!=thunks.end(); ++ii) {
2536  Function *thunk = p->add_function(ii->first, SgAsmFunction::FUNC_THUNK);
2537  BasicBlock *bb = p->find_bb_starting(ii->first);
2538  p->append(thunk, bb, SgAsmBlock::BLK_ENTRY_POINT);
2539  ++nfound;
2540  mlog[TRACE] <<"FindThunkTable: thunk F" <<addrToString(thunk->entry_va) <<"\n";
2541  }
2542  }
2543 
2544  range.first(va);
2545  }
2546  return true;
2547 }
2548 
2556 bool
2558 {
2559  ASSERT_not_null(func);
2560  if (1!=func->basic_blocks.size())
2561  return false;
2562 
2563  BasicBlock *bb = func->basic_blocks.begin()->second;
2564  if (1!=bb->insns.size())
2565  return false;
2566 
2567  Instruction *insn = bb->insns.front();
2568  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
2569  if (!insn_x86 || (insn_x86->get_kind()!=x86_jmp && insn_x86->get_kind()!=x86_farjmp))
2570  return false;
2571 
2572  bool complete;
2573  Disassembler::AddressSet succs = successors(bb, &complete);
2574  if (!complete || 1!=succs.size())
2575  return false;
2576 
2577  rose_addr_t target_va = *succs.begin();
2578  Functions::iterator fi = functions.find(target_va);
2579  Function *target_func = fi==functions.end() ? NULL : fi->second;
2580  if (!target_func)
2581  return false;
2582 
2583  if (0!=(target_func->reason & SgAsmFunction::FUNC_LEFTOVERS) ||
2584  0!=(fi->second->reason & SgAsmFunction::FUNC_PADDING))
2585  return false;
2586 
2587  return true;
2588 }
2589 
2590 bool
2592 {
2593  Stream trace(mlog[TRACE]);
2594  trace.facilityName("Partitioner::FindPostFunctionInsns");
2595 
2596  if (!enabled)
2597  return false;
2598  if (!args.insn_prev || args.insn_begin->get_address()!=args.insn_prev->get_address()+args.insn_prev->get_size())
2599  return true;
2600  Partitioner *p = args.partitioner;
2601  BasicBlock *bb = p->find_bb_containing(args.insn_prev->get_address());
2602  if (!bb || !bb->function)
2603  return true;
2604  Function *func = bb->function;
2605  if (0!=(func->reason & SgAsmFunction::FUNC_PADDING) ||
2606  0!=(func->reason & SgAsmFunction::FUNC_THUNK))
2607  return true; // don't append instructions to certain "functions"
2608 
2609  size_t nadded = 0;
2610  rose_addr_t va = args.insn_begin->get_address();
2611  for (size_t i=0; i<args.ninsns; i++) {
2612  bb = p->find_bb_containing(va);
2613  ASSERT_not_null(bb); // because we know va is an instruction
2614  if (!bb->function) {
2615  if (trace) {
2616  if (0==nadded)
2617  trace <<"for F" <<addrToString(func->entry_va) <<": added";
2618  trace <<" B" <<addrToString(bb->address()) <<"#" <<bb->insns.size();
2619  }
2620  p->append(func, bb, SgAsmBlock::BLK_POSTFUNC, true/*head of CFG subgraph*/);
2621  func->pending = true;
2622  ++nadded;
2623  ++nfound;
2624  }
2625 
2626  Instruction *insn = p->find_instruction(va);
2627  ASSERT_not_null(insn);
2628  va += insn->get_size();
2629  }
2630  if (nadded)
2631  trace <<"\n";
2632 
2633  return true;
2634 }
2635 
2636 /* class method */
2637 rose_addr_t
2639 {
2640  if (!e) {
2641  return 0;
2642  } else if (isSgAsmIntegerValueExpression(e)) {
2643  return isSgAsmIntegerValueExpression(e)->get_value();
2644  } else {
2645  return 0;
2646  }
2647 }
2648 
2649 /* class method */
2650 rose_addr_t
2652 {
2653  rose_addr_t retval = 0;
2654 
2655  SgAsmX86Instruction *insn = isSgAsmX86Instruction(g_insn);
2656  if (!insn ||
2657  !x86InstructionIsUnconditionalBranch(insn) ||
2658  1!=insn->get_operandList()->get_operands().size())
2659  return retval;
2660 
2661  SgAsmMemoryReferenceExpression *mref = isSgAsmMemoryReferenceExpression(insn->get_operandList()->get_operands()[0]);
2662  if (!mref)
2663  return retval;
2664 
2665  SgAsmExpression *mref_addr = mref->get_address();
2666  if (isSgAsmBinaryExpression(mref_addr)) {
2667  SgAsmBinaryExpression *mref_bin = isSgAsmBinaryExpression(mref_addr);
2668  SgAsmRegisterReferenceExpression *reg = isSgAsmRegisterReferenceExpression(mref_bin->get_lhs());
2669  SgAsmValueExpression *val = isSgAsmValueExpression(mref_bin->get_rhs());
2670  if (reg!=NULL && val!=NULL) {
2671  if (reg->get_descriptor().get_major()==x86_regclass_ip) {
2672  retval = value_of(val) + insn->get_address() + insn->get_size();
2673  } else if (reg->get_descriptor().get_major()==x86_regclass_gpr) {
2674  retval = value_of(val) + offset;
2675  }
2676  }
2677  } else if (isSgAsmValueExpression(mref_addr)) {
2678  retval = value_of(isSgAsmValueExpression(mref_addr));
2679  }
2680 
2681  return retval; /*calculated value, or defaults to zero*/
2682 }
2683 
2688 void
2690 {
2691  /* This function is ELF, x86 specific. [FIXME RPM 2009-02-06] */
2692  SgAsmElfFileHeader *elf = isSgAsmElfFileHeader(fhdr);
2693  if (!elf) return;
2694 
2695  /* Find important sections */
2696  SgAsmGenericSection *plt = elf->get_section_by_name(".plt");
2697  if (!plt || !plt->is_mapped()) return;
2698  SgAsmGenericSection *gotplt = elf->get_section_by_name(".got.plt");
2699  if (!gotplt || !gotplt->is_mapped()) return;
2700 
2701  /* Find all relocation sections */
2702  std::set<SgAsmElfRelocSection*> rsects;
2703  for (SgAsmGenericSectionPtrList::iterator si=elf->get_sections()->get_sections().begin();
2704  si!=elf->get_sections()->get_sections().end();
2705  si++) {
2706  SgAsmElfRelocSection *reloc_section = isSgAsmElfRelocSection(*si);
2707  if (reloc_section)
2708  rsects.insert(reloc_section);
2709  }
2710  if (rsects.empty()) return;
2711 
2712  /* Process each .plt trampoline */
2713  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); fi++) {
2714  rose_addr_t func_addr = fi->first;
2715 
2716  if (fi->second->name!="")
2717  continue; /* function already has a name */
2718 
2719  if (func_addr < elf->get_base_va() + plt->get_mapped_preferred_rva() ||
2720  func_addr >= elf->get_base_va() + plt->get_mapped_preferred_rva() + plt->get_mapped_size())
2721  continue; /* function is not in the .plt section */
2722 
2723  /* Sometimes the first instruction of a basic block cannot be disassembled and the basic block will have a different
2724  * starting address than its first instruction. If that basic block is also the start of a function then the
2725  * function also will have no initial instruction. */
2726  Instruction *insn = find_instruction(func_addr);
2727  if (NULL==insn)
2728  continue;
2729 
2730  /* The target in the ".plt" section will be an indirect (through the .got.plt section) jump to the actual dynamically
2731  * linked function (or to the dynamic linker itself). The .got.plt address is what we're really interested in. */
2732  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
2733  ASSERT_not_null(insn_x86);
2734  rose_addr_t gotplt_va = get_indirection_addr(insn_x86, elf->get_base_va() + gotplt->get_mapped_preferred_rva());
2735 
2736  if (gotplt_va < elf->get_base_va() + gotplt->get_mapped_preferred_rva() ||
2737  gotplt_va >= elf->get_base_va() + gotplt->get_mapped_preferred_rva() + gotplt->get_mapped_size())
2738  continue; /* PLT entry doesn't dereference a value in the .got.plt section */
2739 
2740  /* Find the relocation entry whose offset is the gotplt_rva and use that entry's symbol for the function name. */
2741  for (std::set<SgAsmElfRelocSection*>::iterator ri=rsects.begin(); ri!=rsects.end() && fi->second->name==""; ri++) {
2742  SgAsmElfRelocEntryList *entries = (*ri)->get_entries();
2743  SgAsmElfSymbolSection *symbol_section = isSgAsmElfSymbolSection((*ri)->get_linked_section());
2744  if (symbol_section) {
2745  SgAsmElfSymbolList *symbols = symbol_section->get_symbols();
2746  for (size_t ei=0; ei<entries->get_entries().size() && fi->second->name==""; ei++) {
2747  SgAsmElfRelocEntry *rel = entries->get_entries()[ei];
2748  if (rel->get_r_offset()==gotplt_va) {
2749  unsigned long symbol_idx = rel->get_sym();
2750  ASSERT_require(symbol_idx < symbols->get_symbols().size());
2751  SgAsmElfSymbol *symbol = symbols->get_symbols()[symbol_idx];
2752  fi->second->name = symbol->get_name()->get_string() + "@plt";
2753  }
2754  }
2755  }
2756  }
2757  }
2758 }
2759 
2767 void
2769 {
2770  /* This function is PE x86 specific */
2771  SgAsmPEFileHeader *pe = isSgAsmPEFileHeader(fhdr);
2772  if (!pe)
2773  return;
2774 
2775  /* Build an index mapping memory addresses to function names. The addresses are the virtual address through which an
2776  * indirect jump would go when calling an imported function. */
2777  struct ImportIndexBuilder: public AstSimpleProcessing {
2778  typedef std::map<rose_addr_t, std::string> Index;
2779  typedef Index::iterator iterator;
2780  Partitioner *partitioner;
2781  Index index;
2782  SgAsmGenericHeader *fhdr;
2783  ImportIndexBuilder(Partitioner *partitioner, SgAsmGenericHeader *fhdr): partitioner(partitioner), fhdr(fhdr) {
2784  traverse(fhdr, preorder);
2785  }
2786  void visit(SgNode *node) {
2787  SgAsmPEImportItem *import = isSgAsmPEImportItem(node);
2788  if (import) {
2789  std::string name = import->get_name()->get_string();
2790  rose_addr_t va = import->get_bound_rva().get_va();
2791  if (va!=0 && !name.empty())
2792  index[va] = name;
2793  }
2794  }
2795  } imports(this, fhdr);
2796 
2797  /* Look for functions whose first instruction is an indirect jump through one of the memory addresses we indexed above. */
2798  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
2799  Function *func = fi->second;
2800  if (!func->name.empty())
2801  continue;
2802  Instruction *insn = find_instruction(func->entry_va);
2803  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
2804  if (!insn_x86 ||
2805  (insn_x86->get_kind()!=x86_jmp && insn_x86->get_kind()!=x86_farjmp) ||
2806  1!=insn_x86->get_operandList()->get_operands().size())
2807  continue;
2808  SgAsmMemoryReferenceExpression *mre = isSgAsmMemoryReferenceExpression(insn_x86->get_operandList()->get_operands()[0]);
2809  if (!mre)
2810  continue;
2811  SgAsmValueExpression *base = isSgAsmValueExpression(mre->get_address());
2812  if (!base)
2813  continue;
2814  rose_addr_t base_va = value_of(base);
2815 
2816  ImportIndexBuilder::iterator found = imports.index.find(base_va);
2817  if (found==imports.index.end())
2818  continue;
2819  func->name = found->second + "@import";
2820  mlog[TRACE] <<"name_import_entries: F" <<addrToString(func->entry_va) <<": named \"" <<func->name <<"\"\n";
2821  }
2822 }
2823 
2825 void
2827 {
2828  SgAsmGenericSectionPtrList iat_sections = hdr->get_sections_by_name("Import Address Table");
2829  for (size_t i=0; i<iat_sections.size(); ++i) {
2830  if (-1==iat_sections[i]->get_id() && iat_sections[i]->is_mapped())
2831  pe_iat_extents.insert(Extent(iat_sections[i]->get_mapped_actual_va(), iat_sections[i]->get_mapped_size()));
2832  }
2833 }
2834 
2835 /* Seed function starts based on criteria other than control flow graph. */
2836 void
2838 {
2839  mlog[TRACE] <<"function reasons referenced by Partitioner debugging output:\n"
2841 
2842  mark_ipd_configuration(); /*seed partitioner based on IPD configuration information*/
2843 
2844  if (interp) {
2845  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
2846  for (size_t i=0; i<headers.size(); i++) {
2847  find_pe_iat_extents(headers[i]);
2848  if (func_heuristics & SgAsmFunction::FUNC_ENTRY_POINT)
2849  mark_entry_targets(headers[i]);
2850  if (func_heuristics & SgAsmFunction::FUNC_EH_FRAME)
2851  mark_eh_frames(headers[i]);
2852  if (func_heuristics & SgAsmFunction::FUNC_SYMBOL)
2853  mark_func_symbols(headers[i]);
2854  if (func_heuristics & SgAsmFunction::FUNC_IMPORT)
2855  mark_elf_plt_entries(headers[i]);
2856  if (func_heuristics & SgAsmFunction::FUNC_EXPORT)
2857  mark_export_entries(headers[i]);
2858  }
2859  }
2860  if (func_heuristics & SgAsmFunction::FUNC_PATTERN)
2861  mark_func_patterns();
2862  if (func_heuristics & SgAsmFunction::FUNC_CALL_INSN)
2863  mark_call_insns();
2864 
2865 
2866 
2867  /* Run user-defined function detectors, making sure that the basic block starts are up-to-date for each call. */
2868  if (func_heuristics & SgAsmFunction::FUNC_USERDEF) {
2869  if (interp) {
2870  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
2871  for (size_t i=0; i<user_detectors.size(); i++) {
2872  for (size_t j=0; j<=headers.size(); j++) {
2873  SgAsmGenericHeader *hdr = 0==j ? NULL : headers[j-1];
2874  user_detectors[i](this, hdr);
2875  }
2876  }
2877  } else {
2878  for (size_t i=0; i<user_detectors.size(); i++) {
2879  user_detectors[i](this, NULL);
2880  }
2881  }
2882  }
2883 }
2884 
2905 void
2907 {
2908  Stream trace(mlog[TRACE]);
2909  trace.facilityName("Partitioner::discover_first_block");
2910 
2911  trace <<"1st block " <<SgAsmFunction::reason_str(true, func->reason)
2912  <<" F" <<addrToString(func->entry_va) <<" \"" <<func->name <<"\": B" <<addrToString(func->entry_va) <<" ";
2913  BasicBlock *bb = find_bb_containing(func->entry_va);
2914 
2915  /* If this function's entry block collides with some other function, then truncate that other function's block and
2916  * subsume part of it into this function. Mark the other function as pending because its block may have new
2917  * successors now. */
2918  if (bb && func->entry_va!=bb->address()) {
2919  ASSERT_require(bb->function!=func);
2920  trace <<"[split from B" <<addrToString(bb->address());
2921  if (bb->function) {
2922  trace <<" in F" <<addrToString(bb->address()) <<" \"" <<bb->function->name <<"\"";
2923  bb->function->pending = true;
2924  }
2925  trace <<"] ";
2926  truncate(bb, func->entry_va);
2927  bb = find_bb_containing(func->entry_va);
2928  ASSERT_not_null(bb);
2929  ASSERT_require(func->entry_va==bb->address());
2930  } else if (bb && bb->function!=NULL && bb->function!=func) {
2931  trace <<"[removing B" <<addrToString(func->entry_va) <<" from F" <<addrToString(bb->function->entry_va) <<"]";
2932  bb->function->pending = true;
2933  remove(bb->function, bb);
2934  }
2935 
2936  if (bb) {
2937  append(func, bb, SgAsmBlock::BLK_ENTRY_POINT);
2938  trace <<"#" <<bb->insns.size() <<" ";
2939  } else {
2940  trace <<"no instruction at function entry address ";
2941  }
2942  if (trace) {
2943  func->show_properties(trace);
2944  trace <<"\n";
2945  }
2946 }
2947 
2952 void
2953 Partitioner::discover_blocks(Function *f, rose_addr_t va, unsigned reason)
2954 {
2955  Stream trace(mlog[TRACE]);
2956  trace.facilityName("Partitioner::discover_blocks");
2957  trace <<" B" <<addrToString(va);
2958 
2959  Instruction *insn = find_instruction(va);
2960  if (!insn) return; /* No instruction at this address. */
2961  rose_addr_t target_va = NO_TARGET; /*target of function call instructions (e.g., x86 CALL and FARCALL)*/
2962 
2963  /* This block might be the entry address of a function even before that function has any basic blocks assigned to it. This
2964  * can happen when a new function was discovered during the current pass. It can't happen for functions discovered in a
2965  * previous pass since we would have called discover_first_block() by now for any such functions. */
2966  Functions::iterator fi = functions.find(va);
2967  if (fi!=functions.end() && fi->second!=f) {
2968  trace <<"[entry \"" <<fi->second->name <<"\"]";
2969  return;
2970  }
2971 
2972  /* Find basic block at address, creating it if necessary. */
2973  BasicBlock *bb = find_bb_starting(va);
2974  ASSERT_not_null(bb);
2975  trace <<"#" <<bb->insns.size();
2976 
2977  /* If the current function has been somehow marked as pending then we might as well give up discovering its blocks because
2978  * some of its blocks' successors may have changed. This can happen, for instance, if the create_bb() called above had to
2979  * split one of this function's blocks. */
2980  if (f->pending) {
2981  trace <<" abandon";
2982  throw AbandonFunctionDiscovery();
2983  }
2984 
2985  /* Don't reprocess blocks for this function. However, we need to reprocess the first block because it was added by
2986  * discover_first_block(), which is not recursive. Care should be taken so none of the recursive calls below are invoked
2987  * for the first block, or we'll have infinite recurision! */
2988  if (bb->function==f && bb->address()!=f->entry_va)
2989  return;
2990 
2991  if (bb->function && bb->function!=f) {
2992  if (va==bb->function->entry_va) {
2993  /* This is a call to some other existing function. Do not add it to the current function. */
2994  trace <<"[entry \"" <<bb->function->name <<"\"]";
2995  } else {
2996  /* This block belongs internally to some other function. Since ROSE requires that blocks be owned by exactly one
2997  * function (the function/block relationship is an edge in the abstract syntax tree), we have to remove this block
2998  * from the other function. We'll mark both the other function and this function as being in conflict and try
2999  * again later.
3000  *
3001  * However, there is a special case we need to watch out for: the case when the block in conflict is no longer
3002  * reachable from the original function due to having made changes to other blocks in the original function. For
3003  * instance, consider the following sequence of events:
3004  * F000 contains B000 (the entry block) and B010
3005  * B000 has 10 instructions, and ends with a call to F100 which returns
3006  * B010 is the fall-through address of B000
3007  * We then begin to discover F005 whose entry address is the fifth instruction of B000, so
3008  * B000 is split into B000 containing the first five instrucitons and B005 containing the second five
3009  * F000 is marked as pending due to the splitting of its B000 block
3010  * B005 is added to F005 as its entry block
3011  * B005 calls F100 which returns to B010, so we want to add B010 to F005
3012  * So we have a conflict:
3013  * B010 belongs to F000 because we never removed it, but we need B010 also in F005.
3014  * In this example, the only CFG edge to B010 inside F000 was the fall-through edge from the call to F100, which
3015  * no longer exists in F000. Unfortunately we have no way of knowing (short of doing a CFG analysis in F000) that
3016  * the last edge was removed. Even if we did a CFG analysis, we may be working with incomplete information (F000
3017  * might not be fully discovered yet).
3018  *
3019  * The way we handle this special case is as follows:
3020  * If the original function (F000 in the example) is marked as pending then the blocks it currently owns might
3021  * not actually belong to the function anymore. Therefore we will not create a new FUNC_GRAPH function for the
3022  * block in conflict, but rather mark both functions as pending and abandon until the next pass. Otherwise we
3023  * assume the block in conflict really is in conflict and we'll create a FUNC_GRAPH function. */
3024  if (functions.find(va)==functions.end() && !bb->function->pending) {
3025  add_function(va, SgAsmFunction::FUNC_GRAPH);
3026  trace <<"[conflict F" <<addrToString(bb->function->entry_va) <<" \"" <<bb->function->name <<"\"]";
3027  } else {
3028  trace <<"[possible conflict F" <<addrToString(bb->function->entry_va) <<" \"" <<bb->function->name <<"\"]";
3029  }
3030  bb->function->pending = f->pending = true;
3031  trace <<" abandon";
3032  throw AbandonFunctionDiscovery();
3033  }
3034  } else if ((target_va=call_target(bb))!=NO_TARGET) {
3035  /* Call to a known target */
3036  trace <<"[call F" <<addrToString(target_va) <<"]";
3037 
3038  append(f, bb, reason);
3039  BasicBlock *target_bb = find_bb_containing(target_va);
3040 
3041  /* Optionally create or add reason flags to called function. */
3042  Function *new_function = NULL;
3043  if ((func_heuristics & SgAsmFunction::FUNC_CALL_TARGET)) {
3044  new_function = add_function(target_va, SgAsmFunction::FUNC_CALL_TARGET);
3045  } else if (find_function(target_va)!=NULL) {
3046  find_function(target_va)->reason |= SgAsmFunction::FUNC_CALL_TARGET;
3047  }
3048 
3049  /* If the call target is part of a function (the current function or some other) and it's not the entry block then we
3050  * might need to rediscover the blocks of that function. We don't need to rediscover the blocks of that function if
3051  * that function is the current function and should remain in the current function (i.e., we didn't create a new
3052  * function). */
3053  if (target_bb && target_bb->function && target_va!=target_bb->function->entry_va &&
3054  (target_bb->function!=f || new_function!=NULL))
3055  target_bb->function->pending = true;
3056 
3057  /* Discovery continues at the successors. */
3058  const Disassembler::AddressSet &suc = successors(bb);
3059  for (Disassembler::AddressSet::const_iterator si=suc.begin(); si!=suc.end(); ++si) {
3060  if (*si!=f->entry_va)
3061  discover_blocks(f, *si, reason);
3062  }
3063 
3064  } else {
3065  append(f, bb, reason);
3066  const Disassembler::AddressSet& suc = successors(bb);
3067  for (Disassembler::AddressSet::const_iterator si=suc.begin(); si!=suc.end(); ++si) {
3068  if (*si!=f->entry_va)
3069  discover_blocks(f, *si, reason);
3070  }
3071  }
3072 }
3073 
3074 void
3075 Partitioner::discover_blocks(Function *f, unsigned reason)
3076 {
3077  Disassembler::AddressSet heads = f->heads;
3078  heads.insert(f->entry_va);
3079  for (Disassembler::AddressSet::iterator hi=heads.begin(); hi!=heads.end(); ++hi)
3080  discover_blocks(f, *hi, reason);
3081 }
3082 
3083 bool
3085 {
3086  SgAsmX86Instruction *insn_x86 = insn ? isSgAsmX86Instruction(insn->node) : NULL;
3087  if (!insn_x86 || x86_jmp!=insn_x86->get_kind() || insn_x86->get_operandList()->get_operands().size()!=1)
3088  return false; // not a thunk: wrong instruction
3089  SgAsmMemoryReferenceExpression *mre = isSgAsmMemoryReferenceExpression(insn_x86->get_operandList()->get_operands()[0]);
3090  SgAsmIntegerValueExpression *addr = mre ? isSgAsmIntegerValueExpression(mre->get_address()) : NULL;
3091  if (!addr)
3092  return false; // not a dynamic linking thunk: wrong addressing mode
3093  return pe_iat_extents.contains(Extent(addr->get_absoluteValue(), 4));
3094 }
3095 
3096 bool
3098 {
3099  return bb && bb->insns.size()==1 && is_pe_dynlink_thunk(bb->insns.front());
3100 }
3101 
3102 bool
3104 {
3105  return func && func->basic_blocks.size()==1 && is_pe_dynlink_thunk(func->entry_basic_block());
3106 }
3107 
3108 void
3110 {
3111  Stream trace(mlog[TRACE]);
3112  trace.facilityName("Partitioner::analyze_cfg");
3113 
3114  for (size_t pass=1; true; pass++) {
3115  trace <<"========== Partitioner::analyze_cfg() pass " <<pass <<" ==========\n";
3116  update_progress(reason, pass);
3117 
3118  /* Analyze function return characteristics. */
3119  for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi) {
3120  BasicBlock *bb = bi->second;
3121  if (!bb->function)
3122  continue;
3123 
3124  rose_addr_t target_va = NO_TARGET; /*call target*/
3125  bool iscall = is_function_call(bb, &target_va);
3126  rose_addr_t return_va = canonic_block(bb->last_insn()->get_address() + bb->last_insn()->get_size()); // fall through
3127  BasicBlock *return_bb = NULL, *target_bb = NULL; /* computed only if needed since they might split basic blocks */
3128  bool succs_complete;
3129  Disassembler::AddressSet succs = successors(bb, &succs_complete);
3130 
3131  if (iscall && target_va!=NO_TARGET &&
3132  NULL!=(return_bb=find_bb_starting(return_va)) &&
3133  NULL!=(target_bb=find_bb_starting(target_va, false)) &&
3134  target_bb->function && target_bb->function->possible_may_return()) {
3135  if (return_bb->function && return_va==target_bb->function->entry_va && !bb->function->possible_may_return()) {
3136  /* This handles the case when function A's return from B falls through into B. In this case, since B
3137  * returns then A also returns. We mark A as returning.
3138  * function_A:
3139  * ...
3140  * CALL function_B
3141  * function_B:
3142  * RET
3143  */
3145  trace <<" function F" <<addrToString(bb->function->entry_va)
3146  <<" may return by virtue of call fall-through at B" <<addrToString(bb->address()) <<"\n";
3147  }
3148  } else if (!bb->function->possible_may_return() && !is_function_call(bb, NULL) && succs_complete) {
3149  for (Disassembler::AddressSet::iterator si=succs.begin();
3150  si!=succs.end() && !bb->function->possible_may_return();
3151  ++si) {
3152  target_va = *si;
3153  target_bb = target_va!=0 ? find_bb_starting(target_va, false) : NULL;
3154  if (target_bb && target_bb->function && target_bb->function!=bb->function &&
3155  target_va==target_bb->function->entry_va && target_bb->function->possible_may_return()) {
3156  /* The block bb isn't a function call, but branches to the entry point of another function. If that
3157  * function returns then so does this one. This handles situations like:
3158  * function_A:
3159  * ...
3160  * JMP function_B
3161  * ...
3162  * function_B:
3163  * RET
3164  * We don't need to set function_A->pending because the reachability of the instruction after its JMP
3165  * won't change regardless of whether the "called" function returns (i.e., the return is to the caller
3166  * of function_A, not to function_A itself. */
3168  trace <<" F" <<addrToString(bb->function->entry_va)
3169  <<" may return by virtue of branching to function F" <<addrToString(target_bb->function->entry_va)
3170  <<" which may return\n";
3171  }
3172  }
3173  } else if (!bb->function->possible_may_return() && !is_function_call(bb, NULL) && !succs_complete) {
3174  /* If the basic block's successor is not known, then we must assume that it branches to something that could
3175  * return. */
3177  trace <<" F" <<addrToString(bb->function->entry_va) <<" may return by virtue of incomplete successors\n";
3178  }
3179 
3180  // PE dynamic linking thunks are typically placed in the .text section and consist of an indirect jump through one
3181  // of the import address tables. If we didn't dynamically link in ROSE, then the IATs probably don't hold valid
3182  // function addresses, in which case we can't determine if the thunk returns. Therefore, when this situation
3183  // happens, we assume that the imported function returns.
3184  if (!bb->function->possible_may_return() && is_pe_dynlink_thunk(bb->function)) {
3185  bool invalid_callee_va = !succs_complete;
3186  for (Disassembler::AddressSet::iterator si=succs.begin(); !invalid_callee_va && si!=succs.end(); ++si)
3187  invalid_callee_va = NULL==find_instruction(*si);
3188  if (invalid_callee_va) {// otherwise we can just analyze the linked-in code
3190  }
3191  }
3192  }
3193 
3194  /* Which functions did we think didn't return but now think they might return? */
3195  Disassembler::AddressSet might_now_return;
3196  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3197  Function *func = fi->second;
3198  if (func->changed_may_return() && func->possible_may_return()) {
3199  trace <<(might_now_return.empty()?"newly returning functions:":"") <<" F" <<addrToString(func->entry_va);
3200  might_now_return.insert(func->entry_va);
3201  func->commit_may_return();
3202  }
3203  }
3204  if (!might_now_return.empty())
3205  trace <<"\n";
3206 
3207  /* If we previously thought a function didn't return, but now we think it might return, we need to mark as pending all
3208  * callers if the return address in that caller isn't already part of the caller function. There's no need to do this
3209  * fairly expensive loop of we didn't transition any functions from does-not-return to may-return. We use the
3210  * might_now_return set rather than looking up functions with find_function() because the former is probably faster,
3211  * especially if we have lots of functions but only a few transitioned from does-not-return to may-return, which is the
3212  * common case. */
3213  if (!might_now_return.empty()) {
3214  for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi) {
3215  BasicBlock *bb = bi->second;
3216  if (bb->function && !bb->function->pending) {
3217  Disassembler::AddressSet succs = successors(bb, NULL);
3218  for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end(); ++si) {
3219  if (might_now_return.find(*si)!=might_now_return.end()) {
3220  // This is a call from a basic block (bb) to a function that we now think might return. If the
3221  // return-to block is not already part of the calling function, then we should mark the calling
3222  // function as pending.
3223  rose_addr_t return_va = canonic_block(bb->last_insn()->get_address() + bb->last_insn()->get_size());
3224  BasicBlock *return_bb = find_bb_starting(return_va, false); // do not create the block
3225  if (return_bb && return_bb->function!=bb->function) {
3226  bb->function->pending = true;
3227  if (trace) {
3228  Function *called_func = find_function(*si); // don't call this unless debugging (performance)
3229  ASSERT_not_null(called_func);
3230  trace <<"newreturn " <<SgAsmFunction::reason_str(true, called_func->reason)
3231  <<" F" <<addrToString(called_func->entry_va) <<" \"" <<called_func->name <<"\""
3232  <<" returns to B" <<addrToString(return_bb->address())
3233  <<" in F" <<addrToString(bb->function->entry_va) <<"\n";
3234  }
3235  }
3236  }
3237  }
3238  }
3239  }
3240  }
3241 
3242  /* Get a list of functions we need to analyze because they're marked as pending. */
3243  std::vector<Function*> pending;
3244  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3245  ASSERT_require(fi->second->entry_va==fi->first);
3246  if (fi->second->pending) {
3247  fi->second->clear_basic_blocks();
3248  fi->second->pending = false; /*might be set back to true by discover_blocks() in loop below*/
3249  pending.push_back(fi->second);
3250  }
3251  }
3252 
3253  if (pending.size()==0) {
3254  trace <<"finished for " <<stringifySgAsmBlockReason(reason, "BLK_");
3255  break;
3256  }
3257 
3258  /* Make sure all functions have an initial basic block if possible. */
3259  for (size_t i=0; i<pending.size(); ++i)
3260  discover_first_block(pending[i]);
3261 
3262  /* (Re)discover each function's blocks starting with the function entry point */
3263  for (size_t i=0; i<pending.size(); ++i) {
3264  trace <<"analyzing " <<SgAsmFunction::reason_str(true, pending[i]->reason)
3265  <<" F" <<addrToString(pending[i]->entry_va) <<" \"" <<pending[i]->name <<"\" pass " <<pass <<": ";
3266  try {
3267  discover_blocks(pending[i], reason);
3268  } catch (const AbandonFunctionDiscovery&) {
3269  /* thrown when discover_blocks() decides it needs to start over on a function */
3270  }
3271  if (trace) {
3272  trace <<" ";
3273  pending[i]->show_properties(trace);
3274  trace <<"\n";
3275  }
3276  }
3277  }
3278 }
3279 
3280 size_t
3282 {
3283  size_t retval = 0;
3284  Functions functions = this->functions; // so iterators remain valid inside the loop
3285  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3286  while (detach_thunk(fi->second))
3287  ++retval;
3288  }
3289  return retval;
3290 }
3291 
3292 bool
3294 {
3295  /* Don't split functions if it has only zero or one instruction. */
3296  if (func->basic_blocks.empty())
3297  return false;
3298  BasicBlock *entry_bb = find_bb_starting(func->entry_va, false);
3299  if (NULL==entry_bb || entry_bb->function!=func)
3300  return false;
3301  if (func->basic_blocks.size()==1 && entry_bb->insns.size()==1)
3302  return false;
3303 
3304  /* Don't split function whose first instruction is not an x86 JMP. */
3305  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(entry_bb->insns[0]);
3306  if (!insn_x86 || (insn_x86->get_kind()!=x86_jmp && insn_x86->get_kind()!=x86_farjmp))
3307  return false;
3308 
3309  /* The JMP must have a single target. */
3310  rose_addr_t second_va = 0;
3311  if (entry_bb->insns.size()>1) {
3312  second_va = entry_bb->insns[1]->get_address();
3313  } else {
3314  bool complete;
3315  Disassembler::AddressSet succs = successors(entry_bb, &complete);
3316  if (!complete || succs.size()!=1)
3317  return false;
3318  second_va = *(succs.begin());
3319  }
3320 
3321  /* The JMP target must be an instruction in the same function. */
3322  if (BasicBlock *target_bb = find_bb_containing(second_va)) {
3323  if (target_bb->function!=func)
3324  return false;
3325  } else {
3326  return false;
3327  }
3328 
3329  /* Don't split the function if the first instruction is a successor of any of the function's blocks. */
3330  for (BasicBlocks::iterator bi=func->basic_blocks.begin(); bi!=func->basic_blocks.end(); ++bi) {
3331  BasicBlock *bb = bi->second;
3332  Disassembler::AddressSet succs = successors(bb, NULL);
3333  if (std::find(succs.begin(), succs.end(), func->entry_va) != succs.end())
3334  return false;
3335  }
3336 
3337  /* Create a new function to hold everything but the entry instruction */
3338  mlog[TRACE] <<"detach_thunk: detaching thunk F" <<addrToString(func->entry_va)
3339  <<" from body F" <<addrToString(second_va) <<"\n";
3340  Function *new_func = add_function(second_va, func->reason);
3341  new_func->name = func->name;
3342  new_func->set_may_return(func->get_may_return());
3343 
3344  /* Adjust the old function, which now represents the thunk. */
3346  func->pending = false;
3347  if (!func->name.empty() && std::string::npos==func->name.find("-thunk"))
3348  func->name += "-thunk";
3349 
3350  /* Transfer all instructions (except the thunk itself) to new_func. */
3351  new_func->heads = func->heads;
3352  func->heads.clear();
3353  new_func->heads.erase(func->entry_va);
3354  BasicBlocks bblocks = func->basic_blocks;
3355  for (BasicBlocks::iterator bi=bblocks.begin(); bi!=bblocks.end(); ++bi) {
3356  if (bi->first==func->entry_va) {
3357  BasicBlock *new_bb = find_bb_starting(second_va);
3358  assert(new_bb!=NULL);
3359  if (new_bb->function==func) {
3360  remove(func, new_bb);
3361  append(new_func, new_bb, SgAsmBlock::BLK_ENTRY_POINT);
3362  } else if (new_bb->function==new_func) {
3363  /*void*/
3364  } else {
3365  ASSERT_require(NULL==new_bb->function);
3366  append(new_func, new_bb, SgAsmBlock::BLK_ENTRY_POINT);
3367  }
3368  append(new_func, new_bb, SgAsmBlock::BLK_ENTRY_POINT);
3369  } else {
3370  BasicBlock *bb = bi->second;
3371  if (bb->function!=new_func) {
3372  ASSERT_require(bb->function==func);
3373  remove(func, bb);
3374  append(new_func, bb, bb->reason);
3375  }
3376  }
3377  }
3378 
3379  /* Transfer all data blocks to new_func. */
3380  DataBlocks dblocks = func->data_blocks;
3381  for (DataBlocks::iterator di=dblocks.begin(); di!=dblocks.end(); ++di) {
3382  DataBlock *dblock = di->second;
3383  remove(func, dblock);
3384  append(new_func, dblock, dblock->reason);
3385  }
3386  return true;
3387 }
3388 
3389 /* Moves padding blocks to correct functions. */
3390 void
3392 {
3393  /* Compute two maps: one for non-padding bytes belonging to functions, and one for the padding bytes. */
3394  FunctionRangeMap nonpadding_ranges;
3395  function_extent(&nonpadding_ranges);
3396  DataRangeMap padding_ranges;
3397  padding_extent(&padding_ranges);
3398  for (DataRangeMap::iterator pi=padding_ranges.begin(); pi!=padding_ranges.end(); ++pi)
3399  nonpadding_ranges.erase(pi->first);
3400 
3401  /* For each padding block, find the closest prior function and make that the owner of the padding. */
3402  for (DataRangeMap::iterator pi=padding_ranges.begin(); pi!=padding_ranges.end(); ++pi) {
3403  DataBlock *padding = pi->second.get();
3404  FunctionRangeMap::iterator npi = nonpadding_ranges.find_prior(pi->first.first());
3405  if (npi==nonpadding_ranges.end())
3406  continue;
3407  Function *func = npi->second.get();
3408  if (func!=effective_function(padding))
3409  append(func, padding, padding->reason, true/*force*/);
3410  }
3411 }
3412 
3413 /* Merge function fragments when possible. */
3414 void
3416 {
3417  Stream trace(mlog[TRACE]);
3418  trace.facilityName("Partitioner::merge_function_fragments");
3419 
3420  // Find connected components of the control flow graph, but only considering function fragments. We do this in a single
3421  // pass, and at the end of this loop each function fragment, F, will have group number group_number[traversal_number[F]].
3422  typedef std::map<Function*, size_t> TravNumMap;
3423  TravNumMap traversal_number; // which DFS traversal first visited the function?
3424  std::vector<size_t> group_number; // group number for each traversal (indexed by traversal number)
3425  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3426  if (SgAsmFunction::FUNC_GRAPH!=fi->second->reason)
3427  continue; // only consider functions that are strictly fragments
3428  if (traversal_number.find(fi->second)!=traversal_number.end())
3429  continue; // we already visited this function
3430 
3431  size_t tnum = group_number.size();
3432  group_number.push_back(tnum);
3433  traversal_number[fi->second] = tnum;
3434 
3435  // Depth first search considering only function fragments
3436  std::vector<Function*> dfs_functions;
3437  dfs_functions.push_back(fi->second);
3438  while (!dfs_functions.empty()) {
3439  Function *source_func = dfs_functions.back(); dfs_functions.pop_back();
3440  for (BasicBlocks::iterator bi=source_func->basic_blocks.begin(); bi!=source_func->basic_blocks.end(); ++bi) {
3441  BasicBlock *source_bb = bi->second;
3442  Disassembler::AddressSet succs = successors(source_bb);
3443  for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end(); ++si) {
3444  BasicBlock *target_bb = find_bb_starting(*si, false); // do not create the block
3445  Function *target_func = target_bb ? target_bb->function : NULL;
3446  if (target_func && target_func!=source_func && SgAsmFunction::FUNC_GRAPH==target_func->reason) {
3447  bool inserted = traversal_number.insert(std::make_pair(target_func, tnum)).second;
3448  if (inserted) {
3449  dfs_functions.push_back(target_func);
3450  } else {
3451  group_number[traversal_number[target_func]] = tnum;
3452  }
3453  }
3454  }
3455  }
3456  }
3457  }
3458 
3459  /* Reorganize so that we have lists of function fragments by group number. */
3460  typedef std::vector<std::vector<Function*> > FragmentIndex;
3461  FragmentIndex fragment_index(group_number.size(), std::vector<Function*>());
3462  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3463  TravNumMap::iterator tn_found = traversal_number.find(fi->second);
3464  if (tn_found!=traversal_number.end()) {
3465  size_t gnum = group_number[tn_found->second];
3466  fragment_index[gnum].push_back(fi->second);
3467  }
3468  }
3469 
3470  /* Find the non-fragment predecessors of each fragment group. A fragment group can be merged into another function only if
3471  * the fragment group has a single predecessor. */
3472  std::vector<Function*> parent(fragment_index.size(), NULL);
3473  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3474  Function *source_func = fi->second;
3475  if (SgAsmFunction::FUNC_GRAPH!=source_func->reason) {
3476  bool multi_parents = false;
3477  for (BasicBlocks::iterator bi=source_func->basic_blocks.begin();
3478  bi!=source_func->basic_blocks.end() && !multi_parents;
3479  ++bi) {
3480  Disassembler::AddressSet succs = successors(bi->second);
3481  for (Disassembler::AddressSet::iterator si=succs.begin(); si!=succs.end() && !multi_parents; ++si) {
3482  BasicBlock *target_bb = find_bb_starting(*si, false/*do not create*/);
3483  Function *target_func = target_bb ? target_bb->function : NULL;
3484  TravNumMap::iterator tn_found = target_func ? traversal_number.find(target_func) : traversal_number.end();
3485  size_t gnum = tn_found!=traversal_number.end() ? group_number[tn_found->second] : (size_t)(-1);
3486  if (gnum!=(size_t)(-1)) {
3487  /* source_func (non-fragment) branches to fragment group number <gnum> */
3488  if (parent[gnum]) {
3489  parent[gnum] = NULL;
3490  fragment_index[gnum].clear(); // multiple non-fragment predecessors of this group; discard group
3491  multi_parents = true;
3492  } else {
3493  parent[gnum] = source_func;
3494  }
3495  }
3496  }
3497  }
3498  }
3499  }
3500 
3501  /* Merge functions */
3502  for (size_t gnum=0; gnum<fragment_index.size(); ++gnum) {
3503  if (parent[gnum]!=NULL && !fragment_index[gnum].empty()) {
3504  trace <<"fragments " <<SgAsmFunction::reason_str(true, parent[gnum]->reason)
3505  <<" F" <<addrToString(parent[gnum]->entry_va) <<" \"" <<parent[gnum]->name <<"\" merging";
3506  for (std::vector<Function*>::iterator fi=fragment_index[gnum].begin(); fi!=fragment_index[gnum].end(); ++fi) {
3507  trace <<" F" <<addrToString((*fi)->entry_va);
3508  merge_functions(parent[gnum], *fi); *fi = NULL;
3509  parent[gnum]->reason &= ~SgAsmFunction::FUNC_GRAPH;
3510  }
3511  if (trace) {
3512  trace <<" ";
3513  parent[gnum]->show_properties(trace);
3514  trace <<"\n";
3515  }
3516  }
3517  }
3518 }
3519 
3520 void
3522 {
3523  parent->reason |= other->reason;
3524 
3525  if (parent->name.empty()) {
3526  parent->name = other->name;
3527  } else if (!other->name.empty() && 0!=parent->name.compare(other->name)) {
3528  parent->name += "+" + other->name;
3529  }
3530 
3531  parent->move_basic_blocks_from(other);
3532  parent->move_data_blocks_from(other);
3533 
3534  if (other->pending)
3535  parent->pending = true;
3536 
3537  parent->promote_may_return(other->get_may_return());
3538 
3539  functions.erase(other->entry_va);
3540  delete other;
3541 }
3542 
3544 void
3546 {
3547  // AST visitor finds PE Import Items and adds their address/name pair to a map.
3548  struct AddrName: AstSimpleProcessing {
3549  typedef std::map<rose_addr_t, std::string> NameMap;
3550  NameMap names;
3551  SgAsmGenericHeader *hdr;
3552  AddrName(SgAsmInterpretation *interp) {
3553  if (interp) {
3554  const SgAsmGenericHeaderPtrList &hdrs = interp->get_headers()->get_headers();
3555  for (SgAsmGenericHeaderPtrList::const_iterator hi=hdrs.begin(); hi!=hdrs.end(); ++hi) {
3556  hdr = *hi;
3557  traverse(hdr, preorder);
3558  }
3559  }
3560  }
3561  void visit(SgNode *node) {
3562  if (SgAsmPEImportItem *import_item = isSgAsmPEImportItem(node)) {
3563  std::string name = import_item->get_name()->get_string();
3564  if (!name.empty() && !import_item->get_by_ordinal()) {
3565  // Add a name for both the absolute virtual address and the relative virtual address. The IAT will contain
3566  // relative addresses unless BinaryLoader applied fixups.
3567  rose_addr_t va = import_item->get_hintname_rva().get_va();
3568  names[va] = name;
3569  rose_addr_t rva = va - hdr->get_base_va();
3570  names[rva] = name;
3571  }
3572  }
3573  }
3574  std::string operator()(rose_addr_t va) const {
3575  NameMap::const_iterator found = names.find(va);
3576  return found==names.end() ? std::string() : found->second;
3577  }
3578  } names(interp);
3579 
3580  // Identify PE dynamic linking thunks and give them the name of the imported function to which they branch. FIXME: we
3581  // might want to change the name slightly because otherwise the thunk will have the same name as the linked-in function
3582  // if/after BinaryLoader does the linking. In contrast, the ELF executables typically place their dynamic linking
3583  // thunks in a ".plt" section (Procedure Lookup Table) and we name the thunks so that if the linked-in function is
3584  // named "printf", the thunk is named "printf@plt".
3585  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
3586  Function *func = fi->second;
3587  if (is_pe_dynlink_thunk(func)) {
3589  if (func->name.empty()) {
3590  BasicBlock *bb = func->entry_basic_block();
3591  bool complete;
3592  Disassembler::AddressSet succs = successors(bb, &complete);
3593  if (complete && 1==succs.size())
3594  func->name = names(*succs.begin());
3595  }
3596  }
3597  }
3598 }
3599 
3602 {
3603  if (Instruction *insn = find_instruction(va, false /* do not create */)) {
3604  if (insn->bblock)
3605  return insn->bblock->function;
3606  }
3607  return NULL;
3608 }
3609 
3612 {
3613  DataRangeMap dblock_ranges;
3614  datablock_extent(&dblock_ranges /*out*/); // FIXME[Robb P. Matzke 2014-04-11]: this is not particularly fast
3615  DataRangeMap::iterator found = dblock_ranges.find(va);
3616  if (found!=dblock_ranges.end()) {
3617  DataBlock *dblock = found->second.get();
3618  return dblock->function;
3619  }
3620  return NULL;
3621 }
3622 
3625 {
3626  if (Function *func = find_function_containing_code(va))
3627  return func;
3628  return find_function_containing_data(va);
3629 }
3630 
3631 ExtentMap
3633 {
3634  FunctionRangeMap used_addresses;
3635  function_extent(&used_addresses /*out*/);
3636  return used_addresses.invert<ExtentMap>();
3637 }
3638 
3639 bool
3641 {
3642  FunctionRangeMap used_addresses;
3643  function_extent(&used_addresses /*out*/);
3644  return used_addresses.find(va) != used_addresses.end();
3645 }
3646 
3648 Partitioner::next_unused_address(const MemoryMap::Ptr &map, rose_addr_t start_va)
3649 {
3650  ASSERT_not_null(map);
3651  Sawyer::Nothing NOT_FOUND;
3652  ExtentMap unused = unused_addresses(); // all unused addresses regardless of whether they're mapped
3653 
3654  while (1) {
3655  // get the next unused virtual address, but it might not be mapped
3656  ExtentMap::iterator ui = unused.lower_bound(start_va);
3657  if (ui==unused.end())
3658  return NOT_FOUND;
3659  rose_addr_t unused_va = std::max(start_va, ui->first.first());
3660 
3661  // get the next mapped address, but it might not be unused
3662  rose_addr_t mapped_unused_va = 0;
3663  if (!map->atOrAfter(unused_va).next().assignTo(mapped_unused_va))
3664  return NOT_FOUND; // no higher mapped address
3665  if (unused.contains(Extent(mapped_unused_va)))
3666  return mapped_unused_va; // found
3667 
3668  // try again at a higher address
3669  start_va = mapped_unused_va + 1;
3670  if (start_va==0)
3671  return NOT_FOUND; // overflow
3672  }
3673 }
3674 
3675 void
3677 {
3678  ASSERT_not_null(map);
3679  Stream debug = mlog[DEBUG];
3680  if (map->isEmpty())
3681  return;
3682 
3683  std::vector<uint8_t> padding_bytes;
3684  padding_bytes.push_back(0x90); // x86 NOP
3685  padding_bytes.push_back(0xcc); // x86 INT3
3686  padding_bytes.push_back(0); // zero padding
3687 
3688  debug <<"discover_post_padding_functions()\n";
3689 
3690  rose_addr_t next_va = map->hull().least(); // first address in the map
3691  while (1) {
3692  debug <<" current position is " <<addrToString(next_va) <<"\n";
3693 
3694  // Find an address that is mapped but not part of any function.
3695  rose_addr_t unused_va;
3696  if (!next_unused_address(map, next_va).assignTo(unused_va))
3697  break;
3698  debug <<" next unused address is " <<addrToString(unused_va) <<"\n";
3699 
3700  // Find the next occurrence of padding bytes.
3701  Extent search_limits = Extent::inin(unused_va, map->hull().greatest());
3702  rose_addr_t padding_va;
3703  if (!map->findAny(search_limits, padding_bytes).assignTo(padding_va))
3704  break;
3705 
3706  // Skip over all padding bytes. After loop, candidate_va is one past end of padding (but possibly not mapped).
3707  rose_addr_t candidate_va = padding_va;
3708  while (1) {
3709  uint8_t byte;
3710  if (1!=map->at(candidate_va).limit(1).singleSegment().read(&byte).size() ||
3711  std::find(padding_bytes.begin(), padding_bytes.end(), byte)==padding_bytes.end())
3712  break;
3713  ++candidate_va;
3714  }
3715  rose_addr_t npadding = candidate_va - padding_va;
3716  next_va = candidate_va + 1; // for next time through this loop
3717  debug <<" address after padding is " <<addrToString(candidate_va) <<"\n"
3718  <<" number of padding bytes is " <<npadding <<"\n";
3719 
3720  // Only consider this to be padding if we found some minimum number of padding bytes.
3721  if (npadding < 5) // arbitrary
3722  continue;
3723 
3724  // Make sure we found padding and that the address after the padding is not already part of some function
3725  if (candidate_va<=padding_va || is_used_address(candidate_va))
3726  continue;
3727 
3728  // Look at the next few bytes and do some simple tests to see if this looks like code or data.
3729  if (NULL==find_instruction(candidate_va))
3730  continue; // can't be a function if there's no instruction
3731  uint8_t buf[64]; // arbitrary
3732  size_t nread = map->readQuick(buf, candidate_va, sizeof buf);
3733  if (nread < 5) // arbitrary
3734  continue; // too small to be a function
3735  size_t nzeros = 0, nprint = 0;
3736  for (size_t i=0; i<nread; ++i) {
3737  if (buf[i]==0)
3738  ++nzeros;
3739  if (isprint(buf[i]))
3740  ++nprint;
3741  }
3742  if ((double)nzeros / nread > 0.5) // arbitrary
3743  continue; // probably data since there are so many zero bytes
3744  if ((double)nprint / nread > 0.8) // arbitrary
3745  continue; // looks like ASCII data
3746  debug <<" discovering function at " <<addrToString(candidate_va) <<"\n"
3747  <<" nread=" <<nread <<", nzeros=" <<nzeros <<", nprint=" <<nprint <<"\n";
3748 
3749  // Mark the candidate address as a function entry point and discover the basic blocks for this function.
3750  mlog[TRACE] <<"Partitioner::discover_post_padding_functions: candidate function at "
3751  <<addrToString(candidate_va) <<"\n";
3752  add_function(candidate_va, SgAsmFunction::FUNC_INTERPADFUNC);
3753  analyze_cfg(SgAsmBlock::BLK_GRAPH2);
3754  }
3755  debug <<" discover_post_padding_functions analysis has completed\n";
3756 }
3757 
3758 void
3760 {
3761  /* Obtain aggregate statistics over all the functions, and cache them. These statistics describe code, so we want to do
3762  * this before we add data blocks to the functions. Any statistics already cached should be considered outdated. */
3763  clear_aggregate_statistics();
3764  RegionStats *mean = aggregate_statistics();
3765  RegionStats *variance = get_aggregate_variance();
3766  mlog[TRACE] <<"=== Mean ===\n" <<*mean <<"\n"
3767  <<"=== Variance ===\n" <<*variance <<"\n";
3768 
3769  /* A memory map that contains only the executable regions. I.e., those that might contain instructions. */
3770  ASSERT_not_null(map);
3771  MemoryMap::Ptr exe_map = map->shallowCopy();
3772  exe_map->require(MemoryMap::EXECUTABLE).keep();
3773 
3774  /* Add unassigned intra-function blocks to the surrounding function. This needs to come before detecting inter-function
3775  * padding, otherwise it will also try to add the stuff between the true function and its following padding. */
3776  if (func_heuristics & SgAsmFunction::FUNC_INTRABLOCK) {
3778  fff.require_noninterleaved = true;
3779  fff.require_intrafunction = true;
3780  fff.threshold = 0; // treat all intra-function regions as code
3781  scan_unassigned_bytes(&fff, exe_map);
3782  }
3783 
3784 #if 0 // [Robb P. Matzke 2014-04-29]: experimental, slow, heuristic, and a bit too greedy, but perhaps more accurate
3785  // Try to discover a function at each inter-function padding pattern, but interleave the discovery with the search. Each
3786  // time we find padding we will discover that function (by following its control flow graph) before we search for another
3787  // inter-function padding pattern.
3788  if (func_heuristics & SgAsmFunction::FUNC_PADDING)
3789  discover_post_padding_functions(exe_map);
3790 #endif
3791 
3792  /* Detect inter-function padding */
3793  if (func_heuristics & SgAsmFunction::FUNC_PADDING) {
3794  FindDataPadding cb;
3795  cb.minimum_nrep = 2;
3796  cb.maximum_nrep = 1024*1024;
3797  cb.begins_contiguously = false;
3798  cb.ends_contiguously = false;
3799 
3800  SgUnsignedCharList pattern;
3801  pattern.push_back(0x90); /* x68 NOP */
3802  cb.patterns.push_back(pattern);
3803 
3804  pattern.clear();
3805  pattern.push_back(0xcc); /* x86 INT3 */
3806  cb.patterns.push_back(pattern);
3807 
3808  /* Scan only executable regions of memory. */
3809  MemoryMap::Ptr exe_map = map->shallowCopy();
3810  exe_map->require(MemoryMap::EXECUTABLE).keep();
3811  scan_interfunc_bytes(&cb, exe_map);
3812  }
3813 
3814  /* Find thunks. First use FindThunkTables, which has a more relaxed definition of a "thunk" but requires some minimum
3815  * number of consecutive thunks in order to trigger. Then use FindThunks, which has a strict definition including that the
3816  * thunk target must be a function. By running the latter in a loop, we can find thunks that branch to other thunks. */
3817  if (func_heuristics & SgAsmFunction::FUNC_THUNK) {
3818  FindThunkTables find_thunk_tables;
3819  find_thunk_tables.minimum_nthunks = 3; // at least this many JMPs per table
3820  find_thunk_tables.validate_targets = false;
3821  scan_unassigned_bytes(&find_thunk_tables, exe_map);
3822  for (size_t npasses=0; npasses<5; ++npasses) {
3823  FindThunks find_thunks;
3824  scan_unassigned_insns(&find_thunks);
3825  if (0==find_thunks.nfound)
3826  break;
3827  }
3828  }
3829 
3830  /* Find functions that we missed between inter-function padding. */
3831  if (func_heuristics & SgAsmFunction::FUNC_MISCMASK) {
3832  FindInterPadFunctions find_interpad_functions;
3833  scan_unassigned_bytes(&find_interpad_functions, exe_map);
3834  }
3835 
3836  /* Find code fragments that appear after a function. */
3837  if (func_heuristics & SgAsmFunction::FUNC_INTRABLOCK) {
3839  fff.require_noninterleaved = false;
3840  fff.require_intrafunction = false;
3841  fff.threshold = 0.7;
3842  scan_unassigned_bytes(&fff, exe_map);
3843  }
3844 
3845  /* Run another analysis of the CFG because we may need to fix some things up after having added more blocks from the
3846  * post-cfg analyses we did above. If nothing happened above, then analyze_cfg() should be fast. */
3847  analyze_cfg(SgAsmBlock::BLK_GRAPH2);
3848 
3849  /* Split thunks off from their jumped-to function. Not really necessary, but the result is more like other common
3850  * disassemblers and also more closely matches what would happen if we had debugging information in the executable. This
3851  * should only run after analyze_cfg() because it assumes that a function's blocks have all been discovered -- it does some
3852  * analysis on the function's internal control flow. */
3853  if (0!=(func_heuristics & SgAsmFunction::FUNC_THUNK) && detach_thunks()>0)
3854  analyze_cfg(SgAsmBlock::BLK_GRAPH3);
3855 
3856  /* Find thunks again. We might have more things satisfying the relatively strict thunk definition. */
3857  if (func_heuristics & SgAsmFunction::FUNC_THUNK) {
3858  for (size_t npasses=0; npasses<5; ++npasses) {
3859  FindThunks find_thunks;
3860  scan_unassigned_insns(&find_thunks);
3861  if (0==find_thunks.nfound)
3862  break;
3863  }
3864  }
3865 
3866  /* Append data to the end(s) of each normal function. */
3867  FindData find_data;
3868  scan_unassigned_bytes(&find_data, ro_map);
3869 
3870  /* Make sure padding is back where it belongs. */
3871  adjust_padding();
3872 
3873  /* Merge extra functions that we might have created. Sometimes it's possible that we break a function into too many parts,
3874  * and we can recombine those parts now. */
3875  merge_function_fragments();
3876 
3877  /* Give existing functions names from symbol tables. Don't create more functions. */
3878  if (interp && 0!=(func_heuristics & SgAsmFunction::FUNC_IMPORT)) {
3879  name_pe_dynlink_thunks(interp);
3880  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
3881  for (size_t i=0; i<headers.size(); i++) {
3882  name_plt_entries(headers[i]); // give names to ELF .plt trampolines
3883  name_import_entries(headers[i]); // give names to PE import thunks
3884  }
3885  }
3886 
3887  /* Add the BLK_CFGHEAD reason to all blocks that are also in the function's CFG head list. We do this once here rather
3888  * than searching the heads list in each pass of analyze_cfg(). */
3889  for (BasicBlocks::iterator bi=basic_blocks.begin(); bi!=basic_blocks.end(); ++bi) {
3890  BasicBlock *bb = bi->second;
3891  if (bb->function && 0==(bb->function->reason & SgAsmFunction::FUNC_LEFTOVERS) &&
3892  bb->function->heads.find(bb->address())!=bb->function->heads.end())
3894  }
3895 
3896  mlog[INFO] <<"completed " <<StringUtility::plural(functions.size(), "functions")
3897  <<", " <<StringUtility::plural(insns.size(), "instructions")
3898  <<", " <<StringUtility::plural(basic_blocks.size(), "blocks") <<"\n";
3899 }
3900 
3901 size_t
3903 {
3904  size_t retval = 0;
3905  for (Functions::iterator fi=functions.begin(); fi!=functions.end(); ++fi)
3906  retval += function_extent(fi->second, extents);
3907  return retval;
3908 }
3909 
3910 size_t
3912  FunctionRangeMap *extents/*out*/,
3913  rose_addr_t *lo_addr_ptr/*out*/, rose_addr_t *hi_addr_ptr/*out*/)
3914 {
3915  size_t nnodes=0;
3916  rose_addr_t lo_addr=0, hi_addr=0;
3917  std::set<DataBlock*> my_dblocks;
3918 
3919  for (BasicBlocks::iterator bi=func->basic_blocks.begin(); bi!=func->basic_blocks.end(); ++bi) {
3920  /* Find the extents for all the instructions in this basic block. */
3921  BasicBlock *bb = bi->second;
3922  for (InstructionVector::iterator ii=bb->insns.begin(); ii!=bb->insns.end(); ++ii) {
3923  rose_addr_t start = (*ii)->get_address();
3924  size_t size = (*ii)->get_size();
3925  if (0==nnodes++) {
3926  lo_addr = start;
3927  hi_addr = start + size;
3928  } else {
3929  lo_addr = std::min(lo_addr, start);
3930  hi_addr = std::max(hi_addr, start + size);
3931  }
3932  if (extents)
3933  extents->insert(Extent(start, size), func);
3934  }
3935 
3936  /* Gather data blocks associated with this basic block, but only if they aren't explicitly assigned to a function. */
3937  for (std::set<DataBlock*>::iterator di=bb->data_blocks.begin(); di!=bb->data_blocks.end(); ++di) {
3938  if (NULL==(*di)->function)
3939  my_dblocks.insert(*di);
3940  }
3941  }
3942 
3943  /* Gather the data blocks associated with this function. */
3944  for (DataBlocks::iterator bi=func->data_blocks.begin(); bi!=func->data_blocks.end(); ++bi)
3945  my_dblocks.insert(bi->second);
3946 
3947  /* Add the extents of all this function's data blocks. */
3948  for (std::set<DataBlock*>::iterator di=my_dblocks.begin(); di!=my_dblocks.end(); ++di) {
3949  DataBlock *dblock = *di;
3950  DataRangeMap data_extents;
3951  DataRangeMap *data_extents_ptr = extents ? &data_extents : NULL;
3952  rose_addr_t lo, hi;
3953  size_t n = datablock_extent(dblock, data_extents_ptr, &lo, &hi);
3954  if (n>0) {
3955  if (0==nnodes) {
3956  lo_addr = lo;
3957  hi_addr = hi;
3958  } else {
3959  lo_addr = std::min(lo_addr, lo);
3960  hi_addr = std::max(hi_addr, hi);
3961  }
3962  nnodes += n;
3963  if (extents) {
3964  for (DataRangeMap::iterator di2=data_extents.begin(); di2!=data_extents.end(); ++di2)
3965  extents->insert(di2->first, func);
3966  }
3967  }
3968  }
3969 
3970  /* Return values */
3971  if (lo_addr_ptr)
3972  *lo_addr_ptr = lo_addr;
3973  if (hi_addr_ptr)
3974  *hi_addr_ptr = hi_addr;
3975  return nnodes;
3976 }
3977 
3978 size_t
3980 {
3981  size_t nblocks = 0;
3982  for (DataBlocks::const_iterator di=data_blocks.begin(); di!=data_blocks.end(); ++di) {
3983  DataBlock *dblock = di->second;
3984  if (0!=(dblock->reason & SgAsmBlock::BLK_PADDING) && NULL!=effective_function(dblock)) {
3985  datablock_extent(dblock, extents);
3986  ++nblocks;
3987  }
3988  }
3989  return nblocks;
3990 }
3991 
3992 size_t
3994 {
3995  size_t nblocks = 0;
3996  for (DataBlocks::const_iterator di=data_blocks.begin(); di!=data_blocks.end(); ++di) {
3997  DataBlock *dblock = di->second;
3998  if (NULL!=effective_function(dblock)) {
3999  datablock_extent(dblock, extents);
4000  ++nblocks;
4001  }
4002  }
4003  return nblocks;
4004 }
4005 
4006 size_t
4008  DataRangeMap *extents/*in,out*/,
4009  rose_addr_t *lo_addr_ptr/*out*/, rose_addr_t *hi_addr_ptr/*out*/)
4010 {
4011  if (db->nodes.empty()) {
4012  if (lo_addr_ptr)
4013  *lo_addr_ptr = 0;
4014  if (hi_addr_ptr)
4015  *hi_addr_ptr = 0;
4016  } else {
4017  rose_addr_t start = db->nodes.front()->get_address();
4018  size_t size = db->nodes.front()->get_size();
4019  if (lo_addr_ptr)
4020  *lo_addr_ptr = start;
4021  if (hi_addr_ptr)
4022  *hi_addr_ptr = start+size;
4023  if (extents)
4024  extents->insert(Extent(start, size), db);
4025  }
4026 
4027  for (size_t i=1; i<db->nodes.size(); i++) {
4028  SgAsmStaticData *node = db->nodes[i];
4029  rose_addr_t start = node->get_address();
4030  size_t size = node->get_size();
4031 
4032  if (lo_addr_ptr)
4033  *lo_addr_ptr = std::min(*lo_addr_ptr, start);
4034  if (hi_addr_ptr)
4035  *hi_addr_ptr = std::max(*hi_addr_ptr, start+size);
4036  if (extents)
4037  extents->insert(Extent(start, size), db);
4038  }
4039  return db->nodes.size();
4040 }
4041 
4042 /* The function is contiguous if the stuff between its extent doesn't belong to any other function. */
4043 bool
4045 {
4046  FunctionRangeMap extents;
4047  rose_addr_t lo_addr, hi_addr;
4048  if (0==function_extent(func, &extents, &lo_addr, &hi_addr) || 1==extents.size())
4049  return true;
4050  if (strict)
4051  return false;
4052 
4053  /* Check for instructions belonging to other functions. */
4054  const rose_addr_t max_insn_size = 16; /* FIXME: This is a kludge, but should work 99% of the time [RPM 2011-09-16] */
4055  InstructionMap::iterator ii = insns.lower_bound(std::max(lo_addr,max_insn_size)-max_insn_size);
4056  for (/*void*/; ii!=insns.end() && ii->first<hi_addr; ++ii) {
4057  if (ii->first>=lo_addr) {
4058  BasicBlock *bb = find_bb_containing(ii->first, false);
4059  if (bb && bb->function && bb->function!=func)
4060  return false;
4061  }
4062  }
4063 
4064  /* Check for data belonging to other functions.
4065  * FIXME: we could use a faster method of doing this! [RPM 2011-09-29] */
4066  for (DataBlocks::iterator dbi=data_blocks.begin(); dbi!=data_blocks.end(); ++dbi) {
4067  DataBlock *block = dbi->second;
4068  Function *block_func = effective_function(block);
4069  if (block_func!=NULL && block_func!=func) {
4070  for (size_t i=0; i<block->nodes.size(); i++) {
4071  if (block->nodes[i]->get_address() < hi_addr &&
4072  block->nodes[i]->get_address() + block->nodes[i]->get_size() > lo_addr)
4073  return false;
4074  }
4075  }
4076  }
4077 
4078  return true;
4079 }
4080 
4081 /* Update CFG edge nodes. */
4082 void
4084 {
4085  typedef std::map<rose_addr_t, SgAsmBlock*> BlockMap;
4086 
4087  /* Build a map from address to SgAsmBlock so we can do lookups quickly. */
4088  struct BlockMapBuilder: public SgSimpleProcessing {
4089  BlockMap *block_map;
4090  BlockMapBuilder(SgNode *ast, BlockMap *block_map): block_map(block_map) {
4091  traverse(ast, preorder);
4092  }
4093  void visit(SgNode *node) {
4094  SgAsmBlock *block = isSgAsmBlock(node);
4095  if (block!=NULL) {
4096  const SgAsmStatementPtrList &stmts = block->get_statementList();
4097  SgAsmInstruction *insn = stmts.empty() ? NULL : isSgAsmInstruction(stmts.front());
4098  if (insn)
4099  block_map->insert(std::make_pair(insn->get_address(), block));
4100  }
4101  }
4102  };
4103 
4104  /* Now add block pointers to the successor targets. */
4105  struct TargetPopulator: public SgSimpleProcessing {
4106  const BlockMap &block_map;
4107  TargetPopulator(SgNode *ast, const BlockMap &block_map): block_map(block_map) {
4108  traverse(ast, preorder);
4109  }
4110  void visit(SgNode *node) {
4111  SgAsmBlock *block = isSgAsmBlock(node);
4112  if (block) {
4113  for (size_t i=0; i<block->get_successors().size(); i++) {
4114  SgAsmIntegerValueExpression *target = block->get_successors()[i];
4115  if (target && NULL==target->get_baseNode()) {
4116  BlockMap::const_iterator bi=block_map.find(target->get_absoluteValue());
4117  if (bi!=block_map.end())
4118  target->makeRelativeTo(bi->second);
4119  }
4120  }
4121  }
4122  }
4123  };
4124 
4125  BlockMap block_map;
4126  BlockMapBuilder(ast, &block_map);
4127  TargetPopulator(ast, block_map);
4128 }
4129 
4130 /* Make pointers relative to what they point into. */
4131 void
4133 {
4134 
4135  struct FixerUpper: public AstPrePostProcessing {
4136  Partitioner *p;
4137  SgAsmInterpretation *interp;
4138  SgAsmInstruction *insn;
4139  SgAsmGenericSectionPtrList mapped_sections;
4140  DataRangeMap static_data;
4141 
4142  FixerUpper(Partitioner *p, SgAsmInterpretation *interp)
4143  : p(p), interp(interp), insn(NULL) {}
4144 
4145  void atTraversalStart() {
4146  /* Get a list of all memory-mapped sections in the interpretation. */
4147  if (interp) {
4148  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
4149  for (SgAsmGenericHeaderPtrList::const_iterator hi=headers.begin(); hi!=headers.end(); ++hi) {
4150  if ((*hi)->is_mapped())
4151  mapped_sections.push_back(*hi);
4152  SgAsmGenericSectionPtrList file_sections = (*hi)->get_mapped_sections();
4153  mapped_sections.insert(mapped_sections.end(), file_sections.begin(), file_sections.end());
4154  }
4155  }
4156 
4157  /* Get a list of all static data blocks */
4158  p->datablock_extent(&static_data);
4159  }
4160 
4161  void preOrderVisit(SgNode *node) {
4162  if (!insn) {
4163  insn = isSgAsmInstruction(node);
4164  } else if (isSgAsmIntegerValueExpression(node)) {
4165  SgAsmIntegerValueExpression *ival = isSgAsmIntegerValueExpression(node);
4166 
4167  /* Don't monkey with constants that are already relative to some other node. These are things that have been
4168  * already fixed up by other methods. */
4169  if (ival->get_baseNode()!=NULL)
4170  return;
4171  rose_addr_t va = ival->get_absoluteValue();
4172 
4173  /* If this constant is a code pointer, then make the pointer relative to the instruction it points to. If that
4174  * instruction is the entry instruction of a function, then point to the function instead. A value is
4175  * considered a code pointer only if it points to an existing instruction that's contained in a basic block,
4176  * and that basic block is part of a valid function. This constraint weeds out pointers to code that was
4177  * disassembled but later discarded. */
4178  Instruction *target_insn = p->find_instruction(va, false/*do not create*/);
4179  if (target_insn && target_insn->bblock && target_insn->bblock->function &&
4180  0==(target_insn->bblock->function->reason & SgAsmFunction::FUNC_LEFTOVERS)) {
4181  SgAsmFunction *target_func = SageInterface::getEnclosingNode<SgAsmFunction>(target_insn->node);
4182  if (target_func && target_func->get_entry_va()==target_insn->get_address()) {
4183  ival->makeRelativeTo(target_func);
4184  } else {
4185  ival->makeRelativeTo(target_insn->node);
4186  }
4187  return;
4188  }
4189 
4190  /* If this constant points into a static data block, then make it relative to that block. */
4191  DataRangeMap::iterator dbi = static_data.find(va);
4192  if (dbi!=static_data.end()) {
4193  DataBlock *dblock = dbi->second.get();
4194  for (size_t i=0; i<dblock->nodes.size(); ++i) {
4195  SgAsmStaticData *sd = dblock->nodes[i];
4196  if (va>=sd->get_address() && va<sd->get_address()+sd->get_size()) {
4197  ival->makeRelativeTo(sd);
4198  return;
4199  }
4200  }
4201  }
4202 
4203  /* If this constant points into a non-executable data segment, then make the pointer relative to that data
4204  * segment. */
4205  SgAsmGenericSection *section = SgAsmGenericFile::best_section_by_va(mapped_sections, ival->get_absoluteValue());
4206  if (section && !section->get_mapped_xperm()) {
4207  ival->makeRelativeTo(section);
4208  return;
4209  }
4210  }
4211  }
4212 
4213  void postOrderVisit(SgNode *node) {
4214  if (isSgAsmInstruction(node))
4215  insn = NULL;
4216  }
4217  };
4218 
4219  FixerUpper(this, interp).traverse(ast);
4220 }
4221 
4222 /* Build the global block containing all functions. */
4223 SgAsmBlock *
4225 {
4226  /* Build a function to hold all the unassigned instructions. Update documentation if changing the name of
4227  * this generated function! We do this by traversing the instructions and obtaining a basic block for each one. If the
4228  * basic block doesn't belong to a function yet, we add it to this special one. Note that we cannot traverse the list of
4229  * instructions directly because creating the basic block might cause additional instructions to be created.
4230  *
4231  * Do not include the instruction in the leftovers functions if that instruction is completely overlapped by the bytes of
4232  * an existing, non-leftovers function. */
4233  Function *catchall = NULL;
4234  if ((func_heuristics & SgAsmFunction::FUNC_LEFTOVERS)) {
4235 
4236  /* List of all bytes occupied by functions. */
4237  FunctionRangeMap existing;
4238  function_extent(&existing);
4239 
4240  /* Repeatedly add unassigned instructions to the leftovers function. */
4241  bool process_instructions;
4242  do {
4243  process_instructions = false;
4244  InstructionMap insns_copy = insns;
4245  for (InstructionMap::iterator ii=insns_copy.begin(); ii!=insns_copy.end(); ++ii) {
4246  rose_addr_t va = ii->first;
4247  size_t size = ii->second->get_size();
4248  if (!existing.contains(Extent(va, size))) {
4249  BasicBlock *bb = find_bb_containing(ii->first);
4250  ASSERT_not_null(bb);
4251  if (!bb->function) {
4252  if (!catchall)
4253  catchall = add_function(ii->first, SgAsmFunction::FUNC_LEFTOVERS, "***uncategorized blocks***");
4254  append(catchall, bb, SgAsmBlock::BLK_LEFTOVERS);
4255  process_instructions = true;
4256  }
4257  }
4258  }
4259  } while (process_instructions);
4260  }
4261 
4262  /* Build the AST */
4263  SgAsmBlock *retval = new SgAsmBlock;
4264  for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi) {
4265  SgAsmFunction *func_decl = build_ast(fi->second);
4266  if (!func_decl) continue;
4267  retval->get_statementList().push_back(func_decl);
4268  func_decl->set_parent(retval);
4269  }
4270 
4271  /* Return catchall blocks to the free pool */
4272  if (catchall) {
4273  catchall->clear_basic_blocks();
4274  catchall->clear_data_blocks();
4275  functions.erase(catchall->entry_va);
4276  delete catchall;
4277  }
4278 
4279  /* Make pointers relative to the thing into which they point. */
4280  fixup_cfg_edges(retval);
4281  fixup_pointers(retval, interp);
4282  return retval;
4283 }
4284 
4285 /* Build a function node containing all basic blocks and data blocks of the function. */
4286 SgAsmFunction *
4288 {
4289  if (f->basic_blocks.empty()) {
4290  mlog[TRACE] <<"function F" <<addrToString(f->entry_va) <<" \"" <<f->name <<"\" has no basic blocks!\n";
4291  return NULL;
4292  }
4293 
4294  /* Get the list of basic blocks and data blocks. We'll want them to be added to the function in order of their starting
4295  * address, with basic blocks and data blocks interleaved. */
4296  typedef std::multimap<rose_addr_t, SgAsmStatement*> NodeMap;
4297  std::set<DataBlock*> my_data_blocks;
4298  NodeMap nodes;
4299  BasicBlock *first_basic_block = NULL;
4300  for (BasicBlocks::iterator bi=f->basic_blocks.begin(); bi!=f->basic_blocks.end(); ++bi) {
4301  BasicBlock *bblock = bi->second;
4302  if (!first_basic_block)
4303  first_basic_block = bblock;
4304 
4305  /* The instructions for this basic block */
4306  SgAsmStatement *node = build_ast(bblock);
4307  nodes.insert(std::make_pair(bblock->address(), node));
4308 
4309  /* The data associated with this basic block */
4310  for (std::set<DataBlock*>::iterator di=bblock->data_blocks.begin(); di!=bblock->data_blocks.end(); ++di) {
4311  DataBlock *dblock = *di;
4312  Function *dblock_func = effective_function(dblock);
4313  if (dblock_func==f)
4314  my_data_blocks.insert(dblock);
4315  }
4316  }
4317 
4318  for (DataBlocks::iterator di=f->data_blocks.begin(); di!=f->data_blocks.end(); ++di) {
4319  DataBlock *dblock = di->second;
4320  ASSERT_require(dblock->function==f);
4321  my_data_blocks.insert(dblock);
4322  }
4323 
4324  for (std::set<DataBlock*>::iterator di=my_data_blocks.begin(); di!=my_data_blocks.end(); ++di) {
4325  DataBlock *dblock = *di;
4326  SgAsmBlock *ast_block = build_ast(dblock);
4327  nodes.insert(std::make_pair(dblock->address(), ast_block));
4328  }
4329 
4330  /* Create the AST function node. */
4331  SgAsmFunction *retval = new SgAsmFunction;
4332  retval->set_entry_va(f->entry_va);
4333  retval->set_name(f->name);
4334  retval->set_address(first_basic_block->address());
4335 
4336  /* Set the SgAsmFunction::can_return property. If we've never indicated that a function might return then assume it
4337  * doesn't return. We're all done with analysis now, so it must not return. */
4340  } else {
4341  retval->set_may_return(f->get_may_return());
4342  }
4343 
4344  for (NodeMap::iterator ni=nodes.begin(); ni!=nodes.end(); ++ni) {
4345  retval->get_statementList().push_back(ni->second);
4346  ni->second->set_parent(retval);
4347  }
4348 
4349  unsigned reasons = f->reason;
4350  if (0==(reasons & SgAsmFunction::FUNC_DISCONT)) {
4351  FunctionRangeMap extent;
4352  function_extent(f, &extent);
4353  if (extent.nranges()>1)
4354  reasons |= SgAsmFunction::FUNC_DISCONT;
4355  }
4356  retval->set_reason(reasons);
4357  return retval;
4358 }
4359 
4360 /* Build a basic block node containing all instructions for the basic block. */
4361 SgAsmBlock *
4363 {
4364  SgAsmBlock *retval = new SgAsmBlock;
4365  retval->set_id(block->address());
4366  retval->set_address(block->address());
4367  retval->set_reason(block->reason);
4368  retval->set_code_likelihood(block->code_likelihood);
4369 
4370  for (InstructionVector::const_iterator ii=block->insns.begin(); ii!=block->insns.end(); ++ii) {
4371  Instruction *insn = *ii;
4372  retval->get_statementList().push_back(insn->node);
4373  insn->node->set_parent(retval);
4374  }
4375 
4376  /* Cache block successors so other layers don't have to constantly compute them. We fill in the successor
4377  * SgAsmIntegerValueExpression objects with only the address and not pointers to blocks since we don't have all the blocks
4378  * yet. The pointers will be initialized in the no-argument version build_ast() higher up on the stack. */
4379  bool complete;
4380  Disassembler::AddressSet successor_addrs = successors(block, &complete);
4381  for (Disassembler::AddressSet::iterator si=successor_addrs.begin(); si!=successor_addrs.end(); ++si) {
4382  SgAsmIntegerValueExpression *value = SageBuilderAsm::buildValueU64(*si);
4383  value->set_parent(retval);
4384  retval->get_successors().push_back(value);
4385  }
4386  retval->set_successors_complete(complete);
4387  return retval;
4388 }
4389 
4390 /* Buid a data block node for each data block. */
4391 SgAsmBlock *
4393 {
4394  SgAsmBlock *retval = new SgAsmBlock;
4395  retval->set_id(block->address());
4396  retval->set_address(block->address());
4397  retval->set_reason(block->reason);
4398 
4399  for (std::vector<SgAsmStaticData*>::const_iterator ni=block->nodes.begin(); ni!=block->nodes.end(); ++ni) {
4400  retval->get_statementList().push_back(*ni);
4401  ASSERT_require(NULL==(*ni)->get_parent());
4402  (*ni)->set_parent(retval);
4403  }
4404  return retval;
4405 }
4406 
4407 /* Top-level function to run the partitioner in passive mode. */
4408 SgAsmBlock *
4410  const MemoryMap::Ptr &map)
4411 {
4412  disassembler = NULL;
4413  add_instructions(insns);
4414 
4415  MemoryMap::Ptr old_map = get_map();
4416  MemoryMap::Ptr old_ro_map = ro_map;
4417  if (!map && !old_map)
4418  throw Exception("no memory map");
4419  if (map)
4420  set_map(map);
4421 
4422  SgAsmBlock *retval = NULL;
4423  try {
4424  pre_cfg(interp);
4425  analyze_cfg(SgAsmBlock::BLK_GRAPH1);
4426  post_cfg(interp);
4427  retval = build_ast(interp);
4428  set_map(old_map, old_ro_map);
4429  } catch (...) {
4430  set_map(old_map, old_ro_map);
4431  throw;
4432  }
4433 
4434  return retval;
4435 }
4436 
4437 /* Top-level function to run the partitioner in active mode. */
4438 SgAsmBlock *
4440 {
4441  ASSERT_not_null(d);
4442  disassembler = d;
4443  ASSERT_not_null(m);
4444  set_map(m);
4445  pre_cfg(interp);
4446  analyze_cfg(SgAsmBlock::BLK_GRAPH1);
4447  post_cfg(interp);
4448  return build_ast(interp);
4449 }
4450 
4451 void
4453 {
4454  for (Disassembler::InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
4455  Instruction *insn = new Instruction(ii->second);
4456  this->insns.insert(std::make_pair(ii->first, insn));
4457  }
4458 }
4459 
4462 {
4464  for (InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
4465  SgAsmInstruction *insn = ii->second->node;
4466  retval.insert(std::make_pair(ii->first, insn));
4467  }
4468  return retval;
4469 }
4470 
4471 // class method
4472 void
4474 {
4475  assert(interp!=NULL);
4476  if (interp->get_global_block())
4477  return;
4478 
4479  // Map segments into virtual memory if this hasn't been done yet
4480  MemoryMap::Ptr map = interp->get_map();
4481  if (map==NULL) {
4482  map = MemoryMap::instance();
4483  interp->set_map(map);
4484  BinaryLoader *loader = BinaryLoader::lookup(interp)->clone();
4485  loader->remap(interp);
4486  }
4487 
4488  // Obtain a disassembler based on the type of file we're disassembling and configure it according to the
4489  // -rose:disassembler_search switches stored in the enclosing SgFile node.
4490  Disassembler *disassembler = Disassembler::lookup(interp);
4491  if (!disassembler)
4492  throw std::runtime_error("no valid disassembler for this interpretation");
4493  disassembler = disassembler->clone(); // so we can change settings without affecting the registry
4494  SgFile *file = SageInterface::getEnclosingNode<SgFile>(interp);
4495  assert(file!=NULL);
4496  disassembler->set_search(file->get_disassemblerSearchHeuristics());
4497 
4498  // Obtain a partitioner to organize instructions into basic blocks and basic blocks into functions.
4499  Partitioner *partitioner = new Partitioner();
4500  partitioner->set_search(file->get_partitionerSearchHeuristics());
4501  partitioner->load_config(file->get_partitionerConfigurationFileName());
4502 
4503  // Decide what to disassemble. Include at least the entry addresses.
4504  Disassembler::AddressSet worklist;
4505  const SgAsmGenericHeaderPtrList &headers = interp->get_headers()->get_headers();
4506  for (SgAsmGenericHeaderPtrList::const_iterator hi=headers.begin(); hi!=headers.end(); ++hi) {
4507  SgRVAList entry_rvalist = (*hi)->get_entry_rvas();
4508  for (size_t i=0; i<entry_rvalist.size(); ++i) {
4509  rose_addr_t entry_va = (*hi)->get_base_va() + entry_rvalist[i].get_rva();
4510  worklist.insert(entry_va);
4511  }
4512  if (disassembler->get_search() & Disassembler::SEARCH_FUNCSYMS)
4513  disassembler->search_function_symbols(&worklist, map, *hi);
4514  }
4515 
4516  // Run the disassembler first to populate the instruction map for the partitioner. This will allow the partitioner to do
4517  // pattern recognition to find function boundaries if desired.
4518  Disassembler::BadMap errors;
4519  Disassembler::InstructionMap insns = disassembler->disassembleBuffer(map, worklist, NULL, &errors);
4520  partitioner->add_instructions(insns);
4521 
4522  // Organize the instructions into basic blocks and functions. This will call the disassembler to get any additional
4523  // instructions that are needed (the partitioner does deeper analysis than the disassembler).
4524  if (SgAsmBlock *block = partitioner->partition(interp, disassembler, map)) {
4525  interp->set_global_block(block);
4526  block->set_parent(interp);
4527  }
4528 
4529  delete partitioner;
4530  delete disassembler;
4531 }
4532 
4533 /* FIXME: Deprecated 2010-01-01 */
4536 {
4537  BasicBlockStarts bb_starts;
4538 
4539  /* The first instruction always starts a basic block. */
4540  if (insns.size()>0) {
4541  rose_addr_t insn_va = insns.begin()->first;
4542  bb_starts[insn_va] = BasicBlockStarts::mapped_type();
4543  }
4544 
4545  for (Disassembler::InstructionMap::const_iterator ii=insns.begin(); ii!=insns.end(); ++ii) {
4546  SgAsmInstruction *insn = ii->second;
4547  rose_addr_t insn_va = insn->get_address();
4548  rose_addr_t next_va = insn->get_address() + insn->get_size();
4549 
4550  /* If this instruction is one which terminates a basic block then make the next instruction (if any) the beginning of
4551  * a basic block. However, a sequence like the following should not be a basic block boundary because the CALL is
4552  * acting more like a "PUSH EIP" (we should probably just look at the CALL instruction itself rather than also looking
4553  * for the following POP, but since ROSE doesn't currently apply the relocation tables before disassembling, the CALL
4554  * with a zero offset is quite common. [RPM 2009-08-24] */
4555  if (insn->terminatesBasicBlock()) {
4556  Disassembler::InstructionMap::const_iterator found = insns.find(next_va);
4557  if (found!=insns.end()) {
4558  SgAsmX86Instruction *insn_x86 = isSgAsmX86Instruction(insn);
4559  SgAsmX86Instruction *insn2_x86 = isSgAsmX86Instruction(found->second);
4560  rose_addr_t branch_target_va;
4561  if (insn_x86 &&
4562  (insn_x86->get_kind()==x86_call || insn_x86->get_kind()==x86_farcall) &&
4563  insn->getBranchTarget(&branch_target_va) &&
4564  branch_target_va==next_va && insn2_x86->get_kind()==x86_pop) {
4565  /* The CALL is acting more like a "PUSH EIP" and should not end the basic block. */
4566  } else if (bb_starts.find(next_va)==bb_starts.end()) {
4567  bb_starts[next_va] = BasicBlockStarts::mapped_type();
4568  }
4569  }
4570  }
4571 
4572  /* If this instruction has multiple known successors then make each of those successors the beginning of a basic
4573  * block (provided there's an instruction at that address). However, if there's only one successor and it's the
4574  * fall-through address then ignore it. */
4575  bool complete;
4576  Disassembler::AddressSet successors = insn->getSuccessors(&complete);
4577  for (Disassembler::AddressSet::const_iterator si=successors.begin(); si!=successors.end(); ++si) {
4578  rose_addr_t successor_va = *si;
4579  if ((successor_va != next_va || successors.size()>1) && insns.find(successor_va)!=insns.end())
4580  bb_starts[successor_va].insert(insn_va);
4581  }
4582  }
4583  return bb_starts;
4584 }
4585 
4586 /* FIXME: Deprecated 2010-01-01 */
4589  BasicBlockStarts &bb_starts/*out*/) const
4590 {
4591  FunctionStarts retval;
4592  for (Functions::const_iterator fi=functions.begin(); fi!=functions.end(); ++fi)
4593  retval.insert(std::make_pair(fi->first, FunctionStart(fi->second->reason, fi->second->name)));
4594  return retval;
4595 }
4596 
4597 } // namespace
4598 } // namespace
virtual void scan_intrafunc_bytes(ByteRangeCallbacks &callbacks, const MemoryMap::Ptr &restrict_map=MemoryMap::Ptr())
Scans unassigned ranges of the address space within a function.
Definition: Partitioner.C:1925
List of pointers to file sections.
Instruction * insn_begin
First instruction in range of instructions.
std::vector< SgAsmStaticData * > nodes
The static data nodes belonging to this block; not deleted.
void move_basic_blocks_from(Function *other)
Move all basic blocks from the other function to this one.
Definition: Partitioner.C:626
virtual Function * find_function_containing(rose_addr_t va)
Determines if address is part of a function.
Definition: Partitioner.C:3624
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2381
void clear_basic_blocks()
Remove all basic blocks from this function w/out deleting the blocks.
Definition: Partitioner.C:608
virtual void scan_interfunc_bytes(ByteRangeCallbacks &callbacks, const MemoryMap::Ptr &restrict_map=MemoryMap::Ptr())
Scans unassigned ranges of the address space between functions.
Definition: Partitioner.C:1958
virtual Function * effective_function(DataBlock *)
Returns the function to which this data block is effectively assigned.
Definition: Partitioner.C:580
Reason
Reasons why a basic block might have been assigned to a function.
Block is used for padding.
virtual void mark_entry_targets(SgAsmGenericHeader *)
Seeds functions for program entry points.
Definition: Partitioner.C:1295
Expression that adds two operands.
MayReturn
Whether a function returns.
Base class for instruction scanning callbacks.
void set_may_return(SgAsmFunction::MayReturn may_return)
Accessor for the may-return property.
static InstructionMap::const_iterator pattern6(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
Matches M68k "rts; (trapf)?; lea.l [a7+X], a7".
Definition: Partitioner.C:1676
Contiguous region of a file.
virtual void scan_unassigned_insns(InsnRangeCallbacks &callbacks)
Scans ranges of unassigned instructions.
Definition: Partitioner.C:1817
Sum< T >::Ptr sum()
Factory for value agumenter.
Definition: CommandLine.h:1900
rose::BinaryAnalysis::MemoryMap::Ptr get_map() const
Property: Memory map.
Blocks of function are not contiguous in memory.
virtual SgAsmInstruction * disassembleOne(const MemoryMap::Ptr &map, rose_addr_t start_va, AddressSet *successors=NULL)=0
This is the lowest level disassembly function and is implemented in the architecture-specific subclas...
virtual Disassembler::AddressSet successors(BasicBlock *, bool *complete=NULL)
Returns known successors of a basic block.
Definition: Partitioner.C:437
Represents a function within the Partitioner.
This block created because it seems to belong to the function although CFG traversal did not find it...
iterator find(const typename Range::Value &addr)
Find the range containing specified value.
Definition: rangemap.h:955
rose_addr_t get_mapped_actual_va() const
Property: Virtual address where ROSE maps this section.
virtual void mark_export_entries(SgAsmGenericHeader *)
Seeds functions for PE exports.
Definition: Partitioner.C:1468
rose_addr_t get_mapped_preferred_va() const
Virtual address where section prefers to be mapped.
unsigned long get_sym() const
Property: Sym.
DataBlocks data_blocks
Data blocks belonging to this function.
Base class for references to a machine register.
SymbolType get_type() const
Property: Symbol type.
bool empty() const
Returns true if this RangeMap is empty.
Definition: rangemap.h:1059
virtual Function * find_function(rose_addr_t entry_va)
Looks up a function by address.
Definition: Partitioner.C:1090
This function may return or not, depending on how it is called.
MemoryMap::Ptr restrict_map
Optional memory map supplied to scan_*_bytes() method.
virtual size_t get_size() const
Returns the size of an instruction in bytes.
Instruction basic block.
This class represents a source file for a project (which may contian many source files and or directo...
size_t get_size() const
Property: Size of static data in bytes.
One entry of an ELF relocation table.
unsigned reason
SgAsmFunction::FunctionReason bit flags.
Function or other code.
Represents the file header of an ELF binary container.
SgAsmExpression * get_lhs() const
Property: Left-hand side operand.
void parse()
Top-level parsing function.
User-defined algorithm.
rose_addr_t get_rva() const
Returns the numeric RVA.
Definition: Rva.C:85
Class for traversing the AST.
Information about each function starting address.
rose_addr_t alias_for
If non-zero then this block is an alias for another block.
virtual void update_analyses(BasicBlock *)
Runs local block analyses if their cached results are invalid and caches the results.
Definition: Partitioner.C:328
static rose_addr_t get_indirection_addr(SgAsmInstruction *, rose_addr_t offset)
Return the virtual address that holds the branch target for an indirect branch.
Definition: Partitioner.C:2651
Block was added by a third pass of CFG analysis.
Represents a basic block within the Partitioner.
void set_reason(unsigned)
Property: Reason that function exists.
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2125
virtual bool pops_return_address(rose_addr_t)
Determines if a block pops the stack w/o returning.
Definition: Partitioner.C:505
Base class for machine instructions.
Collection of streams.
Definition: Message.h:1579
iterator lower_bound(const typename Range::Value &addr)
Finds the first range ending above the specified value.
Definition: rangemap.h:973
rose_addr_t get_mapped_size() const
Property: Mapped size.
const SgAsmStatementPtrList & get_statementList() const
Property: Statements that make up a function.
bool is_pe_dynlink_thunk(Instruction *)
Returns true if the basic block is a PE dynamic linking thunk.
Definition: Partitioner.C:3084
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2435
X86InstructionKind get_kind() const
Property: Instruction kind.
const rose_rva_t & get_begin_rva() const
Property: Beginning relative virtual address.
virtual void clear()
Reset partitioner to initial conditions by discarding all instructions, basic blocks, functions, and configuration file settings and definitions.
Definition: Partitioner.C:684
void set_successors_complete(bool)
Property: Whether the successors list is complete.
unsigned reason
Reasons this block was created; SgAsmBlock::Reason bit flags.
static InstructionMap::const_iterator pattern4(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
Matches an x86 "enter xxxx, 0" instruction and creates a function at that address.
Definition: Partitioner.C:1631
BlockAnalysisCache cache
Cached results of local analyses.
virtual void set_search(unsigned heuristics)
Sets the set of heuristics used by the partitioner.
const SgAsmElfRelocEntryPtrList & get_entries() const
Property: List of relocation entries.
virtual size_t padding_extent(DataRangeMap *extent)
Adds padding datablocks to extent.
Definition: Partitioner.C:3979
int64_t get_signedValue() const
Returns the current absolute value (base+offset) as a signed value.
const SgAsmExpressionPtrList & get_operands() const
Property: Ordered list of instruction operands.
virtual void mark_ipd_configuration()
Seeds partitioner with IPD configuration information.
Definition: Partitioner.C:1119
virtual BasicBlock * find_bb_starting(rose_addr_t, bool create=true)
Makes sure the block at the specified address exists.
Definition: Partitioner.C:1053
void set_may_return(MayReturn)
Property: Whether a function could return to its caller.
void set_parent(SgNode *parent)
All nodes in the AST contain a reference to a parent node.
void validate_cache()
Marks the block analysis cache as being up to date.
virtual void scan_unassigned_bytes(ByteRangeCallbacks &callbacks, const MemoryMap::Ptr &restrict_map=MemoryMap::Ptr())
Scans ranges of the address space that have not been assigned to any function.
Definition: Partitioner.C:1900
SgAsmGenericSectionPtrList get_sections_by_name(std::string, char sep=0) const
Returns sections in this header that have the specified name.
std::vector< Instruction * > insns
Non-empty set of instructions composing this basic block, in address order.
const SgAsmGenericSectionPtrList & get_sections() const
Property: List of section pointers.
Disassembler::AddressSet sucs
Address to which this block might branch or fall through.
bool valid_cache() const
Returns true if the block analysis cache is up to date.
Block is being assigned to a FUNC_LEFTOVERS function because it could not be assigned to any other fu...
rose_addr_t get_value() const
Property: Symbol value.
Function is a thunk.
const SgRVAList & get_entry_rvas() const
Property: Code entry point wrt base virtual address.
static InstructionMap::const_iterator pattern5(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
Matches an M68k "link a6, xxxx" instruction where xxxx is zero or negative and creates a function at ...
Definition: Partitioner.C:1650
std::map< rose_addr_t, FunctionStart > FunctionStarts
Map describing the starting address of each known function.
const FunctionRangeMap & ranges
The range map over which we are iterating.
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:1991
Represents a synthesized function.
double code_likelihood
Likelihood (0..1) that this is code.
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2471
bool is_function_call
True if this block ends with a CALL-like instruction.
static SgAsmX86Instruction * isSgAsmX86Instruction(const Instruction *)
Augments dynamic casts defined from ROSETTA.
Definition: Partitioner.C:66
Represents a region of static data within the address space being disassembled.
virtual DataBlock * find_db_starting(rose_addr_t, size_t size)
Finds (or creates) a data block.
Definition: Partitioner.C:2258
Map< rose_addr_t, SgAsmInstruction * > InstructionMap
The InstructionMap is a mapping from (absolute) virtual address to disassembled instruction.
Definition: Disassembler.h:225
void facilityName(const std::string &s, bool asDefault=true)
Set message or stream property.
SgAsmExpression * get_address() const
Property: Memory address expression.
SgAsmElfEHFrameEntryFDList * get_fd_entries() const
Property: FD entries.
SgAsmGenericHeaderList * get_headers() const
Property: File headers.
It is unknown whether this function ever returns or not.
Instruction * last_insn() const
Returns the last executed (exit) instruction of the block.
Definition: Partitioner.C:591
Detected by Partitioner::FindInterPadFunctions, which looks for unassigned space between two inter-fu...
List of callback functors.
Definition: callbacks.h:80
size_t minimum_nthunks
Mininum number of JMPs necessary to be considered a thunk table.
virtual void find_pe_iat_extents(SgAsmGenericHeader *)
Find the addresses for all PE Import Address Tables.
Definition: Partitioner.C:2826
void set_global_block(SgAsmBlock *)
Property: Global block.
static unsigned parse_switches(const std::string &, unsigned initial_flags)
Parses a string describing the heuristics and returns the bit vector that can be passed to set_search...
Definition: Partitioner.C:149
void set_raw_bytes(const SgUnsignedCharList &)
Property: Raw bytes.
Callback to insert unreachable code fragments.
Exception thrown to defer function block discovery.
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2083
void show_properties(std::ostream &) const
Emit function property values.
Definition: Partitioner.C:667
void name_pe_dynlink_thunks(SgAsmInterpretation *interp)
Gives names to PE dynamic linking thunks if possible.
Definition: Partitioner.C:3545
Added by Partitioner::FindData, which attaches unassigned parts of the disassembly address space to t...
bool is_mapped() const
Whether section desires to be mapped to memory.
SgAsmBlock * get_global_block() const
Property: Global block.
Block serves as an explicit starting point for CFG analysis.
void search_function_symbols(AddressSet *worklist, const MemoryMap::Ptr &, SgAsmGenericHeader *)
Adds addresses that correspond to function symbols.
Appears to be a function based on pattern of instructions.
static AddressSegment staticInstance(Value *buffer, Address size, unsigned accessBits=0, const std::string &name="")
Create a segment that points to a static buffer.
double threshold
Threshold for determining whether range is code.
void initAndRegister(Facility *mlog, const std::string &name)
Initialize and register a logging facility.
Callback to add unassigned addresses to a function.
bool apply(bool b, const ArgumentType &args, Direction dir=FORWARD) const
Invokes all functors in the callback list.
Definition: callbacks.h:258
Range::Value min() const
Returns the minimum value in an extent map.
Definition: rangemap.h:1081
ELF error handling frame entry frame description entry.
virtual CodeCriteria * new_code_criteria()
Create a new criteria object.
Represents an ELF relocation section.
rose_addr_t address() const
Returns the first address of a data block.
Definition: Partitioner.C:570
Block is an entry point for the function.
bool require_noninterleaved
If set, then preceding function cannot be interleaved.
virtual Instruction * find_instruction(rose_addr_t, bool create=true)
Finds an instruction at the specified address.
Definition: Partitioner.C:969
std::string plural(T n, const std::string &plural_word, const std::string &singular_word="")
Helpful way to print singular or plural words.
virtual rose_addr_t call_target(BasicBlock *)
Returns call target if block could be a function call.
Definition: Partitioner.C:490
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2591
List of ELF relocation entries.
virtual SgAsmBlock * build_ast(SgAsmInterpretation *interp=NULL)
Builds the AST describing all the functions.
Definition: Partitioner.C:4224
virtual void scan_interfunc_insns(InsnRangeCallbacks &callbacks)
Scans the instructions between functions.
Definition: Partitioner.C:1848
virtual void mark_func_symbols(SgAsmGenericHeader *)
Seeds functions that correspond to function symbols.
Definition: Partitioner.C:1418
void set_name(const std::string &)
Property: Name.
Disassembler::AddressSet discover_jump_table(BasicBlock *bb, bool do_create=true, ExtentMap *table_addresses=NULL)
Looks for a jump table.
Definition: Partitioner.C:249
unsigned reason
Reasons this block was created; SgAsmBlock::Reason bit flags.
void erase(const Range &erase_range)
Erases the specified range from this map.
Definition: rangemap.h:1122
bool sucs_specified
True if IPD file specifies successors for this block.
SgAsmGenericSection * get_bound() const
Property: Associated file section.
static Interval baseSize(rose_addr_t lo, rose_addr_t size)
Construct an interval from one endpoint and a size.
Definition: Interval.h:161
This function is known to never return.
size_t maximum_nrep
Maximum number of mathced patterns to be considered padding.
virtual void mark_elf_plt_entries(SgAsmGenericHeader *)
Seeds functions that are dynamically linked via .plt.
Definition: Partitioner.C:1338
iterator insert(Range new_range, Value new_value=Value(), bool make_hole=true)
Insert a range/value pair into the map.
Definition: rangemap.h:1193
static std::string reason_key(const std::string &prefix="")
Multi-line description of function reason keys from unparser.
Definition: SgAsmFunction.C:15
List of ELF EH frame CI entries.
bool changed_may_return() const
Accessor for the may-return property.
Represents an ELF EH frame section.
SgAsmExpression * get_rhs() const
Property: Right-hand side operand.
void clear_data_blocks()
Remove all data blocks from this basic block.
Definition: Partitioner.C:599
virtual void discover_first_block(Function *)
Adds first basic block to empty function before we start discovering blocks of any other functions...
Definition: Partitioner.C:2906
virtual SgAsmBlock * partition(SgAsmInterpretation *, const Disassembler::InstructionMap &, const MemoryMap::Ptr &mmap=MemoryMap::Ptr())
Top-level function to run the partitioner on some instructions and build an AST.
Definition: Partitioner.C:4409
const SgAsmElfEHFrameEntryCIPtrList & get_entries() const
Property: List of pointers to ELF EH frame CI entries.
const RegisterDescriptor * lookup(const std::string &name) const
Returns a descriptor for a given register name.
Definition: Registers.C:132
MemoryMap::Ptr ro_map
The read-only parts of 'map', used for insn semantics mem reads.
virtual rose_addr_t canonic_block(rose_addr_t)
Follow alias links in basic blocks.
Definition: Partitioner.C:1076
Implied by inter-basicblock branching.
Reference to memory locations.
rose_addr_t alias_for
If non-zero, then this block is an alias for the specified block.
boost::shared_ptr< class Dispatcher > DispatcherPtr
Shared-ownership pointer to a semantics instruction dispatcher.
Range::Value size() const
Returns the number of values represented by this RangeMap.
Definition: rangemap.h:1073
const SgAsmElfSymbolPtrList & get_symbols() const
Property: Symbol list.
SgAsmOperandList * get_operandList() const
Property: AST node that holds all operands.
void set_entry_va(rose_addr_t)
Property: Primary entry address.
Exception thrown when something cannot be parsed.
virtual void mark_eh_frames(SgAsmGenericHeader *)
Seeds functions for error handling frames.
Definition: Partitioner.C:1315
static rose_addr_t value_of(SgAsmValueExpression *)
Returns the integer value of a value expression since there's no virtual method for doing this...
Definition: Partitioner.C:2638
size_t ninsns
Number of instructions expected in the basic block.
Block was added by the main CFG analysis.
size_t nranges() const
Returns the number of ranges in the range map.
Definition: rangemap.h:1065
std::set< rose_addr_t > AddressSet
An AddressSet contains virtual addresses (alternatively, relative virtual addresses) for such things ...
Definition: Disassembler.h:222
void promote_may_return(SgAsmFunction::MayReturn new_value)
Increase knowledge about the returnability of this function.
Definition: Partitioner.C:651
static Sawyer::Message::Facility mlog
Logging facility for partitioners.
void set_id(rose_addr_t)
Property: Identification.
rose_addr_t get_r_offset() const
Property: Offset.
virtual Sawyer::Optional< rose_addr_t > next_unused_address(const MemoryMap::Ptr &map, rose_addr_t start_va)
Return the next unused address.
Definition: Partitioner.C:3648
Base class for container file headers.
virtual void remap(SgAsmInterpretation *interp)
Maps sections of the interpretation into the virtual address space.
void erase_ranges(const OtherMap &other)
Erase ranges from this map.
Definition: rangemap.h:1182
void set_code_likelihood(double)
Property: Likelihood that this block represents real instructions.
An entry point specified in the file header.
void commit_may_return()
Accessor for the may-return property.
SgUnsignedCharList sucs_program
i386 code to simulate to find successors.
Expression representing a machine register.
static double progress_interval
Minimum interval between progress reports in seconds.
SgAsmElfEHFrameEntryCIList * get_ci_entries() const
Property: CI entries.
unsigned get_search() const
Returns a bit mask of SearchHeuristic bits representing which heuristics would be used when searching...
Definition: Disassembler.h:503
Base class for integer values.
Instruction * insn_end
First subsequent instruction not in range, or null.
static SgAsmGenericSection * best_section_by_va(const SgAsmGenericSectionPtrList &sections, rose_addr_t va)
Definition for "best".
Definition: GenericFile.C:520
virtual void scan_intrafunc_insns(InsnRangeCallbacks &callbacks)
Scans the unassigned instructions within a function.
Definition: Partitioner.C:1875
virtual void fixup_pointers(SgNode *ast, SgAsmInterpretation *interp=NULL)
Updates pointers inside instructions.
Definition: Partitioner.C:4132
ELF file section containing symbols.
std::string name
Name of function if known.
Function contains basic blocks that were inserted by searching the address space between the blocks d...
virtual bool is_thunk(Function *)
Determines if function is a thunk.
Definition: Partitioner.C:2557
Functions dynamically linked.
This class represents the base class for all IR nodes within Sage III.
Definition: Cxx_Grammar.h:8212
void set_progress_reporting(double min_interval)
Set progress reporting properties.
Definition: Partitioner.C:98
Statistics computed over a region of an address space.
virtual void analyze_cfg(SgAsmBlock::Reason)
Detect functions by analyzing the CFG.
Definition: Partitioner.C:3109
virtual void add_instructions(const Disassembler::InstructionMap &insns)
Adds additional instructions to be processed.
Definition: Partitioner.C:4452
virtual RegionStats * get_aggregate_variance() const
Accessors for cached aggregate statistics.
std::vector< SgUnsignedCharList > patterns
Pattern of padding, repeated at least minimum_size times.
iterator find_prior(const typename Range::Value &addr)
Finds the last range starting at or below the specified value.
Definition: rangemap.h:984
Address mentioned in the ELF .eh_frame section.
bool sucs_complete
True if locally computed successors are fully known.
A single imported object.
static void disassembleInterpretation(SgAsmInterpretation *)
Called by frontend() to disassemble an entire interpretation.
Definition: Partitioner.C:4473
static BinaryLoader * lookup(SgAsmGenericHeader *)
Finds a suitable loader.
virtual bool detach_thunk(Function *)
Splits one thunk off the start of a function if possible.
Definition: Partitioner.C:3293
InstructionMap disassembleBuffer(const MemoryMap::Ptr &map, size_t start_va, AddressSet *successors=NULL, BadMap *bad=NULL)
Disassembles instructions from the content buffer beginning at the specified virtual address and incl...
virtual void truncate(BasicBlock *, rose_addr_t)
Reduces the size of a basic block by truncating its list of instructions.
Definition: Partitioner.C:759
virtual void append(BasicBlock *, Instruction *)
Add an instruction to a basic block.
Definition: Partitioner.C:783
iterator begin()
First-item iterator.
Definition: rangemap.h:906
const SgAsmIntegerValuePtrList & get_successors() const
Property: Control flow successors.
Address of a function symbol in a symbol table.
Represents a single ELF symbol.
virtual bool operator()(bool enabled, const Args &args)
The actual callback function.
Definition: Partitioner.C:2296
void makeRelativeTo(SgNode *baseNode)
Makes the value of this integer relative to some other addressable node.
Target of call, possibly not in the CFG (see Partitioner::mark_call_insns).
Extent range
Range of address space being processed by the callback.
Functions functions
All known functions, pending and complete.
static MemoryCellListPtr promote(const BaseSemantics::MemoryStatePtr &m)
Promote a base memory state pointer to a BaseSemantics::MemoryCellList pointer.
Default value for Partitioner class.
Value size() const
Returns the number of values represented by the range.
Definition: rangemap.h:147
Map< rose_addr_t, Exception > BadMap
The BadMap is a mapping from (absolute) virtual address to information about a failed disassembly att...
Definition: Disassembler.h:229
Disassembler::AddressSet sucs
Locally computed cached successors.
boost::shared_ptr< class MemoryCellList > MemoryCellListPtr
Shared-ownership pointer to a list-based memory state.
rose_addr_t get_base_va() const
Property: Base virtual address used by all relative virtual addresses.
FunctionStarts detectFunctions(SgAsmInterpretation *, const Disassembler::InstructionMap &insns, BasicBlockStarts &bb_starts) const ROSE_DEPRECATED("replaced by pre_cfg")
Returns a list of the currently defined functions.
Definition: Partitioner.C:4588
Represents static data in an executable.
virtual void scan_contiguous_insns(InstructionMap insns, InsnRangeCallbacks &cblist, Instruction *insn_prev, Instruction *insn_end)
Scans contiguous sequences of instructions.
Definition: Partitioner.C:1796
SgAsmFunction::MayReturn get_may_return() const
Accessor for the may-return property.
ValueType value() const
Value for the progress bar.
Definition: ProgressBar.h:163
rose_addr_t address() const
Returns the first address of a basic block.
Definition: Partitioner.C:561
Represents one Intel x86 machine instruction.
virtual void pre_cfg(SgAsmInterpretation *interp=NULL)
Detects functions before analyzing the CFG.
Definition: Partitioner.C:2837
void set_reason(unsigned)
Property: Reasons this block was created.
static InstructionMap::const_iterator pattern1(const InstructionMap &insns, InstructionMap::const_iterator first, Disassembler::AddressSet &exclude)
Looks for stack frame setup.
Definition: Partitioner.C:1485
BasicBlocks basic_blocks
All known basic blocks.
Defines registers available for a particular architecture.
Definition: Registers.h:30
Target of a function call instruction sequence in the CFG.
std::set< DataBlock * > data_blocks
Data blocks owned by this basic block.
static AddressSegment anonymousInstance(Address size, unsigned accessBits=0, const std::string &name="")
Create a segment with no backing store.
This function returns each time it is called.
Disassembler::AddressSet heads
CFG heads, excluding func entry: addresses of additional blocks.
bool empty() const
Returns true if this range is empty.
Definition: rangemap.h:249
virtual void load_config(const std::string &filename)
Loads the specified configuration file.
Definition: Partitioner.C:727
bool contains(Range need) const
Determines if this range map contains all of the specified range.
Definition: rangemap.h:1270
Base class for expressions.
virtual bool is_function_call(BasicBlock *, rose_addr_t *)
Returns true if basic block appears to end with a function call.
Definition: Partitioner.C:417
Table of code addresses used by indirect branches.
virtual Function * find_function_containing_code(rose_addr_t va)
Determines if address is used in a function.
Definition: Partitioner.C:3601
void last(const Value &last)
Accessor for the last value of a range.
Definition: rangemap.h:127
ROSE_DLL_API void deleteAST(SgNode *node)
Function to delete AST subtree's nodes only, users must take care of any dangling pointers...
size_t nfound
Incremented for each thunk found and added.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
std::map< rose_addr_t, Disassembler::AddressSet > BasicBlockStarts
Map of basic block starting addresses.
virtual void name_plt_entries(SgAsmGenericHeader *)
Gives names to dynmaic linking trampolines for ELF.
Definition: Partitioner.C:2689
iterator end()
End-item iterator.
Definition: rangemap.h:918
virtual BasicBlock * find_bb_containing(rose_addr_t, bool create=true)
Finds a basic block containing the specified instruction address.
Definition: Partitioner.C:1004
void move_data_blocks_from(Function *other)
Move all data blocks from the other function to this one.
Definition: Partitioner.C:640
virtual void mark_func_patterns()
Seeds functions according to byte and instruction patterns.
Definition: Partitioner.C:1712
rose_addr_t get_entry_va() const
Property: Primary entry address.
virtual void post_cfg(SgAsmInterpretation *interp=NULL)
Detects functions after analyzing the CFG.
Definition: Partitioner.C:3759
BasicBlocks basic_blocks
Basic blocks belonging to this function.
virtual BinaryLoader * clone() const
Creates a new copy of a loader.
Definition: BinaryLoader.h:143
static Value maximum()
Return the maximum possible value represented by this range.
Definition: rangemap.h:417
bool get_mapped_xperm() const
Property: Whether mapped with execute permission.
const SgAsmStatementPtrList & get_statementList() const
Property: Statements of which this block is composed.
virtual ExtentMap unused_addresses()
Returns addresses that are not part of any function.
Definition: Partitioner.C:3632
SgNode * get_baseNode() const
Property: Base node associated with an integer.
std::string stringifySgAsmBlockReason(long int n, const char *strip=NULL, bool canonic=false)
Converts an enum of type SgAsmBlock::Reason to a string.
Definition: stringify.C:24397
Suffix suffix()
Property: suffix.
Definition: ProgressBar.h:194
bool pending
True if we need to (re)discover the basic blocks.
Export file section.
virtual RegionStats * aggregate_statistics(bool do_variance=true)
Computes aggregate statistics over all known functions.
void resize(const Value &new_size)
Sets the range size by adjusting the maximum value.
Definition: rangemap.h:162
Block was added by a second pass of CFG analysis.
Base class for values.
Added by Partitioner::FindPostFunctionInsns, which adds unassigned instructions to the immediately pr...
BasicBlock * bblock
Block to which this instruction belongs, if any.
boost::shared_ptr< class MemoryCell > MemoryCellPtr
Shared-ownership pointer to a semantic memory cell.
Definition: MemoryCell.h:15
size_t minimum_nrep
Minimum number of matched patterns to be considered padding.
virtual void merge_function_fragments()
Merge function fragments.
Definition: Partitioner.C:3415
Windows PE file header.
Holds an instruction along with some other information about the instruction.
Base class for loading a static or dynamic object.
Definition: BinaryLoader.h:48
Progress bars.
Definition: ProgressBar.h:113
std::string stringifySgAsmFunctionMayReturn(long int n, const char *strip=NULL, bool canonic=false)
Converts an enum of type SgAsmFunction::MayReturn to a string.
Definition: stringify.C:25234
SymbolDefState get_def_state() const
Property: Definition state.
SymbolBinding get_binding() const
Property: Symbol binding.
rose_addr_t get_mapped_preferred_rva() const
Property: Relative virtual address where section prefers to be mapped.
bool require_intrafunction
If set, range must be inside the preceding function.
virtual void mark_call_insns()
Naive marking of CALL instruction targets as functions.
Definition: Partitioner.C:1781
const RegisterDescriptor & get_descriptor() const
Property: Descriptor for accessed register.
static const RegisterDictionary * dictionary_amd64()
Amd64 registers.
Definition: Registers.C:668
Disassemble beginning at every address that corresponds to the value of a function symbol...
Definition: Disassembler.h:203
virtual void fixup_cfg_edges(SgNode *ast)
Update control flow graph edge nodes.
Definition: Partitioner.C:4083
void first(const Value &first)
Accessor for the first value of a range.
Definition: rangemap.h:103
SgAsmInstruction * node
The underlying instruction node for an AST.
virtual bool is_contiguous(Function *, bool strict=false)
Returns an indication of whether a function is contiguous.
Definition: Partitioner.C:4044
virtual void name_import_entries(SgAsmGenericHeader *)
Gives names to dynamic linking thunks for PE.
Definition: Partitioner.C:2768
Function * function
Function to which this data block is explicitly assigned, or null.
static Ptr instance()
Construct an empty memory map.
Definition: MemoryMap.h:233
virtual size_t datablock_extent(DataBlock *, DataRangeMap *extents=NULL, rose_addr_t *lo_addr=NULL, rose_addr_t *hi_addr=NULL)
Returns information about the datablock addresses.
Definition: Partitioner.C:4007
double now()
Current system time in seconds.
SgAsmElfSymbolList * get_symbols() const
Property: Symbols.
A container of ranges, somewhat like a set.
Definition: rangemap.h:853
bool validate_targets
If true, then successors must point to instructions.
Created to represent NOP padding between other functions.
virtual bool terminatesBasicBlock()
Determines if this instruction normally terminates a basic block.
Partitions instructions into basic blocks and functions.
virtual std::set< rose_addr_t > getSuccessors(bool *complete)
Control flow successors for a single instruction.
std::string reason_str(bool pad) const
Returns a very short string describing the reason mask.
Definition: SgAsmFunction.C:30
Main namespace for the ROSE library.
SgAsmGenericSection * get_section_by_name(const std::string &, char sep=0, size_t *nfound=0) const
Returns single section in this header that has the specified name.
Converts text to messages.
Definition: Message.h:1394
virtual Function * find_function_containing_data(rose_addr_t va)
Determines if address is part of a function's data.
Definition: Partitioner.C:3611
virtual size_t detach_thunks()
Splits thunks off of the start of functions.
Definition: Partitioner.C:3281
ResultMap invert() const
Create an inverse of a range map.
Definition: rangemap.h:1363
Represents no value.
Definition: Optional.h:32
M68kInstructionKind get_kind() const
Property: Instruction kind.
bool empty() const
Predicate to test whether the list is empty.
Definition: callbacks.h:112
static SgAsmInstruction * isSgAsmInstruction(const Instruction *)
Augments dynamic casts defined from ROSETTA.
Definition: Partitioner.C:52
Function * function
Function to which this basic block is assigned, or null.
virtual RegionStats * region_statistics(const ExtentMap &)
Computes various statistics over part of an address space.
BasicBlock * entry_basic_block() const
Return the pointer to the basic block at the entry address.
Definition: Partitioner.C:676
uint64_t get_absoluteValue(size_t nbits=0) const
Returns the current absolute value zero filled to 64 bits.
virtual Disassembler * clone() const =0
Creates a new copy of a disassembler.
virtual void adjust_padding()
Adjusts ownership of padding data blocks.
Definition: Partitioner.C:3391