ROSE  0.11.2.0
ParallelPartitioner.h
1 #ifndef ROSE_Partitioner2_ParallelPartitioner_H
2 #define ROSE_Partitioner2_ParallelPartitioner_H
3 #include <rosePublicConfig.h>
4 #if defined(ROSE_BUILD_BINARY_ANALYSIS_SUPPORT) && __cplusplus >= 201103L
5 
6 #include <BinaryInstructionCache.h>
7 #include <Progress.h>
8 #include <queue>
9 #include <Partitioner2/BasicTypes.h>
10 #include <Partitioner2/Semantics.h>
11 #include <Sawyer/Attribute.h>
12 #include <Sawyer/Graph.h>
13 #include <Sawyer/IntervalSetMap.h>
14 #include <Sawyer/Message.h>
15 #include <Sawyer/SharedObject.h>
16 
17 namespace Rose {
18 namespace BinaryAnalysis {
19 namespace Partitioner2 {
20 namespace Experimental {
21 namespace ParallelPartitioner {
22 
23 class Partitioner;
24 
26 // Settings and configuration, including miscellaneous basic types.
28 
30 enum class Accuracy {
31  LOW,
32  HIGH,
33  DEFAULT,
34 };
35 
37 using FunctionReasons = BitFlags<SgAsmFunction::FunctionReason>;
38 
40 struct Settings {
41  size_t maxAnalysisBBlockSize = 20;
42  Accuracy successorAccuracy = Accuracy::LOW;
43  Accuracy functionCallDetectionAccuracy = Accuracy::LOW;
44  size_t minHoleSearch = 8;
45  SemanticMemoryParadigm semanticMemoryParadigm = MAP_BASED_MEMORY;
46 };
47 
49 // Diagnostics
51 
53 extern Sawyer::Message::Facility mlog;
54 
55 // Used internally. See Rose::Diagnostics::initialize
56 void initDiagnostics();
57 
59 // Cached information
61 
63 template<class V, class K>
64 class CachedItem {
65 public:
66  using Value = V;
67  using Key = K;
68  using OptionalValue = Sawyer::Optional<Value>;
69 
70 private:
71  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
73  OptionalValue value_;
74 
75 public:
79  void set(const Key &key, const Value &value) {
80  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
81  key_ = key;
82  value_ = value;
83  }
84 
91  bool maybeSet(const Key &key, const Value &value) {
92  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
93  if (key_ && *key_ == key) {
94  return false;
95  } else {
96  key_ = key;
97  value_ = value;
98  return true;
99  }
100  }
101 
105  OptionalValue get(const Key &key) const {
106  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
107  if (key_ && *key_ == key)
108  return value_;
109  return {};
110  }
111 
115  OptionalValue take(const Key &key) {
116  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
117  if (key_ && *key_ == key) {
118  auto retval = value_;
119  key_.reset();
120  value_.reset();
121  return retval;
122  }
123  return {};
124  }
125 
129  void reset() {
130  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
131  key_.reset();
132  value_.reset();
133  }
134 
138  bool exists(const Key &key) const {
139  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
140  return key_ && *key_ == key;
141  }
142 };
143 
144 // Same as above, but no keys
145 template<class V>
146 class CachedItem<V, void> {
147 public:
148  using Value = V;
149  using Key = void;
150  using OptionalValue = Sawyer::Optional<Value>;
151 
152 private:
153  mutable SAWYER_THREAD_TRAITS::Mutex mutex_;
154  OptionalValue value_;
155 
156 public:
157  void set(const Value &value) {
158  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
159  value_ = value;
160  }
161 
162  bool maybeSet(const Value &value) {
163  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
164  if (value_) {
165  return false;
166  } else {
167  value_ = value;
168  return true;
169  }
170  }
171 
172  OptionalValue get() const {
173  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
174  return value_;
175  }
176 
177  OptionalValue take() const {
178  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
179  auto retval = value_;
180  value_ = Sawyer::Nothing();
181  return retval;
182  }
183 
184  void reset() {
185  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
186  value_.reset();
187  }
188 
189  bool exists() const {
190  SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
191  return value_;
192  }
193 };
194 
199 template<class Cache>
200 class Borrowed {
201  Cache &cache_;
202  typename Cache::Key key_;
203  typename Cache::OptionalValue value_;
204  bool canceled_;
205 
206 public:
208  Borrowed(Cache &cache, typename Cache::Key key)
209  : cache_(cache), key_(key), value_(cache_.take(key)), canceled_(false) {}
210 
212  Borrowed(Cache &cache, typename Cache::Key key, const typename Cache::Value &value)
213  : cache_(cache), key_(key), value_(value), canceled_(false) {}
214 
215  ~Borrowed() {
216  rtn();
217  }
218 
225  void cancel() {
226  canceled_ = true;
227  }
228 
232  void rtn() {
233  if (!canceled_ && value_ && !cache_.maybeSet(key_, *value_)) {
234  //mlog[Diagnostics::WARN] <<"cache borrow contention detected: key=" <<StringUtility::addrToString(key_) <<"\n";
235  }
236  value_.reset();
237  }
238 
244  const typename Cache::OptionalValue& get() const {
245  return value_;
246  }
247 };
248 
252 template<class Cache>
253 Borrowed<Cache> borrow(Cache &cache, typename Cache::Key key) {
254  return Borrowed<Cache>(cache, key);
255 }
256 
261 template<class Cache>
262 Borrowed<Cache> borrow(Cache &cache, typename Cache::Key key, const typename Cache::Value &value) {
263  return Borrowed<Cache>(cache, key, value);
264 }
265 
267 // Information stored at each vertex of the CFG.
269 
271 class InsnInfo: boost::noncopyable {
272 public:
273  using Ptr = std::shared_ptr<InsnInfo>;
274  using List = std::vector<Ptr>;
283  struct Cached: boost::noncopyable {
284  CachedItem<bool, uint64_t> isFunctionCall;
285  CachedItem<Semantics::RiscOperatorsPtr, uint64_t> semantics;
286  };
287 
288 private:
289  InstructionPtr ast_; // possibly evicted AST rooted at an SgAsmInstruction node.
290  const rose_addr_t va_; // starting address accessible even for evicted instruction
291  Cached cached_; // cached items computed elsewhere
292 
293  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
294  size_t size_; // instruction size in bytes even for evicted instruction
295  bool wasDecoded_; // has any DecodeInstruction work completed?
296  FunctionReasons functionReasons_; // is this insn the entry point of a function?
297 
298  // Cached items associated with basic blocks. The key is the hash of the basic block instruction addresses.
299 
300 public:
304  explicit InsnInfo(const InstructionPtr &insn)
305  : ast_(insn), va_(insn->get_address()), size_(insn->get_size()), wasDecoded_(false) {
306  ASSERT_not_null(insn);
307  ASSERT_require(size_ > 0);
308  }
309 
311  explicit InsnInfo(rose_addr_t va)
312  : va_(va), size_(0), wasDecoded_(false) {}
313 
321  rose_addr_t address() const {
322  return va_;
323  }
324 
332  Sawyer::Optional<size_t> size() const;
333 
342 
351  FunctionReasons functionReasons() const;
352  void functionReasons(FunctionReasons);
353  void insertFunctionReasons(FunctionReasons);
354  void eraseFunctionReasons(FunctionReasons);
362  const Cached& cached() const { return cached_; }
363  Cached& cached() { return cached_; }
379  bool wasDecoded() const;
380  void setDecoded();
390  InstructionPtr ast() const;
391 
397  static bool addressOrder(const Ptr &a, const Ptr &b);
398 
404  static uint64_t hash(const List&);
405 
406 private:
407  friend class Partitioner;
408 
409  // Set the underlying instruction.
410  //
411  // This sets the underlying instruction if it isn't set already, and returns the underlying instruction. If the return value
412  // is the same pointer as the argument then the instruction was set, otherwise it already had some other value. Also, the
413  // @ref wasDecoded flag is set. The instruction can be a valid instruction or a null pointer.
414  //
415  // Thread safety: This function is thread safe.
416  InstructionPtr setAstMaybe(const InstructionPtr&);
417 
418 };
419 
421 // CFG edge information
423 
424 class CfgEdge {
425 public:
426  using EdgeTypes = BitFlags<EdgeType>;
427  EdgeTypes types_;
428 
429 public:
430  CfgEdge()
431  : types_(E_NORMAL) {}
432 
433  /*implicit*/ CfgEdge(EdgeType type)
434  : types_(type) {}
435 
436  void merge(const CfgEdge &other) {
437  types_.set(other.types_);
438  }
439 
440  EdgeTypes types() const {
441  return types_;
442  }
443 
444  void types(EdgeTypes x) {
445  types_.set(x);
446  }
447 
448  friend std::ostream& operator<<(std::ostream&, const CfgEdge&);
449 };
450 
452 // Control flow graph
454 
455 // Lookup keys used internally for CFG vertices.
456 class InsnInfoKey {
457  rose_addr_t va_;
458 public:
459  /*implicit*/ InsnInfoKey(const InsnInfo::Ptr &insnInfo)
460  : va_(insnInfo->address()) {}
461  /*impilcit*/ InsnInfoKey(rose_addr_t va)
462  : va_(va) {}
463 
464  // FIXME: we should be using an unordered_map for the index.
465  bool operator<(const InsnInfoKey &other) const {
466  return va_ < other.va_;
467  }
468 };
469 
475 using InsnCfg = Sawyer::Container::Graph<std::shared_ptr<InsnInfo>, CfgEdge, InsnInfoKey>;
476 
478 // WorkItem -- base class for individual parallel units of work
480 
489 class WorkItem {
490 public:
492  enum class Priority {
493  DiscoverInstruction = 0,
494  NextUnusedVa = -100
495  };
496 
497 private:
498  Partitioner &partitioner_; // partitioner that owns this work item
499  Priority priority_; // primary sort key
500  uint64_t sort_; // secondary sort key
501 
502 protected:
510  WorkItem(Partitioner &partitioner, Priority priority, uint64_t sort)
511  : partitioner_(partitioner), priority_(priority), sort_(sort) {}
512 
513  virtual ~WorkItem() {}
514 
516  Partitioner& partitioner() const {
517  return partitioner_;
518  }
519 
521  Priority priority() const {
522  return priority_;
523  }
524 
525 public:
529  bool operator<(const WorkItem&) const;
530 
531 public:
535  virtual void run() = 0;
536 
538  virtual std::string title() const = 0;
539 };
540 
542 // DecodeInstruction -- a unit of work
544 
550 class DecodeInstruction: public WorkItem {
551 protected:
552  rose_addr_t insnVa; // starting address of the instruction
553 
554 public:
555  DecodeInstruction(Partitioner &partitioner, rose_addr_t va)
556  : WorkItem(partitioner, WorkItem::Priority::DiscoverInstruction,
557 #if 0
558  // Process instructions in order of their address. This might be better for caching other data because once
559  // we evict it we might never need it again. On the other hand, it seems to increase contention for things
560  // like basic block semantics since it's more likely that two threads will be working near each other in the
561  // specimen virtual memory.
562  va
563 #else
564  // Process instructions in random order. This seems to reduce contention for things like basic block semantics and
565  // improve performance by about 10% at this early stage of development.
567 #endif
568  ), insnVa(va) {}
569 
570 
571  std::string title() const override {
572  return "decode insn " + StringUtility::addrToString(insnVa);
573  }
574 
575  void run() override;
576 };
577 
579 // NextUnusedRegion -- a unit of work
581 
586 class NextUnusedRegion: public WorkItem {
587 protected:
588  AddressInterval where; // what part of the address space to search
589 
590 public:
591  NextUnusedRegion(Partitioner &partitioner, const AddressInterval &where)
592  : WorkItem(partitioner, WorkItem::Priority::NextUnusedVa, where ? where.least() : uint64_t(0)), where(where) {}
593 
594  std::string title() const override {
595  return "scan unused va within " + StringUtility::addrToString(where);
596  }
597 
598  void run() override;
599 };
600 
602 // Scheduler -- schedules units of work for parallel execution
604 
605 class WorkItemSorter {
606 public:
607  bool operator()(const std::shared_ptr<WorkItem>&, const std::shared_ptr<WorkItem>&) const;
608 };
609 
616 class Scheduler final {
617 public:
618  using Item = std::shared_ptr<WorkItem>;
619  using Container = std::vector<Item>;
620  using Queue = std::priority_queue<Item, Container, WorkItemSorter>;
622 private:
623  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
624  Queue queue_;
625 
626 public:
628  void insert(const Item&);
629 
631  bool isEmpty() const;
632 
634  Item next();
635 
637  void reportStatus(std::ostream&) const;
638 };
639 
641 // Worker -- top level function for worker threads
643 
648 class Worker {
649 public:
650  void operator()(std::shared_ptr<WorkItem> work, Scheduler&) {
651  work->run();
652  }
653 };
654 
656 // Address usage map
658 
663 
665 // Partitioner
667 
673 class Partitioner: public Sawyer::Attribute::Storage<>, public Sawyer::SharedObject, boost::noncopyable {
674  Settings settings_;
675  Scheduler scheduler_;
676  std::shared_ptr<InstructionCache> insnCache_;
677  Progress::Ptr progress_; // reports percent of memory disassembled
678  rose_addr_t nExeVas_; // number of executable addresses in memory map
679 
680  mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
681  bool isRunning_; // true while "run" is called
682  InsnCfg insnCfg_; // the global control flow graph
683  Aum aum_; // address usage map
684 
685 public:
686  //--------------------------------------------------------------------------------------------------------------------
687  // Constructors and initialization
688  //--------------------------------------------------------------------------------------------------------------------
689 
695  Partitioner(const MemoryMap::Ptr &memory, Disassembler *decoder, const Settings &settings = Settings());
696 
705  const Settings& settings() const { return settings_; }
706  Settings& settings() { return settings_; }
713  static Accuracy choose(Accuracy a, Accuracy b);
714 
715  //--------------------------------------------------------------------------------------------------------------------
716  // Status
717  //--------------------------------------------------------------------------------------------------------------------
718 public:
724  Progress::Ptr progress() const;
725 
732  void statusReports();
733 
734  //--------------------------------------------------------------------------------------------------------------------
735  // Functions related to memory
736  //--------------------------------------------------------------------------------------------------------------------
737 public:
738 
742  MemoryMap::Ptr memoryMap() const;
743 
751  size_t nDecodedAddresses() const;
752 
758  AddressIntervalSet unusedExecutableVas(AddressInterval where) const;
759 
760  //--------------------------------------------------------------------------------------------------------------------
761  // Functions related to instructions
762  //--------------------------------------------------------------------------------------------------------------------
763 public:
764 
774  InstructionPtr decodeInstruction(rose_addr_t);
775 
793  InsnInfo::Ptr makeInstruction(rose_addr_t);
794  InsnInfo::Ptr makeInstruction(rose_addr_t, const InstructionPtr&);
803  InsnInfo::Ptr existingInstruction(rose_addr_t);
804 
812  InstructionPtr existingInstructionAst(rose_addr_t);
813 
819  InstructionCache& instructionCache() const;
820 
822  struct LockInCache {
823  std::vector<LockedInstruction> locks;
824  std::vector<SgAsmInstruction*> insns;
825  };
826 
845  LockInCache lockInCache(const InsnInfo::List&);
846 
848  struct CreateLinkedCfgVertices {
849  bool createdSource = false;
850  bool createdTarget = false;
851  bool createdEdge = false;
852  InsnInfo::Ptr source;
853  InsnInfo::Ptr target;
854  };
855 
863  CreateLinkedCfgVertices createLinkedCfgVertices(rose_addr_t sourceVa, rose_addr_t targetVa, const CfgEdge&);
864 
871  void scheduleDecodeInstruction(rose_addr_t insnVa);
872 
878  void scheduleNextUnusedRegion(const AddressInterval &where);
879 
880  //--------------------------------------------------------------------------------------------------------------------
881  // Basic blocks
882  //--------------------------------------------------------------------------------------------------------------------
883 public:
891  InsnInfo::List basicBlockEndingAt(rose_addr_t, size_t maxInsns = UNLIMITED) const;
892 
899  InsnInfo::List basicBlockContaining(rose_addr_t) const;
900 
914  Borrowed<CachedItem<Semantics::RiscOperatorsPtr, uint64_t>> basicBlockSemantics(const InsnInfo::List&);
915 
929  std::vector<SymbolicExpr::Ptr> computeSuccessors(const InsnInfo::List&, Accuracy accuracy);
930  std::vector<SymbolicExpr::Ptr> computeSuccessors(rose_addr_t insnVa, Accuracy accuracy);
943  AddressSet computedConcreteSuccessors(rose_addr_t insnVa, Accuracy accuracy);
944 
952  bool isFunctionCall(rose_addr_t insnVa, Accuracy accuracy);
953 
954 private:
955  // Reads the instruction pointer register from the sepcified state, which must be non-null and have a non-null current
956  // state, and splits it into a list of symbolic expressions. The most common return value is a single concrete address
957  // which occurs for almost all non-branching instructions (such as ADD, NOP, etc.). The second most common case is two
958  // concrete addresses from a conditional branch instruction. Another common case is that one of the successors will be a
959  // free variable to represent that we don't know the successors (as usually happens with x86 RET and other computed
960  // branches).
961  std::vector<SymbolicExpr::Ptr> splitSuccessors(const InstructionSemantics2::BaseSemantics::RiscOperatorsPtr &ops);
962 
963  //--------------------------------------------------------------------------------------------------------------------
964  // Functions to run things.
965  //--------------------------------------------------------------------------------------------------------------------
966 public:
967 
974  void run(size_t maxWorkers);
975 
981  bool isRunning() const;
982 
983 private:
984  // Used internally to change the isRunning value. This function is thread safe.
985  void isRunning(bool b);
986 
987  //--------------------------------------------------------------------------------------------------------------------
988  // Post processing functions. The following functons can only be called when the partitioner is not running (i.e.,
989  // only when isRunning returns false).
990  //--------------------------------------------------------------------------------------------------------------------
991 public:
992 
1002  const InsnCfg& insnCfg() const;
1003  InsnCfg& insnCfg();
1009  void printInsnCfg(std::ostream&) const;
1010 
1016  void dumpInsnCfg(std::ostream&, const Rose::BinaryAnalysis::Partitioner2::Partitioner&) const;
1017 
1025  std::vector<InsnInfo::List> allBasicBlocks() const;
1026 
1032  std::map<rose_addr_t /*insn*/, rose_addr_t /*bb*/> calculateInsnToBbMap() const;
1033 
1038  static bool addressOrder(const InsnInfo::List &a, const InsnInfo::List &b);
1039 
1047  std::map<rose_addr_t /*funcVa*/, AddressSet /*insnVas*/> assignFunctions();
1048 
1055  void transferResults(Rose::BinaryAnalysis::Partitioner2::Partitioner &out);
1056 
1061  rose_addr_t remap();
1062 };
1063 
1064 
1065 } // namespace
1066 } // namespace
1067 } // namespace
1068 } // namespace
1069 } // namespace
1070 #endif
1071 #endif
boost::shared_ptr< RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
Sawyer::SharedPointer< Node > Ptr
Shared-ownership pointer to an expression Node.
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
const size_t UNLIMITED(-1)
Effictively unlimited size.
EdgeType
Partitioner control flow edge types.
Definition: BasicTypes.h:55
Collection of streams.
Definition: Message.h:1599
Mapping from integers to sets.
void reset()
Reset as if default-constructed.
Definition: Optional.h:181
Main namespace for the ROSE library.
Name space for the entire library.
size_t fastRandomIndex(size_t n, size_t seed=0)
Thread-safe random number generator.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
SemanticMemoryParadigm
Organization of semantic memory.
Definition: BasicTypes.h:85
Normal control flow edge, nothing special.
Definition: BasicTypes.h:56
Base class for reference counted objects.
Definition: SharedObject.h:64
Range of values delimited by endpoints.
Definition: Interval.h:33
void set(Word *words, const BitRange &where)
Set some bits.
API and storage for attributes.
Definition: Attribute.h:208
Represents no value.
Definition: Optional.h:32
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:322
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
Definition: Progress.h:168
Hash hash(const std::vector< Ptr > &)
Hash zero or more expressions.