ROSE 0.11.145.134
ParallelPartitioner.h
1#ifndef ROSE_BinaryAnalysis_Partitioner2_ParallelPartitioner_H
2#define ROSE_BinaryAnalysis_Partitioner2_ParallelPartitioner_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BinaryAnalysis/InstructionCache.h>
7#include <Rose/Progress.h>
8#include <queue>
9#include <Rose/BinaryAnalysis/Partitioner2/BasicTypes.h>
10#include <Rose/BinaryAnalysis/Partitioner2/Semantics.h>
11#include <Sawyer/Attribute.h>
12#include <Sawyer/Graph.h>
13#include <Sawyer/IntervalSetMap.h>
14#include <Sawyer/Message.h>
15#include <Sawyer/SharedObject.h>
16
17namespace Rose {
18namespace BinaryAnalysis {
19namespace Partitioner2 {
20namespace Experimental {
21namespace ParallelPartitioner {
22
23class Partitioner;
24
26// Settings and configuration, including miscellaneous basic types.
28
30enum class Accuracy {
31 LOW,
32 HIGH,
33 DEFAULT,
34};
35
37using FunctionReasons = BitFlags<SgAsmFunction::FunctionReason>;
38
40struct Settings {
41 size_t maxAnalysisBBlockSize = 20;
42 Accuracy successorAccuracy = Accuracy::LOW;
43 Accuracy functionCallDetectionAccuracy = Accuracy::LOW;
44 size_t minHoleSearch = 8;
46};
47
49// Diagnostics
51
54
55// Used internally. See Rose::Diagnostics::initialize
56void initDiagnostics();
57
59// Cached information
61
63template<class V, class K>
65public:
66 using Value = V;
67 using Key = K;
69
70private:
71 mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
73 OptionalValue value_;
74
75public:
79 void set(const Key &key, const Value &value) {
80 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
81 key_ = key;
82 value_ = value;
83 }
84
91 bool maybeSet(const Key &key, const Value &value) {
92 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
93 if (key_ && *key_ == key) {
94 return false;
95 } else {
96 key_ = key;
97 value_ = value;
98 return true;
99 }
100 }
101
105 OptionalValue get(const Key &key) const {
106 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
107 if (key_ && *key_ == key)
108 return value_;
109 return {};
110 }
111
115 OptionalValue take(const Key &key) {
116 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
117 if (key_ && *key_ == key) {
118 auto retval = value_;
119 key_.reset();
120 value_.reset();
121 return retval;
122 }
123 return {};
124 }
125
129 void reset() {
130 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
131 key_.reset();
132 value_.reset();
133 }
134
138 bool exists(const Key &key) const {
139 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
140 return key_ && *key_ == key;
141 }
142};
143
144// Same as above, but no keys
145template<class V>
146class CachedItem<V, void> {
147public:
148 using Value = V;
149 using Key = void;
151
152private:
153 mutable SAWYER_THREAD_TRAITS::Mutex mutex_;
154 OptionalValue value_;
155
156public:
157 void set(const Value &value) {
158 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
159 value_ = value;
160 }
161
162 bool maybeSet(const Value &value) {
163 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
164 if (value_) {
165 return false;
166 } else {
167 value_ = value;
168 return true;
169 }
170 }
171
172 OptionalValue get() const {
173 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
174 return value_;
175 }
176
177 OptionalValue take() const {
178 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
179 auto retval = value_;
180 value_ = Sawyer::Nothing();
181 return retval;
182 }
183
184 void reset() {
185 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
186 value_.reset();
187 }
188
189 bool exists() const {
190 SAWYER_THREAD_TRAITS::LockGuard lock(mutex_);
191 return value_;
192 }
193};
194
199template<class Cache>
200class Borrowed {
201 Cache &cache_;
202 typename Cache::Key key_;
203 typename Cache::OptionalValue value_;
204 bool canceled_;
205
206public:
208 Borrowed(Cache &cache, typename Cache::Key key)
209 : cache_(cache), key_(key), value_(cache_.take(key)), canceled_(false) {}
210
212 Borrowed(Cache &cache, typename Cache::Key key, const typename Cache::Value &value)
213 : cache_(cache), key_(key), value_(value), canceled_(false) {}
214
215 ~Borrowed() {
216 rtn();
217 }
218
225 void cancel() {
226 canceled_ = true;
227 }
228
232 void rtn() {
233 if (!canceled_ && value_ && !cache_.maybeSet(key_, *value_)) {
234 //mlog[Diagnostics::WARN] <<"cache borrow contention detected: key=" <<StringUtility::addrToString(key_) <<"\n";
235 }
236 value_.reset();
237 }
238
244 const typename Cache::OptionalValue& get() const {
245 return value_;
246 }
247};
248
252template<class Cache>
253Borrowed<Cache> borrow(Cache &cache, typename Cache::Key key) {
254 return Borrowed<Cache>(cache, key);
255}
256
261template<class Cache>
262Borrowed<Cache> borrow(Cache &cache, typename Cache::Key key, const typename Cache::Value &value) {
263 return Borrowed<Cache>(cache, key, value);
264}
265
267// Information stored at each vertex of the CFG.
269
271class InsnInfo: boost::noncopyable {
272public:
273 using Ptr = std::shared_ptr<InsnInfo>;
274 using List = std::vector<Ptr>;
287
288private:
289 InstructionPtr ast_; // possibly evicted AST rooted at an SgAsmInstruction node.
290 const rose_addr_t va_; // starting address accessible even for evicted instruction
291 Cached cached_; // cached items computed elsewhere
292
293 mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
294 size_t size_; // instruction size in bytes even for evicted instruction
295 bool wasDecoded_; // has any DecodeInstruction work completed?
296 FunctionReasons functionReasons_; // is this insn the entry point of a function?
297
298 // Cached items associated with basic blocks. The key is the hash of the basic block instruction addresses.
299
300public:
304 explicit InsnInfo(const InstructionPtr &insn)
305 : ast_(insn), va_(insn->get_address()), size_(insn->get_size()), wasDecoded_(false) {
306 ASSERT_not_null(insn);
307 ASSERT_require(size_ > 0);
308 }
309
311 explicit InsnInfo(rose_addr_t va)
312 : va_(va), size_(0), wasDecoded_(false) {}
313
321 rose_addr_t address() const {
322 return va_;
323 }
324
333
342
364 const Cached& cached() const { return cached_; }
365 Cached& cached() { return cached_; }
381 bool wasDecoded() const;
393
399 static bool addressOrder(const Ptr &a, const Ptr &b);
400
406 static uint64_t hash(const List&);
407
408private:
409 friend class Partitioner;
410
411 // Set the underlying instruction.
412 //
413 // This sets the underlying instruction if it isn't set already, and returns the underlying instruction. If the return value
414 // is the same pointer as the argument then the instruction was set, otherwise it already had some other value. Also, the
415 // @ref wasDecoded flag is set. The instruction can be a valid instruction or a null pointer.
416 //
417 // Thread safety: This function is thread safe.
418 InstructionPtr setAstMaybe(const InstructionPtr&);
419
420};
421
423// CFG edge information
425
426class CfgEdge {
427public:
429 EdgeTypes types_;
430
431public:
432 CfgEdge()
433 : types_(E_NORMAL) {}
434
435 /*implicit*/ CfgEdge(EdgeType type)
436 : types_(type) {}
437
438 void merge(const CfgEdge &other) {
439 types_.set(other.types_);
440 }
441
442 EdgeTypes types() const {
443 return types_;
444 }
445
446 void types(EdgeTypes x) {
447 types_.set(x);
448 }
449
450 friend std::ostream& operator<<(std::ostream&, const CfgEdge&);
451};
452
454// Control flow graph
456
457// Lookup keys used internally for CFG vertices.
459 rose_addr_t va_;
460public:
461 /*implicit*/ InsnInfoKey(const InsnInfo::Ptr &insnInfo)
462 : va_(insnInfo->address()) {}
463 /*impilcit*/ InsnInfoKey(rose_addr_t va)
464 : va_(va) {}
465
466 // FIXME: we should be using an unordered_map for the index.
467 bool operator<(const InsnInfoKey &other) const {
468 return va_ < other.va_;
469 }
470};
471
478
480// WorkItem -- base class for individual parallel units of work
482
491class WorkItem {
492public:
494 enum class Priority {
495 DiscoverInstruction = 0,
496 NextUnusedVa = -100
497 };
498
499private:
500 Partitioner &partitioner_; // partitioner that owns this work item
501 Priority priority_; // primary sort key
502 uint64_t sort_; // secondary sort key
503
504protected:
513 : partitioner_(partitioner), priority_(priority), sort_(sort) {}
514
515 virtual ~WorkItem() {}
516
519 return partitioner_;
520 }
521
524 return priority_;
525 }
526
527public:
531 bool operator<(const WorkItem&) const;
532
533public:
537 virtual void run() = 0;
538
540 virtual std::string title() const = 0;
541};
542
544// DecodeInstruction -- a unit of work
546
553protected:
554 rose_addr_t insnVa; // starting address of the instruction
555
556public:
558 : WorkItem(partitioner, WorkItem::Priority::DiscoverInstruction,
559#if 0
560 // Process instructions in order of their address. This might be better for caching other data because once
561 // we evict it we might never need it again. On the other hand, it seems to increase contention for things
562 // like basic block semantics since it's more likely that two threads will be working near each other in the
563 // specimen virtual memory.
564 va
565#else
566 // Process instructions in random order. This seems to reduce contention for things like basic block semantics and
567 // improve performance by about 10% at this early stage of development.
569#endif
570 ), insnVa(va) {}
571
572
573 std::string title() const override {
574 return "decode insn " + StringUtility::addrToString(insnVa);
575 }
576
577 void run() override;
578};
579
581// NextUnusedRegion -- a unit of work
583
589protected:
590 AddressInterval where; // what part of the address space to search
591
592public:
594 : WorkItem(partitioner, WorkItem::Priority::NextUnusedVa, where ? where.least() : uint64_t(0)), where(where) {}
595
596 std::string title() const override {
597 return "scan unused va within " + StringUtility::addrToString(where);
598 }
599
600 void run() override;
601};
602
604// Scheduler -- schedules units of work for parallel execution
606
608public:
609 bool operator()(const std::shared_ptr<WorkItem>&, const std::shared_ptr<WorkItem>&) const;
610};
611
618class Scheduler final {
619public:
620 using Item = std::shared_ptr<WorkItem>;
621 using Container = std::vector<Item>;
622 using Queue = std::priority_queue<Item, Container, WorkItemSorter>;
624private:
625 mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
626 Queue queue_;
627
628public:
630 void insert(const Item&);
631
633 bool isEmpty() const;
634
636 Item next();
637
639 void reportStatus(std::ostream&) const;
640};
641
643// Worker -- top level function for worker threads
645
650class Worker {
651public:
652 void operator()(std::shared_ptr<WorkItem> work, Scheduler&) {
653 work->run();
654 }
655};
656
658// Address usage map
660
665
667// Partitioner
669
675class Partitioner: public Sawyer::Attribute::Storage<>, public Sawyer::SharedObject, boost::noncopyable {
676 Settings settings_;
677 Scheduler scheduler_;
678 std::shared_ptr<InstructionCache> insnCache_;
679 Progress::Ptr progress_; // reports percent of memory disassembled
680 rose_addr_t nExeVas_; // number of executable addresses in memory map
681
682 mutable SAWYER_THREAD_TRAITS::Mutex mutex_; // protects all following data members
683 bool isRunning_; // true while "run" is called
684 InsnCfg insnCfg_; // the global control flow graph
685 Aum aum_; // address usage map
686
687public:
688 //--------------------------------------------------------------------------------------------------------------------
689 // Constructors and initialization
690 //--------------------------------------------------------------------------------------------------------------------
691
697 Partitioner(const MemoryMap::Ptr &memory, const Disassembler::BasePtr &decoder, const Settings &settings = Settings());
698
707 const Settings& settings() const { return settings_; }
708 Settings& settings() { return settings_; }
714 static Accuracy choose(Accuracy a, Accuracy b);
715
716 //--------------------------------------------------------------------------------------------------------------------
717 // Status
718 //--------------------------------------------------------------------------------------------------------------------
719public:
726
734
735 //--------------------------------------------------------------------------------------------------------------------
736 // Functions related to memory
737 //--------------------------------------------------------------------------------------------------------------------
738public:
739
744
752 size_t nDecodedAddresses() const;
753
760
761 //--------------------------------------------------------------------------------------------------------------------
762 // Functions related to instructions
763 //--------------------------------------------------------------------------------------------------------------------
764public:
765
776
805
814
821
823 struct LockInCache {
824 std::vector<LockedInstruction> locks;
825 std::vector<SgAsmInstruction*> insns;
826 };
827
847
856
864 CreateLinkedCfgVertices createLinkedCfgVertices(rose_addr_t sourceVa, rose_addr_t targetVa, const CfgEdge&);
865
872 void scheduleDecodeInstruction(rose_addr_t insnVa);
873
880
881 //--------------------------------------------------------------------------------------------------------------------
882 // Basic blocks
883 //--------------------------------------------------------------------------------------------------------------------
884public:
892 InsnInfo::List basicBlockEndingAt(rose_addr_t, size_t maxInsns = UNLIMITED) const;
893
901
916
930 std::vector<SymbolicExpression::Ptr> computeSuccessors(const InsnInfo::List&, Accuracy accuracy);
931 std::vector<SymbolicExpression::Ptr> computeSuccessors(rose_addr_t insnVa, Accuracy accuracy);
944 AddressSet computedConcreteSuccessors(rose_addr_t insnVa, Accuracy accuracy);
945
953 bool isFunctionCall(rose_addr_t insnVa, Accuracy accuracy);
954
955private:
956 // Reads the instruction pointer register from the sepcified state, which must be non-null and have a non-null current
957 // state, and splits it into a list of symbolic expressions. The most common return value is a single concrete address
958 // which occurs for almost all non-branching instructions (such as ADD, NOP, etc.). The second most common case is two
959 // concrete addresses from a conditional branch instruction. Another common case is that one of the successors will be a
960 // free variable to represent that we don't know the successors (as usually happens with x86 RET and other computed
961 // branches).
962 std::vector<SymbolicExpression::Ptr> splitSuccessors(const InstructionSemantics::BaseSemantics::RiscOperatorsPtr &ops);
963
964 //--------------------------------------------------------------------------------------------------------------------
965 // Functions to run things.
966 //--------------------------------------------------------------------------------------------------------------------
967public:
968
975 void run(size_t maxWorkers);
976
982 bool isRunning() const;
983
984private:
985 // Used internally to change the isRunning value. This function is thread safe.
986 void isRunning(bool b);
987
988 //--------------------------------------------------------------------------------------------------------------------
989 // Post processing functions. The following functons can only be called when the partitioner is not running (i.e.,
990 // only when isRunning returns false).
991 //--------------------------------------------------------------------------------------------------------------------
992public:
993
1003 const InsnCfg& insnCfg() const;
1010 void printInsnCfg(std::ostream&) const;
1011
1018
1026 std::vector<InsnInfo::List> allBasicBlocks() const;
1027
1033 std::map<rose_addr_t /*insn*/, rose_addr_t /*bb*/> calculateInsnToBbMap() const;
1034
1039 static bool addressOrder(const InsnInfo::List &a, const InsnInfo::List &b);
1040
1048 std::map<rose_addr_t /*funcVa*/, AddressSet /*insnVas*/> assignFunctions();
1049
1057
1062 rose_addr_t remap();
1063};
1064
1065
1066} // namespace
1067} // namespace
1068} // namespace
1069} // namespace
1070} // namespace
1071#endif
1072#endif
Borrowed(Cache &cache, typename Cache::Key key, const typename Cache::Value &value)
Arrange to return an already taken item back to the cache.
const Cache::OptionalValue & get() const
Return any value that has been borrowed.
Borrowed(Cache &cache, typename Cache::Key key)
Borrow a cached item by taking it from the cache.
bool maybeSet(const Key &key, const Value &value)
Set the cache only if it doesn't contain a value.
bool exists(const Key &key) const
Tests whether a value is cached.
OptionalValue get(const Key &key) const
Get the cached item if present.
OptionalValue take(const Key &key)
Take the cached item if present.
void set(const Key &key, const Value &value)
Set the cache to hold the specified key / value pair.
void functionReasons(FunctionReasons)
Property: Function reasons.
InsnInfo(const InstructionPtr &insn)
Construct info with decoded instruction.
bool wasDecoded() const
Whether any attempt was made to decode this instruction.
void eraseFunctionReasons(FunctionReasons)
Property: Function reasons.
Sawyer::Optional< size_t > size() const
Size of instruciton in bytes if known.
void setDecoded()
Whether any attempt was made to decode this instruction.
Sawyer::Optional< AddressInterval > hull() const
Location of instruction if known.
void insertFunctionReasons(FunctionReasons)
Property: Function reasons.
static bool addressOrder(const Ptr &a, const Ptr &b)
For sorty by address.
FunctionReasons functionReasons() const
Property: Function reasons.
InstructionPtr ast() const
Get the underlying instruction AST.
bool isFunctionCall(rose_addr_t insnVa, Accuracy accuracy)
Determine whether the instruction is a function call.
void transferResults(const Rose::BinaryAnalysis::Partitioner2::PartitionerPtr &out)
Build results from CFG.
std::vector< SymbolicExpression::Ptr > computeSuccessors(rose_addr_t insnVa, Accuracy accuracy)
Return the computed symbolic successors for an instruction.
InsnInfo::Ptr existingInstruction(rose_addr_t)
Return information about a vertex of the global CFG.
AddressSet computedConcreteSuccessors(rose_addr_t insnVa, Accuracy accuracy)
Return the computed concrete successors for an instruction.
AddressIntervalSet unusedExecutableVas(AddressInterval where) const
All unused executable addresses within region.
InstructionPtr decodeInstruction(rose_addr_t)
Decode an instruction at the specified address.
size_t nDecodedAddresses() const
Amount of memory with known instructions.
InsnInfo::List basicBlockContaining(rose_addr_t) const
Find basic block containing specified instruction.
std::map< rose_addr_t, rose_addr_t > calculateInsnToBbMap() const
Create a map from instructions to basic blocks.
static Accuracy choose(Accuracy a, Accuracy b)
Accuracy setting.
std::vector< InsnInfo::List > allBasicBlocks() const
List of all basic blocks.
static bool addressOrder(const InsnInfo::List &a, const InsnInfo::List &b)
Predicate to order blocks by starting address.
InstructionPtr existingInstructionAst(rose_addr_t)
Returns the AST for an instruction.
Borrowed< CachedItem< Semantics::RiscOperatorsPtr, uint64_t > > basicBlockSemantics(const InsnInfo::List &)
Calculate the semantics for a basic block.
LockInCache lockInCache(const InsnInfo::List &)
Lock instructions in the cache.
CreateLinkedCfgVertices createLinkedCfgVertices(rose_addr_t sourceVa, rose_addr_t targetVa, const CfgEdge &)
Add an edge and maybe vertices to the global CFG.
void printInsnCfg(std::ostream &) const
Output the CFG as a DOT graph.
Progress::Ptr progress() const
Progress reports for ratio of memory disassembled.
void scheduleNextUnusedRegion(const AddressInterval &where)
Add a work item to decode lowest unused addresses.
MemoryMap::Ptr memoryMap() const
Returns the memory map used for disassembling.
std::vector< SymbolicExpression::Ptr > computeSuccessors(const InsnInfo::List &, Accuracy accuracy)
Return the computed symbolic successors for an instruction.
void scheduleDecodeInstruction(rose_addr_t insnVa)
Add a work item to decode an instruction.
std::map< rose_addr_t, AddressSet > assignFunctions()
Assign instructions to functions.
InsnInfo::List basicBlockEndingAt(rose_addr_t, size_t maxInsns=UNLIMITED) const
Find a basic block's worth of instructions.
InsnInfo::Ptr makeInstruction(rose_addr_t)
Create and return information about a vertex of the CFG.
void dumpInsnCfg(std::ostream &, const Rose::BinaryAnalysis::Partitioner2::Partitioner &) const
Output the CFG as text for debugging.
InsnInfo::Ptr makeInstruction(rose_addr_t, const InstructionPtr &)
Create and return information about a vertex of the CFG.
Partitioner(const MemoryMap::Ptr &memory, const Disassembler::BasePtr &decoder, const Settings &settings=Settings())
Constructor new partitioner.
std::vector< Item > Container
Type of items stored in this scheduler.
bool isEmpty() const
Test whether any work remains to be done.
void insert(const Item &)
Insert a new work item into the work list.
Partitioner & partitioner() const
Partitioner set at construction time.
WorkItem(Partitioner &partitioner, Priority priority, uint64_t sort)
Construct new item with given priority for a specific partitioner.
Handles work items when they're dispatched to a worker thread.
Partitions instructions into basic blocks and functions.
API and storage for attributes.
Definition Attribute.h:215
BitFlags & set(Enum e)
Set the specified bit.
T least() const
Returns lower limit.
Definition Interval.h:218
Collection of streams.
Definition Message.h:1606
Represents no value.
Definition Optional.h:36
void reset()
Reset as if default-constructed.
Definition Optional.h:213
Base class for reference counted objects.
boost::shared_ptr< RiscOperators > RiscOperatorsPtr
Shared-ownership pointer to a RISC operators object.
@ E_NORMAL
Normal control flow edge, nothing special.
ROSE_UTIL_API std::string addrToString(uint64_t value, size_t nbits=0)
Convert a virtual address to a string.
The ROSE library.
const size_t UNLIMITED
Effectively unlimited size.
Definition Constants.h:19
size_t fastRandomIndex(size_t n, size_t seed=0)
Thread-safe random number generator.
CachedItem< Semantics::RiscOperatorsPtr, uint64_t > semantics
Symbolic semantics of basic block.
SemanticMemoryParadigm semanticMemoryParadigm
Chronological or address hashes for indexing memory.
size_t minHoleSearch
Do now search unused regions smaller than this many bytes.
Accuracy successorAccuracy
Max number of insns in basic block during various analyses.
Accuracy functionCallDetectionAccuracy
How to determine whether something is a function call.