ROSE 0.11.145.147
FunctionSimilarity.h
1#ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2#define ROSE_BinaryAnalysis_FunctionSimilarity_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
5
6#include <Rose/BinaryAnalysis/Matrix.h>
7#include <Rose/BinaryAnalysis/Partitioner2/Function.h>
8#include <Rose/Progress.h>
9#include <Rose/Exception.h>
10#include <Sawyer/Graph.h>
11#include <Sawyer/Map.h>
12
13#ifdef ROSE_HAVE_DLIB
14 #include <dlib/optimization.h>
15#endif
16
17namespace Rose {
18namespace BinaryAnalysis {
19
62 // Public types and data members
64public:
65 /* Exceptions thrown by this analysis. */
66 class Exception: public Rose::Exception {
67 public:
68 Exception(const std::string &what): Rose::Exception(what) {}
69 ~Exception() throw () {}
70 };
71
77 typedef std::vector<double> CartesianPoint;
78 typedef std::vector<int> OrderedList;
80 // Collections of characteristic values
81 typedef std::vector<CartesianPoint> PointCloud;
82 typedef std::vector<OrderedList> OrderedLists;
85 enum Statistic { AVERAGE, MEDIAN, MAXIMUM };
86
87 typedef size_t CategoryId;
88 static const CategoryId NO_CATEGORY = -1;
91 typedef std::pair<Partitioner2::FunctionPtr, double /*distance*/> FunctionDistancePair;
92
94 typedef std::pair<Partitioner2::FunctionPtr, Partitioner2::FunctionPtr> FunctionPair;
95
98
101
103 // Private types and data members
105private:
106 // Declaration for metric categories.
107 struct Category {
108 std::string name; // name of category (or metric for singleton categories)
109 CValKind kind; // kind of characteristic values stored here
110 double weight; // weight when combining category distances
111 size_t dimensionality; // dimensionality of Cartesian points; 0 => unknown
112 double dflt; // default value when one of the functions is null
113
114 explicit Category(const std::string &name, CValKind kind, double weight = 1.0)
115 : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
116 };
117
118 std::vector<Category> categories_;
119 Sawyer::Container::Map<std::string /*category_name*/, size_t /* categories index */> categoryNames_;
120
121 // Characteristic values for some function + category pair. Only one of these members is populated with data.
122 struct CharacteristicValues {
123 PointCloud pointCloud;
124 OrderedLists orderedLists;
125 };
126
127 // Info about each function
128 struct FunctionInfo {
129 std::vector<CharacteristicValues> categories; // characteristic values orgnized by CategoryId
130 FunctionInfo(): categories() {} // LLVM's clang++ 3.5 and 3.7 don't generate this for const objs
131 };
132
133 // Info about all functions
135 Functions functions_;
136
137 // How to combine category distances to obtain a function distance
138 Statistic categoryAccumulatorType_;
139
140 Progress::Ptr progress_;
141
143 // Constructors
145public:
146 FunctionSimilarity()
147 : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
148
149 void clear() {
150 categories_.clear();
151 categoryNames_.clear();
152 functions_.clear();
153 categoryAccumulatorType_ = AVERAGE;
154 }
155
157 // Properties
159public:
167 Statistic categoryAccumulatorType() const { return categoryAccumulatorType_; }
168 void categoryAccumulatorType(Statistic s) { categoryAccumulatorType_ = s; }
172 Rose::Progress::Ptr progress() const { return progress_; }
173
175 // Category declarations
177public:
185 CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting = true);
186
193 CategoryId declareListCategory(const std::string &name, bool allowExisting = true);
194
198 size_t nCategories() const { return categories_.size(); }
199
204 CategoryId findCategory(const std::string &name) const { return categoryNames_.getOrElse(name, NO_CATEGORY); }
208
211
221 // Predefined metrics
223public:
245 CategoryId declareCfgConnectivity(const std::string &categoryName);
247 size_t maxPoints = UNLIMITED);
255 CategoryId declareCallGraphConnectivity(const std::string &categoryName);
265 CategoryId declareMnemonicStream(const std::string &categoryName);
271 // Low level functions for manipulating characteristic values
273public:
274
283
293
296
299
302
303
305 // Comparison interface
307public:
331 double compare(const Partitioner2::FunctionPtr&, const Partitioner2::FunctionPtr&, double dflt = NAN) const;
332
337 std::vector<FunctionDistancePair> compareOneToAll(const Partitioner2::FunctionPtr&) const;
338
347 template<class FunctionIterator>
348 std::vector<FunctionDistancePair>
350 const boost::iterator_range<FunctionIterator> &haystack) const {
351 std::vector<Partitioner2::FunctionPtr> others;
352 for (const Partitioner2::FunctionPtr &other: haystack)
353 others.push_back(other);
354 return compareOneToMany(needle, others);
355 }
356
357 template<class FunctionIterator>
358 std::vector<FunctionDistancePair>
360 const FunctionIterator &begin, const FunctionIterator &end) const {
361 return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
362 }
363
364 std::vector<FunctionDistancePair>
366 const std::vector<Partitioner2::FunctionPtr> &haystack) const;
378 std::vector<std::vector<double> > compareManyToMany(const std::vector<Partitioner2::FunctionPtr>&,
379 const std::vector<Partitioner2::FunctionPtr>&) const;
380
390 DistanceMatrix compareManyToManyMatrix(std::vector<Partitioner2::FunctionPtr>,
391 std::vector<Partitioner2::FunctionPtr>) const;
392
402 std::vector<FunctionPair> findMinimumCostMapping(const std::vector<Partitioner2::FunctionPtr> &list1,
403 const std::vector<Partitioner2::FunctionPtr> &list2) const;
404
410 std::vector<double> computeDistances(const std::vector<Partitioner2::FunctionPtr> &list1,
411 const std::vector<Partitioner2::FunctionPtr> &list2,
412 size_t nThreads) const;
413
415 // Sorting
417public:
420 return a.second < b.second;
421 }
422
425 return b.second < a.second;
426 }
427
430 if (a.first == NULL || b.first == NULL)
431 return a.first == NULL && b.first != NULL;
432 return a.first->address() < b.first->address();
433 }
434
436 // Printing and debugging
438public:
440 static void initDiagnostics();
441
445 void printCharacteristicValues(std::ostream&) const;
446
448 // Utility functions
450public:
453
460 static std::vector<size_t> findMinimumAssignment(const DistanceMatrix&);
461
466 static double totalAssignmentCost(const DistanceMatrix&, const std::vector<size_t> &assignment);
467
469 static double maximumDistance(const DistanceMatrix&);
470
472 static double averageDistance(const DistanceMatrix&);
473
475 static double medianDistance(const DistanceMatrix&);
476
478 // Internal functions
480private:
481 static double comparePointClouds(const PointCloud&, const PointCloud&);
482 static double compareOrderedLists(const OrderedLists&, const OrderedLists&);
483};
484
485std::ostream& operator<<(std::ostream&, const FunctionSimilarity&);
486
487} // namespace
488} // namespace
489
490#endif
491#endif
Analysis to test the similarity of two functions.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::FunctionPtr > &list1, const std::vector< Partitioner2::FunctionPtr > &list2) const
Minimum cost 1:1 mapping.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
CValKind categoryKind(CategoryId) const
Kind of category.
std::pair< Partitioner2::FunctionPtr, double > FunctionDistancePair
Function and distance to some other function.
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
double categoryWeight(CategoryId) const
Property: category weight.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::FunctionPtr > &, const std::vector< Partitioner2::FunctionPtr > &) const
Compare many functions to many others.
size_t nCategories() const
Number of categories.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
static const CategoryId NO_CATEGORY
Invalid category ID.
const PointCloud & points(const Partitioner2::FunctionPtr &, CategoryId) const
Catesian points contained in a category.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::FunctionPtr >, std::vector< Partitioner2::FunctionPtr >) const
Compare many functions to many others.
void insertPoint(const Partitioner2::FunctionPtr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
double compare(const Partitioner2::FunctionPtr &, const Partitioner2::FunctionPtr &, double dflt=NAN) const
Compare two functions.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const std::vector< Partitioner2::FunctionPtr > &haystack) const
Compare one function with many others.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
void categoryWeight(CategoryId, double)
Property: category weight.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
void measureMnemonicStream(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &)
Instruction mnemonic stream.
static Sawyer::Message::Facility mlog
Diagnostic streams.
size_t CategoryId
ID number unique within this analysis context.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
std::vector< double > computeDistances(const std::vector< Partitioner2::FunctionPtr > &list1, const std::vector< Partitioner2::FunctionPtr > &list2, size_t nThreads) const
Compute distances between sets of functions.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
CValKind
Kinds of characteristic values.
@ ORDERED_LIST
Values are lists of integers.
@ CARTESIAN_POINT
Values are N-dimensional Cartesian points.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
void insertList(const Partitioner2::FunctionPtr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
std::pair< Partitioner2::FunctionPtr, Partitioner2::FunctionPtr > FunctionPair
Pair of functions.
size_t size(const Partitioner2::FunctionPtr &, CategoryId) const
Number of characteristic points in a category.
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
Statistic
Ways that values can be combined.
const OrderedLists & lists(const Partitioner2::FunctionPtr &, CategoryId) const
Ordered lists contained in a category.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::FunctionPtr &) const
Compare one function with all others.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
void measureCfgConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &, size_t maxPoints=UNLIMITED)
Control flow graph connectivity.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &)
Function calls.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
Base class for all ROSE exceptions.
ProgressPtr Ptr
Progress objects are reference counted.
Definition Progress.h:170
Container associating values with keys.
Definition Sawyer/Map.h:72
Collection of streams.
Definition Message.h:1606
Sawyer::Container::Map< rose_addr_t, FunctionPtr > Functions
Mapping from address to function.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for Function.
The ROSE library.
const size_t UNLIMITED
Effectively unlimited size.
Definition Constants.h:19