ROSE  0.9.11.56
BinaryFunctionSimilarity.h
1 #ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2 #define ROSE_BinaryAnalysis_FunctionSimilarity_H
3 
4 #include <BinaryMatrix.h>
5 #include <Partitioner2/Function.h>
6 #include <Progress.h>
7 #include <RoseException.h>
8 #include <Sawyer/Graph.h>
9 #include <Sawyer/Map.h>
10 
11 #ifdef ROSE_HAVE_DLIB
12  #include <dlib/optimization.h>
13 #endif
14 
15 namespace Rose {
16 namespace BinaryAnalysis {
17 
60  // Public types and data members
62 public:
63  /* Exceptions thrown by this analysis. */
64  class Exception: public Rose::Exception {
65  public:
66  Exception(const std::string &what): Rose::Exception(what) {}
67  ~Exception() throw () {}
68  };
69 
71  enum CValKind {
74  };
75  typedef std::vector<double> CartesianPoint;
76  typedef std::vector<int> OrderedList;
78  // Collections of characteristic values
79  typedef std::vector<CartesianPoint> PointCloud;
80  typedef std::vector<OrderedList> OrderedLists;
83  enum Statistic { AVERAGE, MEDIAN, MAXIMUM };
84 
85  typedef size_t CategoryId;
86  static const CategoryId NO_CATEGORY = -1;
89  typedef std::pair<Partitioner2::Function::Ptr, double /*distance*/> FunctionDistancePair;
90 
92  typedef std::pair<Partitioner2::Function::Ptr, Partitioner2::Function::Ptr> FunctionPair;
93 
96 
99 
101  // Private types and data members
103 private:
104  // Declaration for metric categories.
105  struct Category {
106  std::string name; // name of category (or metric for singleton categories)
107  CValKind kind; // kind of characteristic values stored here
108  double weight; // weight when combining category distances
109  size_t dimensionality; // dimensionality of Cartesian points; 0 => unknown
110  double dflt; // default value when one of the functions is null
111 
112  explicit Category(const std::string &name, CValKind kind, double weight = 1.0)
113  : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
114  };
115 
116  std::vector<Category> categories_;
117  Sawyer::Container::Map<std::string /*category_name*/, size_t /* categories index */> categoryNames_;
118 
119  // Characteristic values for some function + category pair. Only one of these members is populated with data.
120  struct CharacteristicValues {
121  PointCloud pointCloud;
122  OrderedLists orderedLists;
123  };
124 
125  // Info about each function
126  struct FunctionInfo {
127  std::vector<CharacteristicValues> categories; // characteristic values orgnized by CategoryId
128  FunctionInfo(): categories() {} // LLVM's clang++ 3.5 and 3.7 don't generate this for const objs
129  };
130 
131  // Info about all functions
133  Functions functions_;
134 
135  // How to combine category distances to obtain a function distance
136  Statistic categoryAccumulatorType_;
137 
138  Progress::Ptr progress_;
139 
141  // Constructors
143 public:
144  FunctionSimilarity()
145  : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
146 
147  void clear() {
148  categories_.clear();
149  categoryNames_.clear();
150  functions_.clear();
151  categoryAccumulatorType_ = AVERAGE;
152  }
153 
155  // Properties
157 public:
165  Statistic categoryAccumulatorType() const { return categoryAccumulatorType_; }
166  void categoryAccumulatorType(Statistic s) { categoryAccumulatorType_ = s; }
170  Rose::Progress::Ptr progress() const { return progress_; }
171 
173  // Category declarations
175 public:
183  CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting = true);
184 
191  CategoryId declareListCategory(const std::string &name, bool allowExisting = true);
192 
196  size_t nCategories() const { return categories_.size(); }
197 
202  CategoryId findCategory(const std::string &name) const { return categoryNames_.getOrElse(name, NO_CATEGORY); }
205  CValKind categoryKind(CategoryId) const;
206 
208  size_t categoryDimensionality(CategoryId) const;
209 
213  double categoryWeight(CategoryId) const;
214  void categoryWeight(CategoryId, double);
218  // Predefined metrics
221 public:
243  CategoryId declareCfgConnectivity(const std::string &categoryName);
244  void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&,
245  size_t maxPoints = (size_t)(-1));
253  CategoryId declareCallGraphConnectivity(const std::string &categoryName);
254  void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
263  CategoryId declareMnemonicStream(const std::string &categoryName);
264  void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
268  // Low level functions for manipulating characteristic values
271 public:
272 
280  void insertPoint(const Partitioner2::Function::Ptr&, CategoryId, const CartesianPoint&);
281 
290  void insertList(const Partitioner2::Function::Ptr&, CategoryId, const OrderedList&);
291 
293  size_t size(const Partitioner2::Function::Ptr&, CategoryId) const;
294 
296  const PointCloud& points(const Partitioner2::Function::Ptr&, CategoryId) const;
297 
299  const OrderedLists& lists(const Partitioner2::Function::Ptr&, CategoryId) const;
300 
301 
303  // Comparison interface
305 public:
329  double compare(const Partitioner2::Function::Ptr&, const Partitioner2::Function::Ptr&, double dflt = NAN) const;
330 
335  std::vector<FunctionDistancePair> compareOneToAll(const Partitioner2::Function::Ptr&) const;
336 
345  template<class FunctionIterator>
346  std::vector<FunctionDistancePair>
347  compareOneToMany(const Partitioner2::Function::Ptr &needle,
348  const boost::iterator_range<FunctionIterator> &haystack) const {
349  std::vector<Partitioner2::Function::Ptr> others;
350  BOOST_FOREACH (const Partitioner2::Function::Ptr &other, haystack)
351  others.push_back(other);
352  return compareOneToMany(needle, others);
353  }
354 
355  template<class FunctionIterator>
356  std::vector<FunctionDistancePair>
357  compareOneToMany(const Partitioner2::Function::Ptr &needle,
358  const FunctionIterator &begin, const FunctionIterator &end) const {
359  return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
360  }
361 
362  std::vector<FunctionDistancePair>
363  compareOneToMany(const Partitioner2::Function::Ptr &needle,
364  const std::vector<Partitioner2::Function::Ptr> &haystack) const;
376  std::vector<std::vector<double> > compareManyToMany(const std::vector<Partitioner2::Function::Ptr>&,
377  const std::vector<Partitioner2::Function::Ptr>&) const;
378 
388  DistanceMatrix compareManyToManyMatrix(std::vector<Partitioner2::Function::Ptr>,
389  std::vector<Partitioner2::Function::Ptr>) const;
390 
400  std::vector<FunctionPair> findMinimumCostMapping(const std::vector<Partitioner2::Function::Ptr> &list1,
401  const std::vector<Partitioner2::Function::Ptr> &list2) const;
402 
408  std::vector<double> computeDistances(const std::vector<Partitioner2::Function::Ptr> &list1,
409  const std::vector<Partitioner2::Function::Ptr> &list2,
410  size_t nThreads) const;
411 
413  // Sorting
415 public:
417  static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
418  return a.second < b.second;
419  }
420 
422  static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
423  return b.second < a.second;
424  }
425 
427  static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b) {
428  if (a.first == NULL || b.first == NULL)
429  return a.first == NULL && b.first != NULL;
430  return a.first->address() < b.first->address();
431  }
432 
434  // Printing and debugging
436 public:
438  static void initDiagnostics();
439 
443  void printCharacteristicValues(std::ostream&) const;
444 
446  // Utility functions
448 public:
451 
458  static std::vector<size_t> findMinimumAssignment(const DistanceMatrix&);
459 
464  static double totalAssignmentCost(const DistanceMatrix&, const std::vector<size_t> &assignment);
465 
467  static double maximumDistance(const DistanceMatrix&);
468 
470  static double averageDistance(const DistanceMatrix&);
471 
473  static double medianDistance(const DistanceMatrix&);
474 
476  // Internal functions
478 private:
479  static double comparePointClouds(const PointCloud&, const PointCloud&);
480  static double compareOrderedLists(const OrderedLists&, const OrderedLists&);
481 };
482 
483 std::ostream& operator<<(std::ostream&, const FunctionSimilarity&);
484 
485 } // namespace
486 } // namespace
487 
488 #endif
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
Statistic
Ways that values can be combined.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2) const
Minimum cost 1:1 mapping.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Instruction mnemonic stream.
std::pair< Partitioner2::Function::Ptr, double > FunctionDistancePair
Function and distance to some other function.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, Partitioner2::Function::Ptr > FunctionPair
Pair of functions.
Collection of streams.
Definition: Message.h:1579
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Function calls.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &, size_t maxPoints=(size_t)(-1))
Control flow graph connectivity.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
size_t nCategories() const
Number of categories.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
Main namespace for the ROSE library.
const OrderedLists & lists(const Partitioner2::Function::Ptr &, CategoryId) const
Ordered lists contained in a category.
static Sawyer::Message::Facility mlog
Diagnostic streams.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::Function::Ptr &) const
Compare one function with all others.
double categoryWeight(CategoryId) const
Property: category weight.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
static const CategoryId NO_CATEGORY
Invalid category ID.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
const PointCloud & points(const Partitioner2::Function::Ptr &, CategoryId) const
Catesian points contained in a category.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
CValKind categoryKind(CategoryId) const
Kind of category.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
double compare(const Partitioner2::Function::Ptr &, const Partitioner2::Function::Ptr &, double dflt=NAN) const
Compare two functions.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::Function::Ptr >, std::vector< Partitioner2::Function::Ptr >) const
Compare many functions to many others.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
std::vector< double > computeDistances(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2, size_t nThreads) const
Compute distances between sets of functions.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
Analysis to test the similarity of two functions.
size_t size(const Partitioner2::Function::Ptr &, CategoryId) const
Number of characteristic points in a category.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
size_t CategoryId
ID number unique within this analysis context.
void insertPoint(const Partitioner2::Function::Ptr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:317
FunctionPtr Ptr
Shared-ownership pointer for function.
Definition: Function.h:48
Base class for all ROSE exceptions.
Definition: RoseException.h:9
void insertList(const Partitioner2::Function::Ptr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
Container associating values with keys.
Definition: Sawyer/Map.h:66
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
Definition: Progress.h:167
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::Function::Ptr > &, const std::vector< Partitioner2::Function::Ptr > &) const
Compare many functions to many others.