ROSE  0.11.145.0
FunctionSimilarity.h
1 #ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2 #define ROSE_BinaryAnalysis_FunctionSimilarity_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <Rose/BinaryAnalysis/Matrix.h>
7 #include <Rose/BinaryAnalysis/Partitioner2/Function.h>
8 #include <Rose/Progress.h>
9 #include <Rose/Exception.h>
10 #include <Sawyer/Graph.h>
11 #include <Sawyer/Map.h>
12 
13 #ifdef ROSE_HAVE_DLIB
14  #include <dlib/optimization.h>
15 #endif
16 
17 namespace Rose {
18 namespace BinaryAnalysis {
19 
62  // Public types and data members
64 public:
65  /* Exceptions thrown by this analysis. */
66  class Exception: public Rose::Exception {
67  public:
68  Exception(const std::string &what): Rose::Exception(what) {}
69  ~Exception() throw () {}
70  };
71 
73  enum CValKind {
76  };
77  typedef std::vector<double> CartesianPoint;
78  typedef std::vector<int> OrderedList;
80  // Collections of characteristic values
81  typedef std::vector<CartesianPoint> PointCloud;
82  typedef std::vector<OrderedList> OrderedLists;
85  enum Statistic { AVERAGE, MEDIAN, MAXIMUM };
86 
87  typedef size_t CategoryId;
88  static const CategoryId NO_CATEGORY = -1;
91  typedef std::pair<Partitioner2::FunctionPtr, double /*distance*/> FunctionDistancePair;
92 
94  typedef std::pair<Partitioner2::FunctionPtr, Partitioner2::FunctionPtr> FunctionPair;
95 
98 
101 
103  // Private types and data members
105 private:
106  // Declaration for metric categories.
107  struct Category {
108  std::string name; // name of category (or metric for singleton categories)
109  CValKind kind; // kind of characteristic values stored here
110  double weight; // weight when combining category distances
111  size_t dimensionality; // dimensionality of Cartesian points; 0 => unknown
112  double dflt; // default value when one of the functions is null
113 
114  explicit Category(const std::string &name, CValKind kind, double weight = 1.0)
115  : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
116  };
117 
118  std::vector<Category> categories_;
119  Sawyer::Container::Map<std::string /*category_name*/, size_t /* categories index */> categoryNames_;
120 
121  // Characteristic values for some function + category pair. Only one of these members is populated with data.
122  struct CharacteristicValues {
123  PointCloud pointCloud;
124  OrderedLists orderedLists;
125  };
126 
127  // Info about each function
128  struct FunctionInfo {
129  std::vector<CharacteristicValues> categories; // characteristic values orgnized by CategoryId
130  FunctionInfo(): categories() {} // LLVM's clang++ 3.5 and 3.7 don't generate this for const objs
131  };
132 
133  // Info about all functions
135  Functions functions_;
136 
137  // How to combine category distances to obtain a function distance
138  Statistic categoryAccumulatorType_;
139 
140  Progress::Ptr progress_;
141 
143  // Constructors
145 public:
146  FunctionSimilarity()
147  : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
148 
149  void clear() {
150  categories_.clear();
151  categoryNames_.clear();
152  functions_.clear();
153  categoryAccumulatorType_ = AVERAGE;
154  }
155 
157  // Properties
159 public:
167  Statistic categoryAccumulatorType() const { return categoryAccumulatorType_; }
168  void categoryAccumulatorType(Statistic s) { categoryAccumulatorType_ = s; }
172  Rose::Progress::Ptr progress() const { return progress_; }
173 
175  // Category declarations
177 public:
185  CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting = true);
186 
193  CategoryId declareListCategory(const std::string &name, bool allowExisting = true);
194 
198  size_t nCategories() const { return categories_.size(); }
199 
204  CategoryId findCategory(const std::string &name) const { return categoryNames_.getOrElse(name, NO_CATEGORY); }
207  CValKind categoryKind(CategoryId) const;
208 
210  size_t categoryDimensionality(CategoryId) const;
211 
215  double categoryWeight(CategoryId) const;
216  void categoryWeight(CategoryId, double);
220  // Predefined metrics
223 public:
245  CategoryId declareCfgConnectivity(const std::string &categoryName);
246  void measureCfgConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr&, const Partitioner2::FunctionPtr&,
247  size_t maxPoints = UNLIMITED);
255  CategoryId declareCallGraphConnectivity(const std::string &categoryName);
256  void measureCallGraphConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr&, const Partitioner2::FunctionPtr&);
265  CategoryId declareMnemonicStream(const std::string &categoryName);
266  void measureMnemonicStream(CategoryId, const Partitioner2::PartitionerConstPtr&, const Partitioner2::FunctionPtr&);
270  // Low level functions for manipulating characteristic values
273 public:
274 
282  void insertPoint(const Partitioner2::FunctionPtr&, CategoryId, const CartesianPoint&);
283 
292  void insertList(const Partitioner2::FunctionPtr&, CategoryId, const OrderedList&);
293 
295  size_t size(const Partitioner2::FunctionPtr&, CategoryId) const;
296 
298  const PointCloud& points(const Partitioner2::FunctionPtr&, CategoryId) const;
299 
301  const OrderedLists& lists(const Partitioner2::FunctionPtr&, CategoryId) const;
302 
303 
305  // Comparison interface
307 public:
331  double compare(const Partitioner2::FunctionPtr&, const Partitioner2::FunctionPtr&, double dflt = NAN) const;
332 
337  std::vector<FunctionDistancePair> compareOneToAll(const Partitioner2::FunctionPtr&) const;
338 
347  template<class FunctionIterator>
348  std::vector<FunctionDistancePair>
349  compareOneToMany(const Partitioner2::FunctionPtr &needle,
350  const boost::iterator_range<FunctionIterator> &haystack) const {
351  std::vector<Partitioner2::FunctionPtr> others;
352  for (const Partitioner2::FunctionPtr &other: haystack)
353  others.push_back(other);
354  return compareOneToMany(needle, others);
355  }
356 
357  template<class FunctionIterator>
358  std::vector<FunctionDistancePair>
359  compareOneToMany(const Partitioner2::FunctionPtr &needle,
360  const FunctionIterator &begin, const FunctionIterator &end) const {
361  return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
362  }
363 
364  std::vector<FunctionDistancePair>
365  compareOneToMany(const Partitioner2::FunctionPtr &needle,
366  const std::vector<Partitioner2::FunctionPtr> &haystack) const;
378  std::vector<std::vector<double> > compareManyToMany(const std::vector<Partitioner2::FunctionPtr>&,
379  const std::vector<Partitioner2::FunctionPtr>&) const;
380 
390  DistanceMatrix compareManyToManyMatrix(std::vector<Partitioner2::FunctionPtr>,
391  std::vector<Partitioner2::FunctionPtr>) const;
392 
402  std::vector<FunctionPair> findMinimumCostMapping(const std::vector<Partitioner2::FunctionPtr> &list1,
403  const std::vector<Partitioner2::FunctionPtr> &list2) const;
404 
410  std::vector<double> computeDistances(const std::vector<Partitioner2::FunctionPtr> &list1,
411  const std::vector<Partitioner2::FunctionPtr> &list2,
412  size_t nThreads) const;
413 
415  // Sorting
417 public:
419  static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
420  return a.second < b.second;
421  }
422 
424  static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
425  return b.second < a.second;
426  }
427 
429  static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b) {
430  if (a.first == NULL || b.first == NULL)
431  return a.first == NULL && b.first != NULL;
432  return a.first->address() < b.first->address();
433  }
434 
436  // Printing and debugging
438 public:
440  static void initDiagnostics();
441 
445  void printCharacteristicValues(std::ostream&) const;
446 
448  // Utility functions
450 public:
453 
460  static std::vector<size_t> findMinimumAssignment(const DistanceMatrix&);
461 
466  static double totalAssignmentCost(const DistanceMatrix&, const std::vector<size_t> &assignment);
467 
469  static double maximumDistance(const DistanceMatrix&);
470 
472  static double averageDistance(const DistanceMatrix&);
473 
475  static double medianDistance(const DistanceMatrix&);
476 
478  // Internal functions
480 private:
481  static double comparePointClouds(const PointCloud&, const PointCloud&);
482  static double compareOrderedLists(const OrderedLists&, const OrderedLists&);
483 };
484 
485 std::ostream& operator<<(std::ostream&, const FunctionSimilarity&);
486 
487 } // namespace
488 } // namespace
489 
490 #endif
491 #endif
void insertPoint(const Partitioner2::FunctionPtr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &)
Function calls.
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
Values are N-dimensional Cartesian points.
Statistic
Ways that values can be combined.
Matrix< double > DistanceMatrix
Square matrix representing distances.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
Collection of streams.
Definition: Message.h:1606
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
std::pair< Partitioner2::FunctionPtr, double > FunctionDistancePair
Function and distance to some other function.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
size_t nCategories() const
Number of categories.
void measureMnemonicStream(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &)
Instruction mnemonic stream.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
Main namespace for the ROSE library.
const size_t UNLIMITED(static_cast< size_t >(-1))
Effictively unlimited size.
static Sawyer::Message::Facility mlog
Diagnostic streams.
CValKind
Kinds of characteristic values.
double categoryWeight(CategoryId) const
Property: category weight.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for Function.
static const CategoryId NO_CATEGORY
Invalid category ID.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
std::vector< double > computeDistances(const std::vector< Partitioner2::FunctionPtr > &list1, const std::vector< Partitioner2::FunctionPtr > &list2, size_t nThreads) const
Compute distances between sets of functions.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
const OrderedLists & lists(const Partitioner2::FunctionPtr &, CategoryId) const
Ordered lists contained in a category.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
CValKind categoryKind(CategoryId) const
Kind of category.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
const PointCloud & points(const Partitioner2::FunctionPtr &, CategoryId) const
Catesian points contained in a category.
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::FunctionPtr > &, const std::vector< Partitioner2::FunctionPtr > &) const
Compare many functions to many others.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::FunctionPtr >, std::vector< Partitioner2::FunctionPtr >) const
Compare many functions to many others.
std::pair< Partitioner2::FunctionPtr, Partitioner2::FunctionPtr > FunctionPair
Pair of functions.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
Analysis to test the similarity of two functions.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::FunctionPtr &) const
Compare one function with all others.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
double compare(const Partitioner2::FunctionPtr &, const Partitioner2::FunctionPtr &, double dflt=NAN) const
Compare two functions.
size_t CategoryId
ID number unique within this analysis context.
void insertList(const Partitioner2::FunctionPtr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
Base class for all ROSE exceptions.
Definition: Rose/Exception.h:9
Container associating values with keys.
Definition: Sawyer/Map.h:66
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
size_t size(const Partitioner2::FunctionPtr &, CategoryId) const
Number of characteristic points in a category.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::FunctionPtr > &list1, const std::vector< Partitioner2::FunctionPtr > &list2) const
Minimum cost 1:1 mapping.
ProgressPtr Ptr
Progress objects are reference counted.
Definition: Progress.h:169
Matrix values.
Definition: Matrix.h:25
void measureCfgConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &, size_t maxPoints=UNLIMITED)
Control flow graph connectivity.