ROSE  0.11.31.0
BinaryFunctionSimilarity.h
1 #ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2 #define ROSE_BinaryAnalysis_FunctionSimilarity_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
5 
6 #include <BinaryMatrix.h>
7 #include <Partitioner2/Function.h>
8 #include <Progress.h>
9 #include <RoseException.h>
10 #include <Sawyer/Graph.h>
11 #include <Sawyer/Map.h>
12 
13 #ifdef ROSE_HAVE_DLIB
14  #include <dlib/optimization.h>
15 #endif
16 
17 namespace Rose {
18 namespace BinaryAnalysis {
19 
62  // Public types and data members
64 public:
65  /* Exceptions thrown by this analysis. */
66  class Exception: public Rose::Exception {
67  public:
68  Exception(const std::string &what): Rose::Exception(what) {}
69  ~Exception() throw () {}
70  };
71 
73  enum CValKind {
76  };
77  typedef std::vector<double> CartesianPoint;
78  typedef std::vector<int> OrderedList;
80  // Collections of characteristic values
81  typedef std::vector<CartesianPoint> PointCloud;
82  typedef std::vector<OrderedList> OrderedLists;
85  enum Statistic { AVERAGE, MEDIAN, MAXIMUM };
86 
87  typedef size_t CategoryId;
88  static const CategoryId NO_CATEGORY = -1;
91  typedef std::pair<Partitioner2::Function::Ptr, double /*distance*/> FunctionDistancePair;
92 
94  typedef std::pair<Partitioner2::Function::Ptr, Partitioner2::Function::Ptr> FunctionPair;
95 
98 
101 
103  // Private types and data members
105 private:
106  // Declaration for metric categories.
107  struct Category {
108  std::string name; // name of category (or metric for singleton categories)
109  CValKind kind; // kind of characteristic values stored here
110  double weight; // weight when combining category distances
111  size_t dimensionality; // dimensionality of Cartesian points; 0 => unknown
112  double dflt; // default value when one of the functions is null
113 
114  explicit Category(const std::string &name, CValKind kind, double weight = 1.0)
115  : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
116  };
117 
118  std::vector<Category> categories_;
119  Sawyer::Container::Map<std::string /*category_name*/, size_t /* categories index */> categoryNames_;
120 
121  // Characteristic values for some function + category pair. Only one of these members is populated with data.
122  struct CharacteristicValues {
123  PointCloud pointCloud;
124  OrderedLists orderedLists;
125  };
126 
127  // Info about each function
128  struct FunctionInfo {
129  std::vector<CharacteristicValues> categories; // characteristic values orgnized by CategoryId
130  FunctionInfo(): categories() {} // LLVM's clang++ 3.5 and 3.7 don't generate this for const objs
131  };
132 
133  // Info about all functions
135  Functions functions_;
136 
137  // How to combine category distances to obtain a function distance
138  Statistic categoryAccumulatorType_;
139 
140  Progress::Ptr progress_;
141 
143  // Constructors
145 public:
146  FunctionSimilarity()
147  : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
148 
149  void clear() {
150  categories_.clear();
151  categoryNames_.clear();
152  functions_.clear();
153  categoryAccumulatorType_ = AVERAGE;
154  }
155 
157  // Properties
159 public:
167  Statistic categoryAccumulatorType() const { return categoryAccumulatorType_; }
168  void categoryAccumulatorType(Statistic s) { categoryAccumulatorType_ = s; }
172  Rose::Progress::Ptr progress() const { return progress_; }
173 
175  // Category declarations
177 public:
185  CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting = true);
186 
193  CategoryId declareListCategory(const std::string &name, bool allowExisting = true);
194 
198  size_t nCategories() const { return categories_.size(); }
199 
204  CategoryId findCategory(const std::string &name) const { return categoryNames_.getOrElse(name, NO_CATEGORY); }
207  CValKind categoryKind(CategoryId) const;
208 
210  size_t categoryDimensionality(CategoryId) const;
211 
215  double categoryWeight(CategoryId) const;
216  void categoryWeight(CategoryId, double);
220  // Predefined metrics
223 public:
245  CategoryId declareCfgConnectivity(const std::string &categoryName);
246  void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&,
247  size_t maxPoints = UNLIMITED);
255  CategoryId declareCallGraphConnectivity(const std::string &categoryName);
256  void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
265  CategoryId declareMnemonicStream(const std::string &categoryName);
266  void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
270  // Low level functions for manipulating characteristic values
273 public:
274 
282  void insertPoint(const Partitioner2::Function::Ptr&, CategoryId, const CartesianPoint&);
283 
292  void insertList(const Partitioner2::Function::Ptr&, CategoryId, const OrderedList&);
293 
295  size_t size(const Partitioner2::Function::Ptr&, CategoryId) const;
296 
298  const PointCloud& points(const Partitioner2::Function::Ptr&, CategoryId) const;
299 
301  const OrderedLists& lists(const Partitioner2::Function::Ptr&, CategoryId) const;
302 
303 
305  // Comparison interface
307 public:
331  double compare(const Partitioner2::Function::Ptr&, const Partitioner2::Function::Ptr&, double dflt = NAN) const;
332 
337  std::vector<FunctionDistancePair> compareOneToAll(const Partitioner2::Function::Ptr&) const;
338 
347  template<class FunctionIterator>
348  std::vector<FunctionDistancePair>
349  compareOneToMany(const Partitioner2::Function::Ptr &needle,
350  const boost::iterator_range<FunctionIterator> &haystack) const {
351  std::vector<Partitioner2::Function::Ptr> others;
352  BOOST_FOREACH (const Partitioner2::Function::Ptr &other, haystack)
353  others.push_back(other);
354  return compareOneToMany(needle, others);
355  }
356 
357  template<class FunctionIterator>
358  std::vector<FunctionDistancePair>
359  compareOneToMany(const Partitioner2::Function::Ptr &needle,
360  const FunctionIterator &begin, const FunctionIterator &end) const {
361  return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
362  }
363 
364  std::vector<FunctionDistancePair>
365  compareOneToMany(const Partitioner2::Function::Ptr &needle,
366  const std::vector<Partitioner2::Function::Ptr> &haystack) const;
378  std::vector<std::vector<double> > compareManyToMany(const std::vector<Partitioner2::Function::Ptr>&,
379  const std::vector<Partitioner2::Function::Ptr>&) const;
380 
390  DistanceMatrix compareManyToManyMatrix(std::vector<Partitioner2::Function::Ptr>,
391  std::vector<Partitioner2::Function::Ptr>) const;
392 
402  std::vector<FunctionPair> findMinimumCostMapping(const std::vector<Partitioner2::Function::Ptr> &list1,
403  const std::vector<Partitioner2::Function::Ptr> &list2) const;
404 
410  std::vector<double> computeDistances(const std::vector<Partitioner2::Function::Ptr> &list1,
411  const std::vector<Partitioner2::Function::Ptr> &list2,
412  size_t nThreads) const;
413 
415  // Sorting
417 public:
419  static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
420  return a.second < b.second;
421  }
422 
424  static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
425  return b.second < a.second;
426  }
427 
429  static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b) {
430  if (a.first == NULL || b.first == NULL)
431  return a.first == NULL && b.first != NULL;
432  return a.first->address() < b.first->address();
433  }
434 
436  // Printing and debugging
438 public:
440  static void initDiagnostics();
441 
445  void printCharacteristicValues(std::ostream&) const;
446 
448  // Utility functions
450 public:
453 
460  static std::vector<size_t> findMinimumAssignment(const DistanceMatrix&);
461 
466  static double totalAssignmentCost(const DistanceMatrix&, const std::vector<size_t> &assignment);
467 
469  static double maximumDistance(const DistanceMatrix&);
470 
472  static double averageDistance(const DistanceMatrix&);
473 
475  static double medianDistance(const DistanceMatrix&);
476 
478  // Internal functions
480 private:
481  static double comparePointClouds(const PointCloud&, const PointCloud&);
482  static double compareOrderedLists(const OrderedLists&, const OrderedLists&);
483 };
484 
485 std::ostream& operator<<(std::ostream&, const FunctionSimilarity&);
486 
487 } // namespace
488 } // namespace
489 
490 #endif
491 #endif
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
Statistic
Ways that values can be combined.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2) const
Minimum cost 1:1 mapping.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Instruction mnemonic stream.
void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &, size_t maxPoints=UNLIMITED)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, double > FunctionDistancePair
Function and distance to some other function.
const size_t UNLIMITED(-1)
Effictively unlimited size.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, Partitioner2::Function::Ptr > FunctionPair
Pair of functions.
Collection of streams.
Definition: Message.h:1606
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Function calls.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
size_t nCategories() const
Number of categories.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
Main namespace for the ROSE library.
const OrderedLists & lists(const Partitioner2::Function::Ptr &, CategoryId) const
Ordered lists contained in a category.
static Sawyer::Message::Facility mlog
Diagnostic streams.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::Function::Ptr &) const
Compare one function with all others.
double categoryWeight(CategoryId) const
Property: category weight.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
static const CategoryId NO_CATEGORY
Invalid category ID.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
const PointCloud & points(const Partitioner2::Function::Ptr &, CategoryId) const
Catesian points contained in a category.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
CValKind categoryKind(CategoryId) const
Kind of category.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
double compare(const Partitioner2::Function::Ptr &, const Partitioner2::Function::Ptr &, double dflt=NAN) const
Compare two functions.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::Function::Ptr >, std::vector< Partitioner2::Function::Ptr >) const
Compare many functions to many others.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
std::vector< double > computeDistances(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2, size_t nThreads) const
Compute distances between sets of functions.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
Analysis to test the similarity of two functions.
size_t size(const Partitioner2::Function::Ptr &, CategoryId) const
Number of characteristic points in a category.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
size_t CategoryId
ID number unique within this analysis context.
void insertPoint(const Partitioner2::Function::Ptr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:323
FunctionPtr Ptr
Shared-ownership pointer for function.
Definition: Function.h:52
Base class for all ROSE exceptions.
Definition: RoseException.h:9
void insertList(const Partitioner2::Function::Ptr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
Container associating values with keys.
Definition: Sawyer/Map.h:66
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
Definition: Progress.h:168
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::Function::Ptr > &, const std::vector< Partitioner2::Function::Ptr > &) const
Compare many functions to many others.