ROSE  0.9.10.220
BinaryFunctionSimilarity.h
1 #ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2 #define ROSE_BinaryAnalysis_FunctionSimilarity_H
3 
4 #include <BinaryMatrix.h>
5 #include <Partitioner2/Function.h>
6 #include <Progress.h>
7 #include <Sawyer/Graph.h>
8 #include <Sawyer/Map.h>
9 
10 #ifdef ROSE_HAVE_DLIB
11  #include <dlib/optimization.h>
12 #endif
13 
14 namespace Rose {
15 namespace BinaryAnalysis {
16 
59  // Public types and data members
61 public:
62  /* Exceptions thrown by this analysis. */
63  class Exception: public std::runtime_error {
64  public:
65  Exception(const std::string &what): std::runtime_error(what) {}
66  ~Exception() throw () {}
67  };
68 
70  enum CValKind {
73  };
74  typedef std::vector<double> CartesianPoint;
75  typedef std::vector<int> OrderedList;
77  // Collections of characteristic values
78  typedef std::vector<CartesianPoint> PointCloud;
79  typedef std::vector<OrderedList> OrderedLists;
82  enum Statistic { AVERAGE, MEDIAN, MAXIMUM };
83 
84  typedef size_t CategoryId;
85  static const CategoryId NO_CATEGORY = -1;
88  typedef std::pair<Partitioner2::Function::Ptr, double /*distance*/> FunctionDistancePair;
89 
91  typedef std::pair<Partitioner2::Function::Ptr, Partitioner2::Function::Ptr> FunctionPair;
92 
95 
98 
100  // Private types and data members
102 private:
103  // Declaration for metric categories.
104  struct Category {
105  std::string name; // name of category (or metric for singleton categories)
106  CValKind kind; // kind of characteristic values stored here
107  double weight; // weight when combining category distances
108  size_t dimensionality; // dimensionality of Cartesian points; 0 => unknown
109  double dflt; // default value when one of the functions is null
110 
111  explicit Category(const std::string &name, CValKind kind, double weight = 1.0)
112  : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
113  };
114 
115  std::vector<Category> categories_;
116  Sawyer::Container::Map<std::string /*category_name*/, size_t /* categories index */> categoryNames_;
117 
118  // Characteristic values for some function + category pair. Only one of these members is populated with data.
119  struct CharacteristicValues {
120  PointCloud pointCloud;
121  OrderedLists orderedLists;
122  };
123 
124  // Info about each function
125  struct FunctionInfo {
126  std::vector<CharacteristicValues> categories; // characteristic values orgnized by CategoryId
127  FunctionInfo(): categories() {} // LLVM's clang++ 3.5 and 3.7 don't generate this for const objs
128  };
129 
130  // Info about all functions
132  Functions functions_;
133 
134  // How to combine category distances to obtain a function distance
135  Statistic categoryAccumulatorType_;
136 
137  Progress::Ptr progress_;
138 
140  // Constructors
142 public:
143  FunctionSimilarity()
144  : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
145 
146  void clear() {
147  categories_.clear();
148  categoryNames_.clear();
149  functions_.clear();
150  categoryAccumulatorType_ = AVERAGE;
151  }
152 
154  // Properties
156 public:
164  Statistic categoryAccumulatorType() const { return categoryAccumulatorType_; }
165  void categoryAccumulatorType(Statistic s) { categoryAccumulatorType_ = s; }
169  Rose::Progress::Ptr progress() const { return progress_; }
170 
172  // Category declarations
174 public:
182  CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting = true);
183 
190  CategoryId declareListCategory(const std::string &name, bool allowExisting = true);
191 
195  size_t nCategories() const { return categories_.size(); }
196 
201  CategoryId findCategory(const std::string &name) const { return categoryNames_.getOrElse(name, NO_CATEGORY); }
204  CValKind categoryKind(CategoryId) const;
205 
207  size_t categoryDimensionality(CategoryId) const;
208 
212  double categoryWeight(CategoryId) const;
213  void categoryWeight(CategoryId, double);
217  // Predefined metrics
220 public:
242  CategoryId declareCfgConnectivity(const std::string &categoryName);
243  void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&,
244  size_t maxPoints = (size_t)(-1));
252  CategoryId declareCallGraphConnectivity(const std::string &categoryName);
253  void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
262  CategoryId declareMnemonicStream(const std::string &categoryName);
263  void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
267  // Low level functions for manipulating characteristic values
270 public:
271 
279  void insertPoint(const Partitioner2::Function::Ptr&, CategoryId, const CartesianPoint&);
280 
289  void insertList(const Partitioner2::Function::Ptr&, CategoryId, const OrderedList&);
290 
292  size_t size(const Partitioner2::Function::Ptr&, CategoryId) const;
293 
295  const PointCloud& points(const Partitioner2::Function::Ptr&, CategoryId) const;
296 
298  const OrderedLists& lists(const Partitioner2::Function::Ptr&, CategoryId) const;
299 
300 
302  // Comparison interface
304 public:
328  double compare(const Partitioner2::Function::Ptr&, const Partitioner2::Function::Ptr&, double dflt = NAN) const;
329 
334  std::vector<FunctionDistancePair> compareOneToAll(const Partitioner2::Function::Ptr&) const;
335 
344  template<class FunctionIterator>
345  std::vector<FunctionDistancePair>
346  compareOneToMany(const Partitioner2::Function::Ptr &needle,
347  const boost::iterator_range<FunctionIterator> &haystack) const {
348  std::vector<Partitioner2::Function::Ptr> others;
349  BOOST_FOREACH (const Partitioner2::Function::Ptr &other, haystack)
350  others.push_back(other);
351  return compareOneToMany(needle, others);
352  }
353 
354  template<class FunctionIterator>
355  std::vector<FunctionDistancePair>
356  compareOneToMany(const Partitioner2::Function::Ptr &needle,
357  const FunctionIterator &begin, const FunctionIterator &end) const {
358  return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
359  }
360 
361  std::vector<FunctionDistancePair>
362  compareOneToMany(const Partitioner2::Function::Ptr &needle,
363  const std::vector<Partitioner2::Function::Ptr> &haystack) const;
375  std::vector<std::vector<double> > compareManyToMany(const std::vector<Partitioner2::Function::Ptr>&,
376  const std::vector<Partitioner2::Function::Ptr>&) const;
377 
387  DistanceMatrix compareManyToManyMatrix(std::vector<Partitioner2::Function::Ptr>,
388  std::vector<Partitioner2::Function::Ptr>) const;
389 
399  std::vector<FunctionPair> findMinimumCostMapping(const std::vector<Partitioner2::Function::Ptr> &list1,
400  const std::vector<Partitioner2::Function::Ptr> &list2) const;
401 
407  std::vector<double> computeDistances(const std::vector<Partitioner2::Function::Ptr> &list1,
408  const std::vector<Partitioner2::Function::Ptr> &list2,
409  size_t nThreads) const;
410 
412  // Sorting
414 public:
416  static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
417  return a.second < b.second;
418  }
419 
421  static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
422  return b.second < a.second;
423  }
424 
426  static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b) {
427  if (a.first == NULL || b.first == NULL)
428  return a.first == NULL && b.first != NULL;
429  return a.first->address() < b.first->address();
430  }
431 
433  // Printing and debugging
435 public:
437  static void initDiagnostics();
438 
442  void printCharacteristicValues(std::ostream&) const;
443 
445  // Utility functions
447 public:
450 
457  static std::vector<size_t> findMinimumAssignment(const DistanceMatrix&);
458 
463  static double totalAssignmentCost(const DistanceMatrix&, const std::vector<size_t> &assignment);
464 
466  static double maximumDistance(const DistanceMatrix&);
467 
469  static double averageDistance(const DistanceMatrix&);
470 
472  static double medianDistance(const DistanceMatrix&);
473 
475  // Internal functions
477 private:
478  static double comparePointClouds(const PointCloud&, const PointCloud&);
479  static double compareOrderedLists(const OrderedLists&, const OrderedLists&);
480 };
481 
482 std::ostream& operator<<(std::ostream&, const FunctionSimilarity&);
483 
484 } // namespace
485 } // namespace
486 
487 #endif
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
Statistic
Ways that values can be combined.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2) const
Minimum cost 1:1 mapping.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Instruction mnemonic stream.
std::pair< Partitioner2::Function::Ptr, double > FunctionDistancePair
Function and distance to some other function.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, Partitioner2::Function::Ptr > FunctionPair
Pair of functions.
Collection of streams.
Definition: Message.h:1579
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Function calls.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &, size_t maxPoints=(size_t)(-1))
Control flow graph connectivity.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
size_t nCategories() const
Number of categories.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
Main namespace for the ROSE library.
const OrderedLists & lists(const Partitioner2::Function::Ptr &, CategoryId) const
Ordered lists contained in a category.
static Sawyer::Message::Facility mlog
Diagnostic streams.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::Function::Ptr &) const
Compare one function with all others.
double categoryWeight(CategoryId) const
Property: category weight.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
static const CategoryId NO_CATEGORY
Invalid category ID.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
const PointCloud & points(const Partitioner2::Function::Ptr &, CategoryId) const
Catesian points contained in a category.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
CValKind categoryKind(CategoryId) const
Kind of category.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
double compare(const Partitioner2::Function::Ptr &, const Partitioner2::Function::Ptr &, double dflt=NAN) const
Compare two functions.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::Function::Ptr >, std::vector< Partitioner2::Function::Ptr >) const
Compare many functions to many others.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
std::vector< double > computeDistances(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2, size_t nThreads) const
Compute distances between sets of functions.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
Analysis to test the similarity of two functions.
size_t size(const Partitioner2::Function::Ptr &, CategoryId) const
Number of characteristic points in a category.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
size_t CategoryId
ID number unique within this analysis context.
void insertPoint(const Partitioner2::Function::Ptr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:293
FunctionPtr Ptr
Shared-ownership pointer for function.
Definition: Function.h:48
void insertList(const Partitioner2::Function::Ptr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
Container associating values with keys.
Definition: Sawyer/Map.h:66
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
Definition: Progress.h:167
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::Function::Ptr > &, const std::vector< Partitioner2::Function::Ptr > &) const
Compare many functions to many others.