ROSE  0.9.9.139
BinaryFunctionSimilarity.h
1 #ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2 #define ROSE_BinaryAnalysis_FunctionSimilarity_H
3 
4 #include <BinaryMatrix.h>
5 #include <Partitioner2/Function.h>
6 #include <Progress.h>
7 #include <Sawyer/Graph.h>
8 #include <Sawyer/Map.h>
9 
10 #ifdef ROSE_HAVE_DLIB
11  #include <dlib/optimization.h>
12 #endif
13 
14 namespace Rose {
15 namespace BinaryAnalysis {
16 
59  // Public types and data members
61 public:
62  /* Exceptions thrown by this analysis. */
63  class Exception: public std::runtime_error {
64  public:
65  Exception(const std::string &what): std::runtime_error(what) {}
66  ~Exception() throw () {}
67  };
68 
70  enum CValKind {
73  };
74  typedef std::vector<double> CartesianPoint;
75  typedef std::vector<int> OrderedList;
77  // Collections of characteristic values
78  typedef std::vector<CartesianPoint> PointCloud;
79  typedef std::vector<OrderedList> OrderedLists;
82  enum Statistic { AVERAGE, MEDIAN, MAXIMUM };
83 
84  typedef size_t CategoryId;
85  static const CategoryId NO_CATEGORY = -1;
88  typedef std::pair<Partitioner2::Function::Ptr, double /*distance*/> FunctionDistancePair;
89 
91  typedef std::pair<Partitioner2::Function::Ptr, Partitioner2::Function::Ptr> FunctionPair;
92 
95 
98 
100  // Private types and data members
102 private:
103  // Declaration for metric categories.
104  struct Category {
105  std::string name; // name of category (or metric for singleton categories)
106  CValKind kind; // kind of characteristic values stored here
107  double weight; // weight when combining category distances
108  size_t dimensionality; // dimensionality of Cartesian points; 0 => unknown
109 
110  explicit Category(const std::string &name, CValKind kind, double weight = 1.0)
111  : name(name), kind(kind), weight(weight), dimensionality(0) {}
112  };
113 
114  std::vector<Category> categories_;
115  Sawyer::Container::Map<std::string /*category_name*/, size_t /* categories index */> categoryNames_;
116 
117  // Characteristic values for some function + category pair. Only one of these members is populated with data.
118  struct CharacteristicValues {
119  PointCloud pointCloud;
120  OrderedLists orderedLists;
121  };
122 
123  // Info about each function
124  struct FunctionInfo {
125  std::vector<CharacteristicValues> categories; // characteristic values orgnized by CategoryId
126  FunctionInfo(): categories() {} // LLVM's clang++ 3.5 and 3.7 don't generate this for const objs
127  };
128 
129  // Info about all functions
131  Functions functions_;
132 
133  // How to combine category distances to obtain a function distance
134  Statistic categoryAccumulatorType_;
135 
136  Progress::Ptr progress_;
137 
139  // Constructors
141 public:
142  FunctionSimilarity()
143  : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
144 
145  void clear() {
146  categories_.clear();
147  categoryNames_.clear();
148  functions_.clear();
149  categoryAccumulatorType_ = AVERAGE;
150  }
151 
153  // Properties
155 public:
163  Statistic categoryAccumulatorType() const { return categoryAccumulatorType_; }
164  void categoryAccumulatorType(Statistic s) { categoryAccumulatorType_ = s; }
168  Rose::Progress::Ptr progress() const { return progress_; }
169 
171  // Category declarations
173 public:
181  CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting = true);
182 
189  CategoryId declareListCategory(const std::string &name, bool allowExisting = true);
190 
194  size_t nCategories() const { return categories_.size(); }
195 
200  CategoryId findCategory(const std::string &name) const { return categoryNames_.getOrElse(name, NO_CATEGORY); }
203  CValKind categoryKind(CategoryId) const;
204 
206  size_t categoryDimensionality(CategoryId) const;
207 
211  double categoryWeight(CategoryId) const;
212  void categoryWeight(CategoryId, double);
216  // Predefined metrics
219 public:
238  CategoryId declareCfgConnectivity(const std::string &categoryName);
239  void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
247  CategoryId declareCallGraphConnectivity(const std::string &categoryName);
248  void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
257  CategoryId declareMnemonicStream(const std::string &categoryName);
258  void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner&, const Partitioner2::Function::Ptr&);
262  // Low level functions for manipulating characteristic values
265 public:
266 
274  void insertPoint(const Partitioner2::Function::Ptr&, CategoryId, const CartesianPoint&);
275 
284  void insertList(const Partitioner2::Function::Ptr&, CategoryId, const OrderedList&);
285 
287  size_t size(const Partitioner2::Function::Ptr&, CategoryId) const;
288 
290  const PointCloud& points(const Partitioner2::Function::Ptr&, CategoryId) const;
291 
293  const OrderedLists& lists(const Partitioner2::Function::Ptr&, CategoryId) const;
294 
295 
297  // Comparison interface
299 public:
323  double compare(const Partitioner2::Function::Ptr&, const Partitioner2::Function::Ptr&, double dflt = NAN) const;
324 
329  std::vector<FunctionDistancePair> compareOneToAll(const Partitioner2::Function::Ptr&) const;
330 
339  template<class FunctionIterator>
340  std::vector<FunctionDistancePair>
341  compareOneToMany(const Partitioner2::Function::Ptr &needle,
342  const boost::iterator_range<FunctionIterator> &haystack) const {
343  std::vector<Partitioner2::Function::Ptr> others;
344  BOOST_FOREACH (const Partitioner2::Function::Ptr &other, haystack)
345  others.push_back(other);
346  return compareOneToMany(needle, others);
347  }
348 
349  template<class FunctionIterator>
350  std::vector<FunctionDistancePair>
351  compareOneToMany(const Partitioner2::Function::Ptr &needle,
352  const FunctionIterator &begin, const FunctionIterator &end) const {
353  return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
354  }
355 
356  std::vector<FunctionDistancePair>
357  compareOneToMany(const Partitioner2::Function::Ptr &needle,
358  const std::vector<Partitioner2::Function::Ptr> &haystack) const;
370  std::vector<std::vector<double> > compareManyToMany(const std::vector<Partitioner2::Function::Ptr>&,
371  const std::vector<Partitioner2::Function::Ptr>&) const;
372 
382  DistanceMatrix compareManyToManyMatrix(const std::vector<Partitioner2::Function::Ptr>&,
383  const std::vector<Partitioner2::Function::Ptr>&) const;
384 
394  std::vector<FunctionPair> findMinimumCostMapping(const std::vector<Partitioner2::Function::Ptr> &list1,
395  const std::vector<Partitioner2::Function::Ptr> &list2) const;
396 
398  // Sorting
400 public:
402  static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
403  return a.second < b.second;
404  }
405 
407  static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b) {
408  return b.second < a.second;
409  }
410 
412  static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b) {
413  if (a.first == NULL || b.first == NULL)
414  return a.first == NULL && b.first != NULL;
415  return a.first->address() < b.first->address();
416  }
417 
419  // Printing and debugging
421 public:
423  static void initDiagnostics();
424 
428  void printCharacteristicValues(std::ostream&) const;
429 
431  // Utility functions
433 public:
436 
443  static std::vector<size_t> findMinimumAssignment(const DistanceMatrix&);
444 
449  static double totalAssignmentCost(const DistanceMatrix&, const std::vector<size_t> &assignment);
450 
452  static double maximumDistance(const DistanceMatrix&);
453 
455  static double averageDistance(const DistanceMatrix&);
456 
458  static double medianDistance(const DistanceMatrix&);
459 
461  // Internal functions
463 private:
464  static double comparePointClouds(const PointCloud&, const PointCloud&);
465  static double compareOrderedLists(const OrderedLists&, const OrderedLists&);
466 };
467 
468 std::ostream& operator<<(std::ostream&, const FunctionSimilarity&);
469 
470 } // namespace
471 } // namespace
472 
473 #endif
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
Statistic
Ways that values can be combined.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2) const
Minimum cost 1:1 mapping.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Instruction mnemonic stream.
std::pair< Partitioner2::Function::Ptr, double > FunctionDistancePair
Function and distance to some other function.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, Partitioner2::Function::Ptr > FunctionPair
Pair of functions.
Collection of streams.
Definition: Message.h:1579
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Function calls.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
size_t nCategories() const
Number of categories.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
Main namespace for the ROSE library.
const OrderedLists & lists(const Partitioner2::Function::Ptr &, CategoryId) const
Ordered lists contained in a category.
static Sawyer::Message::Facility mlog
Diagnostic streams.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::Function::Ptr &) const
Compare one function with all others.
double categoryWeight(CategoryId) const
Property: category weight.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
static const CategoryId NO_CATEGORY
Invalid category ID.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
const PointCloud & points(const Partitioner2::Function::Ptr &, CategoryId) const
Catesian points contained in a category.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
CValKind categoryKind(CategoryId) const
Kind of category.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
double compare(const Partitioner2::Function::Ptr &, const Partitioner2::Function::Ptr &, double dflt=NAN) const
Compare two functions.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
DistanceMatrix compareManyToManyMatrix(const std::vector< Partitioner2::Function::Ptr > &, const std::vector< Partitioner2::Function::Ptr > &) const
Compare many functions to many others.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
Analysis to test the similarity of two functions.
size_t size(const Partitioner2::Function::Ptr &, CategoryId) const
Number of characteristic points in a category.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
size_t CategoryId
ID number unique within this analysis context.
void insertPoint(const Partitioner2::Function::Ptr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
Partitions instructions into basic blocks and functions.
Definition: Partitioner.h:290
FunctionPtr Ptr
Shared-ownership pointer for function.
Definition: Function.h:48
void insertList(const Partitioner2::Function::Ptr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
Container associating values with keys.
Definition: Sawyer/Map.h:64
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Control flow graph connectivity.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
Definition: Progress.h:167
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::Function::Ptr > &, const std::vector< Partitioner2::Function::Ptr > &) const
Compare many functions to many others.