1#ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2#define ROSE_BinaryAnalysis_FunctionSimilarity_H
3#include <featureTests.h>
4#ifdef ROSE_ENABLE_BINARY_ANALYSIS
6#include <Rose/BinaryAnalysis/Matrix.h>
7#include <Rose/BinaryAnalysis/Partitioner2/Function.h>
8#include <Rose/Progress.h>
9#include <Rose/Exception.h>
10#include <Sawyer/Graph.h>
11#include <Sawyer/Map.h>
14 #include <dlib/optimization.h>
18namespace BinaryAnalysis {
94 typedef std::pair<Partitioner2::FunctionPtr, Partitioner2::FunctionPtr>
FunctionPair;
111 size_t dimensionality;
114 explicit Category(
const std::string &name,
CValKind kind,
double weight = 1.0)
115 : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
118 std::vector<Category> categories_;
122 struct CharacteristicValues {
128 struct FunctionInfo {
129 std::vector<CharacteristicValues> categories;
130 FunctionInfo(): categories() {}
147 : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
151 categoryNames_.clear();
153 categoryAccumulatorType_ = AVERAGE;
347 template<
class FunctionIterator>
348 std::vector<FunctionDistancePair>
350 const boost::iterator_range<FunctionIterator> &haystack)
const {
351 std::vector<Partitioner2::FunctionPtr> others;
353 others.push_back(other);
357 template<
class FunctionIterator>
358 std::vector<FunctionDistancePair>
360 const FunctionIterator &begin,
const FunctionIterator &end)
const {
361 return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
364 std::vector<FunctionDistancePair>
366 const std::vector<Partitioner2::FunctionPtr> &haystack)
const;
378 std::vector<std::vector<double> >
compareManyToMany(
const std::vector<Partitioner2::FunctionPtr>&,
379 const std::vector<Partitioner2::FunctionPtr>&)
const;
391 std::vector<Partitioner2::FunctionPtr>)
const;
403 const std::vector<Partitioner2::FunctionPtr> &list2)
const;
411 const std::vector<Partitioner2::FunctionPtr> &list2,
412 size_t nThreads)
const;
420 return a.second < b.second;
425 return b.second < a.second;
430 if (a.first == NULL || b.first == NULL)
431 return a.first == NULL && b.first != NULL;
432 return a.first->address() < b.first->address();
Analysis to test the similarity of two functions.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::FunctionPtr > &list1, const std::vector< Partitioner2::FunctionPtr > &list2) const
Minimum cost 1:1 mapping.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
CValKind categoryKind(CategoryId) const
Kind of category.
std::pair< Partitioner2::FunctionPtr, double > FunctionDistancePair
Function and distance to some other function.
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
double categoryWeight(CategoryId) const
Property: category weight.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::FunctionPtr > &, const std::vector< Partitioner2::FunctionPtr > &) const
Compare many functions to many others.
size_t nCategories() const
Number of categories.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
static const CategoryId NO_CATEGORY
Invalid category ID.
const PointCloud & points(const Partitioner2::FunctionPtr &, CategoryId) const
Catesian points contained in a category.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::FunctionPtr >, std::vector< Partitioner2::FunctionPtr >) const
Compare many functions to many others.
void insertPoint(const Partitioner2::FunctionPtr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
double compare(const Partitioner2::FunctionPtr &, const Partitioner2::FunctionPtr &, double dflt=NAN) const
Compare two functions.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const std::vector< Partitioner2::FunctionPtr > &haystack) const
Compare one function with many others.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
void categoryWeight(CategoryId, double)
Property: category weight.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
void measureMnemonicStream(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &)
Instruction mnemonic stream.
static Sawyer::Message::Facility mlog
Diagnostic streams.
size_t CategoryId
ID number unique within this analysis context.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
std::vector< double > computeDistances(const std::vector< Partitioner2::FunctionPtr > &list1, const std::vector< Partitioner2::FunctionPtr > &list2, size_t nThreads) const
Compute distances between sets of functions.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
CValKind
Kinds of characteristic values.
@ ORDERED_LIST
Values are lists of integers.
@ CARTESIAN_POINT
Values are N-dimensional Cartesian points.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
void insertList(const Partitioner2::FunctionPtr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::FunctionPtr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
std::pair< Partitioner2::FunctionPtr, Partitioner2::FunctionPtr > FunctionPair
Pair of functions.
size_t size(const Partitioner2::FunctionPtr &, CategoryId) const
Number of characteristic points in a category.
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
Statistic
Ways that values can be combined.
const OrderedLists & lists(const Partitioner2::FunctionPtr &, CategoryId) const
Ordered lists contained in a category.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::FunctionPtr &) const
Compare one function with all others.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
void measureCfgConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &, size_t maxPoints=UNLIMITED)
Control flow graph connectivity.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::PartitionerConstPtr &, const Partitioner2::FunctionPtr &)
Function calls.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
Base class for all ROSE exceptions.
ProgressPtr Ptr
Progress objects are reference counted.
Container associating values with keys.
Sawyer::Container::Map< rose_addr_t, FunctionPtr > Functions
Mapping from address to function.
Sawyer::SharedPointer< Function > FunctionPtr
Shared-ownership pointer for Function.
const size_t UNLIMITED
Effectively unlimited size.