1 #ifndef ROSE_BinaryAnalysis_FunctionSimilarity_H
2 #define ROSE_BinaryAnalysis_FunctionSimilarity_H
3 #include <featureTests.h>
4 #ifdef ROSE_ENABLE_BINARY_ANALYSIS
6 #include <BinaryMatrix.h>
7 #include <Partitioner2/Function.h>
9 #include <RoseException.h>
10 #include <Sawyer/Graph.h>
11 #include <Sawyer/Map.h>
14 #include <dlib/optimization.h>
94 typedef std::pair<Partitioner2::Function::Ptr, Partitioner2::Function::Ptr>
FunctionPair;
111 size_t dimensionality;
114 explicit Category(
const std::string &name,
CValKind kind,
double weight = 1.0)
115 : name(name), kind(kind), weight(weight), dimensionality(0), dflt(1.0) {}
118 std::vector<Category> categories_;
122 struct CharacteristicValues {
123 PointCloud pointCloud;
124 OrderedLists orderedLists;
128 struct FunctionInfo {
129 std::vector<CharacteristicValues> categories;
130 FunctionInfo(): categories() {}
135 Functions functions_;
147 : categoryAccumulatorType_(AVERAGE), progress_(Progress::instance()) {}
151 categoryNames_.clear();
153 categoryAccumulatorType_ = AVERAGE;
185 CategoryId
declarePointCategory(
const std::string &name,
size_t dimensionality,
bool allowExisting =
true);
204 CategoryId
findCategory(
const std::string &name)
const {
return categoryNames_.getOrElse(name, NO_CATEGORY); }
282 void insertPoint(
const Partitioner2::Function::Ptr&, CategoryId,
const CartesianPoint&);
292 void insertList(
const Partitioner2::Function::Ptr&, CategoryId,
const OrderedList&);
295 size_t size(
const Partitioner2::Function::Ptr&, CategoryId)
const;
298 const PointCloud&
points(
const Partitioner2::Function::Ptr&, CategoryId)
const;
301 const OrderedLists&
lists(
const Partitioner2::Function::Ptr&, CategoryId)
const;
331 double compare(
const Partitioner2::Function::Ptr&,
const Partitioner2::Function::Ptr&,
double dflt = NAN)
const;
337 std::vector<FunctionDistancePair>
compareOneToAll(
const Partitioner2::Function::Ptr&)
const;
347 template<
class FunctionIterator>
348 std::vector<FunctionDistancePair>
350 const boost::iterator_range<FunctionIterator> &haystack)
const {
351 std::vector<Partitioner2::Function::Ptr> others;
352 BOOST_FOREACH (
const Partitioner2::Function::Ptr &other, haystack)
353 others.push_back(other);
357 template<
class FunctionIterator>
358 std::vector<FunctionDistancePair>
360 const FunctionIterator &begin,
const FunctionIterator &end)
const {
361 return compareOneToMany(needle, boost::iterator_range<FunctionIterator>(begin, end));
364 std::vector<FunctionDistancePair>
366 const std::vector<Partitioner2::Function::Ptr> &haystack)
const;
378 std::vector<std::vector<double> >
compareManyToMany(
const std::vector<Partitioner2::Function::Ptr>&,
379 const std::vector<Partitioner2::Function::Ptr>&)
const;
391 std::vector<Partitioner2::Function::Ptr>)
const;
403 const std::vector<Partitioner2::Function::Ptr> &list2)
const;
410 std::vector<double>
computeDistances(
const std::vector<Partitioner2::Function::Ptr> &list1,
411 const std::vector<Partitioner2::Function::Ptr> &list2,
412 size_t nThreads)
const;
420 return a.second < b.second;
425 return b.second < a.second;
429 static bool sortByAddress(
const FunctionDistancePair &a,
const FunctionDistancePair &b) {
430 if (a.first == NULL || b.first == NULL)
431 return a.first == NULL && b.first != NULL;
432 return a.first->address() < b.first->address();
466 static double totalAssignmentCost(
const DistanceMatrix&,
const std::vector<size_t> &assignment);
481 static double comparePointClouds(
const PointCloud&,
const PointCloud&);
482 static double compareOrderedLists(
const OrderedLists&,
const OrderedLists&);
485 std::ostream& operator<<(std::ostream&,
const FunctionSimilarity&);
static bool sortByDecreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by descending distance.
Values are N-dimensional Cartesian points.
Statistic
Ways that values can be combined.
std::vector< FunctionPair > findMinimumCostMapping(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2) const
Minimum cost 1:1 mapping.
Matrix< double > DistanceMatrix
Square matrix representing distances.
void measureMnemonicStream(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Instruction mnemonic stream.
void measureCfgConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &, size_t maxPoints=UNLIMITED)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, double > FunctionDistancePair
Function and distance to some other function.
const size_t UNLIMITED(-1)
Effictively unlimited size.
CategoryId declareCfgConnectivity(const std::string &categoryName)
Control flow graph connectivity.
std::pair< Partitioner2::Function::Ptr, Partitioner2::Function::Ptr > FunctionPair
Pair of functions.
static void initDiagnostics()
Initializes and registers disassembler diagnostic streams.
void measureCallGraphConnectivity(CategoryId, const Partitioner2::Partitioner &, const Partitioner2::Function::Ptr &)
Function calls.
void printCharacteristicValues(std::ostream &) const
Print characteristic values for this analysis.
std::vector< OrderedList > OrderedLists
Ordered collection of ordered lists of integers.
CategoryId findCategory(const std::string &name) const
Find a category by name.
static bool sortByAddress(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by function address.
size_t nCategories() const
Number of categories.
static double totalAssignmentCost(const DistanceMatrix &, const std::vector< size_t > &assignment)
Total cost of a mapping.
Main namespace for the ROSE library.
const OrderedLists & lists(const Partitioner2::Function::Ptr &, CategoryId) const
Ordered lists contained in a category.
static Sawyer::Message::Facility mlog
Diagnostic streams.
CValKind
Kinds of characteristic values.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const boost::iterator_range< FunctionIterator > &haystack) const
Compare one function with many others.
std::vector< FunctionDistancePair > compareOneToAll(const Partitioner2::Function::Ptr &) const
Compare one function with all others.
double categoryWeight(CategoryId) const
Property: category weight.
Values are lists of integers.
Rose::Progress::Ptr progress() const
Property: Object to which progress reports are made.
static const CategoryId NO_CATEGORY
Invalid category ID.
void categoryAccumulatorType(Statistic s)
Property: Statistic for combining category distances.
const PointCloud & points(const Partitioner2::Function::Ptr &, CategoryId) const
Catesian points contained in a category.
size_t categoryDimensionality(CategoryId) const
Dimensionality of Cartesian characteristic points.
CValKind categoryKind(CategoryId) const
Kind of category.
Statistic categoryAccumulatorType() const
Property: Statistic for combining category distances.
double compare(const Partitioner2::Function::Ptr &, const Partitioner2::Function::Ptr &, double dflt=NAN) const
Compare two functions.
static double cartesianDistance(const FunctionSimilarity::CartesianPoint &, const FunctionSimilarity::CartesianPoint &)
Cartesian distance between two points.
CategoryId declarePointCategory(const std::string &name, size_t dimensionality, bool allowExisting=true)
Declare a new category of Cartesian points.
static double medianDistance(const DistanceMatrix &)
Median distance in the matrix.
DistanceMatrix compareManyToManyMatrix(std::vector< Partitioner2::Function::Ptr >, std::vector< Partitioner2::Function::Ptr >) const
Compare many functions to many others.
std::vector< double > CartesianPoint
Characteristic value that's a Cartesian point.
std::vector< double > computeDistances(const std::vector< Partitioner2::Function::Ptr > &list1, const std::vector< Partitioner2::Function::Ptr > &list2, size_t nThreads) const
Compute distances between sets of functions.
CategoryId declareListCategory(const std::string &name, bool allowExisting=true)
Declare a new category of ordered lists of integers.
CategoryId declareCallGraphConnectivity(const std::string &categoryName)
Function calls.
CategoryId declareMnemonicStream(const std::string &categoryName)
Instruction mnemonic stream.
Analysis to test the similarity of two functions.
size_t size(const Partitioner2::Function::Ptr &, CategoryId) const
Number of characteristic points in a category.
std::vector< int > OrderedList
Characteristic value that's an ordered list of integers.
static double averageDistance(const DistanceMatrix &)
Average distance in the matrix.
static double maximumDistance(const DistanceMatrix &)
Maximum value in the distance matrix.
static std::vector< size_t > findMinimumAssignment(const DistanceMatrix &)
Find minimum mapping from rows to columns.
size_t CategoryId
ID number unique within this analysis context.
void insertPoint(const Partitioner2::Function::Ptr &, CategoryId, const CartesianPoint &)
Insert a Cartesian point characteristic value.
std::vector< CartesianPoint > PointCloud
Unordered collection of Cartesian points.
std::vector< FunctionDistancePair > compareOneToMany(const Partitioner2::Function::Ptr &needle, const FunctionIterator &begin, const FunctionIterator &end) const
Compare one function with many others.
Partitions instructions into basic blocks and functions.
FunctionPtr Ptr
Shared-ownership pointer for function.
Base class for all ROSE exceptions.
void insertList(const Partitioner2::Function::Ptr &, CategoryId, const OrderedList &)
Insert an ordered list characteristic value.
Container associating values with keys.
static bool sortByIncreasingDistance(const FunctionDistancePair &a, const FunctionDistancePair &b)
Predicate for sorting by ascending distance.
Sawyer::SharedPointer< Progress > Ptr
Progress objects are reference counted.
std::vector< std::vector< double > > compareManyToMany(const std::vector< Partitioner2::Function::Ptr > &, const std::vector< Partitioner2::Function::Ptr > &) const
Compare many functions to many others.