ROSE  0.11.98.0
DamerauLevenshtein.h
1 #ifndef ROSE_EditDistance_DamerauLevenshtein_H
2 #define ROSE_EditDistance_DamerauLevenshtein_H
3 
4 #include <EditDistance/Levenshtein.h>
5 
6 namespace Rose {
7 namespace EditDistance {
8 
13 template<typename T>
14 size_t
15 damerauLevenshteinDistance(const std::vector<T> &src, const std::vector<T> &tgt)
16 {
17  // Based on the C# implementation on the wikipedia page
18  if (src.empty() || tgt.empty())
19  return std::max(src.size(), tgt.size());
20 
21  const size_t x = src.size();
22  const size_t y = tgt.size();
23  std::vector<std::vector<size_t> > score(x+2, std::vector<size_t>(y+2, 0));
24  size_t score_ceil = x + y;
25  score[0][0] = score_ceil;
26  for (size_t i=0; i<=x; ++i) {
27  score[i+1][1] = i;
28  score[i+1][0] = score_ceil;
29  }
30  for (size_t j=0; j<=y; ++j) {
31  score[1][j+1] = j;
32  score[0][j+1] = score_ceil;
33  }
34 
36  for (size_t i=0; i<x; ++i)
37  dict.unique_push_zero(src[i]);
38  for (size_t j=0; j<y; ++j)
39  dict.unique_push_zero(tgt[j]);
40 
41  for (size_t i=1; i<=x; ++i) {
42  size_t db = 0;
43  for (size_t j=1; j<=y; ++j) {
44  size_t i1 = dict[tgt[j-1]];
45  size_t j1 = db;
46  if (src[i-1]==tgt[j-1]) {
47  score[i+1][j+1] = score[i][j];
48  db = j;
49  } else {
50  score[i+1][j+1] = std::min(score[i][j], std::min(score[i+1][j], score[i][j+1])) + 1;
51  }
52  // swaps
53  score[i+1][j+1] = std::min(score[i+1][j+1], score[i1][j1] + (i-i1-1) + 1 + (j-j1-1));
54  }
55  dict[src[i-1]] = i;
56  }
57 
58  return score[x+1][y+1];
59 }
60 
61 } // namespace
62 } // namespace
63 #endif
Main namespace for the ROSE library.
size_t damerauLevenshteinDistance(const std::vector< T > &src, const std::vector< T > &tgt)
Damerau-Levenshtein edit distance.