ROSE  0.11.145.0
GraphAlgorithm.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 // Algorithms for Sawyer::Container::Graph
9 
10 #ifndef Sawyer_GraphAlgorithm_H
11 #define Sawyer_GraphAlgorithm_H
12 
13 #include <Sawyer/Sawyer.h>
14 #include <Sawyer/DenseIntegerSet.h>
15 #include <Sawyer/GraphIteratorMap.h>
16 #include <Sawyer/GraphTraversal.h>
17 #include <Sawyer/Set.h>
18 
19 #include <boost/foreach.hpp>
20 #include <boost/lexical_cast.hpp>
21 #include <iostream>
22 #include <list>
23 #include <set>
24 #include <vector>
25 
26 // If non-zero, then use a stack-based pool allocation strategy that tuned for multi-threading when searching for isomorphic
27 // subgraphs. The non-zero value defines the size of the large heap blocks requested from the standard runtime system and is a
28 // multiple of the size of the number of vertices in the second of the two graphs accessed by the search.
29 #ifndef SAWYER_VAM_STACK_ALLOCATOR
30 #define SAWYER_VAM_STACK_ALLOCATOR 2 // Use a per-thread, stack based allocation if non-zero
31 #endif
32 #if SAWYER_VAM_STACK_ALLOCATOR
33 #include <Sawyer/StackAllocator.h>
34 #endif
35 
36 // If defined, perform extra checks in the subgraph isomorphism "VAM" ragged array.
37 //#define SAWYER_VAM_EXTRA_CHECKS
38 
39 namespace Sawyer {
40 namespace Container {
41 namespace Algorithm {
42 
46 template<class Graph>
47 bool
49  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
50  std::vector<bool> onPath(g.nVertices(), false); // is a vertex on the current path of edges?
51  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
52  if (visited[rootId])
53  continue;
54  visited[rootId] = true;
55  ASSERT_require(!onPath[rootId]);
56  onPath[rootId] = true;
58  size_t targetId = t.edge()->target()->id();
59  if (t.event() == ENTER_EDGE) {
60  if (onPath[targetId])
61  return true; // must be a back edge forming a cycle
62  onPath[targetId] = true;
63  if (visited[targetId]) {
64  t.skipChildren();
65  } else {
66  visited[targetId] = true;
67  }
68  } else {
69  ASSERT_require(t.event() == LEAVE_EDGE);
70  ASSERT_require(onPath[targetId]);
71  onPath[targetId] = false;
72  }
73  }
74  ASSERT_require(onPath[rootId]);
75  onPath[rootId] = false;
76  }
77  return false;
78 }
79 
84 template<class Graph>
85 size_t
87  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
88  std::vector<unsigned char> onPath(g.nVertices(), false); // is a vertex on the current path of edges? 0, 1, or 2
90 
91  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
92  if (visited[rootId])
93  continue;
94  visited[rootId] = true;
95  ASSERT_require(!onPath[rootId]);
96  onPath[rootId] = true;
98  size_t targetId = t.edge()->target()->id();
99  if (t.event() == ENTER_EDGE) {
100  if (onPath[targetId]) {
101  edgesToErase.insert(t.edge()->id(), t.edge());
102  t.skipChildren();
103  }
104  ++onPath[targetId];
105  if (visited[targetId]) {
106  t.skipChildren();
107  } else {
108  visited[targetId] = true;
109  }
110  } else {
111  ASSERT_require(t.event() == LEAVE_EDGE);
112  ASSERT_require(onPath[targetId]);
113  --onPath[targetId];
114  }
115  }
116  ASSERT_require(onPath[rootId]==1);
117  onPath[rootId] = 0;
118  }
119 
120  BOOST_FOREACH (const typename Graph::ConstEdgeIterator &edge, edgesToErase.values())
121  g.eraseEdge(edge);
122  return edgesToErase.size();
123 }
124 
134 template<class Graph>
135 bool
137  if (g.isEmpty())
138  return true;
139  std::vector<bool> seen(g.nVertices(), false);
140  size_t nSeen = 0;
141  DenseIntegerSet<size_t> worklist(g.nVertices());
142  worklist.insert(0);
143  while (!worklist.isEmpty()) {
144  size_t id = *worklist.values().begin();
145  worklist.erase(id);
146 
147  if (seen[id])
148  continue;
149  seen[id] = true;
150  ++nSeen;
151 
152  typename Graph::ConstVertexIterator v = g.findVertex(id);
153  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
154  if (!seen[e.target()->id()]) // not necessary, but saves some work
155  worklist.insert(e.target()->id());
156  }
157  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
158  if (!seen[e.source()->id()]) // not necessary, but saves some work
159  worklist.insert(e.source()->id());
160  }
161  }
162  return nSeen == g.nVertices();
163 }
164 
174 template<class Graph>
175 size_t
176 graphFindConnectedComponents(const Graph &g, std::vector<size_t> &components /*out*/) {
177  static const size_t NOT_SEEN(-1);
178  size_t nComponents = 0;
179  components.clear();
180  components.resize(g.nVertices(), NOT_SEEN);
181  DenseIntegerSet<size_t> worklist(g.nVertices());
182  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
183  if (components[rootId] != NOT_SEEN)
184  continue;
185  ASSERT_require(worklist.isEmpty());
186  worklist.insert(rootId);
187  while (!worklist.isEmpty()) {
188  size_t id = *worklist.values().begin();
189  worklist.erase(id);
190 
191  ASSERT_require(components[id]==NOT_SEEN || components[id]==nComponents);
192  if (components[id] != NOT_SEEN)
193  continue;
194  components[id] = nComponents;
195 
196  typename Graph::ConstVertexIterator v = g.findVertex(id);
197  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
198  if (components[e.target()->id()] == NOT_SEEN) // not necessary, but saves some work
199  worklist.insert(e.target()->id());
200  }
201  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
202  if (components[e.source()->id()] == NOT_SEEN) // not necesssary, but saves some work
203  worklist.insert(e.source()->id());
204  }
205  }
206  ++nComponents;
207  }
208  return nComponents;
209 }
210 
219 template<class Graph>
220 Graph
221 graphCopySubgraph(const Graph &g, const std::vector<size_t> &vertexIdVector) {
222  Graph retval;
223 
224  // Insert vertices
225  typedef typename Graph::ConstVertexIterator VIter;
226  typedef Map<size_t, VIter> Id2Vertex;
227  Id2Vertex resultVertices;
228  for (size_t i=0; i<vertexIdVector.size(); ++i) {
229  ASSERT_forbid2(resultVertices.exists(vertexIdVector[i]), "duplicate vertices not allowed");
230  VIter rv = retval.insertVertex(g.findVertex(vertexIdVector[i])->value());
231  ASSERT_require(rv->id() == i); // some analyses depend on this
232  resultVertices.insert(vertexIdVector[i], rv);
233  }
234 
235  // Insert edges
236  for (size_t i=0; i<vertexIdVector.size(); ++i) {
237  typename Graph::ConstVertexIterator gSource = g.findVertex(vertexIdVector[i]);
238  typename Graph::ConstVertexIterator rSource = resultVertices[vertexIdVector[i]];
239  BOOST_FOREACH (const typename Graph::Edge &e, gSource->outEdges()) {
240  typename Graph::ConstVertexIterator rTarget = retval.vertices().end();
241  if (resultVertices.getOptional(e.target()->id()).assignTo(rTarget))
242  retval.insertEdge(rSource, rTarget, e.value());
243  }
244  }
245  return retval;
246 }
247 
253 template<class Graph>
254 void
256  BOOST_FOREACH (const typename Graph::Vertex &src, g.vertices()) {
257  if (src.nOutEdges() > 1) {
258  GraphIteratorMap<typename Graph::ConstVertexIterator /*target*/, std::vector<typename Graph::ConstEdgeIterator> > edgesByTarget;
259  typename Graph::ConstEdgeIterator nextEdge = src.outEdges().begin();
260  while (nextEdge != src.outEdges().end()) {
261  typename Graph::ConstEdgeIterator curEdge = nextEdge++;
262  std::vector<typename Graph::ConstEdgeIterator> &prevEdges = edgesByTarget.insertMaybeDefault(curEdge->target());
263  bool erased = false;
264  BOOST_FOREACH (typename Graph::ConstEdgeIterator prevEdge, prevEdges) {
265  if (curEdge->value() == prevEdge->value()) {
266  g.eraseEdge(curEdge);
267  erased = true;
268  break;
269  }
270  }
271  if (!erased)
272  prevEdges.push_back(curEdge);
273  }
274  }
275  }
276 }
277 
284 template<class Graph>
285 std::vector<size_t>
287  size_t height = 1;
288  std::vector<size_t> retval(g.nVertices(), 0);
289  for (size_t root = 0; root < retval.size(); ++root) {
290  if (0 == retval[root]) {
291  std::vector<size_t /*vertexId*/> stack;
292  stack.reserve(retval.size());
293  stack.push_back(root);
294  while (!stack.empty()) {
295  size_t vid = stack.back();
296  BOOST_FOREACH (const typename Graph::Edge &edge, g.findVertex(vid)->outEdges()) {
297  size_t target = edge.target()->id();
298  if (0 == retval[target]) {
299  retval[target] = 1;
300  stack.push_back(target);
301  }
302  }
303  if (stack.back() == vid) {
304  stack.pop_back();
305  retval[vid] = ++height;
306  }
307  }
308  }
309  }
310  return retval;
311 }
312 
314 // Common subgraph isomorphism (CSI)
315 // Loosely based on the algorithm presented by Evgeny B. Krissinel and Kim Henrick
316 // "Common subgraph isomorphism detection by backtracking search"
317 // European Bioinformatics Institute, Genome Campus, Hinxton, Cambridge CB10 1SD, UK
319 
325 template<class GraphA, class GraphB>
327 public:
332  bool mu(const GraphA &g1, const typename GraphA::ConstVertexIterator &v1,
333  const GraphB &g2, const typename GraphB::ConstVertexIterator &v2) const {
334  SAWYER_ARGUSED(g1); // Leave formal arg names in declaration because they're important
335  SAWYER_ARGUSED(v1); // documentation for this function.
336  SAWYER_ARGUSED(g2);
337  SAWYER_ARGUSED(v2);
338  return true;
339  }
340 
350  bool nu(const GraphA &g1, typename GraphA::ConstVertexIterator i1, typename GraphA::ConstVertexIterator i2,
351  const std::vector<typename GraphA::ConstEdgeIterator> &edges1,
352  const GraphB &g2, typename GraphB::ConstVertexIterator j1, typename GraphB::ConstVertexIterator j2,
353  const std::vector<typename GraphB::ConstEdgeIterator> &edges2) const {
354  SAWYER_ARGUSED(g1); // Leave formal argument names in declaration because they're
355  SAWYER_ARGUSED(i1); // important documentation for this function.
356  SAWYER_ARGUSED(i2);
357  SAWYER_ARGUSED(edges1);
358  SAWYER_ARGUSED(g2);
359  SAWYER_ARGUSED(j1);
360  SAWYER_ARGUSED(j2);
361  SAWYER_ARGUSED(edges2);
362  return true;
363  }
364 
370  void progress(size_t /*size*/) {}
371 };
372 
377 };
378 
390 template<class GraphA, class GraphB>
392  size_t n;
393 public:
394  CsiShowSolution(): n(0) {}
395 
401  CsiNextAction operator()(const GraphA &g1, const std::vector<size_t> &x, const GraphB &g2, const std::vector<size_t> &y) {
402  ASSERT_require(x.size() == y.size());
403  std::cout <<"Common subgraph isomorphism solution #" <<n <<" found:\n"
404  <<" x = [";
405  BOOST_FOREACH (size_t i, x)
406  std::cout <<" " <<i;
407  std::cout <<" ]\n"
408  <<" y = [";
409  BOOST_FOREACH (size_t j, y)
410  std::cout <<" " <<j;
411  std::cout <<" ]\n";
412  ++n;
413  return CSI_CONTINUE;
414  }
415 };
416 
440 template<class GraphA, class GraphB,
441  class SolutionProcessor = CsiShowSolution<GraphA, GraphB>,
442  class EquivalenceP = CsiEquivalence<GraphA, GraphB> >
444  const GraphA &g1; // The first graph being compared (i.e., needle)
445  const GraphB &g2; // the second graph being compared (i.e., haystack)
446  DenseIntegerSet<size_t> v, w; // available vertices of g1 and g2, respectively
447  std::vector<size_t> x, y; // selected vertices of g1 and g2, which defines vertex mapping
448  DenseIntegerSet<size_t> vNotX; // X erased from V
449 
450  SolutionProcessor solutionProcessor_; // functor to call for each solution
451  EquivalenceP equivalenceP_; // predicates to determine if two vertices can be equivalent
452  size_t minimumSolutionSize_; // size of smallest permitted solutions
453  size_t maximumSolutionSize_; // size of largest permitted solutions
454  bool monotonicallyIncreasing_; // size of solutions increases
455  bool findingCommonSubgraphs_; // solutions are subgraphs of both graphs or only second graph?
456 
457  // The Vam is a ragged 2d array, but using std::vector<std::vector<T>> is not efficient in a multithreaded environment
458  // because std::allocator<> and Boost pool allocators don't (yet) have a mode that allows each thread to be largely
459  // independent of other threads. The best we can do with std::allocator is about 30 threads running 100% on my box having
460  // 72 hardware threads. But we can use the following facts about the Vam:
461  //
462  // (1) VAMs are constructed and destroyed in a stack-like manner.
463  // (2) We always know the exact size of the major axis (number or rows) before the VAM is created.
464  // (3) The longest row of a new VAM will not be longer than the longest row of the VAM from which it is derived.
465  // (4) The number of rows will never be more than the number of vertices in g1.
466  // (5) The longest row will never be longer than the number of vertices in g2.
467  // (6) VAMs are never shared between threads
468 #if SAWYER_VAM_STACK_ALLOCATOR
470 #else
471  struct VamAllocator {
472  explicit VamAllocator(size_t) {}
473  };
474 #endif
475  VamAllocator vamAllocator_;
476 
477  // Essentially a ragged array having a fixed number of rows and each row can be a different length. The number of rows is
478  // known at construction time, and the rows are extended one at a time starting with the first and working toward the
479  // last. Accessing any element is a constant-time operation.
480  class Vam { // Vertex Availability Map
481  VamAllocator &allocator_;
482 #if SAWYER_VAM_STACK_ALLOCATOR
483  std::vector<size_t*> rows_;
484  std::vector<size_t> rowSize_;
485  size_t *lowWater_; // first element allocated
486 #else
487  std::vector<std::vector<size_t> > rows_;
488 #endif
489  size_t lastRowStarted_;
490 #ifdef SAWYER_VAM_EXTRA_CHECKS
491  size_t maxRows_, maxCols_;
492 #endif
493 
494  public:
495  // Construct the VAM and reserve enough space for the indicated number of rows.
496  explicit Vam(VamAllocator &allocator)
497  : allocator_(allocator),
498 #if SAWYER_VAM_STACK_ALLOCATOR
499  lowWater_(NULL),
500 #endif
501  lastRowStarted_((size_t)(-1)) {
502  }
503 
504  // Destructor assumes this is the top VAM in the allocator stack.
505  ~Vam() {
506 #if SAWYER_VAM_STACK_ALLOCATOR
507  if (lowWater_ != NULL) {
508  ASSERT_require(!rows_.empty());
509  allocator_.revert(lowWater_);
510  } else {
511  ASSERT_require((size_t)std::count(rowSize_.begin(), rowSize_.end(), 0) == rows_.size()); // all rows empty
512  }
513 #endif
514  }
515 
516  // Reserve space for specified number of rows.
517  void reserveRows(size_t nrows) {
518  rows_.reserve(nrows);
519 #if SAWYER_VAM_STACK_ALLOCATOR
520  rowSize_.reserve(nrows);
521 #endif
522 #ifdef SAWYER_VAM_EXTRA_CHECKS
523  maxRows_ = nrows;
524  maxCols_ = 0;
525 #endif
526  }
527 
528  // Start a new row. You can only insert elements into the most recent row.
529  void startNewRow(size_t i, size_t maxColumns) {
530 #ifdef SAWYER_VAM_EXTRA_CHECKS
531  ASSERT_require(i < maxRows_);
532  maxCols_ = maxColumns;
533 #endif
534 #if SAWYER_VAM_STACK_ALLOCATOR
535  if (i >= rows_.size()) {
536  rows_.resize(i+1, NULL);
537  rowSize_.resize(i+1, 0);
538  }
539  ASSERT_require(rows_[i] == NULL); // row was already started
540  rows_[i] = allocator_.reserve(maxColumns);
541 #else
542  if (i >= rows_.size())
543  rows_.resize(i+1);
544 #endif
545  lastRowStarted_ = i;
546  }
547 
548  // Push a new element onto the end of the current row.
549  void push(size_t i, size_t x) {
550  ASSERT_require(i == lastRowStarted_);
551  ASSERT_require(i < rows_.size());
552 #ifdef SAWYER_VAM_EXTRA_CHECKS
553  ASSERT_require(size(i) < maxCols_);
554 #endif
555 #if SAWYER_VAM_STACK_ALLOCATOR
556  size_t *ptr = allocator_.allocNext();
557  if (lowWater_ == NULL)
558  lowWater_ = ptr;
559 #ifdef SAWYER_VAM_EXTRA_CHECKS
560  ASSERT_require(ptr == rows_[i] + rowSize_[i]);
561 #endif
562  ++rowSize_[i];
563  *ptr = x;
564 #else
565  rows_[i].push_back(x);
566 #endif
567  }
568 
569  // Given a vertex i in G1, return the number of vertices j in G2 where i and j can be equivalent.
570  size_t size(size_t i) const {
571 #if SAWYER_VAM_STACK_ALLOCATOR
572  return i < rows_.size() ? rowSize_[i] : size_t(0);
573 #else
574  return i < rows_.size() ? rows_[i].size() : size_t(0);
575 #endif
576  }
577 
578  // Given a vertex i in G1, return those vertices j in G2 where i and j can be equivalent.
579  // This isn't really a proper iterator, but we can't return std::vector<size_t>::const_iterator on macOS because it's
580  // constructor-from-pointer is private.
581  boost::iterator_range<const size_t*> get(size_t i) const {
582  static const size_t empty = 911; /*arbitrary*/
583  if (i < rows_.size() && size(i) > 0) {
584 #if SAWYER_VAM_STACK_ALLOCATOR
585  return boost::iterator_range<const size_t*>(rows_[i], rows_[i] + rowSize_[i]);
586 #else
587  return boost::iterator_range<const size_t*>(&rows_[i][0], &rows_[i][0] + rows_[i].size());
588 #endif
589  }
590  return boost::iterator_range<const size_t*>(&empty, &empty);
591  }
592  };
593 
594 public:
608  CommonSubgraphIsomorphism(const GraphA &g1, const GraphB &g2,
609  SolutionProcessor solutionProcessor = SolutionProcessor(),
610  EquivalenceP equivalenceP = EquivalenceP())
611  : g1(g1), g2(g2), v(g1.nVertices()), w(g2.nVertices()), vNotX(g1.nVertices()), solutionProcessor_(solutionProcessor),
612  equivalenceP_(equivalenceP), minimumSolutionSize_(1), maximumSolutionSize_(-1), monotonicallyIncreasing_(false),
613  findingCommonSubgraphs_(true), vamAllocator_(SAWYER_VAM_STACK_ALLOCATOR * g2.nVertices()) {}
614 
615 private:
617  ASSERT_not_reachable("copy constructor makes no sense");
618  }
619 
621  ASSERT_not_reachable("assignment operator makes no sense");
622  }
623 
624 public:
641  size_t minimumSolutionSize() const { return minimumSolutionSize_; }
642  void minimumSolutionSize(size_t n) { minimumSolutionSize_ = n; }
659  size_t maximumSolutionSize() const { return maximumSolutionSize_; }
660  void maximumSolutionSize(size_t n) { maximumSolutionSize_ = n; }
671  bool monotonicallyIncreasing() const { return monotonicallyIncreasing_; }
672  void monotonicallyIncreasing(bool b) { monotonicallyIncreasing_ = b; }
681  SolutionProcessor& solutionProcessor() { return solutionProcessor_; }
682  const SolutionProcessor& solutionProcessor() const { return solutionProcessor_; }
694  EquivalenceP& equivalencePredicate() { return equivalenceP_; }
695  const EquivalenceP& equivalencePredicate() const { return equivalenceP_; }
708  bool findingCommonSubgraphs() const { return findingCommonSubgraphs_; }
709  void findingCommonSubgraphs(bool b) { findingCommonSubgraphs_ = b; }
727  void run() {
728  reset();
729  Vam vam = initializeVam();
730  recurse(vam);
731  }
732 
737  void reset() {
738  v.insertAll();
739  w.insertAll();
740  x.clear();
741  y.clear();
742  vNotX.insertAll();
743  }
744 
745 private:
746  // Maximum value+1 or zero.
747  template<typename Container>
748  static size_t maxPlusOneOrZero(const Container &container) {
749  if (container.isEmpty())
750  return 0;
751  size_t retval = 0;
752  BOOST_FOREACH (size_t val, container.values())
753  retval = std::max(retval, val);
754  return retval+1;
755  }
756 
757  // Initialize VAM so to indicate which source vertices (v of g1) map to which target vertices (w of g2) based only on the
758  // vertex comparator. We also handle self edges here.
759  Vam initializeVam() {
760  Vam vam(vamAllocator_);
761  vam.reserveRows(maxPlusOneOrZero(v));
762  BOOST_FOREACH (size_t i, v.values()) {
763  typename GraphA::ConstVertexIterator v1 = g1.findVertex(i);
764  vam.startNewRow(i, w.size());
765  BOOST_FOREACH (size_t j, w.values()) {
766  typename GraphB::ConstVertexIterator w1 = g2.findVertex(j);
767  std::vector<typename GraphA::ConstEdgeIterator> selfEdges1;
768  std::vector<typename GraphB::ConstEdgeIterator> selfEdges2;
769  findEdges(g1, i, i, selfEdges1 /*out*/);
770  findEdges(g2, j, j, selfEdges2 /*out*/);
771  if (selfEdges1.size() == selfEdges2.size() &&
772  equivalenceP_.mu(g1, g1.findVertex(i), g2, g2.findVertex(j)) &&
773  equivalenceP_.nu(g1, v1, v1, selfEdges1, g2, w1, w1, selfEdges2))
774  vam.push(i, j);
775  }
776  }
777  return vam;
778  }
779 
780  // Can the solution (stored in X and Y) be extended by adding another (i,j) pair of vertices where i is an element of the
781  // set of available vertices of graph1 (vNotX) and j is a vertex of graph2 that is equivalent to i according to the
782  // user-provided equivalence predicate. Furthermore, is it even possible to find a solution along this branch of discovery
783  // which is falls between the minimum and maximum sizes specified by the user? By eliminating entire branch of the search
784  // space we can drastically decrease the time it takes to search, and the larger the required solution the more branches we
785  // can eliminate.
786  bool isSolutionPossible(const Vam &vam) const {
787  if (findingCommonSubgraphs_ && x.size() >= maximumSolutionSize_)
788  return false; // any further soln on this path would be too large
789  size_t largestPossibleSolution = x.size();
790  BOOST_FOREACH (size_t i, vNotX.values()) {
791  if (vam.size(i) > 0) {
792  ++largestPossibleSolution;
793  if ((findingCommonSubgraphs_ && largestPossibleSolution >= minimumSolutionSize_) ||
794  (!findingCommonSubgraphs_ && largestPossibleSolution >= g1.nVertices()))
795  return true;
796  }
797  }
798  return false;
799  }
800 
801  // Choose some vertex i from G1 which is still available (i.e., i is a member of vNotX) and for which there is a vertex
802  // equivalence of i with some available j from G2 (i.e., the pair (i,j) is present in the VAM). The recursion terminates
803  // quickest if we return the row of the VAM that has the fewest vertices in G2.
804  size_t pickVertex(const Vam &vam) const {
805  // FIXME[Robb Matzke 2016-03-19]: Perhaps this can be made even faster. The step that initializes the VAM
806  // (initializeVam or refine) might be able to compute and store the shortest row so we can retrieve it here in constant
807  // time. Probably not worth the work though since loop this is small compared to the overall analysis.
808  size_t shortestRowLength(-1), retval(-1);
809  BOOST_FOREACH (size_t i, vNotX.values()) {
810  size_t vs = vam.size(i);
811  if (vs > 0 && vs < shortestRowLength) {
812  shortestRowLength = vs;
813  retval = i;
814  }
815  }
816  ASSERT_require2(retval != size_t(-1), "cannot be reached if isSolutionPossible returned true");
817  return retval;
818  }
819 
820  // Extend the current solution by adding vertex i from G1 and vertex j from G2. The VAM should be adjusted separately.
821  void extendSolution(size_t i, size_t j) {
822  ASSERT_require(x.size() == y.size());
823  ASSERT_require(std::find(x.begin(), x.end(), i) == x.end());
824  ASSERT_require(std::find(y.begin(), y.end(), j) == y.end());
825  ASSERT_require(vNotX.exists(i));
826  x.push_back(i);
827  y.push_back(j);
828  vNotX.erase(i);
829  }
830 
831  // Remove the last vertex mapping from a solution. The VAM should be adjusted separately.
832  void retractSolution() {
833  ASSERT_require(x.size() == y.size());
834  ASSERT_require(!x.empty());
835  size_t i = x.back();
836  ASSERT_forbid(vNotX.exists(i));
837  x.pop_back();
838  y.pop_back();
839  vNotX.insert(i);
840  }
841 
842  // Find all edges that have the specified source and target vertices. This is usually zero or one edge, but can be more if
843  // the graph contains parallel edges.
844  template<class Graph>
845  void
846  findEdges(const Graph &g, size_t sourceVertex, size_t targetVertex,
847  std::vector<typename Graph::ConstEdgeIterator> &result /*in,out*/) const {
848  BOOST_FOREACH (const typename Graph::Edge &candidate, g.findVertex(sourceVertex)->outEdges()) {
849  if (candidate.target()->id() == targetVertex)
850  result.push_back(g.findEdge(candidate.id()));
851  }
852  }
853 
854  // Given that we just extended a solution by adding the vertex pair (i, j), decide whether there's a
855  // possible equivalence between vertex iUnused of G1 and vertex jUnused of G2 based on the edge(s) between i and iUnused
856  // and between j and jUnused.
857  bool edgesAreSuitable(size_t i, size_t iUnused, size_t j, size_t jUnused) const {
858  ASSERT_require(i != iUnused);
859  ASSERT_require(j != jUnused);
860 
861  // The two subgraphs in a solution must have the same number of edges.
862  std::vector<typename GraphA::ConstEdgeIterator> edges1;
863  std::vector<typename GraphB::ConstEdgeIterator> edges2;
864  findEdges(g1, i, iUnused, edges1 /*out*/);
865  findEdges(g2, j, jUnused, edges2 /*out*/);
866  if (edges1.size() != edges2.size())
867  return false;
868  findEdges(g1, iUnused, i, edges1 /*out*/);
869  findEdges(g2, jUnused, j, edges2 /*out*/);
870  if (edges1.size() != edges2.size())
871  return false;
872 
873  // If there are no edges, then assume that iUnused and jUnused could be isomorphic. We already know they satisfy the mu
874  // constraint otherwise they wouldn't even be in the VAM.
875  if (edges1.empty() && edges2.empty())
876  return true;
877 
878  // Everything looks good to us, now let the user weed out certain pairs of vertices based on their incident edges.
879  typename GraphA::ConstVertexIterator v1 = g1.findVertex(i), v2 = g1.findVertex(iUnused);
880  typename GraphB::ConstVertexIterator w1 = g2.findVertex(j), w2 = g2.findVertex(jUnused);
881  return equivalenceP_.nu(g1, v1, v2, edges1, g2, w1, w2, edges2);
882  }
883 
884  // Create a new VAM from an existing one. The (i,j) pairs of the new VAM will form a subset of the specified VAM.
885  void refine(const Vam &vam, Vam &refined /*out*/) {
886  refined.reserveRows(maxPlusOneOrZero(vNotX));
887  BOOST_FOREACH (size_t i, vNotX.values()) {
888  size_t rowLength = vam.size(i);
889  refined.startNewRow(i, rowLength);
890  BOOST_FOREACH (size_t j, vam.get(i)) {
891  if (j != y.back() && edgesAreSuitable(x.back(), i, y.back(), j))
892  refined.push(i, j);
893  }
894  }
895  }
896 
897  // The Goldilocks predicate. Returns true if the solution is a valid size, false if it's too small or too big.
898  bool isSolutionValidSize() const {
899  if (findingCommonSubgraphs_) {
900  return x.size() >= minimumSolutionSize_ && x.size() <= maximumSolutionSize_;
901  } else {
902  return x.size() == g1.nVertices();
903  }
904  }
905 
906  // The main recursive function. It works by extending the current solution by one pair for all combinations of such pairs
907  // that are permissible according to the vertex equivalence predicate and not already part of the solution and then
908  // recursively searching the remaining space. This analysis class acts as a state machine whose data structures are
909  // advanced and retracted as the space is searched. The VAM is the only part of the state that needs to be stored on a
910  // stack since changes to it could not be easily undone during the retract phase.
911  CsiNextAction recurse(const Vam &vam, size_t level = 0) {
912  equivalenceP_.progress(level);
913  if (isSolutionPossible(vam)) {
914  size_t i = pickVertex(vam);
915  BOOST_FOREACH (size_t j, vam.get(i)) {
916  extendSolution(i, j);
917  Vam refined(vamAllocator_);
918  refine(vam, refined);
919  if (recurse(refined, level+1) == CSI_ABORT)
920  return CSI_ABORT;
921  retractSolution();
922  }
923 
924  // Try again after removing vertex i from consideration
925  if (findingCommonSubgraphs_) {
926  v.erase(i);
927  ASSERT_require(vNotX.exists(i));
928  vNotX.erase(i);
929  if (recurse(vam, level+1) == CSI_ABORT)
930  return CSI_ABORT;
931  v.insert(i);
932  vNotX.insert(i);
933  }
934  } else if (isSolutionValidSize()) {
935  ASSERT_require(x.size() == y.size());
936  if (monotonicallyIncreasing_)
937  minimumSolutionSize_ = x.size();
938  if (solutionProcessor_(g1, x, g2, y) == CSI_ABORT)
939  return CSI_ABORT;
940  }
941  return CSI_CONTINUE;
942  }
943 };
944 
965 template<class GraphA, class GraphB, class SolutionProcessor>
966 void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor) {
968  csi.run();
969 }
970 
971 template<class GraphA, class GraphB, class SolutionProcessor, class EquivalenceP>
972 void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2,
973  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
974  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
975  csi.run();
976 }
979 // Used by findFirstCommonIsomorphicSubgraph
980 template<class GraphA, class GraphB>
982  std::pair<std::vector<size_t>, std::vector<size_t> > solution_;
983 public:
984  CsiNextAction operator()(const GraphA &/*g1*/, const std::vector<size_t> &x,
985  const GraphB &/*g2*/, const std::vector<size_t> &y) {
986  solution_ = std::make_pair(x, y);
987  return CSI_ABORT;
988  }
989 
990  const std::pair<std::vector<size_t>, std::vector<size_t> >& solution() const {
991  return solution_;
992  }
993 };
994 
1002 template<class GraphA, class GraphB>
1003 std::pair<std::vector<size_t>, std::vector<size_t> >
1004 findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize) {
1006  csi.minimumSolutionSize(minimumSize);
1007  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
1008  csi.run();
1009  return csi.solutionProcessor().solution();
1010 }
1011 
1012 
1013 template<class GraphA, class GraphB, class EquivalenceP>
1014 std::pair<std::vector<size_t>, std::vector<size_t> >
1015 findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize, EquivalenceP equivalenceP) {
1017  csi(g1, g2, FirstIsomorphicSubgraph<GraphA, GraphB>(), equivalenceP);
1018  csi.minimumSolutionSize(minimumSize);
1019  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
1020  csi.run();
1021  return csi.solutionProcessor().solution();
1022 }
1023 
1042 template<class GraphA, class GraphB, class SolutionProcessor>
1043 void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor) {
1044  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor> csi(g1, g2, solutionProcessor);
1045  csi.findingCommonSubgraphs(false);
1046  csi.run();
1047 }
1048 
1049 template<class GraphA, class GraphB, class SolutionProcessor, class EquivalenceP>
1050 void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2,
1051  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
1052  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
1053  csi.findingCommonSubgraphs(false);
1054  csi.run();
1055 }
1058 // Used internally by findMaximumCommonIsomorphicSubgraphs
1059 template<class GraphA, class GraphB>
1061  std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > solutions_;
1062 public:
1063  CsiNextAction operator()(const GraphA &/*g1*/, const std::vector<size_t> &x,
1064  const GraphB &/*g2*/, const std::vector<size_t> &y) {
1065  if (!solutions_.empty() && x.size() > solutions_.front().first.size())
1066  solutions_.clear();
1067  solutions_.push_back(std::make_pair(x, y));
1068  return CSI_CONTINUE;
1069  }
1070 
1071  const std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > &solutions() const {
1072  return solutions_;
1073  }
1074 };
1075 
1096 template<class GraphA, class GraphB>
1097 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1098 findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2) {
1100  csi.monotonicallyIncreasing(true);
1101  csi.run();
1102  return csi.solutionProcessor().solutions();
1103 }
1104 
1105 template<class GraphA, class GraphB, class EquivalenceP>
1106 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1107 findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, EquivalenceP equivalenceP) {
1109  csi(g1, g2, MaximumIsomorphicSubgraphs<GraphA, GraphB>(), equivalenceP);
1110  csi.monotonicallyIncreasing(true);
1111  csi.run();
1112  return csi.solutionProcessor().solutions();
1113 }
1157 template<class Direction, class Graph>
1158 std::vector<typename GraphTraits<Graph>::VertexIterator>
1160  typedef typename GraphTraits<Graph>::VertexIterator VertexIterator;
1161  typedef typename GraphTraits<Graph>::EdgeIterator EdgeIterator;
1162  //typedef typename GraphTraits<Graph>::Edge Edge;
1163  static const size_t NO_ID = (size_t)(-1);
1164 
1165  ASSERT_require(g.isValidVertex(root));
1166 
1167  // List of vertex IDs in the best order for data-flow. I.e., the reverse of the post-fix, depth-first traversal following
1168  // forwared or reverse edges (depending on the Direction template argument). A useful fact is that disregarding back
1169  // edges, the predecessors of the vertex represented by flowlist[i] all appear earlier in flowlist.
1171  traversal.start(root);
1172  std::vector<size_t> flowlist = graphReachableVertices(traversal);
1173  std::reverse(flowlist.begin(), flowlist.end());
1174 
1175  // The inverse mapping of the flowlist. I.e.., since flowlist maps an index to a vertex ID, rflowlist maps a vertex ID back
1176  // to the flowlist index. If vertex v is not reachable from the root, then rflowlist[v] == NO_ID.
1177  std::vector<size_t> rflowlist(g.nVertices(), NO_ID);
1178  for (size_t i=0; i<flowlist.size(); ++i)
1179  rflowlist[flowlist[i]] = i;
1180 
1181  // Initialize the idom vector. idom[i]==i implies idom[i] is unknown
1182  std::vector<size_t> idom(flowlist.size());
1183  for (size_t i=0; i<flowlist.size(); ++i)
1184  idom[i] = i;
1185 
1186  bool changed = true;
1187  while (changed) {
1188  changed = false; // assume no change, prove otherwise
1189  for (size_t vertex_i=0; vertex_i < flowlist.size(); ++vertex_i) {
1190  VertexIterator vertex = g.findVertex(flowlist[vertex_i]);
1191 
1192  // Test idom invariant
1193 #ifndef SAWYER_NDEBUG
1194  for (size_t i=0; i<idom.size(); ++i)
1195  ASSERT_require(idom[i] <= i);
1196 #endif
1197 
1198  // The root vertex has no immediate dominator.
1199  // FIXME[Robb P Matzke 2017-06-23]: why not just start iterating with vertex_i=1?
1200  if (vertex == root)
1201  continue;
1202 
1203  // Try to choose a new idom for this vertex by processing each of its predecessors. The dominator of the current
1204  // vertex is a function of the dominators of its predecessors.
1205  size_t newIdom = idom[vertex_i];
1206 
1207  boost::iterator_range<EdgeIterator> edges = previousEdges<EdgeIterator>(vertex, Direction());
1208  for (EdgeIterator edge=edges.begin(); edge!=edges.end(); ++edge) {
1209  VertexIterator predecessor = previousVertex<VertexIterator>(edge, Direction());
1210  size_t predecessor_i = rflowlist[predecessor->id()];
1211  if (NO_ID == predecessor_i)
1212  continue; // predecessor is not reachable from root, so does't contribute
1213  if (predecessor_i == vertex_i)
1214  continue; // ignore self edges: V cannot be its own immediate dominator
1215  if (predecessor_i > vertex_i)
1216  continue; // ignore back edges; the idom will also be found along a forward edge
1217 
1218  // The predecessor might be our dominator, so merge it with what we already have
1219  if (newIdom == vertex_i) {
1220  newIdom = predecessor_i;
1221  } else {
1222  size_t tmpIdom = predecessor_i;
1223  while (newIdom != tmpIdom) {
1224  while (newIdom > tmpIdom)
1225  newIdom = idom[newIdom];
1226  while (tmpIdom > newIdom)
1227  tmpIdom = idom[tmpIdom];
1228  }
1229  }
1230  }
1231 
1232  if (idom[vertex_i] != newIdom) {
1233  idom[vertex_i] = newIdom;
1234  changed = true;
1235  }
1236  }
1237  }
1238 
1239  // Build the result relation
1240  std::vector<VertexIterator> retval;
1241  retval.resize(g.nVertices(), g.vertices().end());
1242  for (size_t i=0; i<flowlist.size(); ++i) {
1243  if (idom[i] != i)
1244  retval[flowlist[i]] = g.findVertex(flowlist[idom[i]]);
1245  }
1246  return retval;
1247 }
1248 
1256 template<class Graph>
1257 std::vector<typename GraphTraits<Graph>::VertexIterator>
1259  return graphDirectedDominators<ForwardTraversalTag>(g, root);
1260 }
1261 
1269 template<class Graph>
1270 std::vector<typename GraphTraits<Graph>::VertexIterator>
1272  return graphDirectedDominators<ReverseTraversalTag>(g, root);
1273 }
1274 
1275 } // namespace
1276 } // namespace
1277 } // namespace
1278 
1279 #endif
void reset()
Releases memory used by the analysis.
size_t nVertices() const
Total number of vertices.
Definition: Graph.h:1685
T * reserve(size_t nElmts)
Reserve space for objects.
std::vector< size_t > graphDependentOrder(Graph &g)
Number vertices according to their height from the leaves.
const SolutionProcessor & solutionProcessor() const
Property: reference to the solution callback.
VertexIterator insertVertex(const VertexValue &value=VertexValue())
Insert a new vertex.
Definition: Graph.h:1721
const size_t & id() const
Unique vertex ID number.
Definition: Graph.h:1217
CsiNextAction operator()(const GraphA &g1, const std::vector< size_t > &x, const GraphB &g2, const std::vector< size_t > &y)
Functor.
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
void minimumSolutionSize(size_t n)
Property: minimum allowed solution size.
size_t minimumSolutionSize() const
Property: minimum allowed solution size.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphPostDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate post-dominators.
Traits for graphs.
Definition: Graph.h:287
boost::iterator_range< EdgeIterator > inEdges()
List of incoming edges.
Definition: Graph.h:1228
Vertex equivalence for common subgraph isomorphism.
bool isValidVertex(const ConstVertexIterator &vertex) const
Determines whether the vertex iterator is valid.
Definition: Graph.h:1566
void progress(size_t)
Called at each step during the algorithm.
T * allocNext()
Allocate one element.
bool graphContainsCycle(const Graph &g)
Determines if the any edges of a graph form a cycle.
VertexValue & value()
User-defined value.
Definition: Graph.h:1293
size_t size() const
Number of members present.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1469
boost::iterator_range< ConstIterator > values() const
Iterator range for set members.
bool exists(const Value &value) const
Determines whether a value is stored.
EdgeIterator insertEdge(const VertexIterator &sourceVertex, const VertexIterator &targetVertex, const EdgeValue &value=EdgeValue())
Insert a new edge.
Definition: Graph.h:1751
Bidirectional vertex node iterator.
Definition: Graph.h:1048
void revert(T *ptr)
Free all elements back to the specified element.
Name space for the entire library.
Definition: FeasiblePath.h:767
void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor)
Find common isomorphic subgraphs.
Graph graphCopySubgraph(const Graph &g, const std::vector< size_t > &vertexIdVector)
Create a subgraph.
size_t graphFindConnectedComponents(const Graph &g, std::vector< size_t > &components)
Find all connected components of a graph.
boost::iterator_range< EdgeIterator > outEdges()
List of outgoing edges.
Definition: Graph.h:1249
std::pair< std::vector< size_t >, std::vector< size_t > > findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize)
Determine whether a common subgraph exists.
const VertexIterator & source()
Source vertex.
Definition: Graph.h:1154
Functor called for each common subgraph isomorphism solution.
void maximumSolutionSize(size_t n)
Property: maximum allowed solution size.
CsiNextAction
How the CSI algorith should proceed.
EdgeValue & value()
User-defined value.
Definition: Graph.h:1180
bool nu(const GraphA &g1, typename GraphA::ConstVertexIterator i1, typename GraphA::ConstVertexIterator i2, const std::vector< typename GraphA::ConstEdgeIterator > &edges1, const GraphB &g2, typename GraphB::ConstVertexIterator j1, typename GraphB::ConstVertexIterator j2, const std::vector< typename GraphB::ConstEdgeIterator > &edges2) const
Isomorphism of vertices based on incident edges.
bool insert(Value value)
Insert a value.
std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2)
Find maximum common isomorphic subgraphs.
Map of graph edge or vertex pointers to some other value.
const EquivalenceP & equivalencePredicate() const
Property: reference to the vertex equivalence predicate.
void monotonicallyIncreasing(bool b)
Property: monotonically increasing solution size.
void run()
Perform the common subgraph isomorphism analysis.
CommonSubgraphIsomorphism(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor=SolutionProcessor(), EquivalenceP equivalenceP=EquivalenceP())
Construct a solver.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDirectedDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre- or post-dominators.
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1516
void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor)
Find an isomorphic subgraph.
Continue searching for more solutions.
bool findingCommonSubgraphs() const
Property: find common subgraphs.
bool erase(Value value)
Erase a value.
SolutionProcessor & solutionProcessor()
Property: reference to the solution callback.
bool monotonicallyIncreasing() const
Property: monotonically increasing solution size.
Bidirectional edge node iterator.
Definition: Graph.h:945
size_t graphBreakCycles(Graph &g)
Break cycles of a graph arbitrarily.
void findingCommonSubgraphs(bool b)
Property: find common subgraphs.
size_t nOutEdges() const
Number of outgoing edges.
Definition: Graph.h:1271
bool mu(const GraphA &g1, const typename GraphA::ConstVertexIterator &v1, const GraphB &g2, const typename GraphB::ConstVertexIterator &v2) const
Isomorphism of two vertices.
Depth-first, forward traversal for all event types.
Base class for graph traversals.
void insertAll()
Insert all possible members.
bool isEmpty() const
True if graph is empty.
Definition: Graph.h:1704
Return to caller without further searching.
bool graphIsConnected(const Graph &g)
Test whether a graph is connected.
EquivalenceP & equivalencePredicate()
Property: reference to the vertex equivalence predicate.
EdgeIterator eraseEdge(const EdgeIterator &edge)
Erases an edge.
Definition: Graph.h:1811
void graphEraseParallelEdges(Graph &g)
Erase parallel edges.
const VertexIterator & target()
Target vertex.
Definition: Graph.h:1166
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre-dominators.
Container associating values with keys.
Definition: Sawyer/Map.h:66
size_t maximumSolutionSize() const
Property: maximum allowed solution size.
std::vector< size_t > graphReachableVertices(Traversal t)
IDs of vertices reachable from root.