ROSE  0.9.10.196
GraphAlgorithm.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 // Algorithms for Sawyer::Container::Graph
9 
10 #ifndef Sawyer_GraphAlgorithm_H
11 #define Sawyer_GraphAlgorithm_H
12 
13 #include <Sawyer/Sawyer.h>
14 #include <Sawyer/DenseIntegerSet.h>
15 #include <Sawyer/GraphTraversal.h>
16 #include <Sawyer/Set.h>
17 
18 #include <boost/foreach.hpp>
19 #include <boost/lexical_cast.hpp>
20 #include <iostream>
21 #include <list>
22 #include <set>
23 #include <vector>
24 
25 // If non-zero, then use a stack-based pool allocation strategy that tuned for multi-threading when searching for isomorphic
26 // subgraphs. The non-zero value defines the size of the large heap blocks requested from the standard runtime system and is a
27 // multiple of the size of the number of vertices in the second of the two graphs accessed by the search.
28 #ifndef SAWYER_VAM_STACK_ALLOCATOR
29 #define SAWYER_VAM_STACK_ALLOCATOR 2 // Use a per-thread, stack based allocation if non-zero
30 #endif
31 #if SAWYER_VAM_STACK_ALLOCATOR
32 #include <Sawyer/StackAllocator.h>
33 #endif
34 
35 // If defined, perform extra checks in the subgraph isomorphism "VAM" ragged array.
36 //#define SAWYER_VAM_EXTRA_CHECKS
37 
38 namespace Sawyer {
39 namespace Container {
40 namespace Algorithm {
41 
45 template<class Graph>
46 bool
48  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
49  std::vector<bool> onPath(g.nVertices(), false); // is a vertex on the current path of edges?
50  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
51  if (visited[rootId])
52  continue;
53  visited[rootId] = true;
54  ASSERT_require(!onPath[rootId]);
55  onPath[rootId] = true;
57  size_t targetId = t.edge()->target()->id();
58  if (t.event() == ENTER_EDGE) {
59  if (onPath[targetId])
60  return true; // must be a back edge forming a cycle
61  onPath[targetId] = true;
62  if (visited[targetId]) {
63  t.skipChildren();
64  } else {
65  visited[targetId] = true;
66  }
67  } else {
68  ASSERT_require(t.event() == LEAVE_EDGE);
69  ASSERT_require(onPath[targetId]);
70  onPath[targetId] = false;
71  }
72  }
73  ASSERT_require(onPath[rootId]);
74  onPath[rootId] = false;
75  }
76  return false;
77 }
78 
83 template<class Graph>
84 size_t
86  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
87  std::vector<unsigned char> onPath(g.nVertices(), false); // is a vertex on the current path of edges? 0, 1, or 2
88  std::set<typename Graph::ConstEdgeIterator> edgesToErase;
89 
90  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
91  if (visited[rootId])
92  continue;
93  visited[rootId] = true;
94  ASSERT_require(!onPath[rootId]);
95  onPath[rootId] = true;
97  size_t targetId = t.edge()->target()->id();
98  if (t.event() == ENTER_EDGE) {
99  if (onPath[targetId]) {
100  edgesToErase.insert(t.edge());
101  t.skipChildren();
102  }
103  ++onPath[targetId];
104  if (visited[targetId]) {
105  t.skipChildren();
106  } else {
107  visited[targetId] = true;
108  }
109  } else {
110  ASSERT_require(t.event() == LEAVE_EDGE);
111  ASSERT_require(onPath[targetId]);
112  --onPath[targetId];
113  }
114  }
115  ASSERT_require(onPath[rootId]==1);
116  onPath[rootId] = 0;
117  }
118 
119  BOOST_FOREACH (const typename Graph::ConstEdgeIterator &edge, edgesToErase)
120  g.eraseEdge(edge);
121  return edgesToErase.size();
122 }
123 
133 template<class Graph>
134 bool
136  if (g.isEmpty())
137  return true;
138  std::vector<bool> seen(g.nVertices(), false);
139  size_t nSeen = 0;
140  DenseIntegerSet<size_t> worklist(g.nVertices());
141  worklist.insert(0);
142  while (!worklist.isEmpty()) {
143  size_t id = *worklist.values().begin();
144  worklist.erase(id);
145 
146  if (seen[id])
147  continue;
148  seen[id] = true;
149  ++nSeen;
150 
151  typename Graph::ConstVertexIterator v = g.findVertex(id);
152  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
153  if (!seen[e.target()->id()]) // not necessary, but saves some work
154  worklist.insert(e.target()->id());
155  }
156  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
157  if (!seen[e.source()->id()]) // not necessary, but saves some work
158  worklist.insert(e.source()->id());
159  }
160  }
161  return nSeen == g.nVertices();
162 }
163 
173 template<class Graph>
174 size_t
175 graphFindConnectedComponents(const Graph &g, std::vector<size_t> &components /*out*/) {
176  static const size_t NOT_SEEN(-1);
177  size_t nComponents = 0;
178  components.clear();
179  components.resize(g.nVertices(), NOT_SEEN);
180  DenseIntegerSet<size_t> worklist(g.nVertices());
181  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
182  if (components[rootId] != NOT_SEEN)
183  continue;
184  ASSERT_require(worklist.isEmpty());
185  worklist.insert(rootId);
186  while (!worklist.isEmpty()) {
187  size_t id = *worklist.values().begin();
188  worklist.erase(id);
189 
190  ASSERT_require(components[id]==NOT_SEEN || components[id]==nComponents);
191  if (components[id] != NOT_SEEN)
192  continue;
193  components[id] = nComponents;
194 
195  typename Graph::ConstVertexIterator v = g.findVertex(id);
196  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
197  if (components[e.target()->id()] == NOT_SEEN) // not necessary, but saves some work
198  worklist.insert(e.target()->id());
199  }
200  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
201  if (components[e.source()->id()] == NOT_SEEN) // not necesssary, but saves some work
202  worklist.insert(e.source()->id());
203  }
204  }
205  ++nComponents;
206  }
207  return nComponents;
208 }
209 
218 template<class Graph>
219 Graph
220 graphCopySubgraph(const Graph &g, const std::vector<size_t> &vertexIdVector) {
221  Graph retval;
222 
223  // Insert vertices
224  typedef typename Graph::ConstVertexIterator VIter;
225  typedef Map<size_t, VIter> Id2Vertex;
226  Id2Vertex resultVertices;
227  for (size_t i=0; i<vertexIdVector.size(); ++i) {
228  ASSERT_forbid2(resultVertices.exists(vertexIdVector[i]), "duplicate vertices not allowed");
229  VIter rv = retval.insertVertex(g.findVertex(vertexIdVector[i])->value());
230  ASSERT_require(rv->id() == i); // some analyses depend on this
231  resultVertices.insert(vertexIdVector[i], rv);
232  }
233 
234  // Insert edges
235  for (size_t i=0; i<vertexIdVector.size(); ++i) {
236  typename Graph::ConstVertexIterator gSource = g.findVertex(vertexIdVector[i]);
237  typename Graph::ConstVertexIterator rSource = resultVertices[vertexIdVector[i]];
238  BOOST_FOREACH (const typename Graph::Edge &e, gSource->outEdges()) {
239  typename Graph::ConstVertexIterator rTarget = retval.vertices().end();
240  if (resultVertices.getOptional(e.target()->id()).assignTo(rTarget))
241  retval.insertEdge(rSource, rTarget, e.value());
242  }
243  }
244  return retval;
245 }
246 
252 template<class Graph>
253 void
255  BOOST_FOREACH (const typename Graph::Vertex &src, g.vertices()) {
256  if (src.nOutEdges() > 1) {
257  Map<typename Graph::ConstVertexIterator /*target*/, std::vector<typename Graph::ConstEdgeIterator> > edgesByTarget;
258  typename Graph::ConstEdgeIterator nextEdge = src.outEdges().begin();
259  while (nextEdge != src.outEdges().end()) {
260  typename Graph::ConstEdgeIterator curEdge = nextEdge++;
261  std::vector<typename Graph::ConstEdgeIterator> &prevEdges = edgesByTarget.insertMaybeDefault(curEdge->target());
262  bool erased = false;
263  BOOST_FOREACH (typename Graph::ConstEdgeIterator prevEdge, prevEdges) {
264  if (curEdge->value() == prevEdge->value()) {
265  g.eraseEdge(curEdge);
266  erased = true;
267  break;
268  }
269  }
270  if (!erased)
271  prevEdges.push_back(curEdge);
272  }
273  }
274  }
275 }
276 
283 template<class Graph>
284 std::vector<size_t>
286  size_t height = 1;
287  std::vector<size_t> retval(g.nVertices(), 0);
288  for (size_t root = 0; root < retval.size(); ++root) {
289  if (0 == retval[root]) {
290  std::vector<size_t /*vertexId*/> stack;
291  stack.reserve(retval.size());
292  stack.push_back(root);
293  while (!stack.empty()) {
294  size_t vid = stack.back();
295  BOOST_FOREACH (const typename Graph::Edge &edge, g.findVertex(vid)->outEdges()) {
296  size_t target = edge.target()->id();
297  if (0 == retval[target]) {
298  retval[target] = 1;
299  stack.push_back(target);
300  }
301  }
302  if (stack.back() == vid) {
303  stack.pop_back();
304  retval[vid] = ++height;
305  }
306  }
307  }
308  }
309  return retval;
310 }
311 
313 // Common subgraph isomorphism (CSI)
314 // Loosely based on the algorithm presented by Evgeny B. Krissinel and Kim Henrick
315 // "Common subgraph isomorphism detection by backtracking search"
316 // European Bioinformatics Institute, Genome Campus, Hinxton, Cambridge CB10 1SD, UK
318 
324 template<class GraphA, class GraphB>
326 public:
331  bool mu(const GraphA &g1, const typename GraphA::ConstVertexIterator &v1,
332  const GraphB &g2, const typename GraphB::ConstVertexIterator &v2) const {
333  SAWYER_ARGUSED(g1); // Leave formal arg names in declaration because they're important
334  SAWYER_ARGUSED(v1); // documentation for this function.
335  SAWYER_ARGUSED(g2);
336  SAWYER_ARGUSED(v2);
337  return true;
338  }
339 
349  bool nu(const GraphA &g1, typename GraphA::ConstVertexIterator i1, typename GraphA::ConstVertexIterator i2,
350  const std::vector<typename GraphA::ConstEdgeIterator> &edges1,
351  const GraphB &g2, typename GraphB::ConstVertexIterator j1, typename GraphB::ConstVertexIterator j2,
352  const std::vector<typename GraphB::ConstEdgeIterator> &edges2) const {
353  SAWYER_ARGUSED(g1); // Leave formal argument names in declaration because they're
354  SAWYER_ARGUSED(i1); // important documentation for this function.
355  SAWYER_ARGUSED(i2);
356  SAWYER_ARGUSED(edges1);
357  SAWYER_ARGUSED(g2);
358  SAWYER_ARGUSED(j1);
359  SAWYER_ARGUSED(j2);
360  SAWYER_ARGUSED(edges2);
361  return true;
362  }
363 
369  void progress(size_t /*size*/) {}
370 };
371 
376 };
377 
389 template<class GraphA, class GraphB>
391  size_t n;
392 public:
393  CsiShowSolution(): n(0) {}
394 
400  CsiNextAction operator()(const GraphA &g1, const std::vector<size_t> &x, const GraphB &g2, const std::vector<size_t> &y) {
401  ASSERT_require(x.size() == y.size());
402  std::cout <<"Common subgraph isomorphism solution #" <<n <<" found:\n"
403  <<" x = [";
404  BOOST_FOREACH (size_t i, x)
405  std::cout <<" " <<i;
406  std::cout <<" ]\n"
407  <<" y = [";
408  BOOST_FOREACH (size_t j, y)
409  std::cout <<" " <<j;
410  std::cout <<" ]\n";
411  ++n;
412  return CSI_CONTINUE;
413  }
414 };
415 
439 template<class GraphA, class GraphB,
440  class SolutionProcessor = CsiShowSolution<GraphA, GraphB>,
441  class EquivalenceP = CsiEquivalence<GraphA, GraphB> >
443  const GraphA &g1; // The first graph being compared (i.e., needle)
444  const GraphB &g2; // the second graph being compared (i.e., haystack)
445  DenseIntegerSet<size_t> v, w; // available vertices of g1 and g2, respectively
446  std::vector<size_t> x, y; // selected vertices of g1 and g2, which defines vertex mapping
447  DenseIntegerSet<size_t> vNotX; // X erased from V
448 
449  SolutionProcessor solutionProcessor_; // functor to call for each solution
450  EquivalenceP equivalenceP_; // predicates to determine if two vertices can be equivalent
451  size_t minimumSolutionSize_; // size of smallest permitted solutions
452  size_t maximumSolutionSize_; // size of largest permitted solutions
453  bool monotonicallyIncreasing_; // size of solutions increases
454  bool findingCommonSubgraphs_; // solutions are subgraphs of both graphs or only second graph?
455 
456  // The Vam is a ragged 2d array, but using std::vector<std::vector<T>> is not efficient in a multithreaded environment
457  // because std::allocator<> and Boost pool allocators don't (yet) have a mode that allows each thread to be largely
458  // independent of other threads. The best we can do with std::allocator is about 30 threads running 100% on my box having
459  // 72 hardware threads. But we can use the following facts about the Vam:
460  //
461  // (1) VAMs are constructed and destroyed in a stack-like manner.
462  // (2) We always know the exact size of the major axis (number or rows) before the VAM is created.
463  // (3) The longest row of a new VAM will not be longer than the longest row of the VAM from which it is derived.
464  // (4) The number of rows will never be more than the number of vertices in g1.
465  // (5) The longest row will never be longer than the number of vertices in g2.
466  // (6) VAMs are never shared between threads
467 #if SAWYER_VAM_STACK_ALLOCATOR
469 #else
470  struct VamAllocator {
471  explicit VamAllocator(size_t) {}
472  };
473 #endif
474  VamAllocator vamAllocator_;
475 
476  // Essentially a ragged array having a fixed number of rows and each row can be a different length. The number of rows is
477  // known at construction time, and the rows are extended one at a time starting with the first and working toward the
478  // last. Accessing any element is a constant-time operation.
479  class Vam { // Vertex Availability Map
480  VamAllocator &allocator_;
481 #if SAWYER_VAM_STACK_ALLOCATOR
482  std::vector<size_t*> rows_;
483  std::vector<size_t> rowSize_;
484  size_t *lowWater_; // first element allocated
485 #else
486  std::vector<std::vector<size_t> > rows_;
487 #endif
488  size_t lastRowStarted_;
489 #ifdef SAWYER_VAM_EXTRA_CHECKS
490  size_t maxRows_, maxCols_;
491 #endif
492 
493  public:
494  // Construct the VAM and reserve enough space for the indicated number of rows.
495  explicit Vam(VamAllocator &allocator)
496  : allocator_(allocator),
497 #if SAWYER_VAM_STACK_ALLOCATOR
498  lowWater_(NULL),
499 #endif
500  lastRowStarted_((size_t)(-1)) {
501  }
502 
503  // Destructor assumes this is the top VAM in the allocator stack.
504  ~Vam() {
505 #if SAWYER_VAM_STACK_ALLOCATOR
506  if (lowWater_ != NULL) {
507  ASSERT_require(!rows_.empty());
508  allocator_.revert(lowWater_);
509  } else {
510  ASSERT_require((size_t)std::count(rowSize_.begin(), rowSize_.end(), 0) == rows_.size()); // all rows empty
511  }
512 #endif
513  }
514 
515  // Reserve space for specified number of rows.
516  void reserveRows(size_t nrows) {
517  rows_.reserve(nrows);
518 #if SAWYER_VAM_STACK_ALLOCATOR
519  rowSize_.reserve(nrows);
520 #endif
521 #ifdef SAWYER_VAM_EXTRA_CHECKS
522  maxRows_ = nrows;
523  maxCols_ = 0;
524 #endif
525  }
526 
527  // Start a new row. You can only insert elements into the most recent row.
528  void startNewRow(size_t i, size_t maxColumns) {
529 #ifdef SAWYER_VAM_EXTRA_CHECKS
530  ASSERT_require(i < maxRows_);
531  maxCols_ = maxColumns;
532 #endif
533 #if SAWYER_VAM_STACK_ALLOCATOR
534  if (i >= rows_.size()) {
535  rows_.resize(i+1, NULL);
536  rowSize_.resize(i+1, 0);
537  }
538  ASSERT_require(rows_[i] == NULL); // row was already started
539  rows_[i] = allocator_.reserve(maxColumns);
540 #else
541  if (i >= rows_.size())
542  rows_.resize(i+1);
543 #endif
544  lastRowStarted_ = i;
545  }
546 
547  // Push a new element onto the end of the current row.
548  void push(size_t i, size_t x) {
549  ASSERT_require(i == lastRowStarted_);
550  ASSERT_require(i < rows_.size());
551 #ifdef SAWYER_VAM_EXTRA_CHECKS
552  ASSERT_require(size(i) < maxCols_);
553 #endif
554 #if SAWYER_VAM_STACK_ALLOCATOR
555  size_t *ptr = allocator_.allocNext();
556  if (lowWater_ == NULL)
557  lowWater_ = ptr;
558 #ifdef SAWYER_VAM_EXTRA_CHECKS
559  ASSERT_require(ptr == rows_[i] + rowSize_[i]);
560 #endif
561  ++rowSize_[i];
562  *ptr = x;
563 #else
564  rows_[i].push_back(x);
565 #endif
566  }
567 
568  // Given a vertex i in G1, return the number of vertices j in G2 where i and j can be equivalent.
569  size_t size(size_t i) const {
570 #if SAWYER_VAM_STACK_ALLOCATOR
571  return i < rows_.size() ? rowSize_[i] : size_t(0);
572 #else
573  return i < rows_.size() ? rows_[i].size() : size_t(0);
574 #endif
575  }
576 
577  // Given a vertex i in G1, return those vertices j in G2 where i and j can be equivalent.
578  // This isn't really a proper iterator, but we can't return std::vector<size_t>::const_iterator on macOS because it's
579  // constructor-from-pointer is private.
580  boost::iterator_range<const size_t*> get(size_t i) const {
581  static const size_t empty = 911; /*arbitrary*/
582  if (i < rows_.size() && size(i) > 0) {
583 #if SAWYER_VAM_STACK_ALLOCATOR
584  return boost::iterator_range<const size_t*>(rows_[i], rows_[i] + rowSize_[i]);
585 #else
586  return boost::iterator_range<const size_t*>(&rows_[i][0], &rows_[i][0] + rows_[i].size());
587 #endif
588  }
589  return boost::iterator_range<const size_t*>(&empty, &empty);
590  }
591  };
592 
593 public:
607  CommonSubgraphIsomorphism(const GraphA &g1, const GraphB &g2,
608  SolutionProcessor solutionProcessor = SolutionProcessor(),
609  EquivalenceP equivalenceP = EquivalenceP())
610  : g1(g1), g2(g2), v(g1.nVertices()), w(g2.nVertices()), vNotX(g1.nVertices()), solutionProcessor_(solutionProcessor),
611  equivalenceP_(equivalenceP), minimumSolutionSize_(1), maximumSolutionSize_(-1), monotonicallyIncreasing_(false),
612  findingCommonSubgraphs_(true), vamAllocator_(SAWYER_VAM_STACK_ALLOCATOR * g2.nVertices()) {}
613 
614 private:
616  ASSERT_not_reachable("copy constructor makes no sense");
617  }
618 
620  ASSERT_not_reachable("assignment operator makes no sense");
621  }
622 
623 public:
640  size_t minimumSolutionSize() const { return minimumSolutionSize_; }
641  void minimumSolutionSize(size_t n) { minimumSolutionSize_ = n; }
658  size_t maximumSolutionSize() const { return maximumSolutionSize_; }
659  void maximumSolutionSize(size_t n) { maximumSolutionSize_ = n; }
670  bool monotonicallyIncreasing() const { return monotonicallyIncreasing_; }
671  void monotonicallyIncreasing(bool b) { monotonicallyIncreasing_ = b; }
680  SolutionProcessor& solutionProcessor() { return solutionProcessor_; }
681  const SolutionProcessor& solutionProcessor() const { return solutionProcessor_; }
693  EquivalenceP& equivalencePredicate() { return equivalenceP_; }
694  const EquivalenceP& equivalencePredicate() const { return equivalenceP_; }
707  bool findingCommonSubgraphs() const { return findingCommonSubgraphs_; }
708  void findingCommonSubgraphs(bool b) { findingCommonSubgraphs_ = b; }
726  void run() {
727  reset();
728  Vam vam = initializeVam();
729  recurse(vam);
730  }
731 
736  void reset() {
737  v.insertAll();
738  w.insertAll();
739  x.clear();
740  y.clear();
741  vNotX.insertAll();
742  }
743 
744 private:
745  // Maximum value+1 or zero.
746  template<typename Container>
747  static size_t maxPlusOneOrZero(const Container &container) {
748  if (container.isEmpty())
749  return 0;
750  size_t retval = 0;
751  BOOST_FOREACH (size_t val, container.values())
752  retval = std::max(retval, val);
753  return retval+1;
754  }
755 
756  // Initialize VAM so to indicate which source vertices (v of g1) map to which target vertices (w of g2) based only on the
757  // vertex comparator. We also handle self edges here.
758  Vam initializeVam() {
759  Vam vam(vamAllocator_);
760  vam.reserveRows(maxPlusOneOrZero(v));
761  BOOST_FOREACH (size_t i, v.values()) {
762  typename GraphA::ConstVertexIterator v1 = g1.findVertex(i);
763  vam.startNewRow(i, w.size());
764  BOOST_FOREACH (size_t j, w.values()) {
765  typename GraphB::ConstVertexIterator w1 = g2.findVertex(j);
766  std::vector<typename GraphA::ConstEdgeIterator> selfEdges1;
767  std::vector<typename GraphB::ConstEdgeIterator> selfEdges2;
768  findEdges(g1, i, i, selfEdges1 /*out*/);
769  findEdges(g2, j, j, selfEdges2 /*out*/);
770  if (selfEdges1.size() == selfEdges2.size() &&
771  equivalenceP_.mu(g1, g1.findVertex(i), g2, g2.findVertex(j)) &&
772  equivalenceP_.nu(g1, v1, v1, selfEdges1, g2, w1, w1, selfEdges2))
773  vam.push(i, j);
774  }
775  }
776  return vam;
777  }
778 
779  // Can the solution (stored in X and Y) be extended by adding another (i,j) pair of vertices where i is an element of the
780  // set of available vertices of graph1 (vNotX) and j is a vertex of graph2 that is equivalent to i according to the
781  // user-provided equivalence predicate. Furthermore, is it even possible to find a solution along this branch of discovery
782  // which is falls between the minimum and maximum sizes specified by the user? By eliminating entire branch of the search
783  // space we can drastically decrease the time it takes to search, and the larger the required solution the more branches we
784  // can eliminate.
785  bool isSolutionPossible(const Vam &vam) const {
786  if (findingCommonSubgraphs_ && x.size() >= maximumSolutionSize_)
787  return false; // any further soln on this path would be too large
788  size_t largestPossibleSolution = x.size();
789  BOOST_FOREACH (size_t i, vNotX.values()) {
790  if (vam.size(i) > 0) {
791  ++largestPossibleSolution;
792  if ((findingCommonSubgraphs_ && largestPossibleSolution >= minimumSolutionSize_) ||
793  (!findingCommonSubgraphs_ && largestPossibleSolution >= g1.nVertices()))
794  return true;
795  }
796  }
797  return false;
798  }
799 
800  // Choose some vertex i from G1 which is still available (i.e., i is a member of vNotX) and for which there is a vertex
801  // equivalence of i with some available j from G2 (i.e., the pair (i,j) is present in the VAM). The recursion terminates
802  // quickest if we return the row of the VAM that has the fewest vertices in G2.
803  size_t pickVertex(const Vam &vam) const {
804  // FIXME[Robb Matzke 2016-03-19]: Perhaps this can be made even faster. The step that initializes the VAM
805  // (initializeVam or refine) might be able to compute and store the shortest row so we can retrieve it here in constant
806  // time. Probably not worth the work though since loop this is small compared to the overall analysis.
807  size_t shortestRowLength(-1), retval(-1);
808  BOOST_FOREACH (size_t i, vNotX.values()) {
809  size_t vs = vam.size(i);
810  if (vs > 0 && vs < shortestRowLength) {
811  shortestRowLength = vs;
812  retval = i;
813  }
814  }
815  ASSERT_require2(retval != size_t(-1), "cannot be reached if isSolutionPossible returned true");
816  return retval;
817  }
818 
819  // Extend the current solution by adding vertex i from G1 and vertex j from G2. The VAM should be adjusted separately.
820  void extendSolution(size_t i, size_t j) {
821  ASSERT_require(x.size() == y.size());
822  ASSERT_require(std::find(x.begin(), x.end(), i) == x.end());
823  ASSERT_require(std::find(y.begin(), y.end(), j) == y.end());
824  ASSERT_require(vNotX.exists(i));
825  x.push_back(i);
826  y.push_back(j);
827  vNotX.erase(i);
828  }
829 
830  // Remove the last vertex mapping from a solution. The VAM should be adjusted separately.
831  void retractSolution() {
832  ASSERT_require(x.size() == y.size());
833  ASSERT_require(!x.empty());
834  size_t i = x.back();
835  ASSERT_forbid(vNotX.exists(i));
836  x.pop_back();
837  y.pop_back();
838  vNotX.insert(i);
839  }
840 
841  // Find all edges that have the specified source and target vertices. This is usually zero or one edge, but can be more if
842  // the graph contains parallel edges.
843  template<class Graph>
844  void
845  findEdges(const Graph &g, size_t sourceVertex, size_t targetVertex,
846  std::vector<typename Graph::ConstEdgeIterator> &result /*in,out*/) const {
847  BOOST_FOREACH (const typename Graph::Edge &candidate, g.findVertex(sourceVertex)->outEdges()) {
848  if (candidate.target()->id() == targetVertex)
849  result.push_back(g.findEdge(candidate.id()));
850  }
851  }
852 
853  // Given that we just extended a solution by adding the vertex pair (i, j), decide whether there's a
854  // possible equivalence between vertex iUnused of G1 and vertex jUnused of G2 based on the edge(s) between i and iUnused
855  // and between j and jUnused.
856  bool edgesAreSuitable(size_t i, size_t iUnused, size_t j, size_t jUnused) const {
857  ASSERT_require(i != iUnused);
858  ASSERT_require(j != jUnused);
859 
860  // The two subgraphs in a solution must have the same number of edges.
861  std::vector<typename GraphA::ConstEdgeIterator> edges1;
862  std::vector<typename GraphB::ConstEdgeIterator> edges2;
863  findEdges(g1, i, iUnused, edges1 /*out*/);
864  findEdges(g2, j, jUnused, edges2 /*out*/);
865  if (edges1.size() != edges2.size())
866  return false;
867  findEdges(g1, iUnused, i, edges1 /*out*/);
868  findEdges(g2, jUnused, j, edges2 /*out*/);
869  if (edges1.size() != edges2.size())
870  return false;
871 
872  // If there are no edges, then assume that iUnused and jUnused could be isomorphic. We already know they satisfy the mu
873  // constraint otherwise they wouldn't even be in the VAM.
874  if (edges1.empty() && edges2.empty())
875  return true;
876 
877  // Everything looks good to us, now let the user weed out certain pairs of vertices based on their incident edges.
878  typename GraphA::ConstVertexIterator v1 = g1.findVertex(i), v2 = g1.findVertex(iUnused);
879  typename GraphB::ConstVertexIterator w1 = g2.findVertex(j), w2 = g2.findVertex(jUnused);
880  return equivalenceP_.nu(g1, v1, v2, edges1, g2, w1, w2, edges2);
881  }
882 
883  // Create a new VAM from an existing one. The (i,j) pairs of the new VAM will form a subset of the specified VAM.
884  void refine(const Vam &vam, Vam &refined /*out*/) {
885  refined.reserveRows(maxPlusOneOrZero(vNotX));
886  BOOST_FOREACH (size_t i, vNotX.values()) {
887  size_t rowLength = vam.size(i);
888  refined.startNewRow(i, rowLength);
889  BOOST_FOREACH (size_t j, vam.get(i)) {
890  if (j != y.back() && edgesAreSuitable(x.back(), i, y.back(), j))
891  refined.push(i, j);
892  }
893  }
894  }
895 
896  // The Goldilocks predicate. Returns true if the solution is a valid size, false if it's too small or too big.
897  bool isSolutionValidSize() const {
898  if (findingCommonSubgraphs_) {
899  return x.size() >= minimumSolutionSize_ && x.size() <= maximumSolutionSize_;
900  } else {
901  return x.size() == g1.nVertices();
902  }
903  }
904 
905  // The main recursive function. It works by extending the current solution by one pair for all combinations of such pairs
906  // that are permissible according to the vertex equivalence predicate and not already part of the solution and then
907  // recursively searching the remaining space. This analysis class acts as a state machine whose data structures are
908  // advanced and retracted as the space is searched. The VAM is the only part of the state that needs to be stored on a
909  // stack since changes to it could not be easily undone during the retract phase.
910  CsiNextAction recurse(const Vam &vam, size_t level = 0) {
911  equivalenceP_.progress(level);
912  if (isSolutionPossible(vam)) {
913  size_t i = pickVertex(vam);
914  BOOST_FOREACH (size_t j, vam.get(i)) {
915  extendSolution(i, j);
916  Vam refined(vamAllocator_);
917  refine(vam, refined);
918  if (recurse(refined, level+1) == CSI_ABORT)
919  return CSI_ABORT;
920  retractSolution();
921  }
922 
923  // Try again after removing vertex i from consideration
924  if (findingCommonSubgraphs_) {
925  v.erase(i);
926  ASSERT_require(vNotX.exists(i));
927  vNotX.erase(i);
928  if (recurse(vam, level+1) == CSI_ABORT)
929  return CSI_ABORT;
930  v.insert(i);
931  vNotX.insert(i);
932  }
933  } else if (isSolutionValidSize()) {
934  ASSERT_require(x.size() == y.size());
935  if (monotonicallyIncreasing_)
936  minimumSolutionSize_ = x.size();
937  if (solutionProcessor_(g1, x, g2, y) == CSI_ABORT)
938  return CSI_ABORT;
939  }
940  return CSI_CONTINUE;
941  }
942 };
943 
964 template<class GraphA, class GraphB, class SolutionProcessor>
965 void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor) {
967  csi.run();
968 }
969 
970 template<class GraphA, class GraphB, class SolutionProcessor, class EquivalenceP>
971 void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2,
972  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
973  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
974  csi.run();
975 }
978 // Used by findFirstCommonIsomorphicSubgraph
979 template<class GraphA, class GraphB>
981  std::pair<std::vector<size_t>, std::vector<size_t> > solution_;
982 public:
983  CsiNextAction operator()(const GraphA &/*g1*/, const std::vector<size_t> &x,
984  const GraphB &/*g2*/, const std::vector<size_t> &y) {
985  solution_ = std::make_pair(x, y);
986  return CSI_ABORT;
987  }
988 
989  const std::pair<std::vector<size_t>, std::vector<size_t> >& solution() const {
990  return solution_;
991  }
992 };
993 
1001 template<class GraphA, class GraphB>
1002 std::pair<std::vector<size_t>, std::vector<size_t> >
1003 findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize) {
1005  csi.minimumSolutionSize(minimumSize);
1006  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
1007  csi.run();
1008  return csi.solutionProcessor().solution();
1009 }
1010 
1011 
1012 template<class GraphA, class GraphB, class EquivalenceP>
1013 std::pair<std::vector<size_t>, std::vector<size_t> >
1014 findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize, EquivalenceP equivalenceP) {
1016  csi(g1, g2, FirstIsomorphicSubgraph<GraphA, GraphB>(), equivalenceP);
1017  csi.minimumSolutionSize(minimumSize);
1018  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
1019  csi.run();
1020  return csi.solutionProcessor().solution();
1021 }
1022 
1041 template<class GraphA, class GraphB, class SolutionProcessor>
1042 void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor) {
1043  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor> csi(g1, g2, solutionProcessor);
1044  csi.findingCommonSubgraphs(false);
1045  csi.run();
1046 }
1047 
1048 template<class GraphA, class GraphB, class SolutionProcessor, class EquivalenceP>
1049 void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2,
1050  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
1051  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
1052  csi.findingCommonSubgraphs(false);
1053  csi.run();
1054 }
1057 // Used internally by findMaximumCommonIsomorphicSubgraphs
1058 template<class GraphA, class GraphB>
1060  std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > solutions_;
1061 public:
1062  CsiNextAction operator()(const GraphA &/*g1*/, const std::vector<size_t> &x,
1063  const GraphB &/*g2*/, const std::vector<size_t> &y) {
1064  if (!solutions_.empty() && x.size() > solutions_.front().first.size())
1065  solutions_.clear();
1066  solutions_.push_back(std::make_pair(x, y));
1067  return CSI_CONTINUE;
1068  }
1069 
1070  const std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > &solutions() const {
1071  return solutions_;
1072  }
1073 };
1074 
1095 template<class GraphA, class GraphB>
1096 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1097 findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2) {
1099  csi.monotonicallyIncreasing(true);
1100  csi.run();
1101  return csi.solutionProcessor().solutions();
1102 }
1103 
1104 template<class GraphA, class GraphB, class EquivalenceP>
1105 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1106 findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, EquivalenceP equivalenceP) {
1108  csi(g1, g2, MaximumIsomorphicSubgraphs<GraphA, GraphB>(), equivalenceP);
1109  csi.monotonicallyIncreasing(true);
1110  csi.run();
1111  return csi.solutionProcessor().solutions();
1112 }
1156 template<class Direction, class Graph>
1157 std::vector<typename GraphTraits<Graph>::VertexIterator>
1159  typedef typename GraphTraits<Graph>::VertexIterator VertexIterator;
1160  typedef typename GraphTraits<Graph>::EdgeIterator EdgeIterator;
1161  typedef typename GraphTraits<Graph>::Edge Edge;
1162  static const size_t NO_ID = (size_t)(-1);
1163 
1164  ASSERT_require(g.isValidVertex(root));
1165 
1166  // List of vertex IDs in the best order for data-flow. I.e., the reverse of the post-fix, depth-first traversal following
1167  // forwared or reverse edges (depending on the Direction template argument). A useful fact is that disregarding back
1168  // edges, the predecessors of the vertex represented by flowlist[i] all appear earlier in flowlist.
1170  traversal.start(root);
1171  std::vector<size_t> flowlist = graphReachableVertices(traversal);
1172  std::reverse(flowlist.begin(), flowlist.end());
1173 
1174  // The inverse mapping of the flowlist. I.e.., since flowlist maps an index to a vertex ID, rflowlist maps a vertex ID back
1175  // to the flowlist index. If vertex v is not reachable from the root, then rflowlist[v] == NO_ID.
1176  std::vector<size_t> rflowlist(g.nVertices(), NO_ID);
1177  for (size_t i=0; i<flowlist.size(); ++i)
1178  rflowlist[flowlist[i]] = i;
1179 
1180  // Initialize the idom vector. idom[i]==i implies idom[i] is unknown
1181  std::vector<size_t> idom(flowlist.size());
1182  for (size_t i=0; i<flowlist.size(); ++i)
1183  idom[i] = i;
1184 
1185  bool changed = true;
1186  while (changed) {
1187  changed = false; // assume no change, prove otherwise
1188  for (size_t vertex_i=0; vertex_i < flowlist.size(); ++vertex_i) {
1189  VertexIterator vertex = g.findVertex(flowlist[vertex_i]);
1190 
1191  // Test idom invariant
1192 #ifndef SAWYER_NDEBUG
1193  for (size_t i=0; i<idom.size(); ++i)
1194  ASSERT_require(idom[i] <= i);
1195 #endif
1196 
1197  // The root vertex has no immediate dominator.
1198  // FIXME[Robb P Matzke 2017-06-23]: why not just start iterating with vertex_i=1?
1199  if (vertex == root)
1200  continue;
1201 
1202  // Try to choose a new idom for this vertex by processing each of its predecessors. The dominator of the current
1203  // vertex is a function of the dominators of its predecessors.
1204  size_t newIdom = idom[vertex_i];
1205 
1206  boost::iterator_range<EdgeIterator> edges = previousEdges<EdgeIterator>(vertex, Direction());
1207  for (EdgeIterator edge=edges.begin(); edge!=edges.end(); ++edge) {
1208  VertexIterator predecessor = previousVertex<VertexIterator>(edge, Direction());
1209  size_t predecessor_i = rflowlist[predecessor->id()];
1210  if (NO_ID == predecessor_i)
1211  continue; // predecessor is not reachable from root, so does't contribute
1212  if (predecessor_i == vertex_i)
1213  continue; // ignore self edges: V cannot be its own immediate dominator
1214  if (predecessor_i > vertex_i)
1215  continue; // ignore back edges; the idom will also be found along a forward edge
1216 
1217  // The predecessor might be our dominator, so merge it with what we already have
1218  if (newIdom == vertex_i) {
1219  newIdom = predecessor_i;
1220  } else {
1221  size_t tmpIdom = predecessor_i;
1222  while (newIdom != tmpIdom) {
1223  while (newIdom > tmpIdom)
1224  newIdom = idom[newIdom];
1225  while (tmpIdom > newIdom)
1226  tmpIdom = idom[tmpIdom];
1227  }
1228  }
1229  }
1230 
1231  if (idom[vertex_i] != newIdom) {
1232  idom[vertex_i] = newIdom;
1233  changed = true;
1234  }
1235  }
1236  }
1237 
1238  // Build the result relation
1239  std::vector<VertexIterator> retval;
1240  retval.resize(g.nVertices(), g.vertices().end());
1241  for (size_t i=0; i<flowlist.size(); ++i) {
1242  if (idom[i] != i)
1243  retval[flowlist[i]] = g.findVertex(flowlist[idom[i]]);
1244  }
1245  return retval;
1246 }
1247 
1255 template<class Graph>
1256 std::vector<typename GraphTraits<Graph>::VertexIterator>
1258  return graphDirectedDominators<ForwardTraversalTag>(g, root);
1259 }
1260 
1268 template<class Graph>
1269 std::vector<typename GraphTraits<Graph>::VertexIterator>
1271  return graphDirectedDominators<ReverseTraversalTag>(g, root);
1272 }
1273 
1274 } // namespace
1275 } // namespace
1276 } // namespace
1277 
1278 #endif
void reset()
Releases memory used by the analysis.
size_t nVertices() const
Total number of vertices.
Definition: Graph.h:1678
T * reserve(size_t nElmts)
Reserve space for objects.
std::vector< size_t > graphDependentOrder(Graph &g)
Number vertices according to their height from the leaves.
const SolutionProcessor & solutionProcessor() const
Property: reference to the solution callback.
VertexIterator insertVertex(const VertexValue &value=VertexValue())
Insert a new vertex.
Definition: Graph.h:1714
const size_t & id() const
Unique vertex ID number.
Definition: Graph.h:1210
CsiNextAction operator()(const GraphA &g1, const std::vector< size_t > &x, const GraphB &g2, const std::vector< size_t > &y)
Functor.
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
void minimumSolutionSize(size_t n)
Property: minimum allowed solution size.
size_t minimumSolutionSize() const
Property: minimum allowed solution size.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphPostDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate post-dominators.
Traits for graphs.
Definition: Graph.h:287
boost::iterator_range< EdgeIterator > inEdges()
List of incoming edges.
Definition: Graph.h:1221
Vertex equivalence for common subgraph isomorphism.
bool isValidVertex(const ConstVertexIterator &vertex) const
Determines whether the vertex iterator is valid.
Definition: Graph.h:1559
void progress(size_t)
Called at each step during the algorithm.
T * allocNext()
Allocate one element.
bool graphContainsCycle(const Graph &g)
Determines if the any edges of a graph form a cycle.
VertexValue & value()
User-defined value.
Definition: Graph.h:1286
size_t size() const
Number of members present.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1462
boost::iterator_range< ConstIterator > values() const
Iterator range for set members.
bool exists(const Value &value) const
Determines whether a value is stored.
EdgeIterator insertEdge(const VertexIterator &sourceVertex, const VertexIterator &targetVertex, const EdgeValue &value=EdgeValue())
Insert a new edge.
Definition: Graph.h:1744
Bidirectional vertex node iterator.
Definition: Graph.h:1044
void revert(T *ptr)
Free all elements back to the specified element.
Name space for the entire library.
void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor)
Find common isomorphic subgraphs.
Graph graphCopySubgraph(const Graph &g, const std::vector< size_t > &vertexIdVector)
Create a subgraph.
size_t graphFindConnectedComponents(const Graph &g, std::vector< size_t > &components)
Find all connected components of a graph.
boost::iterator_range< EdgeIterator > outEdges()
List of outgoing edges.
Definition: Graph.h:1242
std::pair< std::vector< size_t >, std::vector< size_t > > findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize)
Determine whether a common subgraph exists.
const VertexIterator & source()
Source vertex.
Definition: Graph.h:1147
Functor called for each common subgraph isomorphism solution.
void maximumSolutionSize(size_t n)
Property: maximum allowed solution size.
CsiNextAction
How the CSI algorith should proceed.
EdgeValue & value()
User-defined value.
Definition: Graph.h:1173
bool nu(const GraphA &g1, typename GraphA::ConstVertexIterator i1, typename GraphA::ConstVertexIterator i2, const std::vector< typename GraphA::ConstEdgeIterator > &edges1, const GraphB &g2, typename GraphB::ConstVertexIterator j1, typename GraphB::ConstVertexIterator j2, const std::vector< typename GraphB::ConstEdgeIterator > &edges2) const
Isomorphism of vertices based on incident edges.
bool insert(Value value)
Insert a value.
std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2)
Find maximum common isomorphic subgraphs.
const EquivalenceP & equivalencePredicate() const
Property: reference to the vertex equivalence predicate.
void monotonicallyIncreasing(bool b)
Property: monotonically increasing solution size.
void run()
Perform the common subgraph isomorphism analysis.
CommonSubgraphIsomorphism(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor=SolutionProcessor(), EquivalenceP equivalenceP=EquivalenceP())
Construct a solver.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDirectedDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre- or post-dominators.
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1509
void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor)
Find an isomorphic subgraph.
Continue searching for more solutions.
bool findingCommonSubgraphs() const
Property: find common subgraphs.
bool erase(Value value)
Erase a value.
SolutionProcessor & solutionProcessor()
Property: reference to the solution callback.
bool monotonicallyIncreasing() const
Property: monotonically increasing solution size.
Bidirectional edge node iterator.
Definition: Graph.h:945
size_t graphBreakCycles(Graph &g)
Break cycles of a graph arbitrarily.
void findingCommonSubgraphs(bool b)
Property: find common subgraphs.
size_t nOutEdges() const
Number of outgoing edges.
Definition: Graph.h:1264
bool mu(const GraphA &g1, const typename GraphA::ConstVertexIterator &v1, const GraphB &g2, const typename GraphB::ConstVertexIterator &v2) const
Isomorphism of two vertices.
Depth-first, forward traversal for all event types.
Base class for graph traversals.
void insertAll()
Insert all possible members.
bool isEmpty() const
True if graph is empty.
Definition: Graph.h:1697
Return to caller without further searching.
bool graphIsConnected(const Graph &g)
Test whether a graph is connected.
EquivalenceP & equivalencePredicate()
Property: reference to the vertex equivalence predicate.
EdgeIterator eraseEdge(const EdgeIterator &edge)
Erases an edge.
Definition: Graph.h:1804
void graphEraseParallelEdges(Graph &g)
Erase parallel edges.
const VertexIterator & target()
Target vertex.
Definition: Graph.h:1159
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre-dominators.
Container associating values with keys.
Definition: Sawyer/Map.h:66
size_t maximumSolutionSize() const
Property: maximum allowed solution size.
std::vector< size_t > graphReachableVertices(Traversal t)
IDs of vertices reachable from root.