ROSE  0.9.9.139
GraphAlgorithm.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 // Algorithms for Sawyer::Container::Graph
9 
10 #ifndef Sawyer_GraphAlgorithm_H
11 #define Sawyer_GraphAlgorithm_H
12 
13 #include <Sawyer/Sawyer.h>
14 #include <Sawyer/DenseIntegerSet.h>
15 #include <Sawyer/GraphTraversal.h>
16 #include <Sawyer/Set.h>
17 
18 #include <boost/foreach.hpp>
19 #include <boost/lexical_cast.hpp>
20 #include <iostream>
21 #include <list>
22 #include <set>
23 #include <vector>
24 
25 // If non-zero, then use a stack-based pool allocation strategy that tuned for multi-threading when searching for isomorphic
26 // subgraphs. The non-zero value defines the size of the large heap blocks requested from the standard runtime system and is a
27 // multiple of the size of the number of vertices in the second of the two graphs accessed by the search.
28 #ifndef SAWYER_VAM_STACK_ALLOCATOR
29 #define SAWYER_VAM_STACK_ALLOCATOR 2 // Use a per-thread, stack based allocation if non-zero
30 #endif
31 #if SAWYER_VAM_STACK_ALLOCATOR
32 #include <Sawyer/StackAllocator.h>
33 #endif
34 
35 // If defined, perform extra checks in the subgraph isomorphism "VAM" ragged array.
36 //#define SAWYER_VAM_EXTRA_CHECKS
37 
38 namespace Sawyer {
39 namespace Container {
40 namespace Algorithm {
41 
45 template<class Graph>
46 bool
48  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
49  std::vector<bool> onPath(g.nVertices(), false); // is a vertex on the current path of edges?
50  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
51  if (visited[rootId])
52  continue;
53  visited[rootId] = true;
54  ASSERT_require(!onPath[rootId]);
55  onPath[rootId] = true;
57  size_t targetId = t.edge()->target()->id();
58  if (t.event() == ENTER_EDGE) {
59  if (onPath[targetId])
60  return true; // must be a back edge forming a cycle
61  onPath[targetId] = true;
62  if (visited[targetId]) {
63  t.skipChildren();
64  } else {
65  visited[targetId] = true;
66  }
67  } else {
68  ASSERT_require(t.event() == LEAVE_EDGE);
69  ASSERT_require(onPath[targetId]);
70  onPath[targetId] = false;
71  }
72  }
73  ASSERT_require(onPath[rootId]);
74  onPath[rootId] = false;
75  }
76  return false;
77 }
78 
83 template<class Graph>
84 size_t
86  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
87  std::vector<unsigned char> onPath(g.nVertices(), false); // is a vertex on the current path of edges? 0, 1, or 2
88  std::set<typename Graph::ConstEdgeIterator> edgesToErase;
89 
90  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
91  if (visited[rootId])
92  continue;
93  visited[rootId] = true;
94  ASSERT_require(!onPath[rootId]);
95  onPath[rootId] = true;
97  size_t targetId = t.edge()->target()->id();
98  if (t.event() == ENTER_EDGE) {
99  if (onPath[targetId]) {
100  edgesToErase.insert(t.edge());
101  t.skipChildren();
102  }
103  ++onPath[targetId];
104  if (visited[targetId]) {
105  t.skipChildren();
106  } else {
107  visited[targetId] = true;
108  }
109  } else {
110  ASSERT_require(t.event() == LEAVE_EDGE);
111  ASSERT_require(onPath[targetId]);
112  --onPath[targetId];
113  }
114  }
115  ASSERT_require(onPath[rootId]==1);
116  onPath[rootId] = 0;
117  }
118 
119  BOOST_FOREACH (const typename Graph::ConstEdgeIterator &edge, edgesToErase)
120  g.eraseEdge(edge);
121  return edgesToErase.size();
122 }
123 
133 template<class Graph>
134 bool
136  if (g.isEmpty())
137  return true;
138  std::vector<bool> seen(g.nVertices(), false);
139  size_t nSeen = 0;
140  DenseIntegerSet<size_t> worklist(g.nVertices());
141  worklist.insert(0);
142  while (!worklist.isEmpty()) {
143  size_t id = *worklist.values().begin();
144  worklist.erase(id);
145 
146  if (seen[id])
147  continue;
148  seen[id] = true;
149  ++nSeen;
150 
151  typename Graph::ConstVertexIterator v = g.findVertex(id);
152  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
153  if (!seen[e.target()->id()]) // not necessary, but saves some work
154  worklist.insert(e.target()->id());
155  }
156  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
157  if (!seen[e.source()->id()]) // not necessary, but saves some work
158  worklist.insert(e.source()->id());
159  }
160  }
161  return nSeen == g.nVertices();
162 }
163 
173 template<class Graph>
174 size_t
175 graphFindConnectedComponents(const Graph &g, std::vector<size_t> &components /*out*/) {
176  static const size_t NOT_SEEN(-1);
177  size_t nComponents = 0;
178  components.clear();
179  components.resize(g.nVertices(), NOT_SEEN);
180  DenseIntegerSet<size_t> worklist(g.nVertices());
181  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
182  if (components[rootId] != NOT_SEEN)
183  continue;
184  ASSERT_require(worklist.isEmpty());
185  worklist.insert(rootId);
186  while (!worklist.isEmpty()) {
187  size_t id = *worklist.values().begin();
188  worklist.erase(id);
189 
190  ASSERT_require(components[id]==NOT_SEEN || components[id]==nComponents);
191  if (components[id] != NOT_SEEN)
192  continue;
193  components[id] = nComponents;
194 
195  typename Graph::ConstVertexIterator v = g.findVertex(id);
196  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
197  if (components[e.target()->id()] == NOT_SEEN) // not necessary, but saves some work
198  worklist.insert(e.target()->id());
199  }
200  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
201  if (components[e.source()->id()] == NOT_SEEN) // not necesssary, but saves some work
202  worklist.insert(e.source()->id());
203  }
204  }
205  ++nComponents;
206  }
207  return nComponents;
208 }
209 
218 template<class Graph>
219 Graph
220 graphCopySubgraph(const Graph &g, const std::vector<size_t> &vertexIdVector) {
221  Graph retval;
222 
223  // Insert vertices
224  typedef typename Graph::ConstVertexIterator VIter;
225  typedef Map<size_t, VIter> Id2Vertex;
226  Id2Vertex resultVertices;
227  for (size_t i=0; i<vertexIdVector.size(); ++i) {
228  ASSERT_forbid2(resultVertices.exists(vertexIdVector[i]), "duplicate vertices not allowed");
229  VIter rv = retval.insertVertex(g.findVertex(vertexIdVector[i])->value());
230  ASSERT_require(rv->id() == i); // some analyses depend on this
231  resultVertices.insert(vertexIdVector[i], rv);
232  }
233 
234  // Insert edges
235  for (size_t i=0; i<vertexIdVector.size(); ++i) {
236  typename Graph::ConstVertexIterator gSource = g.findVertex(vertexIdVector[i]);
237  typename Graph::ConstVertexIterator rSource = resultVertices[vertexIdVector[i]];
238  BOOST_FOREACH (const typename Graph::Edge &e, gSource->outEdges()) {
239  typename Graph::ConstVertexIterator rTarget = retval.vertices().end();
240  if (resultVertices.getOptional(e.target()->id()).assignTo(rTarget))
241  retval.insertEdge(rSource, rTarget, e.value());
242  }
243  }
244  return retval;
245 }
246 
252 template<class Graph>
253 void
255  BOOST_FOREACH (const typename Graph::Vertex &src, g.vertices()) {
256  if (src.nOutEdges() > 1) {
257  Map<typename Graph::ConstVertexIterator /*target*/, std::vector<typename Graph::ConstEdgeIterator> > edgesByTarget;
258  typename Graph::ConstEdgeIterator nextEdge = src.outEdges().begin();
259  while (nextEdge != src.outEdges().end()) {
260  typename Graph::ConstEdgeIterator curEdge = nextEdge++;
261  std::vector<typename Graph::ConstEdgeIterator> &prevEdges = edgesByTarget.insertMaybeDefault(curEdge->target());
262  bool erased = false;
263  BOOST_FOREACH (typename Graph::ConstEdgeIterator prevEdge, prevEdges) {
264  if (curEdge->value() == prevEdge->value()) {
265  g.eraseEdge(curEdge);
266  erased = true;
267  break;
268  }
269  }
270  if (!erased)
271  prevEdges.push_back(curEdge);
272  }
273  }
274  }
275 }
276 
278 // Common subgraph isomorphism (CSI)
279 // Loosely based on the algorithm presented by Evgeny B. Krissinel and Kim Henrick
280 // "Common subgraph isomorphism detection by backtracking search"
281 // European Bioinformatics Institute, Genome Campus, Hinxton, Cambridge CB10 1SD, UK
283 
289 template<class Graph>
291 public:
296  bool mu(const Graph &g1, const typename Graph::ConstVertexIterator &v1,
297  const Graph &g2, const typename Graph::ConstVertexIterator &v2) const {
298  SAWYER_ARGUSED(g1); // Leave formal arg names in declaration because they're important
299  SAWYER_ARGUSED(v1); // documentation for this function.
300  SAWYER_ARGUSED(g2);
301  SAWYER_ARGUSED(v2);
302  return true;
303  }
304 
314  bool nu(const Graph &g1, typename Graph::ConstVertexIterator i1, typename Graph::ConstVertexIterator i2,
315  const std::vector<typename Graph::ConstEdgeIterator> &edges1,
316  const Graph &g2, typename Graph::ConstVertexIterator j1, typename Graph::ConstVertexIterator j2,
317  const std::vector<typename Graph::ConstEdgeIterator> &edges2) const {
318  SAWYER_ARGUSED(g1); // Leave formal argument names in declaration because they're
319  SAWYER_ARGUSED(i1); // important documentation for this function.
320  SAWYER_ARGUSED(i2);
321  SAWYER_ARGUSED(edges1);
322  SAWYER_ARGUSED(g2);
323  SAWYER_ARGUSED(j1);
324  SAWYER_ARGUSED(j2);
325  SAWYER_ARGUSED(edges2);
326  return true;
327  }
328 
334  void progress(size_t /*size*/) {}
335 };
336 
341 };
342 
354 template<class Graph>
356  size_t n;
357 public:
358  CsiShowSolution(): n(0) {}
359 
365  CsiNextAction operator()(const Graph &g1, const std::vector<size_t> &x, const Graph &g2, const std::vector<size_t> &y) {
366  ASSERT_require(x.size() == y.size());
367  std::cout <<"Common subgraph isomorphism solution #" <<n <<" found:\n"
368  <<" x = [";
369  BOOST_FOREACH (size_t i, x)
370  std::cout <<" " <<i;
371  std::cout <<" ]\n"
372  <<" y = [";
373  BOOST_FOREACH (size_t j, y)
374  std::cout <<" " <<j;
375  std::cout <<" ]\n";
376  ++n;
377  return CSI_CONTINUE;
378  }
379 };
380 
404 template<class Graph,
405  class SolutionProcessor = CsiShowSolution<Graph>,
406  class EquivalenceP = CsiEquivalence<Graph> >
408  const Graph &g1, &g2; // the two whole graphs being compared
409  DenseIntegerSet<size_t> v, w; // available vertices of g1 and g2, respectively
410  std::vector<size_t> x, y; // selected vertices of g1 and g2, which defines vertex mapping
411  DenseIntegerSet<size_t> vNotX; // X erased from V
412 
413  SolutionProcessor solutionProcessor_; // functor to call for each solution
414  EquivalenceP equivalenceP_; // predicates to determine if two vertices can be equivalent
415  size_t minimumSolutionSize_; // size of smallest permitted solutions
416  size_t maximumSolutionSize_; // size of largest permitted solutions
417  bool monotonicallyIncreasing_; // size of solutions increases
418  bool findingCommonSubgraphs_; // solutions are subgraphs of both graphs or only second graph?
419 
420  // The Vam is a ragged 2d array, but using std::vector<std::vector<T>> is not efficient in a multithreaded environment
421  // because std::allocator<> and Boost pool allocators don't (yet) have a mode that allows each thread to be largely
422  // independent of other threads. The best we can do with std::allocator is about 30 threads running 100% on my box having
423  // 72 hardware threads. But we can use the following facts about the Vam:
424  //
425  // (1) VAMs are constructed and destroyed in a stack-like manner.
426  // (2) We always know the exact size of the major axis (number or rows) before the VAM is created.
427  // (3) The longest row of a new VAM will not be longer than the longest row of the VAM from which it is derived.
428  // (4) The number of rows will never be more than the number of vertices in g1.
429  // (5) The longest row will never be longer than the number of vertices in g2.
430  // (6) VAMs are never shared between threads
431 #if SAWYER_VAM_STACK_ALLOCATOR
433 #else
434  struct VamAllocator {
435  explicit VamAllocator(size_t) {}
436  };
437 #endif
438  VamAllocator vamAllocator_;
439 
440  // Essentially a ragged array having a fixed number of rows and each row can be a different length. The number of rows is
441  // known at construction time, and the rows are extended one at a time starting with the first and working toward the
442  // last. Accessing any element is a constant-time operation.
443  class Vam { // Vertex Availability Map
444  VamAllocator &allocator_;
445 #if SAWYER_VAM_STACK_ALLOCATOR
446  std::vector<size_t*> rows_;
447  std::vector<size_t> rowSize_;
448  size_t *lowWater_; // first element allocated
449 #else
450  std::vector<std::vector<size_t> > rows_;
451 #endif
452  size_t lastRowStarted_;
453 #ifdef SAWYER_VAM_EXTRA_CHECKS
454  size_t maxRows_, maxCols_;
455 #endif
456 
457  public:
458  // Construct the VAM and reserve enough space for the indicated number of rows.
459  explicit Vam(VamAllocator &allocator)
460  : allocator_(allocator),
461 #if SAWYER_VAM_STACK_ALLOCATOR
462  lowWater_(NULL),
463 #endif
464  lastRowStarted_((size_t)(-1)) {
465  }
466 
467  // Destructor assumes this is the top VAM in the allocator stack.
468  ~Vam() {
469 #if SAWYER_VAM_STACK_ALLOCATOR
470  if (lowWater_ != NULL) {
471  ASSERT_require(!rows_.empty());
472  allocator_.revert(lowWater_);
473  } else {
474  ASSERT_require((size_t)std::count(rowSize_.begin(), rowSize_.end(), 0) == rows_.size()); // all rows empty
475  }
476 #endif
477  }
478 
479  // Reserve space for specified number of rows.
480  void reserveRows(size_t nrows) {
481  rows_.reserve(nrows);
482 #if SAWYER_VAM_STACK_ALLOCATOR
483  rowSize_.reserve(nrows);
484 #endif
485 #ifdef SAWYER_VAM_EXTRA_CHECKS
486  maxRows_ = nrows;
487  maxCols_ = 0;
488 #endif
489  }
490 
491  // Start a new row. You can only insert elements into the most recent row.
492  void startNewRow(size_t i, size_t maxColumns) {
493 #ifdef SAWYER_VAM_EXTRA_CHECKS
494  ASSERT_require(i < maxRows_);
495  maxCols_ = maxColumns;
496 #endif
497 #if SAWYER_VAM_STACK_ALLOCATOR
498  if (i >= rows_.size()) {
499  rows_.resize(i+1, NULL);
500  rowSize_.resize(i+1, 0);
501  }
502  ASSERT_require(rows_[i] == NULL); // row was already started
503  rows_[i] = allocator_.reserve(maxColumns);
504 #else
505  if (i >= rows_.size())
506  rows_.resize(i+1);
507 #endif
508  lastRowStarted_ = i;
509  }
510 
511  // Push a new element onto the end of the current row.
512  void push(size_t i, size_t x) {
513  ASSERT_require(i == lastRowStarted_);
514  ASSERT_require(i < rows_.size());
515 #ifdef SAWYER_VAM_EXTRA_CHECKS
516  ASSERT_require(size(i) < maxCols_);
517 #endif
518 #if SAWYER_VAM_STACK_ALLOCATOR
519  size_t *ptr = allocator_.allocNext();
520  if (lowWater_ == NULL)
521  lowWater_ = ptr;
522 #ifdef SAWYER_VAM_EXTRA_CHECKS
523  ASSERT_require(ptr == rows_[i] + rowSize_[i]);
524 #endif
525  ++rowSize_[i];
526  *ptr = x;
527 #else
528  rows_[i].push_back(x);
529 #endif
530  }
531 
532  // Given a vertex i in G1, return the number of vertices j in G2 where i and j can be equivalent.
533  size_t size(size_t i) const {
534 #if SAWYER_VAM_STACK_ALLOCATOR
535  return i < rows_.size() ? rowSize_[i] : size_t(0);
536 #else
537  return i < rows_.size() ? rows_[i].size() : size_t(0);
538 #endif
539  }
540 
541  // Given a vertex i in G1, return those vertices j in G2 where i and j can be equivalent.
542  // This isn't really a proper iterator, but we can't return std::vector<size_t>::const_iterator on macOS because it's
543  // constructor-from-pointer is private.
544  boost::iterator_range<const size_t*> get(size_t i) const {
545  static const size_t empty = 911; /*arbitrary*/
546  if (i < rows_.size() && size(i) > 0) {
547 #if SAWYER_VAM_STACK_ALLOCATOR
548  return boost::iterator_range<const size_t*>(rows_[i], rows_[i] + rowSize_[i]);
549 #else
550  return boost::iterator_range<const size_t*>(&rows_[i][0], &rows_[i][0] + rows_[i].size());
551 #endif
552  }
553  return boost::iterator_range<const size_t*>(&empty, &empty);
554  }
555  };
556 
557 public:
571  CommonSubgraphIsomorphism(const Graph &g1, const Graph &g2,
572  SolutionProcessor solutionProcessor = SolutionProcessor(),
573  EquivalenceP equivalenceP = EquivalenceP())
574  : g1(g1), g2(g2), v(g1.nVertices()), w(g2.nVertices()), vNotX(g1.nVertices()), solutionProcessor_(solutionProcessor),
575  equivalenceP_(equivalenceP), minimumSolutionSize_(1), maximumSolutionSize_(-1), monotonicallyIncreasing_(false),
576  findingCommonSubgraphs_(true), vamAllocator_(SAWYER_VAM_STACK_ALLOCATOR * g2.nVertices()) {}
577 
578 private:
580  ASSERT_not_reachable("copy constructor makes no sense");
581  }
582 
584  ASSERT_not_reachable("assignment operator makes no sense");
585  }
586 
587 public:
604  size_t minimumSolutionSize() const { return minimumSolutionSize_; }
605  void minimumSolutionSize(size_t n) { minimumSolutionSize_ = n; }
622  size_t maximumSolutionSize() const { return maximumSolutionSize_; }
623  void maximumSolutionSize(size_t n) { maximumSolutionSize_ = n; }
634  bool monotonicallyIncreasing() const { return monotonicallyIncreasing_; }
635  void monotonicallyIncreasing(bool b) { monotonicallyIncreasing_ = b; }
644  SolutionProcessor& solutionProcessor() { return solutionProcessor_; }
645  const SolutionProcessor& solutionProcessor() const { return solutionProcessor_; }
657  EquivalenceP& equivalencePredicate() { return equivalenceP_; }
658  const EquivalenceP& equivalencePredicate() const { return equivalenceP_; }
671  bool findingCommonSubgraphs() const { return findingCommonSubgraphs_; }
672  void findingCommonSubgraphs(bool b) { findingCommonSubgraphs_ = b; }
690  void run() {
691  reset();
692  Vam vam = initializeVam();
693  recurse(vam);
694  }
695 
700  void reset() {
701  v.insertAll();
702  w.insertAll();
703  x.clear();
704  y.clear();
705  vNotX.insertAll();
706  }
707 
708 private:
709  // Maximum value+1 or zero.
710  template<typename Container>
711  static size_t maxPlusOneOrZero(const Container &container) {
712  if (container.isEmpty())
713  return 0;
714  size_t retval = 0;
715  BOOST_FOREACH (size_t val, container.values())
716  retval = std::max(retval, val);
717  return retval+1;
718  }
719 
720  // Initialize VAM so to indicate which source vertices (v of g1) map to which target vertices (w of g2) based only on the
721  // vertex comparator. We also handle self edges here.
722  Vam initializeVam() {
723  Vam vam(vamAllocator_);
724  vam.reserveRows(maxPlusOneOrZero(v));
725  BOOST_FOREACH (size_t i, v.values()) {
726  typename Graph::ConstVertexIterator v1 = g1.findVertex(i);
727  vam.startNewRow(i, w.size());
728  BOOST_FOREACH (size_t j, w.values()) {
729  typename Graph::ConstVertexIterator w1 = g2.findVertex(j);
730  std::vector<typename Graph::ConstEdgeIterator> selfEdges1, selfEdges2;
731  findEdges(g1, i, i, selfEdges1 /*out*/);
732  findEdges(g2, j, j, selfEdges2 /*out*/);
733  if (selfEdges1.size() == selfEdges2.size() &&
734  equivalenceP_.mu(g1, g1.findVertex(i), g2, g2.findVertex(j)) &&
735  equivalenceP_.nu(g1, v1, v1, selfEdges1, g2, w1, w1, selfEdges2))
736  vam.push(i, j);
737  }
738  }
739  return vam;
740  }
741 
742  // Can the solution (stored in X and Y) be extended by adding another (i,j) pair of vertices where i is an element of the
743  // set of available vertices of graph1 (vNotX) and j is a vertex of graph2 that is equivalent to i according to the
744  // user-provided equivalence predicate. Furthermore, is it even possible to find a solution along this branch of discovery
745  // which is falls between the minimum and maximum sizes specified by the user? By eliminating entire branch of the search
746  // space we can drastically decrease the time it takes to search, and the larger the required solution the more branches we
747  // can eliminate.
748  bool isSolutionPossible(const Vam &vam) const {
749  if (findingCommonSubgraphs_ && x.size() >= maximumSolutionSize_)
750  return false; // any further soln on this path would be too large
751  size_t largestPossibleSolution = x.size();
752  BOOST_FOREACH (size_t i, vNotX.values()) {
753  if (vam.size(i) > 0) {
754  ++largestPossibleSolution;
755  if ((findingCommonSubgraphs_ && largestPossibleSolution >= minimumSolutionSize_) ||
756  (!findingCommonSubgraphs_ && largestPossibleSolution >= g1.nVertices()))
757  return true;
758  }
759  }
760  return false;
761  }
762 
763  // Choose some vertex i from G1 which is still available (i.e., i is a member of vNotX) and for which there is a vertex
764  // equivalence of i with some available j from G2 (i.e., the pair (i,j) is present in the VAM). The recursion terminates
765  // quickest if we return the row of the VAM that has the fewest vertices in G2.
766  size_t pickVertex(const Vam &vam) const {
767  // FIXME[Robb Matzke 2016-03-19]: Perhaps this can be made even faster. The step that initializes the VAM
768  // (initializeVam or refine) might be able to compute and store the shortest row so we can retrieve it here in constant
769  // time. Probably not worth the work though since loop this is small compared to the overall analysis.
770  size_t shortestRowLength(-1), retval(-1);
771  BOOST_FOREACH (size_t i, vNotX.values()) {
772  size_t vs = vam.size(i);
773  if (vs > 0 && vs < shortestRowLength) {
774  shortestRowLength = vs;
775  retval = i;
776  }
777  }
778  ASSERT_require2(retval != size_t(-1), "cannot be reached if isSolutionPossible returned true");
779  return retval;
780  }
781 
782  // Extend the current solution by adding vertex i from G1 and vertex j from G2. The VAM should be adjusted separately.
783  void extendSolution(size_t i, size_t j) {
784  ASSERT_require(x.size() == y.size());
785  ASSERT_require(std::find(x.begin(), x.end(), i) == x.end());
786  ASSERT_require(std::find(y.begin(), y.end(), j) == y.end());
787  ASSERT_require(vNotX.exists(i));
788  x.push_back(i);
789  y.push_back(j);
790  vNotX.erase(i);
791  }
792 
793  // Remove the last vertex mapping from a solution. The VAM should be adjusted separately.
794  void retractSolution() {
795  ASSERT_require(x.size() == y.size());
796  ASSERT_require(!x.empty());
797  size_t i = x.back();
798  ASSERT_forbid(vNotX.exists(i));
799  x.pop_back();
800  y.pop_back();
801  vNotX.insert(i);
802  }
803 
804  // Find all edges that have the specified source and target vertices. This is usually zero or one edge, but can be more if
805  // the graph contains parallel edges.
806  void
807  findEdges(const Graph &g, size_t sourceVertex, size_t targetVertex,
808  std::vector<typename Graph::ConstEdgeIterator> &result /*in,out*/) const {
809  BOOST_FOREACH (const typename Graph::Edge &candidate, g.findVertex(sourceVertex)->outEdges()) {
810  if (candidate.target()->id() == targetVertex)
811  result.push_back(g.findEdge(candidate.id()));
812  }
813  }
814 
815  // Given that we just extended a solution by adding the vertex pair (i, j), decide whether there's a
816  // possible equivalence between vertex iUnused of G1 and vertex jUnused of G2 based on the edge(s) between i and iUnused
817  // and between j and jUnused.
818  bool edgesAreSuitable(size_t i, size_t iUnused, size_t j, size_t jUnused) const {
819  ASSERT_require(i != iUnused);
820  ASSERT_require(j != jUnused);
821 
822  // The two subgraphs in a solution must have the same number of edges.
823  std::vector<typename Graph::ConstEdgeIterator> edges1, edges2;
824  findEdges(g1, i, iUnused, edges1 /*out*/);
825  findEdges(g2, j, jUnused, edges2 /*out*/);
826  if (edges1.size() != edges2.size())
827  return false;
828  findEdges(g1, iUnused, i, edges1 /*out*/);
829  findEdges(g2, jUnused, j, edges2 /*out*/);
830  if (edges1.size() != edges2.size())
831  return false;
832 
833  // If there are no edges, then assume that iUnused and jUnused could be isomorphic. We already know they satisfy the mu
834  // constraint otherwise they wouldn't even be in the VAM.
835  if (edges1.empty() && edges2.empty())
836  return true;
837 
838  // Everything looks good to us, now let the user weed out certain pairs of vertices based on their incident edges.
839  typename Graph::ConstVertexIterator v1 = g1.findVertex(i), v2 = g1.findVertex(iUnused);
840  typename Graph::ConstVertexIterator w1 = g2.findVertex(j), w2 = g2.findVertex(jUnused);
841  return equivalenceP_.nu(g1, v1, v2, edges1, g2, w1, w2, edges2);
842  }
843 
844  // Create a new VAM from an existing one. The (i,j) pairs of the new VAM will form a subset of the specified VAM.
845  void refine(const Vam &vam, Vam &refined /*out*/) {
846  refined.reserveRows(maxPlusOneOrZero(vNotX));
847  BOOST_FOREACH (size_t i, vNotX.values()) {
848  size_t rowLength = vam.size(i);
849  refined.startNewRow(i, rowLength);
850  BOOST_FOREACH (size_t j, vam.get(i)) {
851  if (j != y.back() && edgesAreSuitable(x.back(), i, y.back(), j))
852  refined.push(i, j);
853  }
854  }
855  }
856 
857  // The Goldilocks predicate. Returns true if the solution is a valid size, false if it's too small or too big.
858  bool isSolutionValidSize() const {
859  if (findingCommonSubgraphs_) {
860  return x.size() >= minimumSolutionSize_ && x.size() <= maximumSolutionSize_;
861  } else {
862  return x.size() == g1.nVertices();
863  }
864  }
865 
866  // The main recursive function. It works by extending the current solution by one pair for all combinations of such pairs
867  // that are permissible according to the vertex equivalence predicate and not already part of the solution and then
868  // recursively searching the remaining space. This analysis class acts as a state machine whose data structures are
869  // advanced and retracted as the space is searched. The VAM is the only part of the state that needs to be stored on a
870  // stack since changes to it could not be easily undone during the retract phase.
871  CsiNextAction recurse(const Vam &vam, size_t level = 0) {
872  equivalenceP_.progress(level);
873  if (isSolutionPossible(vam)) {
874  size_t i = pickVertex(vam);
875  BOOST_FOREACH (size_t j, vam.get(i)) {
876  extendSolution(i, j);
877  Vam refined(vamAllocator_);
878  refine(vam, refined);
879  if (recurse(refined, level+1) == CSI_ABORT)
880  return CSI_ABORT;
881  retractSolution();
882  }
883 
884  // Try again after removing vertex i from consideration
885  if (findingCommonSubgraphs_) {
886  v.erase(i);
887  ASSERT_require(vNotX.exists(i));
888  vNotX.erase(i);
889  if (recurse(vam, level+1) == CSI_ABORT)
890  return CSI_ABORT;
891  v.insert(i);
892  vNotX.insert(i);
893  }
894  } else if (isSolutionValidSize()) {
895  ASSERT_require(x.size() == y.size());
896  if (monotonicallyIncreasing_)
897  minimumSolutionSize_ = x.size();
898  if (solutionProcessor_(g1, x, g2, y) == CSI_ABORT)
899  return CSI_ABORT;
900  }
901  return CSI_CONTINUE;
902  }
903 };
904 
925 template<class Graph, class SolutionProcessor>
926 void findCommonIsomorphicSubgraphs(const Graph &g1, const Graph &g2, SolutionProcessor solutionProcessor) {
927  CommonSubgraphIsomorphism<Graph, SolutionProcessor> csi(g1, g2, solutionProcessor);
928  csi.run();
929 }
930 
931 template<class Graph, class SolutionProcessor, class EquivalenceP>
932 void findCommonIsomorphicSubgraphs(const Graph &g1, const Graph &g2,
933  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
934  CommonSubgraphIsomorphism<Graph, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
935  csi.run();
936 }
939 // Used by findFirstCommonIsomorphicSubgraph
940 template<class Graph>
942  std::pair<std::vector<size_t>, std::vector<size_t> > solution_;
943 public:
944  CsiNextAction operator()(const Graph &/*g1*/, const std::vector<size_t> &x,
945  const Graph &/*g2*/, const std::vector<size_t> &y) {
946  solution_ = std::make_pair(x, y);
947  return CSI_ABORT;
948  }
949 
950  const std::pair<std::vector<size_t>, std::vector<size_t> >& solution() const {
951  return solution_;
952  }
953 };
954 
962 template<class Graph>
963 std::pair<std::vector<size_t>, std::vector<size_t> >
964 findFirstCommonIsomorphicSubgraph(const Graph &g1, const Graph &g2, size_t minimumSize) {
966  csi.minimumSolutionSize(minimumSize);
967  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
968  csi.run();
969  return csi.solutionProcessor().solution();
970 }
971 
972 
973 template<class Graph, class EquivalenceP>
974 std::pair<std::vector<size_t>, std::vector<size_t> >
975 findFirstCommonIsomorphicSubgraph(const Graph &g1, const Graph &g2, size_t minimumSize, EquivalenceP equivalenceP) {
977  csi(g1, g2, FirstIsomorphicSubgraph<Graph>(), equivalenceP);
978  csi.minimumSolutionSize(minimumSize);
979  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
980  csi.run();
981  return csi.solutionProcessor().solution();
982 }
983 
1002 template<class Graph, class SolutionProcessor>
1003 void findIsomorphicSubgraphs(const Graph &g1, const Graph &g2, SolutionProcessor solutionProcessor) {
1004  CommonSubgraphIsomorphism<Graph, SolutionProcessor> csi(g1, g2, solutionProcessor);
1005  csi.findingCommonSubgraphs(false);
1006  csi.run();
1007 }
1008 
1009 template<class Graph, class SolutionProcessor, class EquivalenceP>
1010 void findIsomorphicSubgraphs(const Graph &g1, const Graph &g2, SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
1011  CommonSubgraphIsomorphism<Graph, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
1012  csi.findingCommonSubgraphs(false);
1013  csi.run();
1014 }
1017 // Used internally by findMaximumCommonIsomorphicSubgraphs
1018 template<class Graph>
1020  std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > solutions_;
1021 public:
1022  CsiNextAction operator()(const Graph &/*g1*/, const std::vector<size_t> &x,
1023  const Graph &/*g2*/, const std::vector<size_t> &y) {
1024  if (!solutions_.empty() && x.size() > solutions_.front().first.size())
1025  solutions_.clear();
1026  solutions_.push_back(std::make_pair(x, y));
1027  return CSI_CONTINUE;
1028  }
1029 
1030  const std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > &solutions() const {
1031  return solutions_;
1032  }
1033 };
1034 
1055 template<class Graph>
1056 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1059  csi.monotonicallyIncreasing(true);
1060  csi.run();
1061  return csi.solutionProcessor().solutions();
1062 }
1063 
1064 template<class Graph, class EquivalenceP>
1065 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1066 findMaximumCommonIsomorphicSubgraphs(const Graph &g1, const Graph &g2, EquivalenceP equivalenceP) {
1068  csi(g1, g2, MaximumIsomorphicSubgraphs<Graph>(), equivalenceP);
1069  csi.monotonicallyIncreasing(true);
1070  csi.run();
1071  return csi.solutionProcessor().solutions();
1072 }
1116 template<class Direction, class Graph>
1117 std::vector<typename GraphTraits<Graph>::VertexIterator>
1119  typedef typename GraphTraits<Graph>::VertexIterator VertexIterator;
1120  typedef typename GraphTraits<Graph>::EdgeIterator EdgeIterator;
1121  typedef typename GraphTraits<Graph>::Edge Edge;
1122  static const size_t NO_ID = (size_t)(-1);
1123 
1124  ASSERT_require(g.isValidVertex(root));
1125 
1126  // List of vertex IDs in the best order for data-flow. I.e., the reverse of the post-fix, depth-first traversal following
1127  // forwared or reverse edges (depending on the Direction template argument). A useful fact is that disregarding back
1128  // edges, the predecessors of the vertex represented by flowlist[i] all appear earlier in flowlist.
1130  traversal.start(root);
1131  std::vector<size_t> flowlist = graphReachableVertices(traversal);
1132  std::reverse(flowlist.begin(), flowlist.end());
1133 
1134  // The inverse mapping of the flowlist. I.e.., since flowlist maps an index to a vertex ID, rflowlist maps a vertex ID back
1135  // to the flowlist index. If vertex v is not reachable from the root, then rflowlist[v] == NO_ID.
1136  std::vector<size_t> rflowlist(g.nVertices(), NO_ID);
1137  for (size_t i=0; i<flowlist.size(); ++i)
1138  rflowlist[flowlist[i]] = i;
1139 
1140  // Initialize the idom vector. idom[i]==i implies idom[i] is unknown
1141  std::vector<size_t> idom(flowlist.size());
1142  for (size_t i=0; i<flowlist.size(); ++i)
1143  idom[i] = i;
1144 
1145  bool changed = true;
1146  while (changed) {
1147  changed = false; // assume no change, prove otherwise
1148  for (size_t vertex_i=0; vertex_i < flowlist.size(); ++vertex_i) {
1149  VertexIterator vertex = g.findVertex(flowlist[vertex_i]);
1150 
1151  // Test idom invariant
1152 #ifndef SAWYER_NDEBUG
1153  for (size_t i=0; i<idom.size(); ++i)
1154  ASSERT_require(idom[i] <= i);
1155 #endif
1156 
1157  // The root vertex has no immediate dominator.
1158  // FIXME[Robb P Matzke 2017-06-23]: why not just start iterating with vertex_i=1?
1159  if (vertex == root)
1160  continue;
1161 
1162  // Try to choose a new idom for this vertex by processing each of its predecessors. The dominator of the current
1163  // vertex is a function of the dominators of its predecessors.
1164  size_t newIdom = idom[vertex_i];
1165 
1166  boost::iterator_range<EdgeIterator> edges = previousEdges<EdgeIterator>(vertex, Direction());
1167  for (EdgeIterator edge=edges.begin(); edge!=edges.end(); ++edge) {
1168  VertexIterator predecessor = previousVertex<VertexIterator>(edge, Direction());
1169  size_t predecessor_i = rflowlist[predecessor->id()];
1170  if (NO_ID == predecessor_i)
1171  continue; // predecessor is not reachable from root, so does't contribute
1172  if (predecessor_i == vertex_i)
1173  continue; // ignore self edges: V cannot be its own immediate dominator
1174  if (predecessor_i > vertex_i)
1175  continue; // ignore back edges; the idom will also be found along a forward edge
1176 
1177  // The predecessor might be our dominator, so merge it with what we already have
1178  if (newIdom == vertex_i) {
1179  newIdom = predecessor_i;
1180  } else {
1181  size_t tmpIdom = predecessor_i;
1182  while (newIdom != tmpIdom) {
1183  while (newIdom > tmpIdom)
1184  newIdom = idom[newIdom];
1185  while (tmpIdom > newIdom)
1186  tmpIdom = idom[tmpIdom];
1187  }
1188  }
1189  }
1190 
1191  if (idom[vertex_i] != newIdom) {
1192  idom[vertex_i] = newIdom;
1193  changed = true;
1194  }
1195  }
1196  }
1197 
1198  // Build the result relation
1199  std::vector<VertexIterator> retval;
1200  retval.resize(g.nVertices(), g.vertices().end());
1201  for (size_t i=0; i<flowlist.size(); ++i) {
1202  if (idom[i] != i)
1203  retval[flowlist[i]] = g.findVertex(flowlist[idom[i]]);
1204  }
1205  return retval;
1206 }
1207 
1215 template<class Graph>
1216 std::vector<typename GraphTraits<Graph>::VertexIterator>
1218  return graphDirectedDominators<ForwardTraversalTag>(g, root);
1219 }
1220 
1228 template<class Graph>
1229 std::vector<typename GraphTraits<Graph>::VertexIterator>
1231  return graphDirectedDominators<ReverseTraversalTag>(g, root);
1232 }
1233 
1234 } // namespace
1235 } // namespace
1236 } // namespace
1237 
1238 #endif
size_t nVertices() const
Total number of vertices.
Definition: Graph.h:1670
T * reserve(size_t nElmts)
Reserve space for objects.
void run()
Perform the common subgraph isomorphism analysis.
VertexIterator insertVertex(const VertexValue &value=VertexValue())
Insert a new vertex.
Definition: Graph.h:1706
const size_t & id() const
Unique vertex ID number.
Definition: Graph.h:1202
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
bool mu(const Graph &g1, const typename Graph::ConstVertexIterator &v1, const Graph &g2, const typename Graph::ConstVertexIterator &v2) const
Isomorphism of two vertices.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphPostDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate post-dominators.
Traits for graphs.
Definition: Graph.h:287
boost::iterator_range< EdgeIterator > inEdges()
List of incoming edges.
Definition: Graph.h:1213
EquivalenceP & equivalencePredicate()
Property: reference to the vertex equivalence predicate.
Vertex equivalence for common subgraph isomorphism.
bool isValidVertex(const ConstVertexIterator &vertex) const
Determines whether the vertex iterator is valid.
Definition: Graph.h:1551
std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > findMaximumCommonIsomorphicSubgraphs(const Graph &g1, const Graph &g2)
Find maximum common isomorphic subgraphs.
T * allocNext()
Allocate one element.
bool graphContainsCycle(const Graph &g)
Determines if the any edges of a graph form a cycle.
VertexValue & value()
User-defined value.
Definition: Graph.h:1278
size_t size() const
Number of members present.
size_t minimumSolutionSize() const
Property: minimum allowed solution size.
const EquivalenceP & equivalencePredicate() const
Property: reference to the vertex equivalence predicate.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1454
void minimumSolutionSize(size_t n)
Property: minimum allowed solution size.
boost::iterator_range< ConstIterator > values() const
Iterator range for set members.
bool exists(const Value &value) const
Determines whether a value is stored.
EdgeIterator insertEdge(const VertexIterator &sourceVertex, const VertexIterator &targetVertex, const EdgeValue &value=EdgeValue())
Insert a new edge.
Definition: Graph.h:1736
Bidirectional vertex node iterator.
Definition: Graph.h:1036
void revert(T *ptr)
Free all elements back to the specified element.
Name space for the entire library.
Definition: Access.h:11
Graph graphCopySubgraph(const Graph &g, const std::vector< size_t > &vertexIdVector)
Create a subgraph.
size_t graphFindConnectedComponents(const Graph &g, std::vector< size_t > &components)
Find all connected components of a graph.
bool nu(const Graph &g1, typename Graph::ConstVertexIterator i1, typename Graph::ConstVertexIterator i2, const std::vector< typename Graph::ConstEdgeIterator > &edges1, const Graph &g2, typename Graph::ConstVertexIterator j1, typename Graph::ConstVertexIterator j2, const std::vector< typename Graph::ConstEdgeIterator > &edges2) const
Isomorphism of vertices based on incident edges.
bool monotonicallyIncreasing() const
Property: monotonically increasing solution size.
boost::iterator_range< EdgeIterator > outEdges()
List of outgoing edges.
Definition: Graph.h:1234
const VertexIterator & source()
Source vertex.
Definition: Graph.h:1139
Functor called for each common subgraph isomorphism solution.
CsiNextAction
How the CSI algorith should proceed.
EdgeValue & value()
User-defined value.
Definition: Graph.h:1165
bool insert(Value value)
Insert a value.
bool findingCommonSubgraphs() const
Property: find common subgraphs.
void progress(size_t)
Called at each step during the algorithm.
CsiNextAction operator()(const Graph &g1, const std::vector< size_t > &x, const Graph &g2, const std::vector< size_t > &y)
Functor.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDirectedDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre- or post-dominators.
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1501
Continue searching for more solutions.
void findCommonIsomorphicSubgraphs(const Graph &g1, const Graph &g2, SolutionProcessor solutionProcessor)
Find common isomorphic subgraphs.
std::pair< std::vector< size_t >, std::vector< size_t > > findFirstCommonIsomorphicSubgraph(const Graph &g1, const Graph &g2, size_t minimumSize)
Determine whether a common subgraph exists.
void findingCommonSubgraphs(bool b)
Property: find common subgraphs.
bool erase(Value value)
Erase a value.
void maximumSolutionSize(size_t n)
Property: maximum allowed solution size.
size_t maximumSolutionSize() const
Property: maximum allowed solution size.
Bidirectional edge node iterator.
Definition: Graph.h:937
size_t graphBreakCycles(Graph &g)
Break cycles of a graph arbitrarily.
size_t nOutEdges() const
Number of outgoing edges.
Definition: Graph.h:1256
Depth-first, forward traversal for all event types.
Base class for graph traversals.
void insertAll()
Insert all possible members.
void monotonicallyIncreasing(bool b)
Property: monotonically increasing solution size.
bool isEmpty() const
True if graph is empty.
Definition: Graph.h:1689
Return to caller without further searching.
bool graphIsConnected(const Graph &g)
Test whether a graph is connected.
EdgeIterator eraseEdge(const EdgeIterator &edge)
Erases an edge.
Definition: Graph.h:1796
void graphEraseParallelEdges(Graph &g)
Erase parallel edges.
SolutionProcessor & solutionProcessor()
Property: reference to the solution callback.
const SolutionProcessor & solutionProcessor() const
Property: reference to the solution callback.
void reset()
Releases memory used by the analysis.
const VertexIterator & target()
Target vertex.
Definition: Graph.h:1151
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre-dominators.
Container associating values with keys.
Definition: Sawyer/Map.h:64
void findIsomorphicSubgraphs(const Graph &g1, const Graph &g2, SolutionProcessor solutionProcessor)
Find an isomorphic subgraph.
CommonSubgraphIsomorphism(const Graph &g1, const Graph &g2, SolutionProcessor solutionProcessor=SolutionProcessor(), EquivalenceP equivalenceP=EquivalenceP())
Construct a solver.
std::vector< size_t > graphReachableVertices(Traversal t)
IDs of vertices reachable from root.