ROSE  0.9.10.75
GraphAlgorithm.h
1 // WARNING: Changes to this file must be contributed back to Sawyer or else they will
2 // be clobbered by the next update from Sawyer. The Sawyer repository is at
3 // https://github.com/matzke1/sawyer.
4 
5 
6 
7 
8 // Algorithms for Sawyer::Container::Graph
9 
10 #ifndef Sawyer_GraphAlgorithm_H
11 #define Sawyer_GraphAlgorithm_H
12 
13 #include <Sawyer/Sawyer.h>
14 #include <Sawyer/DenseIntegerSet.h>
15 #include <Sawyer/GraphTraversal.h>
16 #include <Sawyer/Set.h>
17 
18 #include <boost/foreach.hpp>
19 #include <boost/lexical_cast.hpp>
20 #include <iostream>
21 #include <list>
22 #include <set>
23 #include <vector>
24 
25 // If non-zero, then use a stack-based pool allocation strategy that tuned for multi-threading when searching for isomorphic
26 // subgraphs. The non-zero value defines the size of the large heap blocks requested from the standard runtime system and is a
27 // multiple of the size of the number of vertices in the second of the two graphs accessed by the search.
28 #ifndef SAWYER_VAM_STACK_ALLOCATOR
29 #define SAWYER_VAM_STACK_ALLOCATOR 2 // Use a per-thread, stack based allocation if non-zero
30 #endif
31 #if SAWYER_VAM_STACK_ALLOCATOR
32 #include <Sawyer/StackAllocator.h>
33 #endif
34 
35 // If defined, perform extra checks in the subgraph isomorphism "VAM" ragged array.
36 //#define SAWYER_VAM_EXTRA_CHECKS
37 
38 namespace Sawyer {
39 namespace Container {
40 namespace Algorithm {
41 
45 template<class Graph>
46 bool
48  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
49  std::vector<bool> onPath(g.nVertices(), false); // is a vertex on the current path of edges?
50  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
51  if (visited[rootId])
52  continue;
53  visited[rootId] = true;
54  ASSERT_require(!onPath[rootId]);
55  onPath[rootId] = true;
57  size_t targetId = t.edge()->target()->id();
58  if (t.event() == ENTER_EDGE) {
59  if (onPath[targetId])
60  return true; // must be a back edge forming a cycle
61  onPath[targetId] = true;
62  if (visited[targetId]) {
63  t.skipChildren();
64  } else {
65  visited[targetId] = true;
66  }
67  } else {
68  ASSERT_require(t.event() == LEAVE_EDGE);
69  ASSERT_require(onPath[targetId]);
70  onPath[targetId] = false;
71  }
72  }
73  ASSERT_require(onPath[rootId]);
74  onPath[rootId] = false;
75  }
76  return false;
77 }
78 
83 template<class Graph>
84 size_t
86  std::vector<bool> visited(g.nVertices(), false); // have we seen this vertex already?
87  std::vector<unsigned char> onPath(g.nVertices(), false); // is a vertex on the current path of edges? 0, 1, or 2
88  std::set<typename Graph::ConstEdgeIterator> edgesToErase;
89 
90  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
91  if (visited[rootId])
92  continue;
93  visited[rootId] = true;
94  ASSERT_require(!onPath[rootId]);
95  onPath[rootId] = true;
97  size_t targetId = t.edge()->target()->id();
98  if (t.event() == ENTER_EDGE) {
99  if (onPath[targetId]) {
100  edgesToErase.insert(t.edge());
101  t.skipChildren();
102  }
103  ++onPath[targetId];
104  if (visited[targetId]) {
105  t.skipChildren();
106  } else {
107  visited[targetId] = true;
108  }
109  } else {
110  ASSERT_require(t.event() == LEAVE_EDGE);
111  ASSERT_require(onPath[targetId]);
112  --onPath[targetId];
113  }
114  }
115  ASSERT_require(onPath[rootId]==1);
116  onPath[rootId] = 0;
117  }
118 
119  BOOST_FOREACH (const typename Graph::ConstEdgeIterator &edge, edgesToErase)
120  g.eraseEdge(edge);
121  return edgesToErase.size();
122 }
123 
133 template<class Graph>
134 bool
136  if (g.isEmpty())
137  return true;
138  std::vector<bool> seen(g.nVertices(), false);
139  size_t nSeen = 0;
140  DenseIntegerSet<size_t> worklist(g.nVertices());
141  worklist.insert(0);
142  while (!worklist.isEmpty()) {
143  size_t id = *worklist.values().begin();
144  worklist.erase(id);
145 
146  if (seen[id])
147  continue;
148  seen[id] = true;
149  ++nSeen;
150 
151  typename Graph::ConstVertexIterator v = g.findVertex(id);
152  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
153  if (!seen[e.target()->id()]) // not necessary, but saves some work
154  worklist.insert(e.target()->id());
155  }
156  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
157  if (!seen[e.source()->id()]) // not necessary, but saves some work
158  worklist.insert(e.source()->id());
159  }
160  }
161  return nSeen == g.nVertices();
162 }
163 
173 template<class Graph>
174 size_t
175 graphFindConnectedComponents(const Graph &g, std::vector<size_t> &components /*out*/) {
176  static const size_t NOT_SEEN(-1);
177  size_t nComponents = 0;
178  components.clear();
179  components.resize(g.nVertices(), NOT_SEEN);
180  DenseIntegerSet<size_t> worklist(g.nVertices());
181  for (size_t rootId = 0; rootId < g.nVertices(); ++rootId) {
182  if (components[rootId] != NOT_SEEN)
183  continue;
184  ASSERT_require(worklist.isEmpty());
185  worklist.insert(rootId);
186  while (!worklist.isEmpty()) {
187  size_t id = *worklist.values().begin();
188  worklist.erase(id);
189 
190  ASSERT_require(components[id]==NOT_SEEN || components[id]==nComponents);
191  if (components[id] != NOT_SEEN)
192  continue;
193  components[id] = nComponents;
194 
195  typename Graph::ConstVertexIterator v = g.findVertex(id);
196  BOOST_FOREACH (const typename Graph::Edge &e, v->outEdges()) {
197  if (components[e.target()->id()] == NOT_SEEN) // not necessary, but saves some work
198  worklist.insert(e.target()->id());
199  }
200  BOOST_FOREACH (const typename Graph::Edge &e, v->inEdges()) {
201  if (components[e.source()->id()] == NOT_SEEN) // not necesssary, but saves some work
202  worklist.insert(e.source()->id());
203  }
204  }
205  ++nComponents;
206  }
207  return nComponents;
208 }
209 
218 template<class Graph>
219 Graph
220 graphCopySubgraph(const Graph &g, const std::vector<size_t> &vertexIdVector) {
221  Graph retval;
222 
223  // Insert vertices
224  typedef typename Graph::ConstVertexIterator VIter;
225  typedef Map<size_t, VIter> Id2Vertex;
226  Id2Vertex resultVertices;
227  for (size_t i=0; i<vertexIdVector.size(); ++i) {
228  ASSERT_forbid2(resultVertices.exists(vertexIdVector[i]), "duplicate vertices not allowed");
229  VIter rv = retval.insertVertex(g.findVertex(vertexIdVector[i])->value());
230  ASSERT_require(rv->id() == i); // some analyses depend on this
231  resultVertices.insert(vertexIdVector[i], rv);
232  }
233 
234  // Insert edges
235  for (size_t i=0; i<vertexIdVector.size(); ++i) {
236  typename Graph::ConstVertexIterator gSource = g.findVertex(vertexIdVector[i]);
237  typename Graph::ConstVertexIterator rSource = resultVertices[vertexIdVector[i]];
238  BOOST_FOREACH (const typename Graph::Edge &e, gSource->outEdges()) {
239  typename Graph::ConstVertexIterator rTarget = retval.vertices().end();
240  if (resultVertices.getOptional(e.target()->id()).assignTo(rTarget))
241  retval.insertEdge(rSource, rTarget, e.value());
242  }
243  }
244  return retval;
245 }
246 
252 template<class Graph>
253 void
255  BOOST_FOREACH (const typename Graph::Vertex &src, g.vertices()) {
256  if (src.nOutEdges() > 1) {
257  Map<typename Graph::ConstVertexIterator /*target*/, std::vector<typename Graph::ConstEdgeIterator> > edgesByTarget;
258  typename Graph::ConstEdgeIterator nextEdge = src.outEdges().begin();
259  while (nextEdge != src.outEdges().end()) {
260  typename Graph::ConstEdgeIterator curEdge = nextEdge++;
261  std::vector<typename Graph::ConstEdgeIterator> &prevEdges = edgesByTarget.insertMaybeDefault(curEdge->target());
262  bool erased = false;
263  BOOST_FOREACH (typename Graph::ConstEdgeIterator prevEdge, prevEdges) {
264  if (curEdge->value() == prevEdge->value()) {
265  g.eraseEdge(curEdge);
266  erased = true;
267  break;
268  }
269  }
270  if (!erased)
271  prevEdges.push_back(curEdge);
272  }
273  }
274  }
275 }
276 
278 // Common subgraph isomorphism (CSI)
279 // Loosely based on the algorithm presented by Evgeny B. Krissinel and Kim Henrick
280 // "Common subgraph isomorphism detection by backtracking search"
281 // European Bioinformatics Institute, Genome Campus, Hinxton, Cambridge CB10 1SD, UK
283 
289 template<class GraphA, class GraphB>
291 public:
296  bool mu(const GraphA &g1, const typename GraphA::ConstVertexIterator &v1,
297  const GraphB &g2, const typename GraphB::ConstVertexIterator &v2) const {
298  SAWYER_ARGUSED(g1); // Leave formal arg names in declaration because they're important
299  SAWYER_ARGUSED(v1); // documentation for this function.
300  SAWYER_ARGUSED(g2);
301  SAWYER_ARGUSED(v2);
302  return true;
303  }
304 
314  bool nu(const GraphA &g1, typename GraphA::ConstVertexIterator i1, typename GraphA::ConstVertexIterator i2,
315  const std::vector<typename GraphA::ConstEdgeIterator> &edges1,
316  const GraphB &g2, typename GraphB::ConstVertexIterator j1, typename GraphB::ConstVertexIterator j2,
317  const std::vector<typename GraphB::ConstEdgeIterator> &edges2) const {
318  SAWYER_ARGUSED(g1); // Leave formal argument names in declaration because they're
319  SAWYER_ARGUSED(i1); // important documentation for this function.
320  SAWYER_ARGUSED(i2);
321  SAWYER_ARGUSED(edges1);
322  SAWYER_ARGUSED(g2);
323  SAWYER_ARGUSED(j1);
324  SAWYER_ARGUSED(j2);
325  SAWYER_ARGUSED(edges2);
326  return true;
327  }
328 
334  void progress(size_t /*size*/) {}
335 };
336 
341 };
342 
354 template<class GraphA, class GraphB>
356  size_t n;
357 public:
358  CsiShowSolution(): n(0) {}
359 
365  CsiNextAction operator()(const GraphA &g1, const std::vector<size_t> &x, const GraphB &g2, const std::vector<size_t> &y) {
366  ASSERT_require(x.size() == y.size());
367  std::cout <<"Common subgraph isomorphism solution #" <<n <<" found:\n"
368  <<" x = [";
369  BOOST_FOREACH (size_t i, x)
370  std::cout <<" " <<i;
371  std::cout <<" ]\n"
372  <<" y = [";
373  BOOST_FOREACH (size_t j, y)
374  std::cout <<" " <<j;
375  std::cout <<" ]\n";
376  ++n;
377  return CSI_CONTINUE;
378  }
379 };
380 
404 template<class GraphA, class GraphB,
405  class SolutionProcessor = CsiShowSolution<GraphA, GraphB>,
406  class EquivalenceP = CsiEquivalence<GraphA, GraphB> >
408  const GraphA &g1; // The first graph being compared (i.e., needle)
409  const GraphB &g2; // the second graph being compared (i.e., haystack)
410  DenseIntegerSet<size_t> v, w; // available vertices of g1 and g2, respectively
411  std::vector<size_t> x, y; // selected vertices of g1 and g2, which defines vertex mapping
412  DenseIntegerSet<size_t> vNotX; // X erased from V
413 
414  SolutionProcessor solutionProcessor_; // functor to call for each solution
415  EquivalenceP equivalenceP_; // predicates to determine if two vertices can be equivalent
416  size_t minimumSolutionSize_; // size of smallest permitted solutions
417  size_t maximumSolutionSize_; // size of largest permitted solutions
418  bool monotonicallyIncreasing_; // size of solutions increases
419  bool findingCommonSubgraphs_; // solutions are subgraphs of both graphs or only second graph?
420 
421  // The Vam is a ragged 2d array, but using std::vector<std::vector<T>> is not efficient in a multithreaded environment
422  // because std::allocator<> and Boost pool allocators don't (yet) have a mode that allows each thread to be largely
423  // independent of other threads. The best we can do with std::allocator is about 30 threads running 100% on my box having
424  // 72 hardware threads. But we can use the following facts about the Vam:
425  //
426  // (1) VAMs are constructed and destroyed in a stack-like manner.
427  // (2) We always know the exact size of the major axis (number or rows) before the VAM is created.
428  // (3) The longest row of a new VAM will not be longer than the longest row of the VAM from which it is derived.
429  // (4) The number of rows will never be more than the number of vertices in g1.
430  // (5) The longest row will never be longer than the number of vertices in g2.
431  // (6) VAMs are never shared between threads
432 #if SAWYER_VAM_STACK_ALLOCATOR
434 #else
435  struct VamAllocator {
436  explicit VamAllocator(size_t) {}
437  };
438 #endif
439  VamAllocator vamAllocator_;
440 
441  // Essentially a ragged array having a fixed number of rows and each row can be a different length. The number of rows is
442  // known at construction time, and the rows are extended one at a time starting with the first and working toward the
443  // last. Accessing any element is a constant-time operation.
444  class Vam { // Vertex Availability Map
445  VamAllocator &allocator_;
446 #if SAWYER_VAM_STACK_ALLOCATOR
447  std::vector<size_t*> rows_;
448  std::vector<size_t> rowSize_;
449  size_t *lowWater_; // first element allocated
450 #else
451  std::vector<std::vector<size_t> > rows_;
452 #endif
453  size_t lastRowStarted_;
454 #ifdef SAWYER_VAM_EXTRA_CHECKS
455  size_t maxRows_, maxCols_;
456 #endif
457 
458  public:
459  // Construct the VAM and reserve enough space for the indicated number of rows.
460  explicit Vam(VamAllocator &allocator)
461  : allocator_(allocator),
462 #if SAWYER_VAM_STACK_ALLOCATOR
463  lowWater_(NULL),
464 #endif
465  lastRowStarted_((size_t)(-1)) {
466  }
467 
468  // Destructor assumes this is the top VAM in the allocator stack.
469  ~Vam() {
470 #if SAWYER_VAM_STACK_ALLOCATOR
471  if (lowWater_ != NULL) {
472  ASSERT_require(!rows_.empty());
473  allocator_.revert(lowWater_);
474  } else {
475  ASSERT_require((size_t)std::count(rowSize_.begin(), rowSize_.end(), 0) == rows_.size()); // all rows empty
476  }
477 #endif
478  }
479 
480  // Reserve space for specified number of rows.
481  void reserveRows(size_t nrows) {
482  rows_.reserve(nrows);
483 #if SAWYER_VAM_STACK_ALLOCATOR
484  rowSize_.reserve(nrows);
485 #endif
486 #ifdef SAWYER_VAM_EXTRA_CHECKS
487  maxRows_ = nrows;
488  maxCols_ = 0;
489 #endif
490  }
491 
492  // Start a new row. You can only insert elements into the most recent row.
493  void startNewRow(size_t i, size_t maxColumns) {
494 #ifdef SAWYER_VAM_EXTRA_CHECKS
495  ASSERT_require(i < maxRows_);
496  maxCols_ = maxColumns;
497 #endif
498 #if SAWYER_VAM_STACK_ALLOCATOR
499  if (i >= rows_.size()) {
500  rows_.resize(i+1, NULL);
501  rowSize_.resize(i+1, 0);
502  }
503  ASSERT_require(rows_[i] == NULL); // row was already started
504  rows_[i] = allocator_.reserve(maxColumns);
505 #else
506  if (i >= rows_.size())
507  rows_.resize(i+1);
508 #endif
509  lastRowStarted_ = i;
510  }
511 
512  // Push a new element onto the end of the current row.
513  void push(size_t i, size_t x) {
514  ASSERT_require(i == lastRowStarted_);
515  ASSERT_require(i < rows_.size());
516 #ifdef SAWYER_VAM_EXTRA_CHECKS
517  ASSERT_require(size(i) < maxCols_);
518 #endif
519 #if SAWYER_VAM_STACK_ALLOCATOR
520  size_t *ptr = allocator_.allocNext();
521  if (lowWater_ == NULL)
522  lowWater_ = ptr;
523 #ifdef SAWYER_VAM_EXTRA_CHECKS
524  ASSERT_require(ptr == rows_[i] + rowSize_[i]);
525 #endif
526  ++rowSize_[i];
527  *ptr = x;
528 #else
529  rows_[i].push_back(x);
530 #endif
531  }
532 
533  // Given a vertex i in G1, return the number of vertices j in G2 where i and j can be equivalent.
534  size_t size(size_t i) const {
535 #if SAWYER_VAM_STACK_ALLOCATOR
536  return i < rows_.size() ? rowSize_[i] : size_t(0);
537 #else
538  return i < rows_.size() ? rows_[i].size() : size_t(0);
539 #endif
540  }
541 
542  // Given a vertex i in G1, return those vertices j in G2 where i and j can be equivalent.
543  // This isn't really a proper iterator, but we can't return std::vector<size_t>::const_iterator on macOS because it's
544  // constructor-from-pointer is private.
545  boost::iterator_range<const size_t*> get(size_t i) const {
546  static const size_t empty = 911; /*arbitrary*/
547  if (i < rows_.size() && size(i) > 0) {
548 #if SAWYER_VAM_STACK_ALLOCATOR
549  return boost::iterator_range<const size_t*>(rows_[i], rows_[i] + rowSize_[i]);
550 #else
551  return boost::iterator_range<const size_t*>(&rows_[i][0], &rows_[i][0] + rows_[i].size());
552 #endif
553  }
554  return boost::iterator_range<const size_t*>(&empty, &empty);
555  }
556  };
557 
558 public:
572  CommonSubgraphIsomorphism(const GraphA &g1, const GraphB &g2,
573  SolutionProcessor solutionProcessor = SolutionProcessor(),
574  EquivalenceP equivalenceP = EquivalenceP())
575  : g1(g1), g2(g2), v(g1.nVertices()), w(g2.nVertices()), vNotX(g1.nVertices()), solutionProcessor_(solutionProcessor),
576  equivalenceP_(equivalenceP), minimumSolutionSize_(1), maximumSolutionSize_(-1), monotonicallyIncreasing_(false),
577  findingCommonSubgraphs_(true), vamAllocator_(SAWYER_VAM_STACK_ALLOCATOR * g2.nVertices()) {}
578 
579 private:
581  ASSERT_not_reachable("copy constructor makes no sense");
582  }
583 
585  ASSERT_not_reachable("assignment operator makes no sense");
586  }
587 
588 public:
605  size_t minimumSolutionSize() const { return minimumSolutionSize_; }
606  void minimumSolutionSize(size_t n) { minimumSolutionSize_ = n; }
623  size_t maximumSolutionSize() const { return maximumSolutionSize_; }
624  void maximumSolutionSize(size_t n) { maximumSolutionSize_ = n; }
635  bool monotonicallyIncreasing() const { return monotonicallyIncreasing_; }
636  void monotonicallyIncreasing(bool b) { monotonicallyIncreasing_ = b; }
645  SolutionProcessor& solutionProcessor() { return solutionProcessor_; }
646  const SolutionProcessor& solutionProcessor() const { return solutionProcessor_; }
658  EquivalenceP& equivalencePredicate() { return equivalenceP_; }
659  const EquivalenceP& equivalencePredicate() const { return equivalenceP_; }
672  bool findingCommonSubgraphs() const { return findingCommonSubgraphs_; }
673  void findingCommonSubgraphs(bool b) { findingCommonSubgraphs_ = b; }
691  void run() {
692  reset();
693  Vam vam = initializeVam();
694  recurse(vam);
695  }
696 
701  void reset() {
702  v.insertAll();
703  w.insertAll();
704  x.clear();
705  y.clear();
706  vNotX.insertAll();
707  }
708 
709 private:
710  // Maximum value+1 or zero.
711  template<typename Container>
712  static size_t maxPlusOneOrZero(const Container &container) {
713  if (container.isEmpty())
714  return 0;
715  size_t retval = 0;
716  BOOST_FOREACH (size_t val, container.values())
717  retval = std::max(retval, val);
718  return retval+1;
719  }
720 
721  // Initialize VAM so to indicate which source vertices (v of g1) map to which target vertices (w of g2) based only on the
722  // vertex comparator. We also handle self edges here.
723  Vam initializeVam() {
724  Vam vam(vamAllocator_);
725  vam.reserveRows(maxPlusOneOrZero(v));
726  BOOST_FOREACH (size_t i, v.values()) {
727  typename GraphA::ConstVertexIterator v1 = g1.findVertex(i);
728  vam.startNewRow(i, w.size());
729  BOOST_FOREACH (size_t j, w.values()) {
730  typename GraphB::ConstVertexIterator w1 = g2.findVertex(j);
731  std::vector<typename GraphA::ConstEdgeIterator> selfEdges1;
732  std::vector<typename GraphB::ConstEdgeIterator> selfEdges2;
733  findEdges(g1, i, i, selfEdges1 /*out*/);
734  findEdges(g2, j, j, selfEdges2 /*out*/);
735  if (selfEdges1.size() == selfEdges2.size() &&
736  equivalenceP_.mu(g1, g1.findVertex(i), g2, g2.findVertex(j)) &&
737  equivalenceP_.nu(g1, v1, v1, selfEdges1, g2, w1, w1, selfEdges2))
738  vam.push(i, j);
739  }
740  }
741  return vam;
742  }
743 
744  // Can the solution (stored in X and Y) be extended by adding another (i,j) pair of vertices where i is an element of the
745  // set of available vertices of graph1 (vNotX) and j is a vertex of graph2 that is equivalent to i according to the
746  // user-provided equivalence predicate. Furthermore, is it even possible to find a solution along this branch of discovery
747  // which is falls between the minimum and maximum sizes specified by the user? By eliminating entire branch of the search
748  // space we can drastically decrease the time it takes to search, and the larger the required solution the more branches we
749  // can eliminate.
750  bool isSolutionPossible(const Vam &vam) const {
751  if (findingCommonSubgraphs_ && x.size() >= maximumSolutionSize_)
752  return false; // any further soln on this path would be too large
753  size_t largestPossibleSolution = x.size();
754  BOOST_FOREACH (size_t i, vNotX.values()) {
755  if (vam.size(i) > 0) {
756  ++largestPossibleSolution;
757  if ((findingCommonSubgraphs_ && largestPossibleSolution >= minimumSolutionSize_) ||
758  (!findingCommonSubgraphs_ && largestPossibleSolution >= g1.nVertices()))
759  return true;
760  }
761  }
762  return false;
763  }
764 
765  // Choose some vertex i from G1 which is still available (i.e., i is a member of vNotX) and for which there is a vertex
766  // equivalence of i with some available j from G2 (i.e., the pair (i,j) is present in the VAM). The recursion terminates
767  // quickest if we return the row of the VAM that has the fewest vertices in G2.
768  size_t pickVertex(const Vam &vam) const {
769  // FIXME[Robb Matzke 2016-03-19]: Perhaps this can be made even faster. The step that initializes the VAM
770  // (initializeVam or refine) might be able to compute and store the shortest row so we can retrieve it here in constant
771  // time. Probably not worth the work though since loop this is small compared to the overall analysis.
772  size_t shortestRowLength(-1), retval(-1);
773  BOOST_FOREACH (size_t i, vNotX.values()) {
774  size_t vs = vam.size(i);
775  if (vs > 0 && vs < shortestRowLength) {
776  shortestRowLength = vs;
777  retval = i;
778  }
779  }
780  ASSERT_require2(retval != size_t(-1), "cannot be reached if isSolutionPossible returned true");
781  return retval;
782  }
783 
784  // Extend the current solution by adding vertex i from G1 and vertex j from G2. The VAM should be adjusted separately.
785  void extendSolution(size_t i, size_t j) {
786  ASSERT_require(x.size() == y.size());
787  ASSERT_require(std::find(x.begin(), x.end(), i) == x.end());
788  ASSERT_require(std::find(y.begin(), y.end(), j) == y.end());
789  ASSERT_require(vNotX.exists(i));
790  x.push_back(i);
791  y.push_back(j);
792  vNotX.erase(i);
793  }
794 
795  // Remove the last vertex mapping from a solution. The VAM should be adjusted separately.
796  void retractSolution() {
797  ASSERT_require(x.size() == y.size());
798  ASSERT_require(!x.empty());
799  size_t i = x.back();
800  ASSERT_forbid(vNotX.exists(i));
801  x.pop_back();
802  y.pop_back();
803  vNotX.insert(i);
804  }
805 
806  // Find all edges that have the specified source and target vertices. This is usually zero or one edge, but can be more if
807  // the graph contains parallel edges.
808  template<class Graph>
809  void
810  findEdges(const Graph &g, size_t sourceVertex, size_t targetVertex,
811  std::vector<typename Graph::ConstEdgeIterator> &result /*in,out*/) const {
812  BOOST_FOREACH (const typename Graph::Edge &candidate, g.findVertex(sourceVertex)->outEdges()) {
813  if (candidate.target()->id() == targetVertex)
814  result.push_back(g.findEdge(candidate.id()));
815  }
816  }
817 
818  // Given that we just extended a solution by adding the vertex pair (i, j), decide whether there's a
819  // possible equivalence between vertex iUnused of G1 and vertex jUnused of G2 based on the edge(s) between i and iUnused
820  // and between j and jUnused.
821  bool edgesAreSuitable(size_t i, size_t iUnused, size_t j, size_t jUnused) const {
822  ASSERT_require(i != iUnused);
823  ASSERT_require(j != jUnused);
824 
825  // The two subgraphs in a solution must have the same number of edges.
826  std::vector<typename GraphA::ConstEdgeIterator> edges1;
827  std::vector<typename GraphB::ConstEdgeIterator> edges2;
828  findEdges(g1, i, iUnused, edges1 /*out*/);
829  findEdges(g2, j, jUnused, edges2 /*out*/);
830  if (edges1.size() != edges2.size())
831  return false;
832  findEdges(g1, iUnused, i, edges1 /*out*/);
833  findEdges(g2, jUnused, j, edges2 /*out*/);
834  if (edges1.size() != edges2.size())
835  return false;
836 
837  // If there are no edges, then assume that iUnused and jUnused could be isomorphic. We already know they satisfy the mu
838  // constraint otherwise they wouldn't even be in the VAM.
839  if (edges1.empty() && edges2.empty())
840  return true;
841 
842  // Everything looks good to us, now let the user weed out certain pairs of vertices based on their incident edges.
843  typename GraphA::ConstVertexIterator v1 = g1.findVertex(i), v2 = g1.findVertex(iUnused);
844  typename GraphB::ConstVertexIterator w1 = g2.findVertex(j), w2 = g2.findVertex(jUnused);
845  return equivalenceP_.nu(g1, v1, v2, edges1, g2, w1, w2, edges2);
846  }
847 
848  // Create a new VAM from an existing one. The (i,j) pairs of the new VAM will form a subset of the specified VAM.
849  void refine(const Vam &vam, Vam &refined /*out*/) {
850  refined.reserveRows(maxPlusOneOrZero(vNotX));
851  BOOST_FOREACH (size_t i, vNotX.values()) {
852  size_t rowLength = vam.size(i);
853  refined.startNewRow(i, rowLength);
854  BOOST_FOREACH (size_t j, vam.get(i)) {
855  if (j != y.back() && edgesAreSuitable(x.back(), i, y.back(), j))
856  refined.push(i, j);
857  }
858  }
859  }
860 
861  // The Goldilocks predicate. Returns true if the solution is a valid size, false if it's too small or too big.
862  bool isSolutionValidSize() const {
863  if (findingCommonSubgraphs_) {
864  return x.size() >= minimumSolutionSize_ && x.size() <= maximumSolutionSize_;
865  } else {
866  return x.size() == g1.nVertices();
867  }
868  }
869 
870  // The main recursive function. It works by extending the current solution by one pair for all combinations of such pairs
871  // that are permissible according to the vertex equivalence predicate and not already part of the solution and then
872  // recursively searching the remaining space. This analysis class acts as a state machine whose data structures are
873  // advanced and retracted as the space is searched. The VAM is the only part of the state that needs to be stored on a
874  // stack since changes to it could not be easily undone during the retract phase.
875  CsiNextAction recurse(const Vam &vam, size_t level = 0) {
876  equivalenceP_.progress(level);
877  if (isSolutionPossible(vam)) {
878  size_t i = pickVertex(vam);
879  BOOST_FOREACH (size_t j, vam.get(i)) {
880  extendSolution(i, j);
881  Vam refined(vamAllocator_);
882  refine(vam, refined);
883  if (recurse(refined, level+1) == CSI_ABORT)
884  return CSI_ABORT;
885  retractSolution();
886  }
887 
888  // Try again after removing vertex i from consideration
889  if (findingCommonSubgraphs_) {
890  v.erase(i);
891  ASSERT_require(vNotX.exists(i));
892  vNotX.erase(i);
893  if (recurse(vam, level+1) == CSI_ABORT)
894  return CSI_ABORT;
895  v.insert(i);
896  vNotX.insert(i);
897  }
898  } else if (isSolutionValidSize()) {
899  ASSERT_require(x.size() == y.size());
900  if (monotonicallyIncreasing_)
901  minimumSolutionSize_ = x.size();
902  if (solutionProcessor_(g1, x, g2, y) == CSI_ABORT)
903  return CSI_ABORT;
904  }
905  return CSI_CONTINUE;
906  }
907 };
908 
929 template<class GraphA, class GraphB, class SolutionProcessor>
930 void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor) {
932  csi.run();
933 }
934 
935 template<class GraphA, class GraphB, class SolutionProcessor, class EquivalenceP>
936 void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2,
937  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
938  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
939  csi.run();
940 }
943 // Used by findFirstCommonIsomorphicSubgraph
944 template<class GraphA, class GraphB>
946  std::pair<std::vector<size_t>, std::vector<size_t> > solution_;
947 public:
948  CsiNextAction operator()(const GraphA &/*g1*/, const std::vector<size_t> &x,
949  const GraphB &/*g2*/, const std::vector<size_t> &y) {
950  solution_ = std::make_pair(x, y);
951  return CSI_ABORT;
952  }
953 
954  const std::pair<std::vector<size_t>, std::vector<size_t> >& solution() const {
955  return solution_;
956  }
957 };
958 
966 template<class GraphA, class GraphB>
967 std::pair<std::vector<size_t>, std::vector<size_t> >
968 findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize) {
970  csi.minimumSolutionSize(minimumSize);
971  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
972  csi.run();
973  return csi.solutionProcessor().solution();
974 }
975 
976 
977 template<class GraphA, class GraphB, class EquivalenceP>
978 std::pair<std::vector<size_t>, std::vector<size_t> >
979 findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize, EquivalenceP equivalenceP) {
981  csi(g1, g2, FirstIsomorphicSubgraph<GraphA, GraphB>(), equivalenceP);
982  csi.minimumSolutionSize(minimumSize);
983  csi.maximumSolutionSize(minimumSize); // to avoid going further than necessary
984  csi.run();
985  return csi.solutionProcessor().solution();
986 }
987 
1006 template<class GraphA, class GraphB, class SolutionProcessor>
1007 void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor) {
1008  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor> csi(g1, g2, solutionProcessor);
1009  csi.findingCommonSubgraphs(false);
1010  csi.run();
1011 }
1012 
1013 template<class GraphA, class GraphB, class SolutionProcessor, class EquivalenceP>
1014 void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2,
1015  SolutionProcessor solutionProcessor, EquivalenceP equivalenceP) {
1016  CommonSubgraphIsomorphism<GraphA, GraphB, SolutionProcessor, EquivalenceP> csi(g1, g2, solutionProcessor, equivalenceP);
1017  csi.findingCommonSubgraphs(false);
1018  csi.run();
1019 }
1022 // Used internally by findMaximumCommonIsomorphicSubgraphs
1023 template<class GraphA, class GraphB>
1025  std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > solutions_;
1026 public:
1027  CsiNextAction operator()(const GraphA &/*g1*/, const std::vector<size_t> &x,
1028  const GraphB &/*g2*/, const std::vector<size_t> &y) {
1029  if (!solutions_.empty() && x.size() > solutions_.front().first.size())
1030  solutions_.clear();
1031  solutions_.push_back(std::make_pair(x, y));
1032  return CSI_CONTINUE;
1033  }
1034 
1035  const std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > > &solutions() const {
1036  return solutions_;
1037  }
1038 };
1039 
1060 template<class GraphA, class GraphB>
1061 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1062 findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2) {
1064  csi.monotonicallyIncreasing(true);
1065  csi.run();
1066  return csi.solutionProcessor().solutions();
1067 }
1068 
1069 template<class GraphA, class GraphB, class EquivalenceP>
1070 std::vector<std::pair<std::vector<size_t>, std::vector<size_t> > >
1071 findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, EquivalenceP equivalenceP) {
1073  csi(g1, g2, MaximumIsomorphicSubgraphs<GraphA, GraphB>(), equivalenceP);
1074  csi.monotonicallyIncreasing(true);
1075  csi.run();
1076  return csi.solutionProcessor().solutions();
1077 }
1121 template<class Direction, class Graph>
1122 std::vector<typename GraphTraits<Graph>::VertexIterator>
1124  typedef typename GraphTraits<Graph>::VertexIterator VertexIterator;
1125  typedef typename GraphTraits<Graph>::EdgeIterator EdgeIterator;
1126  typedef typename GraphTraits<Graph>::Edge Edge;
1127  static const size_t NO_ID = (size_t)(-1);
1128 
1129  ASSERT_require(g.isValidVertex(root));
1130 
1131  // List of vertex IDs in the best order for data-flow. I.e., the reverse of the post-fix, depth-first traversal following
1132  // forwared or reverse edges (depending on the Direction template argument). A useful fact is that disregarding back
1133  // edges, the predecessors of the vertex represented by flowlist[i] all appear earlier in flowlist.
1135  traversal.start(root);
1136  std::vector<size_t> flowlist = graphReachableVertices(traversal);
1137  std::reverse(flowlist.begin(), flowlist.end());
1138 
1139  // The inverse mapping of the flowlist. I.e.., since flowlist maps an index to a vertex ID, rflowlist maps a vertex ID back
1140  // to the flowlist index. If vertex v is not reachable from the root, then rflowlist[v] == NO_ID.
1141  std::vector<size_t> rflowlist(g.nVertices(), NO_ID);
1142  for (size_t i=0; i<flowlist.size(); ++i)
1143  rflowlist[flowlist[i]] = i;
1144 
1145  // Initialize the idom vector. idom[i]==i implies idom[i] is unknown
1146  std::vector<size_t> idom(flowlist.size());
1147  for (size_t i=0; i<flowlist.size(); ++i)
1148  idom[i] = i;
1149 
1150  bool changed = true;
1151  while (changed) {
1152  changed = false; // assume no change, prove otherwise
1153  for (size_t vertex_i=0; vertex_i < flowlist.size(); ++vertex_i) {
1154  VertexIterator vertex = g.findVertex(flowlist[vertex_i]);
1155 
1156  // Test idom invariant
1157 #ifndef SAWYER_NDEBUG
1158  for (size_t i=0; i<idom.size(); ++i)
1159  ASSERT_require(idom[i] <= i);
1160 #endif
1161 
1162  // The root vertex has no immediate dominator.
1163  // FIXME[Robb P Matzke 2017-06-23]: why not just start iterating with vertex_i=1?
1164  if (vertex == root)
1165  continue;
1166 
1167  // Try to choose a new idom for this vertex by processing each of its predecessors. The dominator of the current
1168  // vertex is a function of the dominators of its predecessors.
1169  size_t newIdom = idom[vertex_i];
1170 
1171  boost::iterator_range<EdgeIterator> edges = previousEdges<EdgeIterator>(vertex, Direction());
1172  for (EdgeIterator edge=edges.begin(); edge!=edges.end(); ++edge) {
1173  VertexIterator predecessor = previousVertex<VertexIterator>(edge, Direction());
1174  size_t predecessor_i = rflowlist[predecessor->id()];
1175  if (NO_ID == predecessor_i)
1176  continue; // predecessor is not reachable from root, so does't contribute
1177  if (predecessor_i == vertex_i)
1178  continue; // ignore self edges: V cannot be its own immediate dominator
1179  if (predecessor_i > vertex_i)
1180  continue; // ignore back edges; the idom will also be found along a forward edge
1181 
1182  // The predecessor might be our dominator, so merge it with what we already have
1183  if (newIdom == vertex_i) {
1184  newIdom = predecessor_i;
1185  } else {
1186  size_t tmpIdom = predecessor_i;
1187  while (newIdom != tmpIdom) {
1188  while (newIdom > tmpIdom)
1189  newIdom = idom[newIdom];
1190  while (tmpIdom > newIdom)
1191  tmpIdom = idom[tmpIdom];
1192  }
1193  }
1194  }
1195 
1196  if (idom[vertex_i] != newIdom) {
1197  idom[vertex_i] = newIdom;
1198  changed = true;
1199  }
1200  }
1201  }
1202 
1203  // Build the result relation
1204  std::vector<VertexIterator> retval;
1205  retval.resize(g.nVertices(), g.vertices().end());
1206  for (size_t i=0; i<flowlist.size(); ++i) {
1207  if (idom[i] != i)
1208  retval[flowlist[i]] = g.findVertex(flowlist[idom[i]]);
1209  }
1210  return retval;
1211 }
1212 
1220 template<class Graph>
1221 std::vector<typename GraphTraits<Graph>::VertexIterator>
1223  return graphDirectedDominators<ForwardTraversalTag>(g, root);
1224 }
1225 
1233 template<class Graph>
1234 std::vector<typename GraphTraits<Graph>::VertexIterator>
1236  return graphDirectedDominators<ReverseTraversalTag>(g, root);
1237 }
1238 
1239 } // namespace
1240 } // namespace
1241 } // namespace
1242 
1243 #endif
void reset()
Releases memory used by the analysis.
size_t nVertices() const
Total number of vertices.
Definition: Graph.h:1664
T * reserve(size_t nElmts)
Reserve space for objects.
const SolutionProcessor & solutionProcessor() const
Property: reference to the solution callback.
VertexIterator insertVertex(const VertexValue &value=VertexValue())
Insert a new vertex.
Definition: Graph.h:1700
const size_t & id() const
Unique vertex ID number.
Definition: Graph.h:1196
CsiNextAction operator()(const GraphA &g1, const std::vector< size_t > &x, const GraphB &g2, const std::vector< size_t > &y)
Functor.
Graph containing user-defined vertices and edges.
Definition: Graph.h:625
void minimumSolutionSize(size_t n)
Property: minimum allowed solution size.
size_t minimumSolutionSize() const
Property: minimum allowed solution size.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphPostDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate post-dominators.
Traits for graphs.
Definition: Graph.h:287
boost::iterator_range< EdgeIterator > inEdges()
List of incoming edges.
Definition: Graph.h:1207
Vertex equivalence for common subgraph isomorphism.
bool isValidVertex(const ConstVertexIterator &vertex) const
Determines whether the vertex iterator is valid.
Definition: Graph.h:1545
void progress(size_t)
Called at each step during the algorithm.
T * allocNext()
Allocate one element.
bool graphContainsCycle(const Graph &g)
Determines if the any edges of a graph form a cycle.
VertexValue & value()
User-defined value.
Definition: Graph.h:1272
size_t size() const
Number of members present.
boost::iterator_range< VertexIterator > vertices()
Iterators for all vertices.
Definition: Graph.h:1448
boost::iterator_range< ConstIterator > values() const
Iterator range for set members.
bool exists(const Value &value) const
Determines whether a value is stored.
EdgeIterator insertEdge(const VertexIterator &sourceVertex, const VertexIterator &targetVertex, const EdgeValue &value=EdgeValue())
Insert a new edge.
Definition: Graph.h:1730
Bidirectional vertex node iterator.
Definition: Graph.h:1030
void revert(T *ptr)
Free all elements back to the specified element.
Name space for the entire library.
Definition: Access.h:13
void findCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor)
Find common isomorphic subgraphs.
Graph graphCopySubgraph(const Graph &g, const std::vector< size_t > &vertexIdVector)
Create a subgraph.
size_t graphFindConnectedComponents(const Graph &g, std::vector< size_t > &components)
Find all connected components of a graph.
boost::iterator_range< EdgeIterator > outEdges()
List of outgoing edges.
Definition: Graph.h:1228
std::pair< std::vector< size_t >, std::vector< size_t > > findFirstCommonIsomorphicSubgraph(const GraphA &g1, const GraphB &g2, size_t minimumSize)
Determine whether a common subgraph exists.
const VertexIterator & source()
Source vertex.
Definition: Graph.h:1133
Functor called for each common subgraph isomorphism solution.
void maximumSolutionSize(size_t n)
Property: maximum allowed solution size.
CsiNextAction
How the CSI algorith should proceed.
EdgeValue & value()
User-defined value.
Definition: Graph.h:1159
bool nu(const GraphA &g1, typename GraphA::ConstVertexIterator i1, typename GraphA::ConstVertexIterator i2, const std::vector< typename GraphA::ConstEdgeIterator > &edges1, const GraphB &g2, typename GraphB::ConstVertexIterator j1, typename GraphB::ConstVertexIterator j2, const std::vector< typename GraphB::ConstEdgeIterator > &edges2) const
Isomorphism of vertices based on incident edges.
bool insert(Value value)
Insert a value.
std::vector< std::pair< std::vector< size_t >, std::vector< size_t > > > findMaximumCommonIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2)
Find maximum common isomorphic subgraphs.
const EquivalenceP & equivalencePredicate() const
Property: reference to the vertex equivalence predicate.
void monotonicallyIncreasing(bool b)
Property: monotonically increasing solution size.
void run()
Perform the common subgraph isomorphism analysis.
CommonSubgraphIsomorphism(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor=SolutionProcessor(), EquivalenceP equivalenceP=EquivalenceP())
Construct a solver.
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDirectedDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre- or post-dominators.
VertexIterator findVertex(size_t id)
Finds the vertex with specified ID number.
Definition: Graph.h:1495
void findIsomorphicSubgraphs(const GraphA &g1, const GraphB &g2, SolutionProcessor solutionProcessor)
Find an isomorphic subgraph.
Continue searching for more solutions.
bool findingCommonSubgraphs() const
Property: find common subgraphs.
bool erase(Value value)
Erase a value.
SolutionProcessor & solutionProcessor()
Property: reference to the solution callback.
bool monotonicallyIncreasing() const
Property: monotonically increasing solution size.
Bidirectional edge node iterator.
Definition: Graph.h:931
size_t graphBreakCycles(Graph &g)
Break cycles of a graph arbitrarily.
void findingCommonSubgraphs(bool b)
Property: find common subgraphs.
size_t nOutEdges() const
Number of outgoing edges.
Definition: Graph.h:1250
bool mu(const GraphA &g1, const typename GraphA::ConstVertexIterator &v1, const GraphB &g2, const typename GraphB::ConstVertexIterator &v2) const
Isomorphism of two vertices.
Depth-first, forward traversal for all event types.
Base class for graph traversals.
void insertAll()
Insert all possible members.
bool isEmpty() const
True if graph is empty.
Definition: Graph.h:1683
Return to caller without further searching.
bool graphIsConnected(const Graph &g)
Test whether a graph is connected.
EquivalenceP & equivalencePredicate()
Property: reference to the vertex equivalence predicate.
EdgeIterator eraseEdge(const EdgeIterator &edge)
Erases an edge.
Definition: Graph.h:1790
void graphEraseParallelEdges(Graph &g)
Erase parallel edges.
const VertexIterator & target()
Target vertex.
Definition: Graph.h:1145
std::vector< typename GraphTraits< Graph >::VertexIterator > graphDominators(Graph &g, typename GraphTraits< Graph >::VertexIterator root)
Find immediate pre-dominators.
Container associating values with keys.
Definition: Sawyer/Map.h:66
size_t maximumSolutionSize() const
Property: maximum allowed solution size.
std::vector< size_t > graphReachableVertices(Traversal t)
IDs of vertices reachable from root.