libabigail
abg-diff-utils.h
Go to the documentation of this file.
1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2023 Red Hat, Inc.
5 
6 /// @file
7 ///
8 /// This file declares types and operations implementing the "O(ND)
9 /// Difference Algorithm" (aka diff2) from Eugene W. Myers, to compute
10 /// the difference between two sequences.
11 ///
12 /// To understand what is going on here, one must read the paper at
13 /// http://www.xmailserver.org/diff2.pdf. Throughout this file, that
14 /// paper is referred to as "the paper".
15 ///
16 /// The implementations goes as far as calculating the shortest edit
17 /// script (the set of insertions and deletions) for transforming a
18 /// sequence into another. The main entry point for that is the
19 /// compute_diff() function.
20 
21 #ifndef __ABG_DIFF_UTILS_H__
22 #define __ABG_DIFF_UTILS_H__
23 
24 #include <cassert>
25 #include <cstdlib>
26 #include <memory>
27 #include <ostream>
28 #include <sstream>
29 #include <stdexcept>
30 #include <string>
31 #include <vector>
32 #include "abg-fwd.h"
33 
34 namespace abigail
35 {
36 
37 /// @brief Libabigail's core diffing algorithms
38 ///
39 /// This is the namespace defining the core diffing algorithm used by
40 /// libabigail @ref comparison tools. This based on the diff
41 /// algorithm from Eugene Myers.
42 namespace diff_utils
43 {
44 
45 using std::shared_ptr;
46 
47 // Inject the names from std:: below into this namespace
48 using std::string;
49 using std::ostream;
50 using std::vector;
51 using std::abs;
52 using std::ostringstream;
53 
54 /// A class representing a vertex in an edit graph, as explained in
55 /// the paper. A vertex is a basically a pair of coordinates
56 /// (abscissa and ordinate).
57 class point
58 {
59  int x_;
60  int y_;
61  bool empty_;
62 
63 public:
64 
65  point()
66  : x_(-1), y_(-1),empty_(true)
67  {}
68 
69  point(int x, int y)
70  : x_(x), y_(y), empty_(false)
71  {}
72 
73  point(const point& p)
74  : x_(p.x()), y_(p.y()), empty_(p.is_empty())
75  {}
76 
77  int
78  x() const
79  {return x_;}
80 
81  void
82  x(int x)
83  {
84  x_ = x;
85  empty_ = false;
86  }
87 
88  int
89  y() const
90  {return y_;}
91 
92  void
93  y(int y)
94  {
95  y_ = y;
96  empty_ = false;
97  }
98 
99  void
100  set(int x, int y)
101  {
102  x_ = x;
103  y_ = y;
104  empty_ = false;
105  }
106 
107  void
108  set(int x, int y, bool empty)
109  {
110  x_ = x;
111  y_ = y;
112  empty_ = empty;
113  }
114 
115  void
116  add(int ax, int ay)
117  {set (x() + ax, y() + ay);}
118 
119  bool
120  operator!=(const point& o) const
121  {return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}
122 
123  bool
124  operator==(const point& o) const
125  {return !(operator!=(o));}
126 
127  bool
128  operator<(const point& o) const
129  {return (x() < o.x() && y() < o.y());}
130 
131  bool
132  operator>(const point& o) const
133  {return (x() > o.x() && y() > o.y());}
134 
135  bool
136  operator<=(const point& o) const
137  {return (x() <= o.x() && y() <= o.y());}
138 
139  bool
140  operator>=(const point& o) const
141  {return (x() >= o.x() && y() >= o.y());}
142 
143  point
144  operator+(int val) const
145  {return point(x() + val, y() + val);}
146 
147  point
148  operator-(int val) const
149  {return point(x() - val, y() - val);}
150 
151  point&
152  operator+= (int val)
153  {
154  set(x_ + val, y_ + val);
155  return *this;
156  }
157 
158  point&
159  operator-= (int val)
160  {return (*this) += (-val);}
161 
162  point&
163  operator--()
164  {return (*this) -= 1;}
165 
166  point&
167  operator++()
168  {return (*this) += 1;}
169 
170  point
171  operator--(int)
172  {
173  point tmp(*this);
174  --(*this);
175  return tmp;
176  }
177 
178  point
179  operator++(int)
180  {
181  point tmp(*this);
182  ++(*this);
183  return tmp;
184  }
185 
186  point&
187  operator=(int val)
188  {
189  set(val, val);
190  return *this;
191  }
192 
193  point&
194  operator=(const point& p)
195  {
196  set(p.x(), p.y(), p.is_empty());
197  return *this;
198  }
199 
200  bool
201  is_empty() const
202  {return empty_;}
203 
204  operator bool () const
205  {return !is_empty();}
206 
207  bool
208  operator!() const
209  {return is_empty();}
210 
211  void
212  clear()
213  {
214  x_ = -1;
215  y_ = -1;
216  empty_ = true;
217  }
218 };// end point
219 
220 /// The abstraction of the Snake concept, from the paper.
221 ///
222 /// In a given path from the edit graph, a snake is a non-diagonal
223 /// edge followed by zero or more diagonal edges.
224 ///
225 /// The starting poing of the non-diagonal edge is the beginning of
226 /// the snake. This is given by the snake::begin() method. This point
227 /// is not explicitely referenced in the paper, but we need it for
228 /// some grunt implementation details of the algorithm.
229 ///
230 /// The end point of the non-diagonal edge is the intermediate point
231 /// of the snake; it's given by the snake::intermediate() method.
232 /// This point is what is referred to as "the begining of the snake"
233 /// in the paper.
234 ///
235 /// The end point of the first diagonal edge is given by the
236 /// snake::diagonal_start() method.
237 ///
238 /// The end point of the last diagonal edge is given by the
239 /// snake::end() method. Note that when the snake contains no
240 /// diagonal edge, snake::intermediate(), and snake::end() return the
241 /// same point; snake::diagonal_start() contains an empty point (i.e,
242 /// a point for which point::is_empty() returns true).
243 class snake
244 {
245  point begin_, intermediate_, diagonal_start_, end_;
246  bool forward_;
247 
248 public:
249 
250  /// Default constructor for snake.
252  : forward_(false)
253  {}
254 
255  /// Constructor from the beginning, intermediate and end points.
256  ///
257  /// @param b the beginning point of the snake. That is, the
258  /// starting point of the non-diagonal edge.
259  ///
260  /// @param i the intermediate point of the snake. That is, the end
261  /// point of the non-diagonal edge.
262  ///
263  /// @param e the end point of the snake. That is the end point of
264  /// the last diagonal edge.
265  snake(const point& b,
266  const point& i,
267  const point& e)
268  : begin_(b), intermediate_(i),
269  end_(e), forward_(false)
270  {}
271 
272  /// Constructor from the beginning, intermediate and end points.
273  ///
274  /// @param b the beginning point of the snake. That is, the
275  /// starting point of the non-diagonal edge.
276  ///
277  /// @param i the intermediate point of the snake. That is, the end
278  /// point of the non-diagonal edge.
279  ///
280  /// @param d the beginning of the diagonal edge. That is the end of
281  /// the first diagonal edge of the snake.
282  ///
283  /// @param e the end point of the snake. That is the end point of
284  /// the last diagonal edge.
285  snake(const point& b,
286  const point& i,
287  const point& d,
288  const point& e)
289  : begin_(b), intermediate_(i),
290  diagonal_start_(d), end_(e),
291  forward_(false)
292  {}
293 
294  /// Getter for the starting point of the non-diagonal edge of the
295  /// snake.
296  ///
297  /// @return the starting point of the non-diagonal edge of the snake
298  const point&
299  begin() const
300  {return begin_;}
301 
302  /// Getter for the starting point of the non-diagonal edge of the
303  /// snake, aka begin point.
304  ///
305  ///@param p the new begin point.
306  void
307  begin(const point& p)
308  {begin_ = p;}
309 
310  /// Getter for the end point of the non-diagonal edge of the snake.
311  ///
312  /// @return the end point of the non-diagonal edge of the snake
313  const point&
314  intermediate() const
315  {return intermediate_;}
316 
317  /// Setter for the end point of the non-diagonal edge of the snake,
318  /// aka intermediate point.
319  ///
320  /// @param p the new intermediate point.
321  void
322  intermediate(const point& p)
323  {intermediate_ = p;}
324 
325  /// Getter for the end point of the first diagonal edge, aka
326  /// diagonal start point. Note that if the snake has no diagonal
327  /// edge, this point is empty.
328  ///
329  /// @return the end point of the first diagonal edge.
330  const point&
332  {return diagonal_start_;}
333 
334  /// Setter for the end point of the first diagonal edge, aka
335  /// diagonal start point.
336  ///
337  /// @param p the new diagonal start.d
338  void
340  {diagonal_start_ = p;}
341 
342  /// Getter for the end point of the last diagonal edge, aka snake
343  /// end point. Note that if the snake has no diagonal edge, this
344  /// point is equal to the intermediate point.
345  ///
346  /// @return the end point of the last diagonal edge
347  const point&
348  end() const
349  {return end_;}
350 
351  /// Setter for the end point of the last diagonal edge, aka snake
352  /// end point. Note that if the snake has no diagonal edge, this
353  /// point is equal to the intermediate point.
354  void
355  end(const point& p)
356  {end_ = p;}
357 
358  /// Setter for the begin, intermediate and end points of the snake.
359  ///
360  /// @param b the new snake begin point
361  ///
362  /// @param i the new snake intermediate point
363  ///
364  /// @param e the new snake end point
365  void
366  set(const point& b, const point&i, const point&e)
367  {
368  begin(b);
369  intermediate(i);
370  end(e);
371  }
372 
373  /// Setter for the begin, intermediate, diagonal start and end points
374  /// of the snake.
375  ///
376  /// @param b the new snake begin point
377  ///
378  /// @param i the new snake intermediate point
379  ///
380  /// @param d the new diagonal start point
381  ///
382  /// @param e the new snake end point
383  void
384  set(const point& b, const point&i, const point& d, const point&e)
385  {
386  begin(b);
387  intermediate(i);
388  diagonal_start(d);
389  end(e);
390  }
391 
392  /// @return true iff the snake is a forward snake. That is, if it
393  /// was built while walking the edit graph going forward (from the
394  /// top left corner to the right bottom corner.
395  bool
396  is_forward() const
397  {return forward_;}
398 
399  /// Set to true if the snake is a forward snake; that is, if it was
400  /// built while walking the edit graph going forward (from the top
401  /// left corner to the right bottom corner. Set to false otherwise.
402  ///
403  /// @param f whether the snake is a forward snake or not.
404  void
405  set_forward(bool f)
406  {forward_ = f;}
407 
408  /// Add an offset to the abscissas of the points of the snake, and
409  /// add another offset to the ordinates of these same points.
410  ///
411  /// @param x_offset the offset to add to the abscissas of all the
412  /// points of the snake.
413  ///
414  /// @param y_offset the offset to add to the ordinates of all the
415  /// points of the snake.
416  void
417  add(int x_offset, int y_offset)
418  {
419  if (is_empty())
420  return;
421 
422  begin_.add(x_offset, y_offset);
423  intermediate_.add(x_offset, y_offset);
424  if (diagonal_start_)
425  diagonal_start_.add(x_offset, y_offset);
426  end_.add(x_offset, y_offset);
427  }
428 
429  /// @return true iff the snake has at least one diagonal edge.
430  bool
432  {return !diagonal_start().is_empty();}
433 
434  /// @return true iff the non-diagonal edge is horizontal.
435  bool
437  {return (begin().y() == intermediate().y());}
438 
439  /// @return true iff the non-diagonal edge is vertical.
440  bool
442  {return (begin().x() == intermediate().x());}
443 
444  /// @return true iff the snake is empty, that is, if all the points
445  /// it contains are empty.
446  bool is_empty() const
447  {return begin().is_empty() && intermediate().is_empty() && end().is_empty();}
448 };// end class snake
449 
450 /// The array containing the furthest D-path end-points, for each value
451 /// of K. MAX_D is the maximum value of the D-Path. That is, M+N if
452 /// M is the size of the first input string, and N is the size of the
453 /// second.
454 class d_path_vec : public std::vector<int>
455 {
456 private:
457 
458  unsigned a_size_;
459  unsigned b_size_;
460 
461  /// Forbid vector size modifications
462  void
463  push_back(const vector<int>::value_type&);
464 
465  /// Forbid default constructor.
466  d_path_vec();
467 
468  bool
469  over_bounds(long long index) const
470  {return (index + offset()) >= (long long) size();}
471 
472  void
473  check_index_against_bound(int index) const
474  {
475  if (over_bounds(index))
476  {
477  ostringstream o;
478  o << "index '" << index
479  << "' out of range [-" << max_d() << ", " << max_d() << "]";
480  throw std::out_of_range(o.str());
481  }
482  }
483 
484 public:
485 
486  /// Constructor of the d_path_vec.
487  ///
488  /// For forward vectors, the underlying vector allocates 2 *
489  /// [MAX_D+1].
490  /// space, so that one can address elements in the index range
491  /// [-MAX_D, MAX_D]. And MAX_D is the sum of the two sequence
492  /// sizes. delta is the difference.
493  ///
494  /// For reverse vectors, note that we need to be able to address
495  /// [-MAX_D - delta, MAX_D + delta], with delta being the (signed)
496  /// difference between the size of the two sequences. We consider
497  /// delta being bounded by MAX_D itself; so we say we need to be
498  /// able to address [-2MAX_D, 2MAX_D].
499  ///
500  /// @param size1 the size of the first sequence we are interested
501  /// in.
502  ///
503  /// @param size2 the size of the second sequence we are interested
504  /// in.
505  d_path_vec(unsigned size1, unsigned size2)
506  : vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),
507  a_size_(size1), b_size_(size2)
508  {
509  }
510 
511  std::vector<int>::const_reference
512  operator[](int index) const
513  {return at(index);}
514 
515  std::vector<int>::reference
516  operator[](int index)
517  {return at(index);}
518 
519  std::vector<int>::reference
520  at(long long index)
521  {
522  //check_index_against_bound(index);
523  long long i = index + offset();
524  return vector<int>::operator[](i);
525  }
526 
527  std::vector<int>::const_reference
528  at(long long index) const
529  {
530  check_index_against_bound(index);
531  long long i = offset() + index;
532  return static_cast<const vector<int>* >(this)->at(i);
533  }
534 
535  unsigned
536  a_size() const
537  {return a_size_;}
538 
539  unsigned
540  b_size() const
541  {return b_size_;}
542 
543  unsigned
544  max_d() const
545  {return a_size_ + b_size_;}
546 
547  unsigned
548  offset() const
549  {return max_d() + abs((long long) a_size() - (long long) b_size());}
550 }; // end class d_path_vec
551 
552 /// The abstration of an insertion of elements of a sequence B into a
553 /// sequence A. This is used to represent the edit script for
554 /// transforming a sequence A into a sequence B.
555 ///
556 /// And insertion mainly encapsulates two components:
557 ///
558 /// - An insertion point: this is the index (starting at 0) of the
559 /// element of the sequence A after which the insertion occurs.
560 ///
561 /// - Inserted elements: this is a vector of indexes of elements of
562 /// sequence B (starting at 0) that got inserted into sequence A,
563 /// after the insertion point.
565 {
566  int insertion_point_;
567  vector<unsigned> inserted_;
568 
569 public:
570 
571  insertion(int insertion_point,
572  const vector<unsigned>& inserted_indexes)
573  : insertion_point_(insertion_point),
574  inserted_(inserted_indexes)
575  {}
576 
577  insertion(int insertion_point = 0)
578  : insertion_point_(insertion_point)
579  {}
580 
581  int
582  insertion_point_index() const
583  {return insertion_point_;}
584 
585  void
586  insertion_point_index(int i)
587  {insertion_point_ = i;}
588 
589  const vector<unsigned>&
590  inserted_indexes() const
591  {return inserted_;}
592 
593  vector<unsigned>&
594  inserted_indexes()
595  {return inserted_;}
596 };// end class insertion
597 
598 /// The abstraction of the deletion of one element of a sequence A.
599 ///
600 /// This encapsulates the index of the element A that got deleted.
601 class deletion
602 {
603  int index_;
604 
605 public:
606 
607  deletion(int i)
608  : index_(i)
609  {}
610 
611  int
612  index() const
613  {return index_;}
614 
615  void
616  index(int i)
617  {index_ = i;}
618 };// end class deletion
619 
620 /// The abstraction of an edit script for transforming a sequence A
621 /// into a sequence B.
622 ///
623 /// It encapsulates the insertions and deletions for transforming A
624 /// into B.
626 {
627  vector<insertion> insertions_;
628  vector<deletion> deletions_;
629 
630 public:
631 
632  edit_script()
633  {}
634 
635  const vector<insertion>&
636  insertions() const
637  {return insertions_;}
638 
639  vector<insertion>&
640  insertions()
641  {return insertions_;}
642 
643  const vector<deletion>&
644  deletions() const
645  {return deletions_;}
646 
647  vector<deletion>&
648  deletions()
649  {return deletions_;}
650 
651  void
652  append(const edit_script& es)
653  {
654  insertions().insert(insertions().end(),
655  es.insertions().begin(),
656  es.insertions().end());
657  deletions().insert(deletions().end(),
658  es.deletions().begin(),
659  es.deletions().end());
660  }
661 
662  void
663  prepend(const edit_script& es)
664  {
665  insertions().insert(insertions().begin(),
666  es.insertions().begin(),
667  es.insertions().end());
668  deletions().insert(deletions().begin(),
669  es.deletions().begin(),
670  es.deletions().end());
671  }
672 
673  void
674  clear()
675  {
676  insertions().clear();
677  deletions().clear();
678  }
679 
680  bool
681  is_empty() const
682  {return insertions().empty() && deletions().empty();}
683 
684  operator bool() const
685  {return !is_empty();}
686 
687  int
688  num_insertions() const
689  {
690  int l = 0;
691  for (vector<insertion>::const_iterator i = insertions().begin();
692  i != insertions().end();
693  ++i)
694  l += i->inserted_indexes().size();
695  return l;
696  }
697 
698  int
699  num_deletions() const
700  {return deletions().size();}
701 
702  int
703  length() const
704  {return num_insertions() + num_deletions();}
705 };//end class edit_script
706 
707 bool
709  unsigned a_size,
710  unsigned b_size);
711 
712 bool
713 ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
714  const point& reverse_d_path_end);
715 
716 /// The default equality functor used by the core diffing algorithms.
718 {
719  /// This equality operator uses the default "==" to compare its
720  /// arguments.
721  ///
722  /// @param a the first comparison argument.
723  ///
724  /// @param b the second comparison argument.
725  ///
726  /// @return true if the two arguments are equal, false otherwise.
727  template<typename T>
728  bool
729  operator()(const T a, const T b) const
730  {return a == b;}
731 };
732 
733 
734 /// An equality functor to deeply compare pointers.
736 {
737  /// This equality operator compares pointers by comparing the
738  /// pointed-to objects.
739  ///
740  /// @param first the first comparison argument.
741  ///
742  /// @param second the second comparison argument.
743  ///
744  /// @return true if the objects pointed to by the pointers are
745  /// equal, false otherwise.
746  template<typename T>
747  bool
748  operator()(const T* first,
749  const T* second) const
750  {
751  if (!!first != !!second)
752  return false;
753 
754  if (!first)
755  return true;
756 
757  return *first == *second;
758  }
759 
760  /// This equality operator compares pointers by comparing the
761  /// pointed-to objects.
762  ///
763  /// @param first the first comparison argument.
764  ///
765  /// @param second the second comparison argument.
766  ///
767  /// @return true if the objects pointed to by the pointers are
768  /// equal, false otherwise.
769  template<typename T>
770  bool
771  operator()(const shared_ptr<T> first,
772  const shared_ptr<T> second) const
773  {return operator()(first.get(), second.get());}
774 
775  /// This equality operator compares pointers by comparing the
776  /// pointed-to objects.
777  ///
778  /// @param first the first comparison argument.
779  ///
780  /// @param second the second comparison argument.
781  ///
782  /// @return true if the objects pointed to by the pointers are
783  /// equal, false otherwise.
784  template<typename T>
785  bool
786  operator()(const weak_ptr<T> first,
787  const weak_ptr<T> second) const
788  {return operator()(shared_ptr<T>(first), shared_ptr<T>(second));}
789 }; // end struct deep_ptr_eq_functor
790 
791 /// Find the end of the furthest reaching d-path on diagonal k, for
792 /// two sequences. In the paper This is referred to as "the basic
793 /// algorithm".
794 ///
795 /// Unlike in the paper, the coordinates of the edit graph start at
796 /// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
797 /// than (M,N).
798 ///
799 /// @tparm RandomAccessOutputIterator the type of iterators passed to
800 /// this function. It must be a random access output iterator kind.
801 ///
802 /// @tparm EqualityFunctor this must be a class that declares a public
803 /// call operator member returning a boolean and taking two arguments
804 /// that must be of the same type as the one pointed to by the @ref
805 /// RandomAccessOutputIterator template parameter. This functor is
806 /// used to compare the elements referred to by the iterators passed in
807 /// argument to this function.
808 ///
809 /// @param k the number of the diagonal on which we want to find the
810 /// end of the furthest reaching D-path.
811 ///
812 /// @param d the D in D-Path. That's the number of insertions/deletions
813 /// (the number of changes, in other words) in the changeset. That is
814 /// also the number of non-diagonals in the D-Path.
815 ///
816 /// @param a_begin an iterator to the beginning of the first sequence
817 ///
818 /// @param a_end an iterator that points right after the last element
819 /// of the second sequence to consider.
820 ///
821 /// @param b_begin an iterator to the beginning of the second sequence.
822 ///
823 /// @param b_end an iterator that points right after the last element
824 /// of the second sequence to consider.
825 ///
826 /// @param v the vector of furthest end points of d_paths, at (d-1).
827 /// It contains the abscissas of the furthest end points for different
828 /// values of k, at (d-1). That is, for k in [-D + 1, -D + 3, -D + 5,
829 /// ..., D - 1], v[k] is the abscissa of the end of the furthest
830 /// reaching (D-1)-path on diagonal k.
831 ///
832 /// @param snak the last snake of the furthest path found. The end
833 /// point of the snake is the end point of the furthest path.
834 ///
835 /// @return true if the end of the furthest reaching path that was
836 /// found was inside the boundaries of the edit graph, false
837 /// otherwise.
838 template<typename RandomAccessOutputIterator,
839  typename EqualityFunctor>
840 bool
842  RandomAccessOutputIterator a_begin,
843  RandomAccessOutputIterator a_end,
844  RandomAccessOutputIterator b_start,
845  RandomAccessOutputIterator b_end,
846  d_path_vec& v, snake& snak)
847 {
848  int x = -1, y = -1;
849  point begin, intermediate, diag_start, end;
850  snake s;
851  EqualityFunctor eq;
852 
853  // Let's pick the end point of the furthest reaching
854  // (D-1)-path. It's either v[k-1] or v[k+1]; the word
855  // "furthest" means we choose the one which abscissa is the
856  // greatest (that is, furthest from abscissa zero).
857  if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))
858  // So, the abscissa of the end point of the furthest
859  // reaching (D-1)-path is v[k+1]. That is a diagonal that
860  // is above the current (k) diagonal, and on the right.
861  // To move to the current k diagonal, one has to move
862  // "down" from the diagonal k+1. So the abscissa won't
863  // change. Only the ordinate will. It will be given by y
864  // = x - k (a bit below); as k has changed from k - 1 (it
865  // has increased), y is going to be the new y that is
866  // 'down' from the previous y in k - 1.
867  {
868  x = v[k+1];
869  begin.set(x, x - (k + 1));
870  }
871  else
872  {
873  // So the abscissa of the end point of the furthest
874  // (D-1)-path is v[k-1]. That is on the left of the
875  // current k diagonal. To move to the current k diagonal,
876  // one has to move "right" from diagonal k - 1. That is,
877  // the y stays constant and x is incremented.
878  x = v[k-1];
879  begin.set(x, x - (k - 1));
880  ++x;
881  }
882 
883  // Now get the value of y from the equation k = x -y.
884  // This is the point where we first touch K, when we move
885  // from the end of the furthest reaching (D-1)-path.
886  y = x - k;
887 
888  intermediate.x(x);
889  intermediate.y(y);
890 
891  int last_x_index = a_end - a_begin - 1;
892  int last_y_index = b_end - b_start - 1;
893  // Now, follow the snake (aka, zero or more consecutive
894  // diagonals). Note that we stay on the k diagonal when we
895  // do this.
896  while ((x < last_x_index) && (y < last_y_index))
897  if (eq(a_begin[x + 1], b_start[y + 1]))
898  {
899  x = x + 1;
900  y = y + 1;
901  if (!diag_start)
902  diag_start.set(x, y);
903  }
904  else
905  break;
906 
907  end.x(x);
908  end.y(y);
909 
910  // Note the point that we store in v here might be outside the
911  // bounds of the edit graph. But we store it at this step (for a
912  // given D) anyway, because out of bound or not, we need this value
913  // at this step to be able to compute the value of the point on the
914  // "next" diagonal for the next D.
915  v[k] = x;
916 
917  if (x >= (int) v.a_size()
918  || y >= (int) v.b_size()
919  || x < -1 || y < -1)
920  return false;
921 
922  s.set(begin, intermediate, diag_start, end);
923  s.set_forward(true);
924  snak = s;
925 
926  return true;
927 }
928 
929 /// Find the end of the furthest reaching reverse d-path on diagonal k
930 /// + delta. Delta is abs(M - N), with M being the size of a and N
931 /// being the size of b. This is the "basic algorithm", run backward.
932 /// That is, starting from the point (M,N) of the edit graph.
933 ///
934 /// Unlike in the paper, the coordinates of the edit graph start at
935 /// (-1,-1), rather than (0,0), and they end at (M-1, N-1), rather
936 /// than (M,N).
937 ///
938 /// @tparm RandomAccessOutputIterator the type of iterators passed to
939 /// this function. It must be a random access output iterator kind.
940 ///
941 /// @tparm EqualityFunctor this must be a class that declares a public
942 /// call operator member returning a boolean and taking two arguments
943 /// that must be of the same type as the one pointed to by the @ref
944 /// RandomAccessOutputIterator template parameter. This functor is
945 /// used to compare the elements referred to by the iterators passed in
946 /// argument to this function.
947 ///
948 /// @param k the number of the diagonal on which we want to find the
949 /// end of the furthest reaching reverse D-path. Actually, we want to
950 /// find the end of the furthest reaching reverse D-path on diagonal (k
951 /// - delta).
952 ///
953 /// @param d the D in D-path. That's the number of insertions/deletions
954 /// (the number of changes, in other words) in the changeset. That is
955 /// also the number of non-diagonals in the D-Path.
956 ///
957 /// @param a_begin an iterator to the beginning of the first sequence
958 ///
959 /// @param a_end an iterator that points right after the last element
960 /// of the second sequence to consider.
961 ///
962 /// @param b_begin an iterator to the beginning of the second sequence.
963 ///
964 /// @param b_end an iterator that points right after the last element
965 /// of the second sequence to consider.
966 ///
967 /// @param v the vector of furthest end points of d_paths, at (d-1).
968 /// It contains the abscissae of the furthest end points for different
969 /// values of k - delta, at (d-1). That is, for k in [-D + 1, -D + 3,
970 /// -D + 5, ..., D - 1], v[k - delta] is the abscissa of the end of the
971 /// furthest reaching (D-1)-path on diagonal k - delta.
972 ///
973 /// @param snak the last snake of the furthest path found. The end
974 /// point of the snake is the end point of the furthest path.
975 ///
976 /// @return true iff the end of the furthest reaching path that was
977 /// found was inside the boundaries of the edit graph, false
978 /// otherwise.
979 template<typename RandomAccessOutputIterator,
980  typename EqualityFunctor>
981 bool
983  RandomAccessOutputIterator a_begin,
984  RandomAccessOutputIterator a_end,
985  RandomAccessOutputIterator b_begin,
986  RandomAccessOutputIterator b_end,
987  d_path_vec& v, snake& snak)
988 {
989  int a_size = a_end - a_begin;
990  int b_size = b_end - b_begin;
991  int delta = a_size - b_size;
992  int k_plus_delta = k + delta;
993  int x = -1, y = -1;
994  point begin, intermediate, diag_start, end;
995  snake s;
996  EqualityFunctor eq;
997 
998  // Let's pick the end point of the furthest reaching (D-1)-path and
999  // move from there to reach the current k_plus_delta-line. That end
1000  // point of the furthest reaching (D-1)-path is either on
1001  // v[k_plus_delta-1] or on v[k_plus_delta+1]; the word "furthest"
1002  // means we choose the one which abscissa is the lowest (that is,
1003  // furthest from abscissa M).
1004  if (k_plus_delta == -d + delta
1005  || ((k_plus_delta != d + delta)
1006  && (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))
1007  {
1008  // We move left, that means ordinate won't change ...
1009  x = v[k_plus_delta + 1];
1010  y = x - (k_plus_delta + 1);
1011  begin.set(x, y);
1012  // ... and abscissa decreases.
1013  x = x - 1;
1014  }
1015  else
1016  {
1017  // So the furthest end point is on the k_plus_delta - 1
1018  // diagonal. That is a diagonal that is 'below' the
1019  // k_plus_delta current diagonal. So to join the current
1020  // diagonal from the k_plus_delta - 1 one, we need to move up.
1021 
1022  // So moving up means abscissa won't change ...
1023  x = v[k_plus_delta - 1];
1024  begin.set(x, x - (k_plus_delta - 1));
1025  // ... and that ordinate decreases.
1026  y = x - (k_plus_delta - 1) - 1;
1027  }
1028 
1029  intermediate.set(x, y);
1030 
1031  // Now, follow the snake. Note that we stay on the k_plus_delta
1032  // diagonal when we do this.
1033  while (x >= 0 && y >= 0)
1034  if (eq(a_begin[x], b_begin[y]))
1035  {
1036  if (!diag_start)
1037  diag_start.set(x, y);
1038  x = x - 1;
1039  y = y - 1;
1040  }
1041  else
1042  break;
1043 
1044  end.set(x, y);
1045 
1046  // Note the point that we store in v here might be outside the
1047  // bounds of the edit graph. But we store it at this step (for a
1048  // given D) anyway, because out of bound or not, we need this value
1049  // at this step to be able to compute the value of the point on the
1050  // "next" diagonal for the next D.
1051  v[k_plus_delta] = x;
1052 
1053  if (x == -1 && y == -1)
1054  ;
1055  else if (x < -1 || y < -1)
1056  return false;
1057 
1058  s.set(begin, intermediate, diag_start, end);
1059  s.set_forward(false);
1060  snak = s;
1061 
1062  return true;
1063 }
1064 
1065 /// Tests if a given point is a match point in an edit graph.
1066 ///
1067 /// @param a_begin the begin iterator of the first input sequence of
1068 /// the edit graph.
1069 ///
1070 /// @param a_end the end iterator of the first input sequence of the
1071 /// edit graph. This points to one element passed the end of the
1072 /// sequence.
1073 ///
1074 /// @param b_begin the begin iterator of the second input sequence of
1075 /// the edit graph.
1076 ///
1077 /// @param b_end the end iterator of the second input sequence of the
1078 /// edit graph. This points the one element passed the end of the
1079 /// sequence.
1080 ///
1081 /// @param point the point to test for being a match point.
1082 ///
1083 /// @return true iff \a point is a match point.
1084 template<typename RandomAccessOutputIterator>
1085 bool
1086 is_match_point(RandomAccessOutputIterator a_begin,
1087  RandomAccessOutputIterator a_end,
1088  RandomAccessOutputIterator b_begin,
1089  RandomAccessOutputIterator b_end,
1090  const point& point)
1091 {
1092  int a_size = a_end - a_begin, b_size = b_end - b_begin;
1093 
1094  if (point.x() < 0
1095  || point.x () >= a_size
1096  || point.y() < 0
1097  || point.y() >= b_size)
1098  return false;
1099 
1100  return (a_begin[point.x()] == b_begin[point.y()]);
1101 }
1102 
1103 /// Returns the middle snake of two sequences A and B, as well as the
1104 /// length of their shortest editing script.
1105 ///
1106 /// This uses the "linear space refinement" algorithm presented in
1107 /// section 4b in the paper. As the paper says, "The idea for doing
1108 /// so is to simultaneously run the basic algorithm in both the
1109 /// forward and reverse directions until furthest reaching forward and
1110 /// reverse paths starting at opposing corners 'overlap'."
1111 ///
1112 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1113 /// this function. It must be a random access output iterator kind.
1114 ///
1115 /// @tparm EqualityFunctor this must be a class that declares a public
1116 /// call operator member returning a boolean and taking two arguments
1117 /// that must be of the same type as the one pointed to by the @ref
1118 /// RandomAccessOutputIterator template parameter. This functor is
1119 /// used to compare the elements referred to by the iterators passed in
1120 /// argument to this function.
1121 ///
1122 /// @param a_begin an iterator pointing to the begining of sequence A.
1123 ///
1124 /// @param a_end an iterator pointing to the end of sequence A. Note
1125 /// that this points right /after/ the end of vector A.
1126 ///
1127 /// @param b_begin an iterator pointing to the begining of sequence B.
1128 ///
1129 /// @param b_end an iterator pointing to the end of sequence B. Note
1130 /// that this points right /after/ the end of vector B
1131 ///
1132 /// @param snak out parameter. This is the snake current when the two
1133 /// paths overlapped. This is set iff the function returns true;
1134 /// otherwise, this is not touched.
1135 ///
1136 /// @return true is the snake was found, false otherwise.
1137 template<typename RandomAccessOutputIterator,
1138  typename EqualityFunctor>
1139 bool
1140 compute_middle_snake(RandomAccessOutputIterator a_begin,
1141  RandomAccessOutputIterator a_end,
1142  RandomAccessOutputIterator b_begin,
1143  RandomAccessOutputIterator b_end,
1144  snake& snak, int& ses_len)
1145 {
1146  int a_size = a_end - a_begin;
1147  int N = a_size;
1148  int b_size = b_end - b_begin;
1149  int M = b_size;
1150  int delta = N - M;
1151  d_path_vec forward_d_paths(a_size, b_size);
1152  d_path_vec reverse_d_paths(a_size, b_size);
1153  // These points below are the top leftmost point and bottom
1154  // right-most points of the edit graph.
1155  point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);
1156 
1157  // We want the initial step (D = 0, k = 0 in the paper) to find a
1158  // furthest reaching point on diagonal k == 0; For that, we need the
1159  // value of x for k == 1; So let's set that value to -1; that is for
1160  // k == 1 (diagonal 1), the point in the edit graph is (-1,-2).
1161  // That way, to get the furthest reaching point on diagonal 0 (k ==
1162  // 0), we go down from (-1,-2) on diagonal 1 and we hit diagonal 0
1163  // on (-1,-1); that is the starting value that the algorithm expects
1164  // for k == 0.
1165  forward_d_paths[1] = -1;
1166 
1167  // Similarly for the reverse paths, for diagonal delta + 1 (note
1168  // that diagonals are centered on delta, unlike for forward paths
1169  // where they are centered on zero), we set the initial point to
1170  // (a_size, b_size - 1). That way, at step D == 0 and k == delta,
1171  // to reach diagonal delta from the point (a_size, b_size - 1) on
1172  // diagonal delta + 1, we just have to move left, and we hit
1173  // diagonal delta on (a_size - 1, b_size -1); that is the starting
1174  // point value the algorithm expects for k == 0 in the reverse case.
1175  reverse_d_paths[delta + 1] = a_size;
1176 
1177  int d_max = (M + N) / 2 + 1;
1178  for (int d = 0; d <= d_max; ++d)
1179  {
1180  // First build forward paths.
1181  for (int k = -d; k <= d; k += 2)
1182  {
1183  snake s;
1184  bool found =
1185  end_of_fr_d_path_in_k<RandomAccessOutputIterator,
1186  EqualityFunctor>(k, d,
1187  a_begin, a_end,
1188  b_begin, b_end,
1189  forward_d_paths, s);
1190  if (!found)
1191  continue;
1192 
1193  // As the paper says in 4b while explaining the middle snake
1194  // algorithm:
1195  //
1196  // "Thus when delta is odd, check for overlap only while
1197  // extending forward paths ..."
1198  if ((delta % 2)
1199  && (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))
1200  {
1201  point reverse_end;
1202  reverse_end.x(reverse_d_paths[k]);
1203  reverse_end.y(reverse_end.x() - k);
1204  if (ends_of_furthest_d_paths_overlap(s.end(), reverse_end))
1205  {
1206  ses_len = 2 * d - 1;
1207  snak = s;
1208  return true;
1209  }
1210  }
1211  }
1212 
1213  // Now build reverse paths.
1214  for (int k = -d; k <= d; k += 2)
1215  {
1216  snake s;
1217  bool found =
1218  end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
1219  EqualityFunctor>(k, d,
1220  a_begin, a_end,
1221  b_begin, b_end,
1222  reverse_d_paths,
1223  s);
1224 
1225  if (!found)
1226  continue;
1227 
1228  // And the paper continues by saying:
1229  //
1230  // "... and when delta is even, check for overlap only while
1231  // extending reverse paths."
1232  int k_plus_delta = k + delta;
1233  if (!(delta % 2)
1234  && (k_plus_delta >= -d) && (k_plus_delta <= d))
1235  {
1236  point forward_end;
1237  forward_end.x(forward_d_paths[k_plus_delta]);
1238  forward_end.y(forward_end.x() - k_plus_delta);
1239  if (ends_of_furthest_d_paths_overlap(forward_end, s.end()))
1240  {
1241  ses_len = 2 * d;
1242  snak = s;
1243  return true;
1244  }
1245  }
1246  }
1247  }
1248  return false;
1249 }
1250 
1251 bool
1252 compute_middle_snake(const char* str1, const char* str2,
1253  snake& s, int& ses_len);
1254 
1255 /// This prints the middle snake of two strings.
1256 ///
1257 /// @param a_begin the beginning of the first string.
1258 ///
1259 /// @param b_begin the beginning of the second string.
1260 ///
1261 /// @param s the snake to print.
1262 ///
1263 /// @param out the output stream to print the snake to.
1264 template<typename RandomAccessOutputIterator>
1265 void
1266 print_snake(RandomAccessOutputIterator a_begin,
1267  RandomAccessOutputIterator b_begin,
1268  const snake &s, ostream& out)
1269 {
1270  if (s.is_empty())
1271  return;
1272 
1273  out << "snake start: ";
1274  out << "(" << s.begin().x() << ", " << s.end().y() << ")\n";
1275 
1276  out << "snake intermediate: ";
1277  out << "(" << s.intermediate().x() << ", " << s.intermediate().y() << ")\n";
1278 
1279  out << "diagonal point(s): ";
1280  if (s.has_diagonal_edge())
1281  for (int x = s.intermediate().x(), y = s.intermediate().y();
1282  x <= s.end().x() && y <= s.end().y();
1283  ++x, ++y)
1284  {
1285  ABG_ASSERT(a_begin[x] == b_begin[y]);
1286  out << "(" << x << "," << y << ") ";
1287  }
1288  out << "\n";
1289 
1290  out << "snake end: ";
1291  out << "(" << s.end().x() << ", " << s.end().y() << ")\n";
1292 }
1293 
1294 /// Compute the length of the shortest edit script for two sequences a
1295 /// and b. This is done using the "Greedy LCS/SES" of figure 2 in the
1296 /// paper. It can walk the edit graph either foward (when reverse is
1297 /// false) or backward starting from the end (when reverse is true).
1298 ///
1299 /// Here, note that the real content of a and b should start at index
1300 /// 1, for this implementatikon algorithm to match the paper's
1301 /// algorithm in a straightforward manner. So pleast make sure that
1302 /// at index 0, we just get some non-used value.
1303 ///
1304 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1305 /// this function. It must be a random access output iterator kind.
1306 ///
1307 /// @tparm EqualityFunctor this must be a class that declares a public
1308 /// call operator member returning a boolean and taking two arguments
1309 /// that must be of the same type as the one pointed to by the @ref
1310 /// RandomAccessOutputIterator template parameter. This functor is
1311 /// used to compare the elements referred to by the iterators passed in
1312 /// argument to this function.
1313 ///
1314 /// @param a the first sequence we care about.
1315 ///
1316 /// @param b the second sequence we care about.
1317 ///
1318 /// @param v the vector that contains the end points of the furthest
1319 /// reaching d-path and (d-1)-path.
1320 template<typename RandomAccessOutputIterator,
1321  typename EqualityFunctor>
1322 int
1323 ses_len(RandomAccessOutputIterator a_begin,
1324  RandomAccessOutputIterator a_end,
1325  RandomAccessOutputIterator b_begin,
1326  RandomAccessOutputIterator b_end,
1327  d_path_vec& v, bool reverse)
1328 {
1329  unsigned a_size = a_end - a_begin;
1330  unsigned b_size = b_end - b_begin;
1331  snake snak;
1332 
1333  ABG_ASSERT(v.max_d() == a_size + b_size);
1334 
1335  int delta = a_size - b_size;
1336 
1337  if (reverse)
1338  // Set a fictitious (M, N-1) into v[1], to find the furthest
1339  // reaching reverse 0-path (i.e, when we are at d == 0 and k == 0).
1340  v[delta + 1] = a_size - 1;
1341  else
1342  // Set a fictitious (-1,-2) point into v[1], to find the furthest
1343  // reaching forward 0-path (i.e, when we are at d == 0 and k == 0).
1344  v[1] = -1;
1345 
1346  for (unsigned d = 0; d <= v.max_d(); ++d)
1347  {
1348  for (int k = -d; k <= (int) d; k += 2)
1349  {
1350  point end;
1351  if (reverse)
1352  {
1353  bool found =
1354  end_of_frr_d_path_in_k_plus_delta<RandomAccessOutputIterator,
1355  EqualityFunctor>(k, d,
1356  a_begin, a_end,
1357  b_begin, b_end,
1358  v, snak);
1359  // If we reached the upper left corner of the edit graph then
1360  // we are done.
1361  if (found && snak.end().x() == -1 && snak.end().y() == -1)
1362  return d;
1363  }
1364  else
1365  {
1366  end_of_fr_d_path_in_k<RandomAccessOutputIterator,
1367  EqualityFunctor>(k, d,
1368  a_begin, a_end,
1369  b_begin, b_end,
1370  v, snak);
1371  // If we reached the lower right corner of the edit
1372  // graph then we are done.
1373  if ((snak.end().x() == (int) a_size - 1)
1374  && (snak.end().y() == (int) b_size - 1))
1375  return d;
1376  }
1377  }
1378  }
1379  return 0;
1380 }
1381 
1382 /// Compute the length of the shortest edit script for two sequences a
1383 /// and b. This is done using the "Greedy LCS/SES" of figure 2 in the
1384 /// paper. It can walk the edit graph either foward (when reverse is
1385 /// false) or backward starting from the end (when reverse is true).
1386 ///
1387 /// Here, note that the real content of a and b should start at index
1388 /// 1, for this implementatikon algorithm to match the paper's
1389 /// algorithm in a straightforward manner. So pleast make sure that
1390 /// at index 0, we just get some non-used value.
1391 ///
1392 /// Note that the equality operator used to compare the elements
1393 /// passed in argument to this function is the default "==" operator.
1394 ///
1395 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1396 /// this function. It must be a random access output iterator kind.
1397 ///
1398 /// @param a the first sequence we care about.
1399 ///
1400 /// @param b the second sequence we care about.
1401 ///
1402 /// @param v the vector that contains the end points of the furthest
1403 /// reaching d-path and (d-1)-path.
1404 template<typename RandomAccessOutputIterator>
1405 int
1406 ses_len(RandomAccessOutputIterator a_begin,
1407  RandomAccessOutputIterator a_end,
1408  RandomAccessOutputIterator b_begin,
1409  RandomAccessOutputIterator b_end,
1410  d_path_vec& v, bool reverse)
1411 {
1412  return ses_len<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
1413  b_begin, b_end,
1414  v, reverse);
1415 }
1416 
1417 int
1418 ses_len(const char* str1,
1419  const char* str2,
1420  bool reverse = false);
1421 
1422 bool
1423 snake_end_points(const snake& s, point&, point&);
1424 
1425 /// Compute the longest common subsequence of two (sub-regions of)
1426 /// sequences as well as the shortest edit script from transforming
1427 /// the first (sub-region of) sequence into the second (sub-region of)
1428 /// sequence.
1429 ///
1430 /// A sequence is determined by a base, a beginning offset and an end
1431 /// offset. The base always points to the container that contains the
1432 /// sequence to consider. The beginning offset is an iterator that
1433 /// points the beginning of the sub-region of the sequence that we
1434 /// actually want to consider. The end offset is an iterator that
1435 /// points to the end of the sub-region of the sequence that we
1436 /// actually want to consider.
1437 ///
1438 /// This uses the LCS algorithm of the paper at section 4b.
1439 ///
1440 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1441 /// this function. It must be a random access output iterator kind.
1442 ///
1443 /// @tparm EqualityFunctor this must be a class that declares a public
1444 /// call operator member returning a boolean and taking two arguments
1445 /// that must be of the same type as the one pointed to by the @ref
1446 /// RandomAccessOutputIterator template parameter. This functor is
1447 /// used to compare the elements referred to by the iterators passed in
1448 /// argument to this function.
1449 ///
1450 /// @param a_base the iterator to the base of the first sequence.
1451 ///
1452 /// @param a_start an iterator to the beginning of the sub-region
1453 /// of the first sequence to actually consider.
1454 ///
1455 /// @param a_end an iterator to the end of the sub-region of the first
1456 /// sequence to consider.
1457 ///
1458 ///@param b_base an iterator to the base of the second sequence to
1459 ///consider.
1460 ///
1461 /// @param b_start an iterator to the beginning of the sub-region
1462 /// of the second sequence to actually consider.
1463 ///
1464 /// @param b_end an iterator to the end of the sub-region of the
1465 /// second sequence to actually consider.
1466 ///
1467 /// @param lcs the resulting lcs. This is set iff the function
1468 /// returns true.
1469 ///
1470 /// @param ses the resulting shortest editing script.
1471 ///
1472 /// @param ses_len the length of the ses above. Normally this can be
1473 /// retrieved from ses.length(), but this parameter is here for sanity
1474 /// check purposes. The function computes the length of the ses in
1475 /// two redundant ways and ensures that both methods lead to the same
1476 /// result.
1477 ///
1478 /// @return true upon successful completion, false otherwise.
1479 template<typename RandomAccessOutputIterator,
1480  typename EqualityFunctor>
1481 void
1482 compute_diff(RandomAccessOutputIterator a_base,
1483  RandomAccessOutputIterator a_begin,
1484  RandomAccessOutputIterator a_end,
1485  RandomAccessOutputIterator b_base,
1486  RandomAccessOutputIterator b_begin,
1487  RandomAccessOutputIterator b_end,
1488  vector<point>& lcs,
1489  edit_script& ses,
1490  int& ses_len)
1491 {
1492  int a_size = a_end - a_begin;
1493  int b_size = b_end - b_begin;
1494  unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;
1495 
1496  if (a_size == 0 || b_size == 0)
1497  {
1498  if (a_size > 0 && b_size == 0)
1499  // All elements of the first sequences have been deleted. So add
1500  // the relevant deletions to the edit script.
1501  for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1502  ses.deletions().push_back(deletion(i - a_base));
1503 
1504  if (b_size > 0 && a_size == 0)
1505  {
1506  // All elements present in the second sequence are part of
1507  // an insertion into the first sequence at a_end. So add
1508  // that insertion to the edit script.
1509  int a_full_size = a_end - a_base;
1510  int insertion_index = a_full_size ? a_full_size - 1 : -1;
1511  insertion ins(insertion_index);
1512  for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1513  ins.inserted_indexes().push_back(i - b_base);
1514 
1515  ses.insertions().push_back(ins);
1516  }
1517 
1518  ses_len = a_size + b_size;
1519  return;
1520  }
1521 
1522  int d = 0;
1523  snake snak;
1524  vector<point> trace; // the trace of the edit graph. Read the paper
1525  // to understand what a trace is.
1526  bool has_snake =
1527  compute_middle_snake<RandomAccessOutputIterator,
1528  EqualityFunctor>(a_begin, a_end,
1529  b_begin, b_end,
1530  snak, d);
1531  if (has_snake)
1532  {
1533  // So middle_{begin,end} are expressed wrt a_begin and b_begin.
1534  // Let's express them wrt a_base and b_base.
1535  snak.add(a_offset, b_offset);
1536  ses_len = d;
1537  }
1538 
1539  if (has_snake)
1540  {
1541  if ( snak.has_diagonal_edge())
1542  for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
1543  x <= snak.end().x() && y <= snak.end().y();
1544  ++x, ++y)
1545  {
1546  point p(x, y);
1547  trace.push_back(p);
1548  }
1549  }
1550  else
1551  {
1552  // So there is no middle snake. That means there is no lcs, so
1553  // the two sequences are different.
1554 
1555  // In other words, all the elements of the first sequence have
1556  // been deleted ...
1557  for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1558  ses.deletions().push_back(deletion(i - a_base));
1559 
1560  // ... and all the elements of the second sequence are insertions
1561  // that happen at the beginning of the first sequence.
1562  insertion ins(a_begin - a_base);
1563  for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1564  ins.inserted_indexes().push_back(i - b_base);
1565  ses.insertions().push_back(ins);
1566 
1567  ses_len = a_size + b_size;
1568  ABG_ASSERT(ses_len == ses.length());
1569  return;
1570  }
1571 
1572  if (d > 1)
1573  {
1574  int tmp_ses_len0 = 0;
1575  edit_script tmp_ses0;
1576  point px, pu;
1577  snake_end_points(snak, px, pu);
1578  compute_diff<RandomAccessOutputIterator,
1579  EqualityFunctor>(a_base, a_begin, a_base + (px.x() + 1),
1580  b_base, b_begin, b_base + (px.y() + 1),
1581  lcs, tmp_ses0, tmp_ses_len0);
1582 
1583  lcs.insert(lcs.end(), trace.begin(), trace.end());
1584 
1585  int tmp_ses_len1 = 0;
1586  edit_script tmp_ses1;
1587  compute_diff<RandomAccessOutputIterator,
1588  EqualityFunctor>(a_base, a_base + (pu.x() + 1), a_end,
1589  b_base, b_base + (pu.y() + 1), b_end,
1590  lcs, tmp_ses1, tmp_ses_len1);
1591  ABG_ASSERT(tmp_ses0.length() + tmp_ses1.length() == d);
1592  ABG_ASSERT(tmp_ses_len0 + tmp_ses_len1 == d);
1593  ses.append(tmp_ses0);
1594  ses.append(tmp_ses1);
1595  }
1596  else if (d == 1)
1597  {
1598  if (snak.has_diagonal_edge())
1599  {
1600  for (int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
1601  x <= snak.end().x() && y <= snak.end().y();
1602  ++x, ++y)
1603  {
1604  point p(x, y);
1605  trace.push_back(p);
1606  }
1607  }
1608 
1609  if (snak.has_vertical_edge())
1610  {
1611  point p = snak.intermediate();
1612  insertion ins(p.x());
1613  ins.inserted_indexes().push_back(p.y());
1614  ses.insertions().push_back(ins);
1615  }
1616  else if (snak.has_horizontal_edge())
1617  {
1618  if (snak.is_forward())
1619  {
1620  deletion del(snak.intermediate().x());
1621  ses.deletions().push_back(del);
1622  }
1623  else
1624  {
1625  deletion del(snak.begin().x());
1626  ses.deletions().push_back(del);
1627  }
1628  }
1629  }
1630  else if (d == 0)
1631  {
1632  // Obviously on the middle snake is part of the solution, as
1633  // there is no edit script; iow, the two sequences are
1634  // identical.
1635  lcs.insert(lcs.end(), trace.begin(), trace.end());
1636  ses_len = 0;
1637  }
1638 
1639  ABG_ASSERT(ses_len == ses.length());
1640 }
1641 
1642 /// Compute the longest common subsequence of two (sub-regions of)
1643 /// sequences as well as the shortest edit script from transforming
1644 /// the first (sub-region of) sequence into the second (sub-region of)
1645 /// sequence.
1646 ///
1647 /// This uses the LCS algorithm of the paper at section 4b.
1648 ///
1649 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1650 /// this function. It must be a random access output iterator kind.
1651 ///
1652 /// @tparm EqualityFunctor this must be a class that declares a public
1653 /// call operator member returning a boolean and taking two arguments
1654 /// that must be of the same type as the one pointed to by the @ref
1655 /// RandomAccessOutputIterator template parameter. This functor is
1656 /// used to compare the elements referred to by the iterators passed in
1657 /// argument to this function.
1658 ///
1659 /// @param a_start an iterator to the beginning of the first sequence
1660 /// to consider.
1661 ///
1662 /// @param a_end an iterator to the end of the first sequence to
1663 /// consider.
1664 ///
1665 /// @param b_start an iterator to the beginning of the second sequence
1666 /// to consider.
1667 ///
1668 /// @param b_end an iterator to the end of the second sequence to
1669 /// consider.
1670 ///
1671 /// @param lcs the resulting lcs. This is set iff the function
1672 /// returns true.
1673 ///
1674 /// @param ses the resulting shortest editing script.
1675 ///
1676 /// @param ses_len the length of the ses above. Normally this can be
1677 /// retrieved from ses.length(), but this parameter is here for sanity
1678 /// check purposes. The function computes the length of the ses in
1679 /// two redundant ways and ensures that both methods lead to the same
1680 /// result.
1681 ///
1682 /// @return true upon successful completion, false otherwise.
1683 template<typename RandomAccessOutputIterator,
1684  typename EqualityFunctor>
1685 void
1686 compute_diff(RandomAccessOutputIterator a_begin,
1687  RandomAccessOutputIterator a_end,
1688  RandomAccessOutputIterator b_begin,
1689  RandomAccessOutputIterator b_end,
1690  vector<point>& lcs,
1691  edit_script& ses,
1692  int& ses_len)
1693 {
1694  compute_diff<RandomAccessOutputIterator,
1695  EqualityFunctor>(a_begin, a_begin, a_end,
1696  b_begin, b_begin, b_end,
1697  lcs, ses, ses_len);
1698 }
1699 
1700 /// Compute the longest common subsequence of two (sub-regions of)
1701 /// sequences as well as the shortest edit script from transforming
1702 /// the first (sub-region of) sequence into the second (sub-region of)
1703 /// sequence.
1704 ///
1705 /// A sequence is determined by a base, a beginning offset and an end
1706 /// offset. The base always points to the container that contains the
1707 /// sequence to consider. The beginning offset is an iterator that
1708 /// points the beginning of the sub-region of the sequence that we
1709 /// actually want to consider. The end offset is an iterator that
1710 /// points to the end of the sub-region of the sequence that we
1711 /// actually want to consider.
1712 ///
1713 /// This uses the LCS algorithm of the paper at section 4b.
1714 ///
1715 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1716 /// this function. It must be a random access output iterator kind.
1717 ///
1718 /// @tparm EqualityFunctor this must be a class that declares a public
1719 /// call operator member returning a boolean and taking two arguments
1720 /// that must be of the same type as the one pointed to by the @ref
1721 /// RandomAccessOutputIterator template parameter. This functor is
1722 /// used to compare the elements referred to by the iterators passed in
1723 /// argument to this function.
1724 ///
1725 /// @param a_base the iterator to the base of the first sequence.
1726 ///
1727 /// @param a_start an iterator to the beginning of the sub-region
1728 /// of the first sequence to actually consider.
1729 ///
1730 /// @param a_end an iterator to the end of the sub-region of the first
1731 /// sequence to consider.
1732 ///
1733 ///@param b_base an iterator to the base of the second sequence to
1734 ///consider.
1735 ///
1736 /// @param b_start an iterator to the beginning of the sub-region
1737 /// of the second sequence to actually consider.
1738 ///
1739 /// @param b_end an iterator to the end of the sub-region of the
1740 /// second sequence to actually consider.
1741 ///
1742 /// @param lcs the resulting lcs. This is set iff the function
1743 /// returns true.
1744 ///
1745 /// @param ses the resulting shortest editing script.
1746 ///
1747 /// @return true upon successful completion, false otherwise.
1748 template<typename RandomAccessOutputIterator,
1749  typename EqualityFunctor>
1750 void
1751 compute_diff(RandomAccessOutputIterator a_base,
1752  RandomAccessOutputIterator a_begin,
1753  RandomAccessOutputIterator a_end,
1754  RandomAccessOutputIterator b_base,
1755  RandomAccessOutputIterator b_begin,
1756  RandomAccessOutputIterator b_end,
1757  vector<point>& lcs,
1758  edit_script& ses)
1759 {
1760  int ses_len = 0;
1761 
1762  compute_diff<RandomAccessOutputIterator,
1763  EqualityFunctor>(a_base, a_begin, a_end,
1764  b_base, b_begin, b_end,
1765  lcs, ses, ses_len);
1766 }
1767 
1768 /// Compute the longest common subsequence of two (sub-regions of)
1769 /// sequences as well as the shortest edit script from transforming
1770 /// the first (sub-region of) sequence into the second (sub-region of)
1771 /// sequence.
1772 ///
1773 /// This uses the LCS algorithm of the paper at section 4b.
1774 ///
1775 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1776 /// this function. It must be a random access output iterator kind.
1777 ///
1778 /// @tparm EqualityFunctor this must be a class that declares a public
1779 /// call operator member returning a boolean and taking two arguments
1780 /// that must be of the same type as the one pointed to by the @ref
1781 /// RandomAccessOutputIterator template parameter. This functor is
1782 /// used to compare the elements referred to by the iterators passed in
1783 /// argument to this function.
1784 ///
1785 /// @param a_start an iterator to the beginning of the first sequence
1786 /// to consider.
1787 ///
1788 /// @param a_end an iterator to the end of the first sequence to
1789 /// consider.
1790 ///
1791 /// @param b_start an iterator to the beginning of the sequence to
1792 /// actually consider.
1793 ///
1794 /// @param b_end an iterator to the end of second sequence to
1795 /// consider.
1796 ///
1797 /// @param lcs the resulting lcs. This is set iff the function
1798 /// returns true.
1799 ///
1800 /// @param ses the resulting shortest editing script.
1801 ///
1802 /// @return true upon successful completion, false otherwise.
1803 template<typename RandomAccessOutputIterator,
1804  typename EqualityFunctor>
1805 void
1806 compute_diff(RandomAccessOutputIterator a_begin,
1807  RandomAccessOutputIterator a_end,
1808  RandomAccessOutputIterator b_begin,
1809  RandomAccessOutputIterator b_end,
1810  vector<point>& lcs,
1811  edit_script& ses)
1812 {
1813  compute_diff<RandomAccessOutputIterator,
1814  EqualityFunctor>(a_begin, a_begin, a_end,
1815  b_begin, b_begin, b_end,
1816  lcs, ses);
1817 }
1818 
1819 /// Compute the longest common subsequence of two (sub-regions of)
1820 /// sequences as well as the shortest edit script from transforming
1821 /// the first (sub-region of) sequence into the second (sub-region of)
1822 /// sequence.
1823 ///
1824 /// This uses the LCS algorithm of the paper at section 4b.
1825 ///
1826 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1827 /// this function. It must be a random access output iterator kind.
1828 ///
1829 /// @param a_start an iterator to the beginning of the first sequence
1830 /// to consider.
1831 ///
1832 /// @param a_end an iterator to the end of the first sequence to
1833 /// consider.
1834 ///
1835 /// @param b_start an iterator to the beginning of the sequence to
1836 /// actually consider.
1837 ///
1838 /// @param b_end an iterator to the end of second sequence to
1839 /// consider.
1840 ///
1841 /// @param lcs the resulting lcs. This is set iff the function
1842 /// returns true.
1843 ///
1844 /// @param ses the resulting shortest editing script.
1845 ///
1846 /// @return true upon successful completion, false otherwise.
1847 template<typename RandomAccessOutputIterator>
1848 void
1849 compute_diff(RandomAccessOutputIterator a_begin,
1850  RandomAccessOutputIterator a_end,
1851  RandomAccessOutputIterator b_begin,
1852  RandomAccessOutputIterator b_end,
1853  vector<point>& lcs,
1854  edit_script& ses)
1855 {
1856  compute_diff<RandomAccessOutputIterator,
1857  default_eq_functor>(a_begin, a_end, b_begin, b_end, lcs, ses);
1858 }
1859 
1860 /// Compute the longest common subsequence of two (sub-regions of)
1861 /// sequences as well as the shortest edit script from transforming
1862 /// the first (sub-region of) sequence into the second (sub-region of)
1863 /// sequence.
1864 ///
1865 /// A sequence is determined by a base, a beginning offset and an end
1866 /// offset. The base always points to the container that contains the
1867 /// sequence to consider. The beginning offset is an iterator that
1868 /// points the beginning of the sub-region of the sequence that we
1869 /// actually want to consider. The end offset is an iterator that
1870 /// points to the end of the sub-region of the sequence that we
1871 /// actually want to consider.
1872 ///
1873 /// This uses the LCS algorithm of the paper at section 4b.
1874 ///
1875 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1876 /// this function. It must be a random access output iterator kind.
1877 ///
1878 /// @tparm EqualityFunctor this must be a class that declares a public
1879 /// call operator member returning a boolean and taking two arguments
1880 /// that must be of the same type as the one pointed to by the @ref
1881 /// RandomAccessOutputIterator template parameter. This functor is
1882 /// used to compare the elements referred to by the iterators passed in
1883 /// argument to this function.
1884 ///
1885 /// @param a_base the iterator to the base of the first sequence.
1886 ///
1887 /// @param a_start an iterator to the beginning of the sub-region
1888 /// of the first sequence to actually consider.
1889 ///
1890 /// @param a_end an iterator to the end of the sub-region of the first
1891 /// sequence to consider.
1892 ///
1893 /// @param b_base an iterator to the base of the second sequence to
1894 /// consider.
1895 ///
1896 /// @param b_start an iterator to the beginning of the sub-region
1897 /// of the second sequence to actually consider.
1898 ///
1899 /// @param b_end an iterator to the end of the sub-region of the
1900 /// second sequence to actually consider.
1901 ///
1902 /// @param ses the resulting shortest editing script.
1903 ///
1904 /// @return true upon successful completion, false otherwise.
1905 template<typename RandomAccessOutputIterator,
1906  typename EqualityFunctor>
1907 void
1908 compute_diff(RandomAccessOutputIterator a_base,
1909  RandomAccessOutputIterator a_begin,
1910  RandomAccessOutputIterator a_end,
1911  RandomAccessOutputIterator b_base,
1912  RandomAccessOutputIterator b_begin,
1913  RandomAccessOutputIterator b_end,
1914  edit_script& ses)
1915 {
1916  vector<point> lcs;
1917 
1918  compute_diff<RandomAccessOutputIterator,
1919  EqualityFunctor>(a_base, a_begin, a_end,
1920  b_base, b_begin, b_end,
1921  lcs, ses);
1922 }
1923 
1924 /// Compute the longest common subsequence of two (sub-regions of)
1925 /// sequences as well as the shortest edit script from transforming
1926 /// the first (sub-region of) sequence into the second (sub-region of)
1927 /// sequence.
1928 ///
1929 /// This uses the LCS algorithm of the paper at section 4b.
1930 ///
1931 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1932 /// this function. It must be a random access output iterator kind.
1933 ///
1934 /// @tparm EqualityFunctor this must be a class that declares a public
1935 /// call operator member returning a boolean and taking two arguments
1936 /// that must be of the same type as the one pointed to by the @ref
1937 /// RandomAccessOutputIterator template parameter. This functor is
1938 /// used to compare the elements referred to by the iterators passed in
1939 /// argument to this function.
1940 ///
1941 /// @param a_start an iterator to the beginning of the first sequence
1942 /// to consider.
1943 ///
1944 /// @param a_end an iterator to the end of the first sequence to
1945 /// consider.
1946 ///
1947 /// @param b_start an iterator to the beginning of the second sequence
1948 /// to consider.
1949 ///
1950 /// @param b_end an iterator to the end of the second sequence to
1951 /// consider.
1952 ///
1953 /// @param ses the resulting shortest editing script.
1954 ///
1955 /// @return true upon successful completion, false otherwise.
1956 template<typename RandomAccessOutputIterator,
1957  typename EqualityFunctor>
1958 void
1959 compute_diff(RandomAccessOutputIterator a_begin,
1960  RandomAccessOutputIterator a_end,
1961  RandomAccessOutputIterator b_begin,
1962  RandomAccessOutputIterator b_end,
1963  edit_script& ses)
1964 {
1965  compute_diff<RandomAccessOutputIterator,
1966  EqualityFunctor>(a_begin, a_begin, a_end,
1967  b_begin, b_begin, b_end,
1968  ses);
1969 }
1970 
1971 /// Compute the longest common subsequence of two (sub-regions of)
1972 /// sequences as well as the shortest edit script from transforming
1973 /// the first (sub-region of) sequence into the second (sub-region of)
1974 /// sequence.
1975 ///
1976 /// This uses the LCS algorithm of the paper at section 4b.
1977 ///
1978 /// @tparm RandomAccessOutputIterator the type of iterators passed to
1979 /// this function. It must be a random access output iterator kind.
1980 ///
1981 /// @param a_start an iterator to the beginning of the first sequence
1982 /// to consider.
1983 ///
1984 /// @param a_end an iterator to the end of the first sequence to
1985 /// consider.
1986 ///
1987 /// @param b_start an iterator to the beginning of the second sequence
1988 /// to consider.
1989 ///
1990 /// @param b_end an iterator to the end of the second sequence to
1991 /// consider.
1992 ///
1993 /// @param ses the resulting shortest editing script.
1994 ///
1995 /// @return true upon successful completion, false otherwise.
1996 template<typename RandomAccessOutputIterator>
1997 void
1998 compute_diff(RandomAccessOutputIterator a_begin,
1999  RandomAccessOutputIterator a_end,
2000  RandomAccessOutputIterator b_begin,
2001  RandomAccessOutputIterator b_end,
2002  edit_script& ses)
2003 {
2004  compute_diff<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
2005  b_begin, b_end,
2006  ses);
2007 }
2008 
2009 void
2010 compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs);
2011 
2012 void
2013 compute_ses(const char* str1, const char* str2, edit_script& ses);
2014 
2015 /// Display an edit script on standard output.
2016 ///
2017 /// @param es the edit script to display
2018 ///
2019 /// @param str1_base the first string the edit script is about.
2020 ///
2021 /// @pram str2_base the second string the edit script is about.
2022 template<typename RandomAccessOutputIterator>
2023 void
2025  const RandomAccessOutputIterator str1_base,
2026  const RandomAccessOutputIterator str2_base,
2027  ostream& out)
2028 {
2029  if (es.num_deletions() == 0)
2030  out << "no deletion:\n";
2031  else if (es.num_deletions() == 1)
2032  {
2033  out << "1 deletion:\n"
2034  << "\t happened at index: ";
2035  }
2036  else
2037  {
2038  out << es.num_deletions() << " deletions:\n"
2039  << "\t happened at indexes: ";
2040  }
2041 
2042  for (vector<deletion>::const_iterator i = es.deletions().begin();
2043  i != es.deletions().end();
2044  ++i)
2045  {
2046  if (i != es.deletions().begin())
2047  out << ", ";
2048  out << i->index() << " (" << str1_base[i->index()] << ")";
2049  }
2050  out << "\n\n";
2051 
2052  if (es.num_insertions() == 0)
2053  out << "no insertion\n";
2054  else if (es.num_insertions() == 1)
2055  out << "1 insertion\n";
2056  else
2057  out << es.num_insertions() << " insertions:\n";
2058  for (vector<insertion>::const_iterator i = es.insertions().begin();
2059  i != es.insertions().end();
2060  ++i)
2061  {
2062  int idx = i->insertion_point_index();
2063  if (idx < 0)
2064  out << "\t before index of first sequence: " << idx + 1
2065  << " (" << str1_base[idx + 1] << ")\n";
2066  else
2067  out << "\t after index of first sequence: " << idx
2068  << " (" << str1_base[idx] << ")\n";
2069 
2070  if (!i->inserted_indexes().empty())
2071  out << "\t\t inserted indexes from second sequence: ";
2072 
2073  for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();
2074  j != i->inserted_indexes().end();
2075  ++j)
2076  {
2077  if (j != i->inserted_indexes().begin())
2078  out << ", ";
2079  out << *j << " (" << str2_base[*j] << ")";
2080  }
2081  out << "\n";
2082  }
2083  out << "\n\n";
2084 }
2085 
2086 }//end namespace diff_utils
2087 
2088 }//end namespace abigail
2089 #endif // __ABG_DIFF_UTILS_H__
snake()
Default constructor for snake.
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
The abstraction of the Snake concept, from the paper.
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script...
void diagonal_start(const point &p)
Setter for the end point of the first diagonal edge, aka diagonal start point.
The abstration of an insertion of elements of a sequence B into a sequence A. This is used to represe...
bool end_of_fr_d_path_in_k(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_start, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching d-path on diagonal k, for two sequences. In the paper This is r...
bool operator()(const T a, const T b) const
This equality operator uses the default "==" to compare its arguments.
The abstraction of the deletion of one element of a sequence A.
void set(const point &b, const point &i, const point &e)
Setter for the begin, intermediate and end points of the snake.
void print_snake(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator b_begin, const snake &s, ostream &out)
This prints the middle snake of two strings.
Toplevel namespace for libabigail.
void set_forward(bool f)
Set to true if the snake is a forward snake; that is, if it was built while walking the edit graph go...
void add(int x_offset, int y_offset)
Add an offset to the abscissas of the points of the snake, and add another offset to the ordinates of...
The default equality functor used by the core diffing algorithms.
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
bool operator()(const T *first, const T *second) const
This equality operator compares pointers by comparing the pointed-to objects.
void set(const point &b, const point &i, const point &d, const point &e)
Setter for the begin, intermediate, diagonal start and end points of the snake.
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects...
Definition: abg-fwd.h:1659
bool has_horizontal_edge() const
d_path_vec(unsigned size1, unsigned size2)
Constructor of the d_path_vec.
snake(const point &b, const point &i, const point &d, const point &e)
Constructor from the beginning, intermediate and end points.
bool end_of_frr_d_path_in_k_plus_delta(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching reverse d-path on diagonal k + delta. Delta is abs(M - N)...
void intermediate(const point &p)
Setter for the end point of the non-diagonal edge of the snake, aka intermediate point.
The abstraction of an edit script for transforming a sequence A into a sequence B.
void begin(const point &p)
Getter for the starting point of the non-diagonal edge of the snake, aka begin point.
bool operator()(const weak_ptr< T > first, const weak_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
bool operator()(const shared_ptr< T > first, const shared_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
void end(const point &p)
Setter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
An equality functor to deeply compare pointers.
bool is_match_point(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, const point &point)
Tests if a given point is a match point in an edit graph.
void display_edit_script(const edit_script &es, const RandomAccessOutputIterator str1_base, const RandomAccessOutputIterator str2_base, ostream &out)
Display an edit script on standard output.
snake(const point &b, const point &i, const point &e)
Constructor from the beginning, intermediate and end points.
const point & begin() const
Getter for the starting point of the non-diagonal edge of the snake.
const point & diagonal_start() const
Getter for the end point of the first diagonal edge, aka diagonal start point. Note that if the snake...
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...