21 #ifndef __ABG_DIFF_UTILS_H__
22 #define __ABG_DIFF_UTILS_H__
45 using std::shared_ptr;
52 using std::ostringstream;
66 : x_(-1), y_(-1),empty_(
true)
70 : x_(x), y_(y), empty_(
false)
74 : x_(p.x()), y_(p.y()), empty_(p.is_empty())
108 set(
int x,
int y,
bool empty)
117 {set (x() + ax, y() + ay);}
120 operator!=(
const point& o)
const
121 {
return (x() != o.x() || y() != o.y() || is_empty() != o.is_empty());}
124 operator==(
const point& o)
const
125 {
return !(operator!=(o));}
128 operator<(
const point& o)
const
129 {
return (x() < o.x() && y() < o.y());}
132 operator>(
const point& o)
const
133 {
return (x() > o.x() && y() > o.y());}
136 operator<=(
const point& o)
const
137 {
return (x() <= o.x() && y() <= o.y());}
140 operator>=(
const point& o)
const
141 {
return (x() >= o.x() && y() >= o.y());}
144 operator+(
int val)
const
145 {
return point(x() + val, y() + val);}
148 operator-(
int val)
const
149 {
return point(x() - val, y() - val);}
154 set(x_ + val, y_ + val);
160 {
return (*
this) += (-val);}
164 {
return (*
this) -= 1;}
168 {
return (*
this) += 1;}
194 operator=(
const point& p)
196 set(p.x(), p.y(), p.is_empty());
204 operator bool ()
const
205 {
return !is_empty();}
245 point begin_, intermediate_, diagonal_start_, end_;
268 : begin_(b), intermediate_(i),
269 end_(e), forward_(false)
289 : begin_(b), intermediate_(i),
290 diagonal_start_(d), end_(e),
315 {
return intermediate_;}
332 {
return diagonal_start_;}
340 {diagonal_start_ = p;}
417 add(
int x_offset,
int y_offset)
422 begin_.add(x_offset, y_offset);
423 intermediate_.add(x_offset, y_offset);
425 diagonal_start_.add(x_offset, y_offset);
426 end_.add(x_offset, y_offset);
463 push_back(
const vector<int>::value_type&);
469 over_bounds(
long long index)
const
470 {
return (index + offset()) >= (
long long) size();}
473 check_index_against_bound(
int index)
const
475 if (over_bounds(index))
478 o <<
"index '" << index
479 <<
"' out of range [-" << max_d() <<
", " << max_d() <<
"]";
480 throw std::out_of_range(o.str());
506 : vector<int>(2 * (size1 + size2 + 1 + (size1 + size2)) + 1, 0),
507 a_size_(size1), b_size_(size2)
511 std::vector<int>::const_reference
512 operator[](
int index)
const
515 std::vector<int>::reference
516 operator[](
int index)
519 std::vector<int>::reference
523 long long i = index + offset();
524 return vector<int>::operator[](i);
527 std::vector<int>::const_reference
528 at(
long long index)
const
530 check_index_against_bound(index);
531 long long i = offset() + index;
532 return static_cast<const vector<int>*
>(
this)->at(i);
545 {
return a_size_ + b_size_;}
549 {
return max_d() + abs((
long long) a_size() - (
long long) b_size());}
566 int insertion_point_;
567 vector<unsigned> inserted_;
572 const vector<unsigned>& inserted_indexes)
573 : insertion_point_(insertion_point),
574 inserted_(inserted_indexes)
578 : insertion_point_(insertion_point)
582 insertion_point_index()
const
583 {
return insertion_point_;}
586 insertion_point_index(
int i)
587 {insertion_point_ = i;}
589 const vector<unsigned>&
590 inserted_indexes()
const
627 vector<insertion> insertions_;
628 vector<deletion> deletions_;
635 const vector<insertion>&
637 {
return insertions_;}
641 {
return insertions_;}
643 const vector<deletion>&
654 insertions().insert(insertions().end(),
655 es.insertions().begin(),
656 es.insertions().end());
657 deletions().insert(deletions().end(),
658 es.deletions().begin(),
659 es.deletions().end());
665 insertions().insert(insertions().begin(),
666 es.insertions().begin(),
667 es.insertions().end());
668 deletions().insert(deletions().begin(),
669 es.deletions().begin(),
670 es.deletions().end());
676 insertions().clear();
682 {
return insertions().empty() && deletions().empty();}
684 operator bool()
const
685 {
return !is_empty();}
688 num_insertions()
const
691 for (vector<insertion>::const_iterator i = insertions().begin();
692 i != insertions().end();
694 l += i->inserted_indexes().size();
699 num_deletions()
const
700 {
return deletions().size();}
704 {
return num_insertions() + num_deletions();}
714 const point& reverse_d_path_end);
749 const T* second)
const
751 if (!!first != !!second)
757 return *first == *second;
772 const shared_ptr<T> second)
const
773 {
return operator()(first.get(), second.get());}
787 const weak_ptr<T> second)
const
788 {
return operator()(shared_ptr<T>(first), shared_ptr<T>(second));}
838 template<
typename RandomAccessOutputIterator,
839 typename EqualityFunctor>
842 RandomAccessOutputIterator a_begin,
843 RandomAccessOutputIterator a_end,
844 RandomAccessOutputIterator b_start,
845 RandomAccessOutputIterator b_end,
849 point begin, intermediate, diag_start, end;
857 if (k == -d || ((k != d) && (v[k-1] < v[k + 1])))
869 begin.set(x, x - (k + 1));
879 begin.set(x, x - (k - 1));
891 int last_x_index = a_end - a_begin - 1;
892 int last_y_index = b_end - b_start - 1;
896 while ((x < last_x_index) && (y < last_y_index))
897 if (eq(a_begin[x + 1], b_start[y + 1]))
902 diag_start.set(x, y);
917 if (x >= (
int) v.a_size()
918 || y >= (int) v.b_size()
922 s.
set(begin, intermediate, diag_start, end);
979 template<
typename RandomAccessOutputIterator,
980 typename EqualityFunctor>
983 RandomAccessOutputIterator a_begin,
984 RandomAccessOutputIterator a_end,
985 RandomAccessOutputIterator b_begin,
986 RandomAccessOutputIterator b_end,
989 int a_size = a_end - a_begin;
990 int b_size = b_end - b_begin;
991 int delta = a_size - b_size;
992 int k_plus_delta = k + delta;
994 point begin, intermediate, diag_start, end;
1004 if (k_plus_delta == -d + delta
1005 || ((k_plus_delta != d + delta)
1006 && (v[k_plus_delta + 1] <= v[k_plus_delta - 1])))
1009 x = v[k_plus_delta + 1];
1010 y = x - (k_plus_delta + 1);
1023 x = v[k_plus_delta - 1];
1024 begin.set(x, x - (k_plus_delta - 1));
1026 y = x - (k_plus_delta - 1) - 1;
1029 intermediate.set(x, y);
1033 while (x >= 0 && y >= 0)
1034 if (eq(a_begin[x], b_begin[y]))
1037 diag_start.set(x, y);
1051 v[k_plus_delta] = x;
1053 if (x == -1 && y == -1)
1055 else if (x < -1 || y < -1)
1058 s.
set(begin, intermediate, diag_start, end);
1084 template<
typename RandomAccessOutputIterator>
1087 RandomAccessOutputIterator a_end,
1088 RandomAccessOutputIterator b_begin,
1089 RandomAccessOutputIterator b_end,
1092 int a_size = a_end - a_begin, b_size = b_end - b_begin;
1095 || point.x () >= a_size
1097 || point.y() >= b_size)
1100 return (a_begin[point.x()] == b_begin[point.y()]);
1137 template<
typename RandomAccessOutputIterator,
1138 typename EqualityFunctor>
1141 RandomAccessOutputIterator a_end,
1142 RandomAccessOutputIterator b_begin,
1143 RandomAccessOutputIterator b_end,
1146 int a_size = a_end - a_begin;
1148 int b_size = b_end - b_begin;
1155 point first_point(-1, -1), last_point(a_size -1, b_size -1), point_zero(0, 0);
1165 forward_d_paths[1] = -1;
1175 reverse_d_paths[delta + 1] = a_size;
1177 int d_max = (M + N) / 2 + 1;
1178 for (
int d = 0; d <= d_max; ++d)
1181 for (
int k = -d; k <= d; k += 2)
1186 EqualityFunctor>(k, d,
1189 forward_d_paths, s);
1199 && (k >= (delta - (d - 1))) && (k <= (delta + (d - 1))))
1202 reverse_end.x(reverse_d_paths[k]);
1203 reverse_end.y(reverse_end.x() - k);
1206 ses_len = 2 * d - 1;
1214 for (
int k = -d; k <= d; k += 2)
1219 EqualityFunctor>(k, d,
1232 int k_plus_delta = k + delta;
1234 && (k_plus_delta >= -d) && (k_plus_delta <= d))
1237 forward_end.x(forward_d_paths[k_plus_delta]);
1238 forward_end.y(forward_end.x() - k_plus_delta);
1264 template<
typename RandomAccessOutputIterator>
1267 RandomAccessOutputIterator b_begin,
1268 const snake &s, ostream& out)
1273 out <<
"snake start: ";
1274 out <<
"(" << s.
begin().x() <<
", " << s.
end().y() <<
")\n";
1276 out <<
"snake intermediate: ";
1279 out <<
"diagonal point(s): ";
1282 x <= s.
end().x() && y <= s.
end().y();
1286 out <<
"(" << x <<
"," << y <<
") ";
1290 out <<
"snake end: ";
1291 out <<
"(" << s.
end().x() <<
", " << s.
end().y() <<
")\n";
1320 template<
typename RandomAccessOutputIterator,
1321 typename EqualityFunctor>
1324 RandomAccessOutputIterator a_end,
1325 RandomAccessOutputIterator b_begin,
1326 RandomAccessOutputIterator b_end,
1329 unsigned a_size = a_end - a_begin;
1330 unsigned b_size = b_end - b_begin;
1335 int delta = a_size - b_size;
1340 v[delta + 1] = a_size - 1;
1346 for (
unsigned d = 0; d <= v.max_d(); ++d)
1348 for (
int k = -d; k <= (int) d; k += 2)
1355 EqualityFunctor>(k, d,
1361 if (found && snak.
end().x() == -1 && snak.
end().y() == -1)
1367 EqualityFunctor>(k, d,
1373 if ((snak.
end().x() == (int) a_size - 1)
1374 && (snak.
end().y() == (int) b_size - 1))
1404 template<
typename RandomAccessOutputIterator>
1407 RandomAccessOutputIterator a_end,
1408 RandomAccessOutputIterator b_begin,
1409 RandomAccessOutputIterator b_end,
1412 return ses_len<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
1420 bool reverse =
false);
1479 template<
typename RandomAccessOutputIterator,
1480 typename EqualityFunctor>
1483 RandomAccessOutputIterator a_begin,
1484 RandomAccessOutputIterator a_end,
1485 RandomAccessOutputIterator b_base,
1486 RandomAccessOutputIterator b_begin,
1487 RandomAccessOutputIterator b_end,
1492 int a_size = a_end - a_begin;
1493 int b_size = b_end - b_begin;
1494 unsigned a_offset = a_begin - a_base, b_offset = b_begin - b_base;
1496 if (a_size == 0 || b_size == 0)
1498 if (a_size > 0 && b_size == 0)
1501 for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1502 ses.deletions().push_back(
deletion(i - a_base));
1504 if (b_size > 0 && a_size == 0)
1509 int a_full_size = a_end - a_base;
1510 int insertion_index = a_full_size ? a_full_size - 1 : -1;
1512 for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1513 ins.inserted_indexes().push_back(i - b_base);
1515 ses.insertions().push_back(ins);
1518 ses_len = a_size + b_size;
1524 vector<point> trace;
1528 EqualityFunctor>(a_begin, a_end,
1535 snak.add(a_offset, b_offset);
1541 if ( snak.has_diagonal_edge())
1542 for (
int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
1543 x <= snak.end().x() && y <= snak.end().y();
1557 for (RandomAccessOutputIterator i = a_begin; i < a_end; ++i)
1558 ses.deletions().push_back(
deletion(i - a_base));
1563 for (RandomAccessOutputIterator i = b_begin; i < b_end; ++i)
1564 ins.inserted_indexes().push_back(i - b_base);
1565 ses.insertions().push_back(ins);
1567 ses_len = a_size + b_size;
1574 int tmp_ses_len0 = 0;
1579 EqualityFunctor>(a_base, a_begin, a_base + (px.x() + 1),
1580 b_base, b_begin, b_base + (px.y() + 1),
1581 lcs, tmp_ses0, tmp_ses_len0);
1583 lcs.insert(lcs.end(), trace.begin(), trace.end());
1585 int tmp_ses_len1 = 0;
1588 EqualityFunctor>(a_base, a_base + (pu.x() + 1), a_end,
1589 b_base, b_base + (pu.y() + 1), b_end,
1590 lcs, tmp_ses1, tmp_ses_len1);
1591 ABG_ASSERT(tmp_ses0.length() + tmp_ses1.length() == d);
1592 ABG_ASSERT(tmp_ses_len0 + tmp_ses_len1 == d);
1593 ses.append(tmp_ses0);
1594 ses.append(tmp_ses1);
1598 if (snak.has_diagonal_edge())
1600 for (
int x = snak.diagonal_start().x(), y = snak.diagonal_start().y();
1601 x <= snak.end().x() && y <= snak.end().y();
1609 if (snak.has_vertical_edge())
1611 point p = snak.intermediate();
1613 ins.inserted_indexes().push_back(p.y());
1614 ses.insertions().push_back(ins);
1616 else if (snak.has_horizontal_edge())
1618 if (snak.is_forward())
1620 deletion del(snak.intermediate().x());
1621 ses.deletions().push_back(del);
1626 ses.deletions().push_back(del);
1635 lcs.insert(lcs.end(), trace.begin(), trace.end());
1683 template<
typename RandomAccessOutputIterator,
1684 typename EqualityFunctor>
1687 RandomAccessOutputIterator a_end,
1688 RandomAccessOutputIterator b_begin,
1689 RandomAccessOutputIterator b_end,
1695 EqualityFunctor>(a_begin, a_begin, a_end,
1696 b_begin, b_begin, b_end,
1748 template<
typename RandomAccessOutputIterator,
1749 typename EqualityFunctor>
1752 RandomAccessOutputIterator a_begin,
1753 RandomAccessOutputIterator a_end,
1754 RandomAccessOutputIterator b_base,
1755 RandomAccessOutputIterator b_begin,
1756 RandomAccessOutputIterator b_end,
1763 EqualityFunctor>(a_base, a_begin, a_end,
1764 b_base, b_begin, b_end,
1803 template<
typename RandomAccessOutputIterator,
1804 typename EqualityFunctor>
1807 RandomAccessOutputIterator a_end,
1808 RandomAccessOutputIterator b_begin,
1809 RandomAccessOutputIterator b_end,
1814 EqualityFunctor>(a_begin, a_begin, a_end,
1815 b_begin, b_begin, b_end,
1847 template<
typename RandomAccessOutputIterator>
1850 RandomAccessOutputIterator a_end,
1851 RandomAccessOutputIterator b_begin,
1852 RandomAccessOutputIterator b_end,
1905 template<
typename RandomAccessOutputIterator,
1906 typename EqualityFunctor>
1909 RandomAccessOutputIterator a_begin,
1910 RandomAccessOutputIterator a_end,
1911 RandomAccessOutputIterator b_base,
1912 RandomAccessOutputIterator b_begin,
1913 RandomAccessOutputIterator b_end,
1919 EqualityFunctor>(a_base, a_begin, a_end,
1920 b_base, b_begin, b_end,
1956 template<
typename RandomAccessOutputIterator,
1957 typename EqualityFunctor>
1960 RandomAccessOutputIterator a_end,
1961 RandomAccessOutputIterator b_begin,
1962 RandomAccessOutputIterator b_end,
1966 EqualityFunctor>(a_begin, a_begin, a_end,
1967 b_begin, b_begin, b_end,
1996 template<
typename RandomAccessOutputIterator>
1999 RandomAccessOutputIterator a_end,
2000 RandomAccessOutputIterator b_begin,
2001 RandomAccessOutputIterator b_end,
2004 compute_diff<RandomAccessOutputIterator, default_eq_functor>(a_begin, a_end,
2013 compute_ses(
const char* str1,
const char* str2, edit_script& ses);
2022 template<
typename RandomAccessOutputIterator>
2025 const RandomAccessOutputIterator str1_base,
2026 const RandomAccessOutputIterator str2_base,
2029 if (es.num_deletions() == 0)
2030 out <<
"no deletion:\n";
2031 else if (es.num_deletions() == 1)
2033 out <<
"1 deletion:\n"
2034 <<
"\t happened at index: ";
2038 out << es.num_deletions() <<
" deletions:\n"
2039 <<
"\t happened at indexes: ";
2042 for (vector<deletion>::const_iterator i = es.deletions().begin();
2043 i != es.deletions().end();
2046 if (i != es.deletions().begin())
2048 out << i->index() <<
" (" << str1_base[i->index()] <<
")";
2052 if (es.num_insertions() == 0)
2053 out <<
"no insertion\n";
2054 else if (es.num_insertions() == 1)
2055 out <<
"1 insertion\n";
2057 out << es.num_insertions() <<
" insertions:\n";
2058 for (vector<insertion>::const_iterator i = es.insertions().begin();
2059 i != es.insertions().end();
2062 int idx = i->insertion_point_index();
2064 out <<
"\t before index of first sequence: " << idx + 1
2065 <<
" (" << str1_base[idx + 1] <<
")\n";
2067 out <<
"\t after index of first sequence: " << idx
2068 <<
" (" << str1_base[idx] <<
")\n";
2070 if (!i->inserted_indexes().empty())
2071 out <<
"\t\t inserted indexes from second sequence: ";
2073 for (vector<unsigned>::const_iterator j = i->inserted_indexes().begin();
2074 j != i->inserted_indexes().end();
2077 if (j != i->inserted_indexes().begin())
2079 out << *j <<
" (" << str2_base[*j] <<
")";
2089 #endif // __ABG_DIFF_UTILS_H__
snake()
Default constructor for snake.
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
The abstraction of the Snake concept, from the paper.
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
bool has_diagonal_edge() const
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script...
void diagonal_start(const point &p)
Setter for the end point of the first diagonal edge, aka diagonal start point.
The abstration of an insertion of elements of a sequence B into a sequence A. This is used to represe...
bool end_of_fr_d_path_in_k(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_start, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching d-path on diagonal k, for two sequences. In the paper This is r...
bool operator()(const T a, const T b) const
This equality operator uses the default "==" to compare its arguments.
The abstraction of the deletion of one element of a sequence A.
void set(const point &b, const point &i, const point &e)
Setter for the begin, intermediate and end points of the snake.
void print_snake(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator b_begin, const snake &s, ostream &out)
This prints the middle snake of two strings.
Toplevel namespace for libabigail.
void set_forward(bool f)
Set to true if the snake is a forward snake; that is, if it was built while walking the edit graph go...
void add(int x_offset, int y_offset)
Add an offset to the abscissas of the points of the snake, and add another offset to the ordinates of...
The default equality functor used by the core diffing algorithms.
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
bool operator()(const T *first, const T *second) const
This equality operator compares pointers by comparing the pointed-to objects.
void set(const point &b, const point &i, const point &d, const point &e)
Setter for the begin, intermediate, diagonal start and end points of the snake.
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects...
bool has_horizontal_edge() const
d_path_vec(unsigned size1, unsigned size2)
Constructor of the d_path_vec.
snake(const point &b, const point &i, const point &d, const point &e)
Constructor from the beginning, intermediate and end points.
bool end_of_frr_d_path_in_k_plus_delta(int k, int d, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, d_path_vec &v, snake &snak)
Find the end of the furthest reaching reverse d-path on diagonal k + delta. Delta is abs(M - N)...
void intermediate(const point &p)
Setter for the end point of the non-diagonal edge of the snake, aka intermediate point.
The abstraction of an edit script for transforming a sequence A into a sequence B.
void begin(const point &p)
Getter for the starting point of the non-diagonal edge of the snake, aka begin point.
bool operator()(const weak_ptr< T > first, const weak_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
bool operator()(const shared_ptr< T > first, const shared_ptr< T > second) const
This equality operator compares pointers by comparing the pointed-to objects.
void end(const point &p)
Setter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
An equality functor to deeply compare pointers.
bool is_match_point(RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, const point &point)
Tests if a given point is a match point in an edit graph.
void display_edit_script(const edit_script &es, const RandomAccessOutputIterator str1_base, const RandomAccessOutputIterator str2_base, ostream &out)
Display an edit script on standard output.
snake(const point &b, const point &i, const point &e)
Constructor from the beginning, intermediate and end points.
const point & begin() const
Getter for the starting point of the non-diagonal edge of the snake.
const point & diagonal_start() const
Getter for the end point of the first diagonal edge, aka diagonal start point. Note that if the snake...
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
bool has_vertical_edge() const
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...