libabigail
abg-diff-utils.cc
Go to the documentation of this file.
1// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2// -*- Mode: C++ -*-
3//
4// Copyright (C) 2013-2025 Red Hat, Inc.
5
6#include <cstring>
7
8#include "abg-fwd.h"
9#include "abg-internal.h"
10// <headers defining libabigail's API go under here>
11ABG_BEGIN_EXPORT_DECLARATIONS
12
13#include "abg-diff-utils.h"
14
15ABG_END_EXPORT_DECLARATIONS
16// </headers defining libabigail's API>
17
18/// @file
19///
20/// This file defines the declarations found in abg-diff-utils.h
21namespace abigail
22{
23
24namespace diff_utils
25{
26/// @return true iff the end of a furthest forward d_path overlaps the
27/// end of a furtherst reverse d_path on the same diagonal. This is
28/// defined by the lemma 3 of the paper.
29///
30/// @param forward_d_path_end
31bool
32ends_of_furthest_d_paths_overlap(const point& forward_d_path_end,
33 const point& reverse_d_path_end)
34{
35 return ((forward_d_path_end.x() - forward_d_path_end.y())
36 == (reverse_d_path_end.x() - reverse_d_path_end.y())
37 && forward_d_path_end.x() >= reverse_d_path_end.x());
38}
39//// Test if a point is within the boundaries of the edit graph.
40///
41/// @param p the point to test.
42///
43/// @param a_size the size of abscissas. That is, the size of the
44/// first input to consider.
45///
46/// @param b_size the of ordinates. That is, the size of the second
47/// input to consider.
48///
49/// @return true iff a point is within the boundaries of the edit
50/// graph.
51bool
53 unsigned a_size,
54 unsigned b_size)
55{
56 return (p.x() > -1
57 && p.x() < (int) a_size
58 && p.y() > -1
59 && p.y() < (int) b_size);
60}
61
62/// Get the end points of the snake, as intended by the paper.
63///
64/// @param s the snake to consider.
65///
66/// @param x the starting point of the snake.
67///
68/// @param u the end point of the snake.
69bool
71{
72 if (s.is_empty())
73 return false;
74
75 if (s.is_forward())
76 {
77 x = s.intermediate();
78 u = s.end();
79 }
80 else
81 {
82 x = s.end();
83 u = s.intermediate();
84 }
85 return true;
86}
87
88/// Returns the middle snake of two strings, as well as the length of
89/// their shortest editing script.
90///
91/// @param str1 the first string to consider.
92///
93/// @param str2 the second string to consider.
94///
95/// @param snake_begin the point representing the beginning
96bool
97compute_middle_snake(const char* str1, const char* str2,
98 snake& s, int& ses_len)
99{
100 bool has_snake = false;
101 int str1_size = strlen(str1), str2_size = strlen(str2);
102
103 if (compute_middle_snake<const char*,
104 default_eq_functor>(str1, str1 + str1_size,
105 str2 , str2 + str2_size,
106 s, ses_len))
107 has_snake = true;
108
109 return has_snake;
110}
111
112/// Compute the length of the shortest edit script for two strings.
113/// This is done using the "Greedy LCS/SES" of figure 2 in the paper.
114/// It can walk the edit graph either foward (when reverse is false)
115/// or backward starting from the end (when reverse is true).
116///
117/// @param str1 the first string to consider.
118///
119/// @param str2 the second string to consider.
120///
121/// @param reverse when true, walk the edit graph backward, starting
122/// from the point (M,N) (with M and N being the length of str1 and
123/// str2). When false, walk the edit graph forward, starting from the
124/// point (0,0).
125///
126/// @return the length of the shortest edit script.
127int
128ses_len(const char* str1,
129 const char* str2,
130 bool reverse)
131{
132 int str1_size = strlen(str1), str2_size = strlen(str2);
133
134 d_path_vec v(str1_size, str2_size);
135 return ses_len(str1, str1 + str1_size,
136 str2, str2 + str2_size,
137 v, reverse);
138}
139
140/// Compute the longest common subsequence of two strings and return
141/// the length of the shortest edit script for transforming the first
142/// string into the second.
143///
144/// @param str1 the first string to consider.
145///
146/// @param str2 the second string to consider.
147///
148/// @param ses_len the length of the shortest edit script. This is
149/// set by the function iff it returns true.
150///
151/// @param the longest common subsequence of the two strings. This is
152/// set the function iff it returns true.
153///
154/// @return true if the function could compute an lcs, false
155/// otherwise.
156void
157compute_lcs(const char* str1, const char* str2, int &ses_len, string& lcs)
158{
159 vector<point> result;
160 edit_script ses;
161
162 compute_diff(str1, str1 + strlen(str1),
163 str2, str2 + strlen(str2),
164 result, ses);
165
166 ses_len = ses.length();
167
168 for (unsigned i = 0; i < result.size(); ++i)
169 {
170 int x = result[i].x(), y = result[i].y();
171 ABG_ASSERT(str1[x] == str2[y]);
172 lcs.push_back(str1[x]);
173 }
174}
175
176/// Compute the shortest edit script for transforming a string into
177/// another.
178///
179/// @param str1 the first string to consider.
180///
181/// @param str2 the second string to consider.
182///
183/// @param ses the resulting edit script.
184///
185/// @pram ses_len the length of the edit script.
186void
187compute_ses(const char* str1, const char* str2, edit_script& ses)
188{
189 vector<point> lcs;
190 compute_diff(str1, str1 + strlen(str1),
191 str2, str2 + strlen(str2),
192 lcs, ses);
193}
194
195}//end namespace diff_utils
196
197}//end namespace abigail
This file declares types and operations implementing the "O(ND) Difference Algorithm" (aka diff2) fro...
#define ABG_ASSERT(cond)
This is a wrapper around the 'assert' glibc call. It allows for its argument to have side effects,...
Definition: abg-fwd.h:1737
The array containing the furthest D-path end-points, for each value of K. MAX_D is the maximum value ...
The abstraction of an edit script for transforming a sequence A into a sequence B.
A class representing a vertex in an edit graph, as explained in the paper. A vertex is a basically a ...
The abstraction of the Snake concept, from the paper.
const point & intermediate() const
Getter for the end point of the non-diagonal edge of the snake.
const point & end() const
Getter for the end point of the last diagonal edge, aka snake end point. Note that if the snake has n...
bool compute_middle_snake(const char *str1, const char *str2, snake &s, int &ses_len)
Returns the middle snake of two strings, as well as the length of their shortest editing script.
bool snake_end_points(const snake &s, point &x, point &u)
Get the end points of the snake, as intended by the paper.
void compute_diff(RandomAccessOutputIterator a_base, RandomAccessOutputIterator a_begin, RandomAccessOutputIterator a_end, RandomAccessOutputIterator b_base, RandomAccessOutputIterator b_begin, RandomAccessOutputIterator b_end, vector< point > &lcs, edit_script &ses, int &ses_len)
Compute the longest common subsequence of two (sub-regions of) sequences as well as the shortest edit...
bool ends_of_furthest_d_paths_overlap(const point &forward_d_path_end, const point &reverse_d_path_end)
void compute_lcs(const char *str1, const char *str2, int &ses_len, string &lcs)
Compute the longest common subsequence of two strings and return the length of the shortest edit scri...
void compute_ses(const char *str1, const char *str2, edit_script &ses)
Compute the shortest edit script for transforming a string into another.
int ses_len(const char *str1, const char *str2, bool reverse)
Compute the length of the shortest edit script for two strings. This is done using the "Greedy LCS/SE...
bool point_is_valid_in_graph(point &p, unsigned a_size, unsigned b_size)
Toplevel namespace for libabigail.
The default equality functor used by the core diffing algorithms.