Add NoFinishHook and new Myer's implementation (#11)
This commit is contained in:
parent
a1d78c9be6
commit
0341515380
11 changed files with 407 additions and 150 deletions
|
|
@ -5,7 +5,11 @@ All notable changes to similar are documented here.
|
|||
## 1.2.0
|
||||
|
||||
* Make the unicode feature optional for inline diffing.
|
||||
* Added Hunt–McIlroy LCS algorithm.
|
||||
* Added Hunt–McIlroy LCS algorithm (`lcs`).
|
||||
* Changed the implementation of Mayer's diff. This has slightly changed the
|
||||
behavior but resulted in snigificantly improved performance and more
|
||||
readable code.
|
||||
* Added `NoFinishHook` to aid composing of diff hooks.
|
||||
|
||||
## 1.1.0
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
[package]
|
||||
name = "similar"
|
||||
version = "1.1.0"
|
||||
authors = ["Armin Ronacher <armin.ronacher@active-4.com>", "Pierre-Étienne Meunier <pe@pijul.org>"]
|
||||
version = "1.2.0"
|
||||
authors = ["Armin Ronacher <armin.ronacher@active-4.com>", "Pierre-Étienne Meunier <pe@pijul.org>", "Brandon Williams <bwilliams.eng@gmail.com>"]
|
||||
edition = "2018"
|
||||
license = "Apache-2.0"
|
||||
description = "A diff library for Rust"
|
||||
|
|
@ -56,3 +56,6 @@ required-features = ["text", "bytes"]
|
|||
[[example]]
|
||||
name = "close-matches"
|
||||
required-features = ["text"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ impl Capture {
|
|||
impl DiffHook for Capture {
|
||||
type Error = Infallible;
|
||||
|
||||
#[inline(always)]
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Equal {
|
||||
old_index,
|
||||
|
|
@ -43,6 +44,7 @@ impl DiffHook for Capture {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
|
|
@ -57,6 +59,7 @@ impl DiffHook for Capture {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
|
|
@ -71,6 +74,7 @@ impl DiffHook for Capture {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ pub trait DiffHook: Sized {
|
|||
///
|
||||
/// You can use the [`Replace`](crate::algorithms::Replace) hook to
|
||||
/// automatically generate these.
|
||||
#[inline(always)]
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
|
|
@ -63,6 +64,7 @@ pub trait DiffHook: Sized {
|
|||
}
|
||||
|
||||
/// Always called at the end of the algorithm.
|
||||
#[inline(always)]
|
||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -71,10 +73,12 @@ pub trait DiffHook: Sized {
|
|||
impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
||||
type Error = D::Error;
|
||||
|
||||
#[inline(always)]
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
(*self).equal(old_index, new_index, len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
|
|
@ -84,6 +88,7 @@ impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
|||
(*self).delete(old_index, old_len, new_index)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
|
|
@ -93,6 +98,7 @@ impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
|||
(*self).insert(old_index, new_index, new_len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn replace(
|
||||
&mut self,
|
||||
old: usize,
|
||||
|
|
@ -103,7 +109,70 @@ impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
|||
(*self).replace(old, old_len, new, new_len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||
(*self).finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper [`DiffHook`] that prevents calls to [`DiffHook::finish`].
|
||||
///
|
||||
/// This hook is useful in situations where diff hooks are composed but you
|
||||
/// want to prevent that the finish hook method is called.
|
||||
pub struct NoFinishHook<D: DiffHook>(D);
|
||||
|
||||
impl<D: DiffHook> NoFinishHook<D> {
|
||||
/// Wraps another hook.
|
||||
pub fn new(d: D) -> NoFinishHook<D> {
|
||||
NoFinishHook(d)
|
||||
}
|
||||
|
||||
/// Extracts the inner hook.
|
||||
pub fn into_inner(self) -> D {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<D: DiffHook> DiffHook for NoFinishHook<D> {
|
||||
type Error = D::Error;
|
||||
|
||||
#[inline(always)]
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
self.0.equal(old_index, new_index, len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.delete(old_index, old_len, new_index)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.insert(old_index, new_index, new_len)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.replace(old_index, old_len, new_index, new_len)
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ use std::hash::Hash;
|
|||
use std::ops::{Index, Range};
|
||||
|
||||
pub use capture::Capture;
|
||||
pub use hook::DiffHook;
|
||||
pub use hook::{DiffHook, NoFinishHook};
|
||||
pub use replace::Replace;
|
||||
|
||||
#[doc(no_inline)]
|
||||
|
|
|
|||
|
|
@ -5,9 +5,11 @@
|
|||
//!
|
||||
//! See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf)
|
||||
//! describing it.
|
||||
//!
|
||||
//! The implementation of this algorithm is based on the implementation by
|
||||
//! Brandon Williams.
|
||||
|
||||
use std::cmp::{max, min};
|
||||
use std::ops::{Index, Range};
|
||||
use std::ops::{Index, IndexMut, Range};
|
||||
|
||||
use crate::algorithms::DiffHook;
|
||||
|
||||
|
|
@ -27,15 +29,10 @@ where
|
|||
D: DiffHook,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
diff_offsets(
|
||||
d,
|
||||
old,
|
||||
old_range.start,
|
||||
old_range.end,
|
||||
new,
|
||||
new_range.start,
|
||||
new_range.end,
|
||||
)?;
|
||||
let max_d = max_d(old_range.len(), new_range.len());
|
||||
let mut vf = V::new(max_d);
|
||||
let mut vb = V::new(max_d);
|
||||
conquer(d, old, old_range, new, new_range, &mut vf, &mut vb)?;
|
||||
d.finish()
|
||||
}
|
||||
|
||||
|
|
@ -48,135 +45,312 @@ where
|
|||
diff(d, old, 0..old.len(), new, 0..new.len())
|
||||
}
|
||||
|
||||
fn modulo(a: isize, b: usize) -> usize {
|
||||
a.rem_euclid(b as isize) as usize
|
||||
// A D-path is a path which starts at (0,0) that has exactly D non-diagonal
|
||||
// edges. All D-paths consist of a (D - 1)-path followed by a non-diagonal edge
|
||||
// and then a possibly empty sequence of diagonal edges called a snake.
|
||||
|
||||
/// `V` contains the endpoints of the furthest reaching `D-paths`. For each
|
||||
/// recorded endpoint `(x,y)` in diagonal `k`, we only need to retain `x` because
|
||||
/// `y` can be computed from `x - k`. In other words, `V` is an array of integers
|
||||
/// where `V[k]` contains the row index of the endpoint of the furthest reaching
|
||||
/// path in diagonal `k`.
|
||||
///
|
||||
/// We can't use a traditional Vec to represent `V` since we use `k` as an index
|
||||
/// and it can take on negative values. So instead `V` is represented as a
|
||||
/// light-weight wrapper around a Vec plus an `offset` which is the maximum value
|
||||
/// `k` can take on in order to map negative `k`'s back to a value >= 0.
|
||||
#[derive(Debug)]
|
||||
struct V {
|
||||
offset: isize,
|
||||
v: Vec<usize>, // Look into initializing this to -1 and storing isize
|
||||
}
|
||||
|
||||
pub(crate) fn diff_offsets<D, Old, New>(
|
||||
diff: &mut D,
|
||||
impl V {
|
||||
fn new(max_d: usize) -> Self {
|
||||
Self {
|
||||
offset: max_d as isize,
|
||||
v: vec![0; 2 * max_d],
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.v.len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<isize> for V {
|
||||
type Output = usize;
|
||||
|
||||
fn index(&self, index: isize) -> &Self::Output {
|
||||
&self.v[(index + self.offset) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<isize> for V {
|
||||
fn index_mut(&mut self, index: isize) -> &mut Self::Output {
|
||||
&mut self.v[(index + self.offset) as usize]
|
||||
}
|
||||
}
|
||||
|
||||
/// A `Snake` is a sequence of diagonal edges in the edit graph. Normally
|
||||
/// a snake has a start end end point (and it is possible for a snake to have
|
||||
/// a length of zero, meaning the start and end points are the same) however
|
||||
/// we do not need the end point which is why it's not implemented here.
|
||||
#[derive(Debug)]
|
||||
struct Snake {
|
||||
x_start: usize,
|
||||
y_start: usize,
|
||||
}
|
||||
|
||||
fn max_d(len1: usize, len2: usize) -> usize {
|
||||
// XXX look into reducing the need to have the additional '+ 1'
|
||||
(len1 + len2 + 1) / 2 + 1
|
||||
}
|
||||
|
||||
fn common_prefix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_current: usize,
|
||||
old_end: usize,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_current: usize,
|
||||
new_end: usize,
|
||||
) -> Result<(), D::Error>
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
D: DiffHook + ?Sized,
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
#![allow(clippy::many_single_char_names)]
|
||||
if old_end > old_current && new_end > new_current {
|
||||
let old_span = old_end - old_current;
|
||||
let new_span = new_end - new_current;
|
||||
let total_span = (old_span + new_span) as isize;
|
||||
let vec_size = (2 * min(old_span, new_span) + 2) as usize;
|
||||
let w = old_span as isize - new_span as isize;
|
||||
let mut vec_down = vec![0; vec_size as usize];
|
||||
let mut vec_up = vec![0; vec_size as usize];
|
||||
for i in 0..=(total_span / 2 + total_span % 2) {
|
||||
for &inverse in &[true, false][..] {
|
||||
let (v1, v2) = if inverse {
|
||||
(&mut vec_down, &mut vec_up)
|
||||
} else {
|
||||
(&mut vec_up, &mut vec_down)
|
||||
};
|
||||
let j_start = -(i - 2 * max(0, i - new_span as isize));
|
||||
let j_end = i - 2 * max(0, i - old_span as isize) + 1;
|
||||
for j in (j_start..j_end).step_by(2) {
|
||||
let mut a: usize = if j == -i
|
||||
|| j != i && v1[modulo(j - 1, vec_size)] < v1[modulo(j + 1, vec_size)]
|
||||
{
|
||||
v1[modulo(j + 1, vec_size)]
|
||||
} else {
|
||||
v1[modulo(j - 1, vec_size)] + 1
|
||||
if old_range.is_empty() || new_range.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
new_range
|
||||
.zip(old_range)
|
||||
.take_while(
|
||||
#[inline(always)]
|
||||
|x| new[x.0] == old[x.1],
|
||||
)
|
||||
.count()
|
||||
}
|
||||
|
||||
fn common_suffix_len<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> usize
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
if old_range.is_empty() || new_range.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
new_range
|
||||
.rev()
|
||||
.zip(old_range.rev())
|
||||
.take_while(
|
||||
#[inline(always)]
|
||||
|x| new[x.0] == old[x.1],
|
||||
)
|
||||
.count()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn split_at(range: Range<usize>, at: usize) -> (Range<usize>, Range<usize>) {
|
||||
(range.start..at, at..range.end)
|
||||
}
|
||||
|
||||
// The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes
|
||||
// some of which may be empty. The divide step requires finding the ceil(D/2) +
|
||||
// 1 or middle snake of an optimal D-path. The idea for doing so is to
|
||||
// simultaneously run the basic algorithm in both the forward and reverse
|
||||
// directions until furthest reaching forward and reverse paths starting at
|
||||
// opposing corners 'overlap'.
|
||||
fn find_middle_snake<Old, New>(
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
) -> Snake
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
let n = old_range.len();
|
||||
let m = new_range.len();
|
||||
|
||||
// By Lemma 1 in the paper, the optimal edit script length is odd or even as
|
||||
// `delta` is odd or even.
|
||||
let delta = n as isize - m as isize;
|
||||
let odd = delta & 1 == 1;
|
||||
|
||||
// The initial point at (0, -1)
|
||||
vf[1] = 0;
|
||||
// The initial point at (N, M+1)
|
||||
vb[1] = 0;
|
||||
|
||||
// We only need to explore ceil(D/2) + 1
|
||||
let d_max = max_d(n, m);
|
||||
assert!(vf.len() >= d_max);
|
||||
assert!(vb.len() >= d_max);
|
||||
|
||||
for d in 0..d_max as isize {
|
||||
// Forward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
||||
vf[k + 1]
|
||||
} else {
|
||||
vf[k - 1] + 1
|
||||
};
|
||||
let y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
let (x0, y0) = (x, y);
|
||||
// While these sequences are identical, keep moving through the
|
||||
// graph with no cost
|
||||
if x < old_range.len() && y < new_range.len() {
|
||||
let advance = common_prefix_len(
|
||||
old,
|
||||
old_range.start + x..old_range.end,
|
||||
new,
|
||||
new_range.start + y..new_range.end,
|
||||
);
|
||||
x += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vf[k] = x;
|
||||
|
||||
// Only check for connections from the forward search when N - M is
|
||||
// odd and when there is a reciprocal k line coming from the other
|
||||
// direction.
|
||||
if odd && (k - delta).abs() <= (d - 1) {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vf[k] + vb[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Snake {
|
||||
x_start: x0 + old_range.start,
|
||||
y_start: y0 + new_range.start,
|
||||
};
|
||||
let mut b = (a as isize - j) as usize;
|
||||
let (s, t) = (a, b);
|
||||
while a < old_span && b < new_span && {
|
||||
let (e_i, f_i) = if inverse {
|
||||
(a, b)
|
||||
} else {
|
||||
(old_span - a - 1, new_span - b - 1)
|
||||
};
|
||||
new[new_current + f_i] == old[old_current + e_i]
|
||||
} {
|
||||
a += 1;
|
||||
b += 1;
|
||||
}
|
||||
v1[modulo(j, vec_size)] = a;
|
||||
let bound = if inverse { i - 1 } else { i };
|
||||
if (total_span % 2 == 1) == inverse
|
||||
&& w - j >= -bound
|
||||
&& w - j <= bound
|
||||
&& v1[modulo(j, vec_size)] + v2[modulo(w - j, vec_size)] >= old_span
|
||||
{
|
||||
let (x, y, u, v) = if inverse {
|
||||
(s, t, a, b)
|
||||
} else {
|
||||
(old_span - a, new_span - b, old_span - s, new_span - t)
|
||||
};
|
||||
if i + bound > 1 || (x != u && y != v) {
|
||||
diff_offsets(
|
||||
diff,
|
||||
old,
|
||||
old_current,
|
||||
old_current + x,
|
||||
new,
|
||||
new_current,
|
||||
new_current + y,
|
||||
)?;
|
||||
if x != u {
|
||||
diff.equal(old_current + x, new_current + y, u - x)?;
|
||||
}
|
||||
diff_offsets(
|
||||
diff,
|
||||
old,
|
||||
old_current + u,
|
||||
old_end,
|
||||
new,
|
||||
new_current + v,
|
||||
new_end,
|
||||
)?;
|
||||
return Ok(());
|
||||
} else if new_span > old_span {
|
||||
diff.equal(old_current, new_current, old_span)?;
|
||||
diff.insert(
|
||||
old_current + old_span,
|
||||
new_current + old_span,
|
||||
new_span - old_span,
|
||||
)?;
|
||||
return Ok(());
|
||||
} else if new_span < old_span {
|
||||
diff.equal(old_current, new_current, new_span)?;
|
||||
diff.delete(
|
||||
old_current + new_span,
|
||||
old_span - new_span,
|
||||
new_current + new_span,
|
||||
)?;
|
||||
return Ok(());
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if old_end > old_current {
|
||||
diff.delete(old_current, old_end - old_current, new_current)?
|
||||
} else if new_end > new_current {
|
||||
diff.insert(old_current, new_current, new_end - new_current)?
|
||||
|
||||
// Backward path
|
||||
for k in (-d..=d).rev().step_by(2) {
|
||||
let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) {
|
||||
vb[k + 1]
|
||||
} else {
|
||||
vb[k - 1] + 1
|
||||
};
|
||||
let mut y = (x as isize - k) as usize;
|
||||
|
||||
// The coordinate of the start of a snake
|
||||
if x < n && y < m {
|
||||
let advance = common_suffix_len(
|
||||
old,
|
||||
old_range.start..old_range.start + n - x,
|
||||
new,
|
||||
new_range.start..new_range.start + m - y,
|
||||
);
|
||||
x += advance;
|
||||
y += advance;
|
||||
}
|
||||
|
||||
// This is the new best x value
|
||||
vb[k] = x;
|
||||
|
||||
if !odd && (k - delta).abs() <= d {
|
||||
// TODO optimize this so we don't have to compare against n
|
||||
if vb[k] + vf[-(k - delta)] >= n {
|
||||
// Return the snake
|
||||
return Snake {
|
||||
x_start: n - x + old_range.start,
|
||||
y_start: m - y + new_range.start,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Maybe there's an opportunity to optimize and bail early?
|
||||
}
|
||||
|
||||
unreachable!("unable to find a middle snake");
|
||||
}
|
||||
|
||||
fn conquer<Old, New, D>(
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
mut old_range: Range<usize>,
|
||||
new: &New,
|
||||
mut new_range: Range<usize>,
|
||||
vf: &mut V,
|
||||
vb: &mut V,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
// Check for common prefix
|
||||
let common_prefix_len = common_prefix_len(old, old_range.clone(), new, new_range.clone());
|
||||
if common_prefix_len > 0 {
|
||||
d.equal(old_range.start, new_range.start, common_prefix_len)?;
|
||||
}
|
||||
old_range.start += common_prefix_len;
|
||||
new_range.start += common_prefix_len;
|
||||
|
||||
// Check for common suffix
|
||||
let common_suffix_len = common_suffix_len(old, old_range.clone(), new, new_range.clone());
|
||||
let common_suffix = (
|
||||
old_range.end - common_suffix_len,
|
||||
new_range.end - common_suffix_len,
|
||||
);
|
||||
old_range.end -= common_suffix_len;
|
||||
new_range.end -= common_suffix_len;
|
||||
|
||||
if old_range.is_empty() && new_range.is_empty() {
|
||||
// Do nothing
|
||||
} else if new_range.is_empty() {
|
||||
d.delete(
|
||||
old_range.start,
|
||||
old_range.end - old_range.start,
|
||||
new_range.start,
|
||||
)?;
|
||||
} else if old_range.is_empty() {
|
||||
d.insert(
|
||||
old_range.start,
|
||||
new_range.start,
|
||||
new_range.end - new_range.start,
|
||||
)?;
|
||||
} else {
|
||||
let snake = find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb);
|
||||
let (old_a, old_b) = split_at(old_range, snake.x_start);
|
||||
let (new_a, new_b) = split_at(new_range, snake.y_start);
|
||||
conquer(d, old, old_a, new, new_a, vf, vb)?;
|
||||
conquer(d, old, old_b, new, new_b, vf, vb)?;
|
||||
}
|
||||
|
||||
if common_suffix_len > 0 {
|
||||
d.equal(common_suffix.0, common_suffix.1, common_suffix_len)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_modulo() {
|
||||
assert_eq!(modulo(-11, 10), 9);
|
||||
assert_eq!(modulo(23, 7), 2);
|
||||
assert_eq!(modulo(-12, 6), 0);
|
||||
fn test_find_middle_snake() {
|
||||
let a = &b"ABCABBA"[..];
|
||||
let b = &b"CBABAC"[..];
|
||||
let max_d = max_d(a.len(), b.len());
|
||||
let mut vf = V::new(max_d);
|
||||
let mut vb = V::new(max_d);
|
||||
let snake = find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb);
|
||||
assert_eq!(snake.x_start, 4);
|
||||
assert_eq!(snake.y_start, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -5,12 +5,15 @@
|
|||
//!
|
||||
//! Tends to give more human-readable outputs. See [Bram Cohen's blog
|
||||
//! post](https://bramcohen.livejournal.com/73318.html) describing it.
|
||||
//!
|
||||
//! This is based on the patience implementation of [pijul](https://pijul.org/)
|
||||
//! by Pierre-Étienne Meunier.
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::HashMap;
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::{myers, DiffHook, Replace};
|
||||
use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace};
|
||||
|
||||
/// Patience diff algorithm.
|
||||
///
|
||||
|
|
@ -147,16 +150,15 @@ where
|
|||
self.new_current += 1;
|
||||
}
|
||||
if self.old_current > a0 {
|
||||
self.d.equal(a0, b0, self.old_current - a0)?
|
||||
self.d.equal(a0, b0, self.old_current - a0)?;
|
||||
}
|
||||
myers::diff_offsets(
|
||||
self.d,
|
||||
let mut no_finish_d = NoFinishHook::new(&mut self.d);
|
||||
myers::diff(
|
||||
&mut no_finish_d,
|
||||
self.old,
|
||||
self.old_current,
|
||||
self.old_indexes[old].index,
|
||||
self.old_current..self.old_indexes[old].index,
|
||||
self.new,
|
||||
self.new_current,
|
||||
self.new_indexes[new].index,
|
||||
self.new_current..self.new_indexes[new].index,
|
||||
)?;
|
||||
self.old_current = self.old_indexes[old].index;
|
||||
self.new_current = self.new_indexes[new].index;
|
||||
|
|
|
|||
|
|
@ -10,18 +10,18 @@ expression: d.into_inner().ops()
|
|||
},
|
||||
Replace {
|
||||
old_index: 3,
|
||||
old_len: 2,
|
||||
old_len: 1,
|
||||
new_index: 3,
|
||||
new_len: 2,
|
||||
},
|
||||
Equal {
|
||||
old_index: 5,
|
||||
old_index: 4,
|
||||
new_index: 5,
|
||||
len: 2,
|
||||
},
|
||||
Replace {
|
||||
old_index: 7,
|
||||
old_len: 1,
|
||||
old_index: 6,
|
||||
old_len: 2,
|
||||
new_index: 7,
|
||||
new_len: 1,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -16,18 +16,18 @@ expression: d.into_inner().ops()
|
|||
},
|
||||
Replace {
|
||||
old_index: 4,
|
||||
old_len: 2,
|
||||
old_len: 1,
|
||||
new_index: 4,
|
||||
new_len: 2,
|
||||
},
|
||||
Equal {
|
||||
old_index: 6,
|
||||
old_index: 5,
|
||||
new_index: 6,
|
||||
len: 2,
|
||||
},
|
||||
Replace {
|
||||
old_index: 8,
|
||||
old_len: 1,
|
||||
old_index: 7,
|
||||
old_len: 2,
|
||||
new_index: 8,
|
||||
new_len: 1,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
//! This crate implements diffing utilities. It attempts to provide an abstraction
|
||||
//! interface over different types of diffing algorithms. It's based on the
|
||||
//! the diff algorithm implementations of [pijul](https://pijul.org/).
|
||||
//! interface over different types of diffing algorithms. The design of the
|
||||
//! library is inspired by pijul's diff library by Pierre-Étienne Meunier and
|
||||
//! also inherits the patience diff algorithm from there.
|
||||
//!
|
||||
//! The API of the crate is split into high and low level functionality. Most
|
||||
//! of what you probably want to use is available top level. Additionally the
|
||||
|
|
|
|||
|
|
@ -54,8 +54,10 @@ expression: "&changes"
|
|||
value: "some",
|
||||
},
|
||||
Change {
|
||||
tag: Insert,
|
||||
old_index: None,
|
||||
tag: Equal,
|
||||
old_index: Some(
|
||||
5,
|
||||
),
|
||||
new_index: Some(
|
||||
5,
|
||||
),
|
||||
|
|
@ -70,10 +72,8 @@ expression: "&changes"
|
|||
value: "amazing",
|
||||
},
|
||||
Change {
|
||||
tag: Equal,
|
||||
old_index: Some(
|
||||
5,
|
||||
),
|
||||
tag: Insert,
|
||||
old_index: None,
|
||||
new_index: Some(
|
||||
7,
|
||||
),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue