Add simple Hunt–McIlroy LCS algorithm (#10)
This commit is contained in:
parent
df78bdca1c
commit
bee5d88b02
7 changed files with 267 additions and 0 deletions
|
|
@ -5,6 +5,7 @@ All notable changes to similar are documented here.
|
||||||
## 1.2.0
|
## 1.2.0
|
||||||
|
|
||||||
* Make the unicode feature optional for inline diffing.
|
* Make the unicode feature optional for inline diffing.
|
||||||
|
* Added Hunt–McIlroy LCS algorithm.
|
||||||
|
|
||||||
## 1.1.0
|
## 1.1.0
|
||||||
|
|
||||||
|
|
|
||||||
181
src/algorithms/lcs.rs
Normal file
181
src/algorithms/lcs.rs
Normal file
|
|
@ -0,0 +1,181 @@
|
||||||
|
//! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
|
||||||
|
//!
|
||||||
|
//! * time: `O((NM)D log (M)D)`
|
||||||
|
//! * space `O(MN)`
|
||||||
|
use std::ops::{Index, Range};
|
||||||
|
|
||||||
|
use crate::algorithms::DiffHook;
|
||||||
|
|
||||||
|
/// Myers' diff algorithm.
|
||||||
|
///
|
||||||
|
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||||
|
pub fn diff<Old, New, D>(
|
||||||
|
d: &mut D,
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
D: DiffHook,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
if new_range.is_empty() {
|
||||||
|
d.delete(old_range.start, old_range.len(), new_range.start)?;
|
||||||
|
return Ok(());
|
||||||
|
} else if old_range.is_empty() {
|
||||||
|
d.insert(old_range.start, new_range.start, new_range.len())?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let prefix_len = old_range
|
||||||
|
.clone()
|
||||||
|
.zip(new_range.clone())
|
||||||
|
.take_while(|x| new[x.1] == old[x.0])
|
||||||
|
.count();
|
||||||
|
let suffix_len = old_range
|
||||||
|
.clone()
|
||||||
|
.rev()
|
||||||
|
.zip(new_range.clone().rev())
|
||||||
|
.take(old_range.len().min(new_range.len()) - prefix_len)
|
||||||
|
.take_while(|x| new[x.1] == old[x.0])
|
||||||
|
.count();
|
||||||
|
|
||||||
|
let table = make_table(
|
||||||
|
old,
|
||||||
|
prefix_len..(old_range.len() - suffix_len),
|
||||||
|
new,
|
||||||
|
prefix_len..(new_range.len() - suffix_len),
|
||||||
|
);
|
||||||
|
let mut old_idx = 0;
|
||||||
|
let mut new_idx = 0;
|
||||||
|
let new_len = new_range.len() - prefix_len - suffix_len;
|
||||||
|
let old_len = old_range.len() - prefix_len - suffix_len;
|
||||||
|
|
||||||
|
if prefix_len > 0 {
|
||||||
|
d.equal(old_range.start, new_range.start, prefix_len)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
while new_idx < new_len && old_idx < old_len {
|
||||||
|
let old_orig_idx = old_range.start + prefix_len + old_idx;
|
||||||
|
let new_orig_idx = new_range.start + prefix_len + new_idx;
|
||||||
|
|
||||||
|
if new[new_orig_idx] == old[old_orig_idx] {
|
||||||
|
d.equal(old_orig_idx, new_orig_idx, 1)?;
|
||||||
|
old_idx += 1;
|
||||||
|
new_idx += 1;
|
||||||
|
} else if table[new_idx][old_idx + 1] >= table[new_idx + 1][old_idx] {
|
||||||
|
d.delete(old_orig_idx, 1, new_orig_idx)?;
|
||||||
|
old_idx += 1;
|
||||||
|
} else {
|
||||||
|
d.insert(old_orig_idx, new_orig_idx, 1)?;
|
||||||
|
new_idx += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if old_idx < old_len {
|
||||||
|
d.delete(
|
||||||
|
old_range.start + prefix_len + old_idx,
|
||||||
|
old_len - old_idx,
|
||||||
|
new_range.start + prefix_len + new_idx,
|
||||||
|
)?;
|
||||||
|
old_idx += old_len - old_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
if new_idx < new_len {
|
||||||
|
d.insert(
|
||||||
|
old_range.start + prefix_len + old_idx,
|
||||||
|
new_range.start + prefix_len + new_idx,
|
||||||
|
new_len - new_idx,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
if suffix_len > 0 {
|
||||||
|
d.equal(
|
||||||
|
old_range.start + old_len + prefix_len,
|
||||||
|
new_range.start + new_len + prefix_len,
|
||||||
|
suffix_len,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
d.finish()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for diffing slices.
|
||||||
|
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
D: DiffHook,
|
||||||
|
T: PartialEq,
|
||||||
|
{
|
||||||
|
diff(d, old, 0..old.len(), new, 0..new.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_table<Old, New>(
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
) -> Vec<Vec<u32>>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
let old_len = old_range.len();
|
||||||
|
let new_len = new_range.len();
|
||||||
|
let mut table = vec![vec![0; old_len + 1]; new_len + 1];
|
||||||
|
|
||||||
|
for i in 0..new_len {
|
||||||
|
let i = new_len - i - 1;
|
||||||
|
table[i][old_len] = 0;
|
||||||
|
for j in 0..old_len {
|
||||||
|
let j = old_len - j - 1;
|
||||||
|
table[i][j] = if new[i] == old[j] {
|
||||||
|
table[i + 1][j + 1] + 1
|
||||||
|
} else {
|
||||||
|
table[i + 1][j].max(table[i][j + 1])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
table
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_table() {
|
||||||
|
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3);
|
||||||
|
let expected = vec![vec![1, 0, 0], vec![1, 0, 0], vec![1, 0, 0], vec![0, 0, 0]];
|
||||||
|
assert_eq!(table, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_diff() {
|
||||||
|
let a: &[usize] = &[0, 1, 2, 3, 4];
|
||||||
|
let b: &[usize] = &[0, 1, 2, 9, 4];
|
||||||
|
|
||||||
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
|
diff_slices(&mut d, a, b).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_contiguous() {
|
||||||
|
let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
|
||||||
|
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
||||||
|
|
||||||
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
|
diff_slices(&mut d, a, b).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_pat() {
|
||||||
|
let a: &[usize] = &[0, 1, 3, 4, 5];
|
||||||
|
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
||||||
|
|
||||||
|
let mut d = crate::algorithms::Capture::new();
|
||||||
|
diff_slices(&mut d, a, b).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(d.ops());
|
||||||
|
}
|
||||||
|
|
@ -47,6 +47,7 @@ pub use replace::Replace;
|
||||||
#[doc(no_inline)]
|
#[doc(no_inline)]
|
||||||
pub use crate::Algorithm;
|
pub use crate::Algorithm;
|
||||||
|
|
||||||
|
pub mod lcs;
|
||||||
pub mod myers;
|
pub mod myers;
|
||||||
pub mod patience;
|
pub mod patience;
|
||||||
|
|
||||||
|
|
@ -71,6 +72,7 @@ where
|
||||||
match alg {
|
match alg {
|
||||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
||||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
||||||
|
Algorithm::Lcs => lcs::diff(d, old, old_range, new, new_range),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,28 @@
|
||||||
|
---
|
||||||
|
source: src/algorithms/lcs.rs
|
||||||
|
expression: d.into_inner().ops()
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Equal {
|
||||||
|
old_index: 0,
|
||||||
|
new_index: 0,
|
||||||
|
len: 3,
|
||||||
|
},
|
||||||
|
Replace {
|
||||||
|
old_index: 3,
|
||||||
|
old_len: 2,
|
||||||
|
new_index: 3,
|
||||||
|
new_len: 2,
|
||||||
|
},
|
||||||
|
Equal {
|
||||||
|
old_index: 5,
|
||||||
|
new_index: 5,
|
||||||
|
len: 2,
|
||||||
|
},
|
||||||
|
Replace {
|
||||||
|
old_index: 7,
|
||||||
|
old_len: 1,
|
||||||
|
new_index: 7,
|
||||||
|
new_len: 1,
|
||||||
|
},
|
||||||
|
]
|
||||||
22
src/algorithms/snapshots/similar__algorithms__lcs__diff.snap
Normal file
22
src/algorithms/snapshots/similar__algorithms__lcs__diff.snap
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
---
|
||||||
|
source: src/algorithms/lcs.rs
|
||||||
|
expression: d.into_inner().ops()
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Equal {
|
||||||
|
old_index: 0,
|
||||||
|
new_index: 0,
|
||||||
|
len: 3,
|
||||||
|
},
|
||||||
|
Replace {
|
||||||
|
old_index: 3,
|
||||||
|
old_len: 1,
|
||||||
|
new_index: 3,
|
||||||
|
new_len: 1,
|
||||||
|
},
|
||||||
|
Equal {
|
||||||
|
old_index: 4,
|
||||||
|
new_index: 4,
|
||||||
|
len: 1,
|
||||||
|
},
|
||||||
|
]
|
||||||
31
src/algorithms/snapshots/similar__algorithms__lcs__pat.snap
Normal file
31
src/algorithms/snapshots/similar__algorithms__lcs__pat.snap
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
---
|
||||||
|
source: src/algorithms/lcs.rs
|
||||||
|
expression: d.ops()
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Equal {
|
||||||
|
old_index: 0,
|
||||||
|
new_index: 0,
|
||||||
|
len: 2,
|
||||||
|
},
|
||||||
|
Delete {
|
||||||
|
old_index: 2,
|
||||||
|
old_len: 1,
|
||||||
|
new_index: 2,
|
||||||
|
},
|
||||||
|
Equal {
|
||||||
|
old_index: 3,
|
||||||
|
new_index: 2,
|
||||||
|
len: 1,
|
||||||
|
},
|
||||||
|
Equal {
|
||||||
|
old_index: 4,
|
||||||
|
new_index: 3,
|
||||||
|
len: 1,
|
||||||
|
},
|
||||||
|
Insert {
|
||||||
|
old_index: 5,
|
||||||
|
new_index: 4,
|
||||||
|
new_len: 2,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
@ -10,6 +10,8 @@ pub enum Algorithm {
|
||||||
Myers,
|
Myers,
|
||||||
/// Picks the patience algorithm from [`crate::algorithms::patience`]
|
/// Picks the patience algorithm from [`crate::algorithms::patience`]
|
||||||
Patience,
|
Patience,
|
||||||
|
/// Picks the LCS algorithm from [`crate::algorithms::lcs`]
|
||||||
|
Lcs,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Algorithm {
|
impl Default for Algorithm {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue