From bee5d88b0296ba0b2811d001fcec0c146fb439b8 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sun, 14 Feb 2021 23:11:29 +0100 Subject: [PATCH] =?UTF-8?q?Add=20simple=20Hunt=E2=80=93McIlroy=20LCS=20alg?= =?UTF-8?q?orithm=20(#10)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + src/algorithms/lcs.rs | 181 ++++++++++++++++++ src/algorithms/mod.rs | 2 + .../similar__algorithms__lcs__contiguous.snap | 28 +++ .../similar__algorithms__lcs__diff.snap | 22 +++ .../similar__algorithms__lcs__pat.snap | 31 +++ src/types.rs | 2 + 7 files changed, 267 insertions(+) create mode 100644 src/algorithms/lcs.rs create mode 100644 src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap create mode 100644 src/algorithms/snapshots/similar__algorithms__lcs__diff.snap create mode 100644 src/algorithms/snapshots/similar__algorithms__lcs__pat.snap diff --git a/CHANGELOG.md b/CHANGELOG.md index 421268b..bd38c4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ All notable changes to similar are documented here. ## 1.2.0 * Make the unicode feature optional for inline diffing. +* Added Hunt–McIlroy LCS algorithm. ## 1.1.0 diff --git a/src/algorithms/lcs.rs b/src/algorithms/lcs.rs new file mode 100644 index 0000000..8f23500 --- /dev/null +++ b/src/algorithms/lcs.rs @@ -0,0 +1,181 @@ +//! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. +//! +//! * time: `O((NM)D log (M)D)` +//! * space `O(MN)` +use std::ops::{Index, Range}; + +use crate::algorithms::DiffHook; + +/// Myers' diff algorithm. +/// +/// Diff `old`, between indices `old_range` and `new` between indices `new_range`. +pub fn diff( + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + New::Output: PartialEq, +{ + if new_range.is_empty() { + d.delete(old_range.start, old_range.len(), new_range.start)?; + return Ok(()); + } else if old_range.is_empty() { + d.insert(old_range.start, new_range.start, new_range.len())?; + return Ok(()); + } + + let prefix_len = old_range + .clone() + .zip(new_range.clone()) + .take_while(|x| new[x.1] == old[x.0]) + .count(); + let suffix_len = old_range + .clone() + .rev() + .zip(new_range.clone().rev()) + .take(old_range.len().min(new_range.len()) - prefix_len) + .take_while(|x| new[x.1] == old[x.0]) + .count(); + + let table = make_table( + old, + prefix_len..(old_range.len() - suffix_len), + new, + prefix_len..(new_range.len() - suffix_len), + ); + let mut old_idx = 0; + let mut new_idx = 0; + let new_len = new_range.len() - prefix_len - suffix_len; + let old_len = old_range.len() - prefix_len - suffix_len; + + if prefix_len > 0 { + d.equal(old_range.start, new_range.start, prefix_len)?; + } + + while new_idx < new_len && old_idx < old_len { + let old_orig_idx = old_range.start + prefix_len + old_idx; + let new_orig_idx = new_range.start + prefix_len + new_idx; + + if new[new_orig_idx] == old[old_orig_idx] { + d.equal(old_orig_idx, new_orig_idx, 1)?; + old_idx += 1; + new_idx += 1; + } else if table[new_idx][old_idx + 1] >= table[new_idx + 1][old_idx] { + d.delete(old_orig_idx, 1, new_orig_idx)?; + old_idx += 1; + } else { + d.insert(old_orig_idx, new_orig_idx, 1)?; + new_idx += 1; + } + } + + if old_idx < old_len { + d.delete( + old_range.start + prefix_len + old_idx, + old_len - old_idx, + new_range.start + prefix_len + new_idx, + )?; + old_idx += old_len - old_idx; + } + + if new_idx < new_len { + d.insert( + old_range.start + prefix_len + old_idx, + new_range.start + prefix_len + new_idx, + new_len - new_idx, + )?; + } + + if suffix_len > 0 { + d.equal( + old_range.start + old_len + prefix_len, + new_range.start + new_len + prefix_len, + suffix_len, + )?; + } + + d.finish() +} + +/// Shortcut for diffing slices. +pub fn diff_slices(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error> +where + D: DiffHook, + T: PartialEq, +{ + diff(d, old, 0..old.len(), new, 0..new.len()) +} + +fn make_table( + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Vec> +where + Old: Index + ?Sized, + New: Index + ?Sized, + New::Output: PartialEq, +{ + let old_len = old_range.len(); + let new_len = new_range.len(); + let mut table = vec![vec![0; old_len + 1]; new_len + 1]; + + for i in 0..new_len { + let i = new_len - i - 1; + table[i][old_len] = 0; + for j in 0..old_len { + let j = old_len - j - 1; + table[i][j] = if new[i] == old[j] { + table[i + 1][j + 1] + 1 + } else { + table[i + 1][j].max(table[i][j + 1]) + } + } + } + + table +} + +#[test] +fn test_table() { + let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3); + let expected = vec![vec![1, 0, 0], vec![1, 0, 0], vec![1, 0, 0], vec![0, 0, 0]]; + assert_eq!(table, expected); +} + +#[test] +fn test_diff() { + let a: &[usize] = &[0, 1, 2, 3, 4]; + let b: &[usize] = &[0, 1, 2, 9, 4]; + + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff_slices(&mut d, a, b).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_contiguous() { + let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5]; + let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7]; + + let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new()); + diff_slices(&mut d, a, b).unwrap(); + insta::assert_debug_snapshot!(d.into_inner().ops()); +} + +#[test] +fn test_pat() { + let a: &[usize] = &[0, 1, 3, 4, 5]; + let b: &[usize] = &[0, 1, 4, 5, 8, 9]; + + let mut d = crate::algorithms::Capture::new(); + diff_slices(&mut d, a, b).unwrap(); + insta::assert_debug_snapshot!(d.ops()); +} diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index ab2d7a9..c9c47f0 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -47,6 +47,7 @@ pub use replace::Replace; #[doc(no_inline)] pub use crate::Algorithm; +pub mod lcs; pub mod myers; pub mod patience; @@ -71,6 +72,7 @@ where match alg { Algorithm::Myers => myers::diff(d, old, old_range, new, new_range), Algorithm::Patience => patience::diff(d, old, old_range, new, new_range), + Algorithm::Lcs => lcs::diff(d, old, old_range, new, new_range), } } diff --git a/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap b/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap new file mode 100644 index 0000000..77e5afa --- /dev/null +++ b/src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap @@ -0,0 +1,28 @@ +--- +source: src/algorithms/lcs.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 2, + new_index: 3, + new_len: 2, + }, + Equal { + old_index: 5, + new_index: 5, + len: 2, + }, + Replace { + old_index: 7, + old_len: 1, + new_index: 7, + new_len: 1, + }, +] diff --git a/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap b/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap new file mode 100644 index 0000000..d43706d --- /dev/null +++ b/src/algorithms/snapshots/similar__algorithms__lcs__diff.snap @@ -0,0 +1,22 @@ +--- +source: src/algorithms/lcs.rs +expression: d.into_inner().ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 3, + }, + Replace { + old_index: 3, + old_len: 1, + new_index: 3, + new_len: 1, + }, + Equal { + old_index: 4, + new_index: 4, + len: 1, + }, +] diff --git a/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap b/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap new file mode 100644 index 0000000..9ed25ae --- /dev/null +++ b/src/algorithms/snapshots/similar__algorithms__lcs__pat.snap @@ -0,0 +1,31 @@ +--- +source: src/algorithms/lcs.rs +expression: d.ops() +--- +[ + Equal { + old_index: 0, + new_index: 0, + len: 2, + }, + Delete { + old_index: 2, + old_len: 1, + new_index: 2, + }, + Equal { + old_index: 3, + new_index: 2, + len: 1, + }, + Equal { + old_index: 4, + new_index: 3, + len: 1, + }, + Insert { + old_index: 5, + new_index: 4, + new_len: 2, + }, +] diff --git a/src/types.rs b/src/types.rs index 9998840..100d09c 100644 --- a/src/types.rs +++ b/src/types.rs @@ -10,6 +10,8 @@ pub enum Algorithm { Myers, /// Picks the patience algorithm from [`crate::algorithms::patience`] Patience, + /// Picks the LCS algorithm from [`crate::algorithms::lcs`] + Lcs, } impl Default for Algorithm {