Added text diff utilities

This commit is contained in:
Armin Ronacher 2021-01-18 21:37:04 +01:00
parent bc97614946
commit 34dd260dbb
9 changed files with 729 additions and 121 deletions

112
src/algorithms/capture.rs Normal file
View file

@ -0,0 +1,112 @@
use crate::algorithms::hook::DiffHook;
use std::convert::Infallible;
/// Utility enum to capture a diff operation.
///
/// This is used by [`Capture`](crate::algorithms::Capture).
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum DiffOp {
/// A segment is equal (see [`DiffHook::equal`])
Equal {
old_index: usize,
new_index: usize,
len: usize,
},
/// A segment was deleted (see [`DiffHook::delete`])
Delete {
old_index: usize,
old_len: usize,
new_index: usize,
},
/// A segment was inserted (see [`DiffHook::insert`])
Insert {
old_index: usize,
new_index: usize,
new_len: usize,
},
/// A segment was replaced (see [`DiffHook::replace`])
Replace {
old_index: usize,
old_len: usize,
new_index: usize,
new_len: usize,
},
}
/// A [`DiffHook`] that captures all diff operations.
#[derive(Default, Clone)]
pub struct Capture(Vec<DiffOp>);
impl Capture {
/// Creates a new capture hook.
pub fn new() -> Capture {
Capture::default()
}
/// Converts the capture hook into a vector.
pub fn into_vec(self) -> Vec<DiffOp> {
self.0
}
/// Accesses the captured operations.
pub fn ops(&self) -> &[DiffOp] {
&self.0
}
}
impl DiffHook for Capture {
type Error = Infallible;
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
self.0.push(DiffOp::Equal {
old_index,
new_index,
len,
});
Ok(())
}
fn delete(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
) -> Result<(), Self::Error> {
self.0.push(DiffOp::Delete {
old_index,
old_len,
new_index,
});
Ok(())
}
fn insert(
&mut self,
old_index: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
self.0.push(DiffOp::Insert {
old_index,
new_index,
new_len,
});
Ok(())
}
fn replace(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
self.0.push(DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
});
Ok(())
}
}

View file

@ -1,5 +1,3 @@
use std::convert::Infallible;
/// A trait for reacting to an edit script from the "old" version to
/// the "new" version.
pub trait DiffHook: Sized {
@ -108,113 +106,3 @@ impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
(*self).finish()
}
}
/// Utility enum to capture a diff operation.
///
/// This is used by [`CaptureHook`].
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum DiffOp {
/// A segment is equal (see [`DiffHook::equal`])
Equal {
old_index: usize,
new_index: usize,
len: usize,
},
/// A segment was deleted (see [`DiffHook::delete`])
Delete {
old_index: usize,
old_len: usize,
new_index: usize,
},
/// A segment was inserted (see [`DiffHook::insert`])
Insert {
old_index: usize,
new_index: usize,
new_len: usize,
},
/// A segment was replaced (see [`DiffHook::replace`])
Replace {
old_index: usize,
old_len: usize,
new_index: usize,
new_len: usize,
},
}
/// A [`DiffHook`] that captures all diff operations.
#[derive(Default, Clone)]
pub struct CaptureHook(Vec<DiffOp>);
impl CaptureHook {
/// Creates a new capture hook.
pub fn new() -> CaptureHook {
CaptureHook::default()
}
/// Converts the capture hook into a vector.
pub fn into_vec(self) -> Vec<DiffOp> {
self.0
}
/// Accesses the captured operations.
pub fn ops(&self) -> &[DiffOp] {
&self.0
}
}
impl DiffHook for CaptureHook {
type Error = Infallible;
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
self.0.push(DiffOp::Equal {
old_index,
new_index,
len,
});
Ok(())
}
fn delete(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
) -> Result<(), Self::Error> {
self.0.push(DiffOp::Delete {
old_index,
old_len,
new_index,
});
Ok(())
}
fn insert(
&mut self,
old_index: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
self.0.push(DiffOp::Insert {
old_index,
new_index,
new_len,
});
Ok(())
}
fn replace(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
self.0.push(DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
});
Ok(())
}
}

View file

@ -11,12 +11,69 @@
//! the actual values but only the indexes. This is why the diff hook is not
//! used outside of the raw algorithm implementations as for most situations
//! access to the values is useful of required.
//!
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
//! the different algorithms.
// general traits and utilities
mod capture;
mod hook;
mod replace;
use std::hash::Hash;
use std::ops::{Index, Range};
pub use capture::*;
pub use hook::*;
pub use replace::*;
// actual diffing algorithms
pub mod myers;
pub mod patience;
/// An enum representing a diffing algorithm.
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub enum Algorithm {
Myers,
Patience,
}
impl Default for Algorithm {
/// Returns the default algorithm ([`Algorithm::Myers`]).
fn default() -> Algorithm {
Algorithm::Myers
}
}
/// Creates a diff between old and new with the given algorithm.
///
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
pub fn diff<Old, New, D>(
alg: Algorithm,
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
match alg {
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
}
}
/// Shortcut for diffing slices with a specific algorithm.
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
T: Eq + Hash + Ord,
{
diff(alg, d, old, 0..old.len(), new, 0..new.len())
}

View file

@ -183,7 +183,7 @@ fn test_diff() {
let a: &[usize] = &[0, 1, 2, 3, 4];
let b: &[usize] = &[0, 1, 2, 9, 4];
let mut d = crate::algorithms::Replace::new(crate::algorithms::CaptureHook::new());
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
[
@ -212,7 +212,7 @@ fn test_contiguous() {
let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
let mut d = crate::algorithms::Replace::new(crate::algorithms::CaptureHook::new());
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
[
@ -247,7 +247,7 @@ fn test_pat() {
let a: &[usize] = &[0, 1, 3, 4, 5];
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
let mut d = crate::algorithms::CaptureHook::new();
let mut d = crate::algorithms::Capture::new();
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.ops(), @r###"
[

View file

@ -180,7 +180,7 @@ fn test_patience() {
let a: &[usize] = &[11, 1, 2, 2, 3, 4, 4, 4, 5, 47, 19];
let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18];
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
let mut d = Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"

View file

@ -2,6 +2,13 @@ use crate::algorithms::DiffHook;
/// A [`DiffHook`] that combines deletions and insertions to give blocks
/// of maximal length, and replacements when appropriate.
///
/// It will replace [`DiffHook::insert`] and [`DiffHook::delete`] events when
/// possible with [`DiffHook::replace`] events. Note that even though the
/// text processing in the crate does not use replace events and always resolves
/// then back to delete and insert, it's useful to always use the replacer to
/// ensure a consistent order of inserts and deletes. This is why for instance
/// the text diffing automatically uses this hook internally.
pub struct Replace<D: DiffHook> {
d: D,
del: Option<(usize, usize, usize)>,
@ -153,7 +160,7 @@ fn test_mayers_replace() {
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n",
];
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
let mut d = Replace::new(crate::algorithms::Capture::new());
myers::diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###"
@ -194,7 +201,7 @@ fn test_replace() {
let a: &[usize] = &[0, 1, 2, 3, 4];
let b: &[usize] = &[0, 1, 2, 7, 8, 9];
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
let mut d = Replace::new(crate::algorithms::Capture::new());
crate::algorithms::myers::diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
[