Added text diff utilities
This commit is contained in:
parent
bc97614946
commit
34dd260dbb
9 changed files with 729 additions and 121 deletions
112
src/algorithms/capture.rs
Normal file
112
src/algorithms/capture.rs
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
use crate::algorithms::hook::DiffHook;
|
||||
use std::convert::Infallible;
|
||||
|
||||
/// Utility enum to capture a diff operation.
|
||||
///
|
||||
/// This is used by [`Capture`](crate::algorithms::Capture).
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub enum DiffOp {
|
||||
/// A segment is equal (see [`DiffHook::equal`])
|
||||
Equal {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
len: usize,
|
||||
},
|
||||
/// A segment was deleted (see [`DiffHook::delete`])
|
||||
Delete {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
},
|
||||
/// A segment was inserted (see [`DiffHook::insert`])
|
||||
Insert {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
/// A segment was replaced (see [`DiffHook::replace`])
|
||||
Replace {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
}
|
||||
|
||||
/// A [`DiffHook`] that captures all diff operations.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Capture(Vec<DiffOp>);
|
||||
|
||||
impl Capture {
|
||||
/// Creates a new capture hook.
|
||||
pub fn new() -> Capture {
|
||||
Capture::default()
|
||||
}
|
||||
|
||||
/// Converts the capture hook into a vector.
|
||||
pub fn into_vec(self) -> Vec<DiffOp> {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Accesses the captured operations.
|
||||
pub fn ops(&self) -> &[DiffOp] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DiffHook for Capture {
|
||||
type Error = Infallible;
|
||||
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Delete {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Insert {
|
||||
old_index,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Replace {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,3 @@
|
|||
use std::convert::Infallible;
|
||||
|
||||
/// A trait for reacting to an edit script from the "old" version to
|
||||
/// the "new" version.
|
||||
pub trait DiffHook: Sized {
|
||||
|
|
@ -108,113 +106,3 @@ impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
|||
(*self).finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Utility enum to capture a diff operation.
|
||||
///
|
||||
/// This is used by [`CaptureHook`].
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub enum DiffOp {
|
||||
/// A segment is equal (see [`DiffHook::equal`])
|
||||
Equal {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
len: usize,
|
||||
},
|
||||
/// A segment was deleted (see [`DiffHook::delete`])
|
||||
Delete {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
},
|
||||
/// A segment was inserted (see [`DiffHook::insert`])
|
||||
Insert {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
/// A segment was replaced (see [`DiffHook::replace`])
|
||||
Replace {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
}
|
||||
|
||||
/// A [`DiffHook`] that captures all diff operations.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct CaptureHook(Vec<DiffOp>);
|
||||
|
||||
impl CaptureHook {
|
||||
/// Creates a new capture hook.
|
||||
pub fn new() -> CaptureHook {
|
||||
CaptureHook::default()
|
||||
}
|
||||
|
||||
/// Converts the capture hook into a vector.
|
||||
pub fn into_vec(self) -> Vec<DiffOp> {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Accesses the captured operations.
|
||||
pub fn ops(&self) -> &[DiffOp] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DiffHook for CaptureHook {
|
||||
type Error = Infallible;
|
||||
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Delete {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Insert {
|
||||
old_index,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Replace {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,12 +11,69 @@
|
|||
//! the actual values but only the indexes. This is why the diff hook is not
|
||||
//! used outside of the raw algorithm implementations as for most situations
|
||||
//! access to the values is useful of required.
|
||||
//!
|
||||
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
|
||||
//! the different algorithms.
|
||||
|
||||
// general traits and utilities
|
||||
mod capture;
|
||||
mod hook;
|
||||
mod replace;
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
pub use capture::*;
|
||||
pub use hook::*;
|
||||
pub use replace::*;
|
||||
|
||||
// actual diffing algorithms
|
||||
pub mod myers;
|
||||
pub mod patience;
|
||||
|
||||
/// An enum representing a diffing algorithm.
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
pub enum Algorithm {
|
||||
Myers,
|
||||
Patience,
|
||||
}
|
||||
|
||||
impl Default for Algorithm {
|
||||
/// Returns the default algorithm ([`Algorithm::Myers`]).
|
||||
fn default() -> Algorithm {
|
||||
Algorithm::Myers
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm.
|
||||
///
|
||||
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
pub fn diff<Old, New, D>(
|
||||
alg: Algorithm,
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
match alg {
|
||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for diffing slices with a specific algorithm.
|
||||
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||
where
|
||||
D: DiffHook,
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ fn test_diff() {
|
|||
let a: &[usize] = &[0, 1, 2, 3, 4];
|
||||
let b: &[usize] = &[0, 1, 2, 9, 4];
|
||||
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
[
|
||||
|
|
@ -212,7 +212,7 @@ fn test_contiguous() {
|
|||
let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
|
||||
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
||||
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
[
|
||||
|
|
@ -247,7 +247,7 @@ fn test_pat() {
|
|||
let a: &[usize] = &[0, 1, 3, 4, 5];
|
||||
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
||||
|
||||
let mut d = crate::algorithms::CaptureHook::new();
|
||||
let mut d = crate::algorithms::Capture::new();
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.ops(), @r###"
|
||||
[
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ fn test_patience() {
|
|||
let a: &[usize] = &[11, 1, 2, 2, 3, 4, 4, 4, 5, 47, 19];
|
||||
let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18];
|
||||
|
||||
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
|
|
|
|||
|
|
@ -2,6 +2,13 @@ use crate::algorithms::DiffHook;
|
|||
|
||||
/// A [`DiffHook`] that combines deletions and insertions to give blocks
|
||||
/// of maximal length, and replacements when appropriate.
|
||||
///
|
||||
/// It will replace [`DiffHook::insert`] and [`DiffHook::delete`] events when
|
||||
/// possible with [`DiffHook::replace`] events. Note that even though the
|
||||
/// text processing in the crate does not use replace events and always resolves
|
||||
/// then back to delete and insert, it's useful to always use the replacer to
|
||||
/// ensure a consistent order of inserts and deletes. This is why for instance
|
||||
/// the text diffing automatically uses this hook internally.
|
||||
pub struct Replace<D: DiffHook> {
|
||||
d: D,
|
||||
del: Option<(usize, usize, usize)>,
|
||||
|
|
@ -153,7 +160,7 @@ fn test_mayers_replace() {
|
|||
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n",
|
||||
];
|
||||
|
||||
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||
myers::diff_slices(&mut d, a, b).unwrap();
|
||||
|
||||
insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###"
|
||||
|
|
@ -194,7 +201,7 @@ fn test_replace() {
|
|||
let a: &[usize] = &[0, 1, 2, 3, 4];
|
||||
let b: &[usize] = &[0, 1, 2, 7, 8, 9];
|
||||
|
||||
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||
crate::algorithms::myers::diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
[
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue