Added text diff utilities
This commit is contained in:
parent
bc97614946
commit
34dd260dbb
9 changed files with 729 additions and 121 deletions
112
src/algorithms/capture.rs
Normal file
112
src/algorithms/capture.rs
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
use crate::algorithms::hook::DiffHook;
|
||||
use std::convert::Infallible;
|
||||
|
||||
/// Utility enum to capture a diff operation.
|
||||
///
|
||||
/// This is used by [`Capture`](crate::algorithms::Capture).
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub enum DiffOp {
|
||||
/// A segment is equal (see [`DiffHook::equal`])
|
||||
Equal {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
len: usize,
|
||||
},
|
||||
/// A segment was deleted (see [`DiffHook::delete`])
|
||||
Delete {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
},
|
||||
/// A segment was inserted (see [`DiffHook::insert`])
|
||||
Insert {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
/// A segment was replaced (see [`DiffHook::replace`])
|
||||
Replace {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
}
|
||||
|
||||
/// A [`DiffHook`] that captures all diff operations.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct Capture(Vec<DiffOp>);
|
||||
|
||||
impl Capture {
|
||||
/// Creates a new capture hook.
|
||||
pub fn new() -> Capture {
|
||||
Capture::default()
|
||||
}
|
||||
|
||||
/// Converts the capture hook into a vector.
|
||||
pub fn into_vec(self) -> Vec<DiffOp> {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Accesses the captured operations.
|
||||
pub fn ops(&self) -> &[DiffOp] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DiffHook for Capture {
|
||||
type Error = Infallible;
|
||||
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Delete {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Insert {
|
||||
old_index,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Replace {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,3 @@
|
|||
use std::convert::Infallible;
|
||||
|
||||
/// A trait for reacting to an edit script from the "old" version to
|
||||
/// the "new" version.
|
||||
pub trait DiffHook: Sized {
|
||||
|
|
@ -108,113 +106,3 @@ impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
|||
(*self).finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Utility enum to capture a diff operation.
|
||||
///
|
||||
/// This is used by [`CaptureHook`].
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||
pub enum DiffOp {
|
||||
/// A segment is equal (see [`DiffHook::equal`])
|
||||
Equal {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
len: usize,
|
||||
},
|
||||
/// A segment was deleted (see [`DiffHook::delete`])
|
||||
Delete {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
},
|
||||
/// A segment was inserted (see [`DiffHook::insert`])
|
||||
Insert {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
/// A segment was replaced (see [`DiffHook::replace`])
|
||||
Replace {
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
},
|
||||
}
|
||||
|
||||
/// A [`DiffHook`] that captures all diff operations.
|
||||
#[derive(Default, Clone)]
|
||||
pub struct CaptureHook(Vec<DiffOp>);
|
||||
|
||||
impl CaptureHook {
|
||||
/// Creates a new capture hook.
|
||||
pub fn new() -> CaptureHook {
|
||||
CaptureHook::default()
|
||||
}
|
||||
|
||||
/// Converts the capture hook into a vector.
|
||||
pub fn into_vec(self) -> Vec<DiffOp> {
|
||||
self.0
|
||||
}
|
||||
|
||||
/// Accesses the captured operations.
|
||||
pub fn ops(&self) -> &[DiffOp] {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DiffHook for CaptureHook {
|
||||
type Error = Infallible;
|
||||
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Delete {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Insert {
|
||||
old_index,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.0.push(DiffOp::Replace {
|
||||
old_index,
|
||||
old_len,
|
||||
new_index,
|
||||
new_len,
|
||||
});
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,12 +11,69 @@
|
|||
//! the actual values but only the indexes. This is why the diff hook is not
|
||||
//! used outside of the raw algorithm implementations as for most situations
|
||||
//! access to the values is useful of required.
|
||||
//!
|
||||
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
|
||||
//! the different algorithms.
|
||||
|
||||
// general traits and utilities
|
||||
mod capture;
|
||||
mod hook;
|
||||
mod replace;
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
pub use capture::*;
|
||||
pub use hook::*;
|
||||
pub use replace::*;
|
||||
|
||||
// actual diffing algorithms
|
||||
pub mod myers;
|
||||
pub mod patience;
|
||||
|
||||
/// An enum representing a diffing algorithm.
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
pub enum Algorithm {
|
||||
Myers,
|
||||
Patience,
|
||||
}
|
||||
|
||||
impl Default for Algorithm {
|
||||
/// Returns the default algorithm ([`Algorithm::Myers`]).
|
||||
fn default() -> Algorithm {
|
||||
Algorithm::Myers
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm.
|
||||
///
|
||||
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
pub fn diff<Old, New, D>(
|
||||
alg: Algorithm,
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
match alg {
|
||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for diffing slices with a specific algorithm.
|
||||
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||
where
|
||||
D: DiffHook,
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ fn test_diff() {
|
|||
let a: &[usize] = &[0, 1, 2, 3, 4];
|
||||
let b: &[usize] = &[0, 1, 2, 9, 4];
|
||||
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
[
|
||||
|
|
@ -212,7 +212,7 @@ fn test_contiguous() {
|
|||
let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
|
||||
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
||||
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
[
|
||||
|
|
@ -247,7 +247,7 @@ fn test_pat() {
|
|||
let a: &[usize] = &[0, 1, 3, 4, 5];
|
||||
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
||||
|
||||
let mut d = crate::algorithms::CaptureHook::new();
|
||||
let mut d = crate::algorithms::Capture::new();
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.ops(), @r###"
|
||||
[
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ fn test_patience() {
|
|||
let a: &[usize] = &[11, 1, 2, 2, 3, 4, 4, 4, 5, 47, 19];
|
||||
let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18];
|
||||
|
||||
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
|
|
|
|||
|
|
@ -2,6 +2,13 @@ use crate::algorithms::DiffHook;
|
|||
|
||||
/// A [`DiffHook`] that combines deletions and insertions to give blocks
|
||||
/// of maximal length, and replacements when appropriate.
|
||||
///
|
||||
/// It will replace [`DiffHook::insert`] and [`DiffHook::delete`] events when
|
||||
/// possible with [`DiffHook::replace`] events. Note that even though the
|
||||
/// text processing in the crate does not use replace events and always resolves
|
||||
/// then back to delete and insert, it's useful to always use the replacer to
|
||||
/// ensure a consistent order of inserts and deletes. This is why for instance
|
||||
/// the text diffing automatically uses this hook internally.
|
||||
pub struct Replace<D: DiffHook> {
|
||||
d: D,
|
||||
del: Option<(usize, usize, usize)>,
|
||||
|
|
@ -153,7 +160,7 @@ fn test_mayers_replace() {
|
|||
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n",
|
||||
];
|
||||
|
||||
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||
myers::diff_slices(&mut d, a, b).unwrap();
|
||||
|
||||
insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###"
|
||||
|
|
@ -194,7 +201,7 @@ fn test_replace() {
|
|||
let a: &[usize] = &[0, 1, 2, 3, 4];
|
||||
let b: &[usize] = &[0, 1, 2, 7, 8, 9];
|
||||
|
||||
let mut d = Replace::new(crate::algorithms::CaptureHook::new());
|
||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||
crate::algorithms::myers::diff_slices(&mut d, a, b).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||
[
|
||||
|
|
|
|||
24
src/lib.rs
24
src/lib.rs
|
|
@ -1 +1,25 @@
|
|||
//! This crate implements diffing utilities. It attempts to provide an abstraction
|
||||
//! interface over different types of diffing algorithms.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! ```rust
|
||||
//! use similar::{text::diff_lines, algorithms::Algorithm};
|
||||
//! let old = "Hello World!\nMore stuff here.";
|
||||
//! let new = "Oh well World!\nMore stuff here.";
|
||||
//! for op in diff_lines(Algorithm::Myers, old, new) {
|
||||
//! println!("{}", op);
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! # Components
|
||||
//!
|
||||
//! The crate is split into two components:
|
||||
//!
|
||||
//! * [`algorithms`]: This implements the different types of diffing algorithms.
|
||||
//! It provides both low level access to the algorithms with the minimal
|
||||
//! trait bounds necessary, as well as a generic interface.
|
||||
//! * [`text`]: This extends the general diffing functionality to text (and more
|
||||
//! specifically line) based diff operations.
|
||||
pub mod algorithms;
|
||||
pub mod text;
|
||||
|
|
|
|||
523
src/text.rs
Normal file
523
src/text.rs
Normal file
|
|
@ -0,0 +1,523 @@
|
|||
//! Text diffing utilities.
|
||||
//!
|
||||
//! This provides helpful utilities for text (and more specifically line) diff
|
||||
//! operations.
|
||||
use std::borrow::Cow;
|
||||
use std::collections::VecDeque;
|
||||
use std::convert::Infallible;
|
||||
use std::fmt;
|
||||
|
||||
use crate::algorithms::{diff_slices, Algorithm, DiffHook, Replace};
|
||||
|
||||
/// A text diff operation.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum DiffOp<'old, 'new> {
|
||||
/// Old and new text are matching
|
||||
Equal {
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
value: &'old str,
|
||||
},
|
||||
/// New text was inserted
|
||||
Insert { new_index: usize, value: &'new str },
|
||||
/// Old text was deleted
|
||||
Delete { old_index: usize, value: &'old str },
|
||||
}
|
||||
|
||||
impl<'old, 'new> fmt::Display for DiffOp<'old, 'new> {
|
||||
/// Stringifies a diff op.
|
||||
///
|
||||
/// This is mostly for debugging. It prepends a diff marker to the beginning
|
||||
/// of the value (`+`, `-` or a space) and trims of trailing spaces. If the
|
||||
/// space trimming is not wanted the alternative rendering mode disables that.
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}{}",
|
||||
match *self {
|
||||
DiffOp::Equal { .. } => " ",
|
||||
DiffOp::Insert { .. } => "+",
|
||||
DiffOp::Delete { .. } => "-",
|
||||
},
|
||||
if f.alternate() {
|
||||
self.as_str()
|
||||
} else {
|
||||
self.as_str().trim_end()
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'old, 'new> DiffOp<'old, 'new> {
|
||||
/// Returns the old and new index based on availability.
|
||||
pub fn indices(&self) -> (Option<usize>, Option<usize>) {
|
||||
match *self {
|
||||
DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
..
|
||||
} => (Some(old_index), Some(new_index)),
|
||||
DiffOp::Insert { new_index, .. } => (None, Some(new_index)),
|
||||
DiffOp::Delete { old_index, .. } => (Some(old_index), None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Extracts the embedded string value.
|
||||
pub fn as_str(&self) -> &str {
|
||||
match *self {
|
||||
DiffOp::Equal { value, .. } => value,
|
||||
DiffOp::Insert { value, .. } => value,
|
||||
DiffOp::Delete { value, .. } => value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct ContextState<'old, 'new> {
|
||||
radius: usize,
|
||||
ops: VecDeque<DiffOp<'old, 'new>>,
|
||||
after_modification: bool,
|
||||
}
|
||||
|
||||
/// Captures diff op codes for textual diffs
|
||||
pub struct TextDiffer<'old, 'new, 'bufs> {
|
||||
old: Cow<'bufs, [&'old str]>,
|
||||
new: Cow<'bufs, [&'new str]>,
|
||||
ops: Vec<DiffOp<'old, 'new>>,
|
||||
algorithm: Algorithm,
|
||||
use_replace: bool,
|
||||
context_state: Option<ContextState<'old, 'new>>,
|
||||
}
|
||||
|
||||
impl<'old, 'new, 'bufs> TextDiffer<'old, 'new, 'bufs> {
|
||||
/// Creates a new line based text differ from two strings.
|
||||
pub fn new_from_lines(old: &'old str, new: &'new str) -> TextDiffer<'old, 'new, 'bufs> {
|
||||
TextDiffer {
|
||||
old: Cow::Owned(split_lines(old).collect()),
|
||||
new: Cow::Owned(split_lines(new).collect()),
|
||||
ops: Vec::new(),
|
||||
algorithm: Algorithm::default(),
|
||||
use_replace: true,
|
||||
context_state: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new word based text differ from two strings.
|
||||
pub fn new_from_words(old: &'old str, new: &'new str) -> TextDiffer<'old, 'new, 'bufs> {
|
||||
TextDiffer {
|
||||
old: Cow::Owned(split_words(old).collect()),
|
||||
new: Cow::Owned(split_words(new).collect()),
|
||||
ops: Vec::new(),
|
||||
algorithm: Algorithm::default(),
|
||||
use_replace: true,
|
||||
context_state: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new text differ from two slices.
|
||||
pub fn new_from_slices(
|
||||
old: &'bufs [&'old str],
|
||||
new: &'bufs [&'new str],
|
||||
) -> TextDiffer<'old, 'new, 'bufs> {
|
||||
TextDiffer {
|
||||
old: Cow::Borrowed(old),
|
||||
new: Cow::Borrowed(new),
|
||||
ops: Vec::new(),
|
||||
algorithm: Algorithm::default(),
|
||||
use_replace: true,
|
||||
context_state: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets a context radius.
|
||||
///
|
||||
/// By default no radius is set. If a radius is set then `n` number of
|
||||
/// equal chunks around modifications are retained, others are discarded.
|
||||
pub fn set_context_radius(&mut self, n: Option<usize>) {
|
||||
self.context_state = n.map(|radius| ContextState {
|
||||
radius,
|
||||
ops: VecDeque::new(),
|
||||
after_modification: false,
|
||||
});
|
||||
}
|
||||
|
||||
/// Sets a different diffing algorithm.
|
||||
///
|
||||
/// If not explicitly configured the default algorithm is
|
||||
/// [`Algorithm::default`].
|
||||
pub fn set_algorithm(&mut self, alg: Algorithm) {
|
||||
self.algorithm = alg;
|
||||
}
|
||||
|
||||
/// Controls if the [`Replace`] hook should be used automatically.
|
||||
///
|
||||
/// By default the replacer is always used.
|
||||
pub fn set_use_replace(&mut self, yes: bool) {
|
||||
self.use_replace = yes;
|
||||
}
|
||||
|
||||
/// Diffs the text with the given algorithm returning the ops.
|
||||
///
|
||||
/// This is a shortcut for running a diff operation with the text differ
|
||||
/// as hook and converting it into the captured operaitons.
|
||||
pub fn diff(mut self) -> Vec<DiffOp<'old, 'new>> {
|
||||
// this requires some explanation: because the text differ can
|
||||
// hold owned buffers (from `TextDiffer::from_lines`) we cannot directly
|
||||
// use it can't fulfill the lifetime requirements. For the way this
|
||||
// diff function works though we can get around this by making a new
|
||||
// differ that borrows the stack local buffers as nothing bound to
|
||||
// `bufs needs to outlive this stack frame.
|
||||
//
|
||||
// TODO: consider changing `TextDiffer` into a builder instead.
|
||||
let (old, new) = (&self.old[..], &self.new[..]);
|
||||
let mut d = TextDiffer {
|
||||
old: Cow::Borrowed(old),
|
||||
new: Cow::Borrowed(new),
|
||||
ops: self.ops,
|
||||
algorithm: self.algorithm,
|
||||
use_replace: self.use_replace,
|
||||
context_state: self.context_state.take(),
|
||||
};
|
||||
if d.use_replace {
|
||||
let mut d = Replace::new(d);
|
||||
diff_slices(self.algorithm, &mut d, old, new).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
} else {
|
||||
diff_slices(self.algorithm, &mut d, old, new).unwrap();
|
||||
d.into_ops()
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the captured ops.
|
||||
pub fn into_ops(self) -> Vec<DiffOp<'old, 'new>> {
|
||||
self.ops
|
||||
}
|
||||
|
||||
fn push_op(&mut self, op: DiffOp<'old, 'new>) {
|
||||
match self.context_state {
|
||||
None => self.ops.push(op),
|
||||
Some(ref mut context_state) => {
|
||||
if let DiffOp::Equal { .. } = op {
|
||||
if context_state.ops.len() >= context_state.radius {
|
||||
if context_state.after_modification {
|
||||
context_state.after_modification = false;
|
||||
self.ops.extend(context_state.ops.drain(..));
|
||||
}
|
||||
context_state.ops.pop_front();
|
||||
}
|
||||
context_state.ops.push_back(op);
|
||||
} else {
|
||||
context_state.after_modification = true;
|
||||
self.ops.extend(context_state.ops.drain(..));
|
||||
self.ops.push(op);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'old, 'new, 'bufs> DiffHook for TextDiffer<'old, 'new, 'bufs> {
|
||||
type Error = Infallible;
|
||||
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
for off in 0..len {
|
||||
self.push_op(DiffOp::Equal {
|
||||
old_index: old_index + off,
|
||||
new_index: new_index + off,
|
||||
value: self.old[old_index + off],
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
_new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
for off in 0..old_len {
|
||||
self.push_op(DiffOp::Delete {
|
||||
old_index: old_index + off,
|
||||
value: self.old[old_index + off],
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&mut self,
|
||||
_old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
for off in 0..new_len {
|
||||
self.push_op(DiffOp::Insert {
|
||||
new_index: new_index + off,
|
||||
value: self.new[new_index + off],
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||
if let Some(context_state) = self.context_state.take() {
|
||||
self.ops.extend(context_state.ops);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Simple utility function to calculate differences between old and new.
|
||||
pub fn diff_strings<'old, 'new, 'bufs>(
|
||||
alg: Algorithm,
|
||||
old: &'bufs [&'old str],
|
||||
new: &'bufs [&'new str],
|
||||
) -> Vec<DiffOp<'old, 'new>> {
|
||||
let mut differ = TextDiffer::new_from_slices(old, new);
|
||||
differ.set_algorithm(alg);
|
||||
differ.diff()
|
||||
}
|
||||
|
||||
/// Simple utility function to calculate line differences between old and new.
|
||||
pub fn diff_lines<'old, 'new>(
|
||||
alg: Algorithm,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> Vec<DiffOp<'old, 'new>> {
|
||||
let mut differ = TextDiffer::new_from_lines(old, new);
|
||||
differ.set_algorithm(alg);
|
||||
differ.diff()
|
||||
}
|
||||
|
||||
/// Simple utility function to calculate word differences between old and new.
|
||||
pub fn diff_words<'old, 'new>(
|
||||
alg: Algorithm,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> Vec<DiffOp<'old, 'new>> {
|
||||
let mut differ = TextDiffer::new_from_words(old, new);
|
||||
differ.set_algorithm(alg);
|
||||
differ.diff()
|
||||
}
|
||||
|
||||
/// Given a string splits it into lines.
|
||||
///
|
||||
/// This operation will preserve the newline separation character at the end.
|
||||
/// It supports all common newline sequences (`\r\n`, `\n` as well as `\r`).
|
||||
fn split_lines(s: &str) -> impl Iterator<Item = &str> {
|
||||
let mut iter = s.char_indices().peekable();
|
||||
let mut last_pos = 0;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
if let Some((idx, c)) = iter.next() {
|
||||
let mut rv = None;
|
||||
if c == '\r' {
|
||||
if iter.peek().map_or(false, |x| x.1 == '\n') {
|
||||
rv = Some(&s[last_pos..=idx + 1]);
|
||||
iter.next();
|
||||
last_pos = idx + 2;
|
||||
} else {
|
||||
rv = Some(&s[last_pos..=idx]);
|
||||
last_pos = idx + 1;
|
||||
}
|
||||
} else if c == '\n' {
|
||||
rv = Some(&s[last_pos..=idx]);
|
||||
last_pos = idx + 1;
|
||||
}
|
||||
Some(rv)
|
||||
} else if last_pos < s.len() {
|
||||
let tmp = &s[last_pos..];
|
||||
last_pos = s.len();
|
||||
Some(Some(tmp))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
/// Splits text into words with whitespace attached.
|
||||
pub fn split_words(s: &str) -> impl Iterator<Item = &str> {
|
||||
let mut iter = s.char_indices().peekable();
|
||||
let mut last_pos = 0;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
if let Some((idx, c)) = iter.next() {
|
||||
let mut rv = None;
|
||||
if c.is_whitespace() {
|
||||
let mut last = (idx, c);
|
||||
while let Some(&(next_idx, next_char)) = iter.peek() {
|
||||
if !next_char.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
iter.next();
|
||||
last = (next_idx, next_char);
|
||||
}
|
||||
let whitespace_end = last.0 + last.1.len_utf8();
|
||||
rv = Some(&s[last_pos..whitespace_end]);
|
||||
last_pos = whitespace_end;
|
||||
}
|
||||
Some(rv)
|
||||
} else if last_pos < s.len() {
|
||||
let tmp = &s[last_pos..];
|
||||
last_pos = s.len();
|
||||
Some(Some(tmp))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_lines() {
|
||||
assert_eq!(
|
||||
split_lines("first\nsecond\rthird\r\nfourth\nlast").collect::<Vec<_>>(),
|
||||
vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
|
||||
);
|
||||
assert_eq!(split_lines("\n\n").collect::<Vec<_>>(), vec!["\n", "\n"]);
|
||||
assert_eq!(split_lines("\n").collect::<Vec<_>>(), vec!["\n"]);
|
||||
assert!(split_lines("").collect::<Vec<_>>().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_words() {
|
||||
assert_eq!(
|
||||
split_words("foo bar baz\n\n aha").collect::<Vec<_>>(),
|
||||
["foo ", "bar ", "baz\n\n ", "aha"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_diff() {
|
||||
insta::assert_debug_snapshot!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz", "foo\nblah\nbaz"), @r###"
|
||||
[
|
||||
Equal {
|
||||
old_index: 0,
|
||||
new_index: 0,
|
||||
value: "foo\n",
|
||||
},
|
||||
Delete {
|
||||
old_index: 1,
|
||||
value: "bar\n",
|
||||
},
|
||||
Insert {
|
||||
new_index: 1,
|
||||
value: "blah\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 2,
|
||||
new_index: 2,
|
||||
value: "baz",
|
||||
},
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_context_diff() {
|
||||
let old_text = vec![
|
||||
"1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "0\n", "1\n", "2\n", "3\n",
|
||||
"4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "0\n",
|
||||
];
|
||||
let mut new_text = old_text.clone();
|
||||
new_text[9] = "a\n";
|
||||
new_text[11] = "b\n";
|
||||
new_text[19] = "c\n";
|
||||
let mut differ = TextDiffer::new_from_slices(&old_text, &new_text);
|
||||
differ.set_context_radius(Some(2));
|
||||
insta::assert_debug_snapshot!(differ.diff(), @r###"
|
||||
[
|
||||
Equal {
|
||||
old_index: 7,
|
||||
new_index: 7,
|
||||
value: "8\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 8,
|
||||
new_index: 8,
|
||||
value: "9\n",
|
||||
},
|
||||
Delete {
|
||||
old_index: 9,
|
||||
value: "0\n",
|
||||
},
|
||||
Insert {
|
||||
new_index: 9,
|
||||
value: "a\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 10,
|
||||
new_index: 10,
|
||||
value: "1\n",
|
||||
},
|
||||
Delete {
|
||||
old_index: 11,
|
||||
value: "2\n",
|
||||
},
|
||||
Insert {
|
||||
new_index: 11,
|
||||
value: "b\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 12,
|
||||
new_index: 12,
|
||||
value: "3\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 13,
|
||||
new_index: 13,
|
||||
value: "4\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 17,
|
||||
new_index: 17,
|
||||
value: "8\n",
|
||||
},
|
||||
Equal {
|
||||
old_index: 18,
|
||||
new_index: 18,
|
||||
value: "9\n",
|
||||
},
|
||||
Delete {
|
||||
old_index: 19,
|
||||
value: "0\n",
|
||||
},
|
||||
Insert {
|
||||
new_index: 19,
|
||||
value: "c\n",
|
||||
},
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
let old_text = vec![
|
||||
"1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n",
|
||||
];
|
||||
let mut new_text = old_text.clone();
|
||||
new_text[3] = "a\n";
|
||||
new_text[7] = "b\n";
|
||||
let mut differ = TextDiffer::new_from_slices(&old_text, &new_text);
|
||||
differ.set_context_radius(Some(2));
|
||||
let ops = differ
|
||||
.diff()
|
||||
.into_iter()
|
||||
.map(|x| x.to_string())
|
||||
.collect::<Vec<_>>();
|
||||
insta::assert_debug_snapshot!(ops, @r###"
|
||||
[
|
||||
" 2",
|
||||
" 3",
|
||||
"-4",
|
||||
"+a",
|
||||
" 5",
|
||||
" 6",
|
||||
" 7",
|
||||
"-8",
|
||||
"+b",
|
||||
" 9",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue