Initial work to refactor the text module
This commit is contained in:
parent
8692e89b0a
commit
c0b61f95b0
2 changed files with 68 additions and 370 deletions
|
|
@ -101,7 +101,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
pub fn capture_diff_slices<D, T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
||||||
where
|
where
|
||||||
T: Eq + Hash + Ord,
|
T: Eq + Hash + Ord,
|
||||||
{
|
{
|
||||||
|
|
|
||||||
436
src/text.rs
436
src/text.rs
|
|
@ -3,267 +3,100 @@
|
||||||
//! This provides helpful utilities for text (and more specifically line) diff
|
//! This provides helpful utilities for text (and more specifically line) diff
|
||||||
//! operations.
|
//! operations.
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::VecDeque;
|
|
||||||
use std::convert::Infallible;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use crate::algorithms::{diff_slices, Algorithm, DiffHook, Replace};
|
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp};
|
||||||
|
|
||||||
/// A text diff operation.
|
#[derive(Clone, Debug)]
|
||||||
#[derive(Debug, Clone)]
|
pub struct TextDiffBuilder {
|
||||||
pub enum DiffOp<'old, 'new> {
|
algorithm: Algorithm,
|
||||||
/// Old and new text are matching
|
|
||||||
Equal {
|
|
||||||
old_index: usize,
|
|
||||||
new_index: usize,
|
|
||||||
value: &'old str,
|
|
||||||
},
|
|
||||||
/// New text was inserted
|
|
||||||
Insert { new_index: usize, value: &'new str },
|
|
||||||
/// Old text was deleted
|
|
||||||
Delete { old_index: usize, value: &'old str },
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'old, 'new> fmt::Display for DiffOp<'old, 'new> {
|
impl Default for TextDiffBuilder {
|
||||||
/// Stringifies a diff op.
|
fn default() -> TextDiffBuilder {
|
||||||
///
|
TextDiffBuilder {
|
||||||
/// This is mostly for debugging. It prepends a diff marker to the beginning
|
algorithm: Algorithm::default(),
|
||||||
/// of the value (`+`, `-` or a space) and trims of trailing spaces. If the
|
}
|
||||||
/// space trimming is not wanted the alternative rendering mode disables that.
|
}
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
}
|
||||||
write!(
|
|
||||||
f,
|
impl TextDiffBuilder {
|
||||||
"{}{}",
|
pub fn diff_lines<'old, 'new>(
|
||||||
match *self {
|
&self,
|
||||||
DiffOp::Equal { .. } => " ",
|
old: &'old str,
|
||||||
DiffOp::Insert { .. } => "+",
|
new: &'new str,
|
||||||
DiffOp::Delete { .. } => "-",
|
) -> TextDiff<'old, 'new, 'static> {
|
||||||
},
|
self.diff(
|
||||||
if f.alternate() {
|
Cow::Owned(split_lines(old).collect()),
|
||||||
self.as_str()
|
Cow::Owned(split_lines(new).collect()),
|
||||||
} else {
|
|
||||||
self.as_str().trim_end()
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl<'old, 'new> DiffOp<'old, 'new> {
|
pub fn diff_words<'old, 'new>(
|
||||||
/// Returns the old and new index based on availability.
|
&self,
|
||||||
pub fn indices(&self) -> (Option<usize>, Option<usize>) {
|
old: &'old str,
|
||||||
match *self {
|
new: &'new str,
|
||||||
DiffOp::Equal {
|
) -> TextDiff<'old, 'new, 'static> {
|
||||||
old_index,
|
self.diff(
|
||||||
new_index,
|
Cow::Owned(split_words(old).collect()),
|
||||||
..
|
Cow::Owned(split_words(new).collect()),
|
||||||
} => (Some(old_index), Some(new_index)),
|
)
|
||||||
DiffOp::Insert { new_index, .. } => (None, Some(new_index)),
|
|
||||||
DiffOp::Delete { old_index, .. } => (Some(old_index), None),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extracts the embedded string value.
|
pub fn diff_slices<'old, 'new, 'bufs>(
|
||||||
pub fn as_str(&self) -> &str {
|
&self,
|
||||||
match *self {
|
old: &'bufs [&'old str],
|
||||||
DiffOp::Equal { value, .. } => value,
|
new: &'bufs [&'new str],
|
||||||
DiffOp::Insert { value, .. } => value,
|
) -> TextDiff<'old, 'new, 'bufs> {
|
||||||
DiffOp::Delete { value, .. } => value,
|
self.diff(Cow::Borrowed(old), Cow::Borrowed(new))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diff<'old, 'new, 'bufs>(
|
||||||
|
&self,
|
||||||
|
old: Cow<'bufs, [&'old str]>,
|
||||||
|
new: Cow<'bufs, [&'new str]>,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs> {
|
||||||
|
let ops = capture_diff_slices(self.algorithm, &old, &new);
|
||||||
|
TextDiff {
|
||||||
|
old,
|
||||||
|
new,
|
||||||
|
ops,
|
||||||
|
algorithm: self.algorithm,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ContextState<'old, 'new> {
|
|
||||||
radius: usize,
|
|
||||||
ops: VecDeque<DiffOp<'old, 'new>>,
|
|
||||||
after_modification: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Captures diff op codes for textual diffs
|
/// Captures diff op codes for textual diffs
|
||||||
pub struct TextDiffer<'old, 'new, 'bufs> {
|
pub struct TextDiff<'old, 'new, 'bufs> {
|
||||||
old: Cow<'bufs, [&'old str]>,
|
old: Cow<'bufs, [&'old str]>,
|
||||||
new: Cow<'bufs, [&'new str]>,
|
new: Cow<'bufs, [&'new str]>,
|
||||||
ops: Vec<DiffOp<'old, 'new>>,
|
ops: Vec<DiffOp>,
|
||||||
algorithm: Algorithm,
|
algorithm: Algorithm,
|
||||||
use_replace: bool,
|
|
||||||
context_state: Option<ContextState<'old, 'new>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'old, 'new, 'bufs> TextDiffer<'old, 'new, 'bufs> {
|
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
/// Creates a new line based text differ from two strings.
|
/// The name of the algorithm that created the diff.
|
||||||
pub fn new_from_lines(old: &'old str, new: &'new str) -> TextDiffer<'old, 'new, 'bufs> {
|
pub fn algorithm(&self) -> Algorithm {
|
||||||
TextDiffer {
|
self.algorithm
|
||||||
old: Cow::Owned(split_lines(old).collect()),
|
|
||||||
new: Cow::Owned(split_lines(new).collect()),
|
|
||||||
ops: Vec::new(),
|
|
||||||
algorithm: Algorithm::default(),
|
|
||||||
use_replace: true,
|
|
||||||
context_state: None,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new word based text differ from two strings.
|
/// Returns the old slices.
|
||||||
pub fn new_from_words(old: &'old str, new: &'new str) -> TextDiffer<'old, 'new, 'bufs> {
|
pub fn old_slices(&self) -> &[&'old str] {
|
||||||
TextDiffer {
|
&self.old
|
||||||
old: Cow::Owned(split_words(old).collect()),
|
|
||||||
new: Cow::Owned(split_words(new).collect()),
|
|
||||||
ops: Vec::new(),
|
|
||||||
algorithm: Algorithm::default(),
|
|
||||||
use_replace: true,
|
|
||||||
context_state: None,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new text differ from two slices.
|
/// Returns the new slices.
|
||||||
pub fn new_from_slices(
|
pub fn new_slices(&self) -> &[&'new str] {
|
||||||
old: &'bufs [&'old str],
|
&self.new
|
||||||
new: &'bufs [&'new str],
|
|
||||||
) -> TextDiffer<'old, 'new, 'bufs> {
|
|
||||||
TextDiffer {
|
|
||||||
old: Cow::Borrowed(old),
|
|
||||||
new: Cow::Borrowed(new),
|
|
||||||
ops: Vec::new(),
|
|
||||||
algorithm: Algorithm::default(),
|
|
||||||
use_replace: true,
|
|
||||||
context_state: None,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets a context radius.
|
/// Returns the captured diff ops.
|
||||||
///
|
pub fn ops(&self) -> &[DiffOp] {
|
||||||
/// By default no radius is set. If a radius is set then `n` number of
|
&self.ops
|
||||||
/// equal chunks around modifications are retained, others are discarded.
|
|
||||||
pub fn set_context_radius(&mut self, n: Option<usize>) {
|
|
||||||
self.context_state = n.map(|radius| ContextState {
|
|
||||||
radius,
|
|
||||||
ops: VecDeque::new(),
|
|
||||||
after_modification: false,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets a different diffing algorithm.
|
/// Returns the ops grouped.
|
||||||
///
|
pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
|
||||||
/// If not explicitly configured the default algorithm is
|
group_diff_ops(self.ops().to_vec(), n)
|
||||||
/// [`Algorithm::default`].
|
|
||||||
pub fn set_algorithm(&mut self, alg: Algorithm) {
|
|
||||||
self.algorithm = alg;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Controls if the [`Replace`] hook should be used automatically.
|
|
||||||
///
|
|
||||||
/// By default the replacer is always used.
|
|
||||||
pub fn set_use_replace(&mut self, yes: bool) {
|
|
||||||
self.use_replace = yes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Diffs the text with the given algorithm returning the ops.
|
|
||||||
///
|
|
||||||
/// This is a shortcut for running a diff operation with the text differ
|
|
||||||
/// as hook and converting it into the captured operaitons.
|
|
||||||
pub fn diff(mut self) -> Vec<DiffOp<'old, 'new>> {
|
|
||||||
// this requires some explanation: because the text differ can
|
|
||||||
// hold owned buffers (from `TextDiffer::from_lines`) we cannot directly
|
|
||||||
// use it can't fulfill the lifetime requirements. For the way this
|
|
||||||
// diff function works though we can get around this by making a new
|
|
||||||
// differ that borrows the stack local buffers as nothing bound to
|
|
||||||
// `bufs needs to outlive this stack frame.
|
|
||||||
//
|
|
||||||
// TODO: consider changing `TextDiffer` into a builder instead.
|
|
||||||
let (old, new) = (&self.old[..], &self.new[..]);
|
|
||||||
let mut d = TextDiffer {
|
|
||||||
old: Cow::Borrowed(old),
|
|
||||||
new: Cow::Borrowed(new),
|
|
||||||
ops: self.ops,
|
|
||||||
algorithm: self.algorithm,
|
|
||||||
use_replace: self.use_replace,
|
|
||||||
context_state: self.context_state.take(),
|
|
||||||
};
|
|
||||||
if d.use_replace {
|
|
||||||
let mut d = Replace::new(d);
|
|
||||||
diff_slices(self.algorithm, &mut d, old, new).unwrap();
|
|
||||||
d.into_inner().into_ops()
|
|
||||||
} else {
|
|
||||||
diff_slices(self.algorithm, &mut d, old, new).unwrap();
|
|
||||||
d.into_ops()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the captured ops.
|
|
||||||
pub fn into_ops(self) -> Vec<DiffOp<'old, 'new>> {
|
|
||||||
self.ops
|
|
||||||
}
|
|
||||||
|
|
||||||
fn push_op(&mut self, op: DiffOp<'old, 'new>) {
|
|
||||||
match self.context_state {
|
|
||||||
None => self.ops.push(op),
|
|
||||||
Some(ref mut context_state) => {
|
|
||||||
if let DiffOp::Equal { .. } = op {
|
|
||||||
if context_state.ops.len() >= context_state.radius {
|
|
||||||
if context_state.after_modification {
|
|
||||||
context_state.after_modification = false;
|
|
||||||
self.ops.extend(context_state.ops.drain(..));
|
|
||||||
}
|
|
||||||
context_state.ops.pop_front();
|
|
||||||
}
|
|
||||||
context_state.ops.push_back(op);
|
|
||||||
} else {
|
|
||||||
context_state.after_modification = true;
|
|
||||||
self.ops.extend(context_state.ops.drain(..));
|
|
||||||
self.ops.push(op);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'old, 'new, 'bufs> DiffHook for TextDiffer<'old, 'new, 'bufs> {
|
|
||||||
type Error = Infallible;
|
|
||||||
|
|
||||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
|
||||||
for off in 0..len {
|
|
||||||
self.push_op(DiffOp::Equal {
|
|
||||||
old_index: old_index + off,
|
|
||||||
new_index: new_index + off,
|
|
||||||
value: self.old[old_index + off],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete(
|
|
||||||
&mut self,
|
|
||||||
old_index: usize,
|
|
||||||
old_len: usize,
|
|
||||||
_new_index: usize,
|
|
||||||
) -> Result<(), Self::Error> {
|
|
||||||
for off in 0..old_len {
|
|
||||||
self.push_op(DiffOp::Delete {
|
|
||||||
old_index: old_index + off,
|
|
||||||
value: self.old[old_index + off],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn insert(
|
|
||||||
&mut self,
|
|
||||||
_old_index: usize,
|
|
||||||
new_index: usize,
|
|
||||||
new_len: usize,
|
|
||||||
) -> Result<(), Self::Error> {
|
|
||||||
for off in 0..new_len {
|
|
||||||
self.push_op(DiffOp::Insert {
|
|
||||||
new_index: new_index + off,
|
|
||||||
value: self.new[new_index + off],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
|
||||||
if let Some(context_state) = self.context_state.take() {
|
|
||||||
self.ops.extend(context_state.ops);
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -354,138 +187,3 @@ fn test_split_words() {
|
||||||
["foo ", "bar ", "baz\n\n ", "aha"]
|
["foo ", "bar ", "baz\n\n ", "aha"]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_line_diff() {
|
|
||||||
let differ = TextDiffer::new_from_lines("foo\nbar\nbaz", "foo\nblah\nbaz");
|
|
||||||
insta::assert_debug_snapshot!(differ.diff(), @r###"
|
|
||||||
[
|
|
||||||
Equal {
|
|
||||||
old_index: 0,
|
|
||||||
new_index: 0,
|
|
||||||
value: "foo\n",
|
|
||||||
},
|
|
||||||
Delete {
|
|
||||||
old_index: 1,
|
|
||||||
value: "bar\n",
|
|
||||||
},
|
|
||||||
Insert {
|
|
||||||
new_index: 1,
|
|
||||||
value: "blah\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 2,
|
|
||||||
new_index: 2,
|
|
||||||
value: "baz",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_context_diff() {
|
|
||||||
let old_text = vec![
|
|
||||||
"1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "0\n", "1\n", "2\n", "3\n",
|
|
||||||
"4\n", "5\n", "6\n", "7\n", "8\n", "9\n", "0\n",
|
|
||||||
];
|
|
||||||
let mut new_text = old_text.clone();
|
|
||||||
new_text[9] = "a\n";
|
|
||||||
new_text[11] = "b\n";
|
|
||||||
new_text[19] = "c\n";
|
|
||||||
let mut differ = TextDiffer::new_from_slices(&old_text, &new_text);
|
|
||||||
differ.set_context_radius(Some(2));
|
|
||||||
insta::assert_debug_snapshot!(differ.diff(), @r###"
|
|
||||||
[
|
|
||||||
Equal {
|
|
||||||
old_index: 7,
|
|
||||||
new_index: 7,
|
|
||||||
value: "8\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 8,
|
|
||||||
new_index: 8,
|
|
||||||
value: "9\n",
|
|
||||||
},
|
|
||||||
Delete {
|
|
||||||
old_index: 9,
|
|
||||||
value: "0\n",
|
|
||||||
},
|
|
||||||
Insert {
|
|
||||||
new_index: 9,
|
|
||||||
value: "a\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 10,
|
|
||||||
new_index: 10,
|
|
||||||
value: "1\n",
|
|
||||||
},
|
|
||||||
Delete {
|
|
||||||
old_index: 11,
|
|
||||||
value: "2\n",
|
|
||||||
},
|
|
||||||
Insert {
|
|
||||||
new_index: 11,
|
|
||||||
value: "b\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 12,
|
|
||||||
new_index: 12,
|
|
||||||
value: "3\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 13,
|
|
||||||
new_index: 13,
|
|
||||||
value: "4\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 17,
|
|
||||||
new_index: 17,
|
|
||||||
value: "8\n",
|
|
||||||
},
|
|
||||||
Equal {
|
|
||||||
old_index: 18,
|
|
||||||
new_index: 18,
|
|
||||||
value: "9\n",
|
|
||||||
},
|
|
||||||
Delete {
|
|
||||||
old_index: 19,
|
|
||||||
value: "0\n",
|
|
||||||
},
|
|
||||||
Insert {
|
|
||||||
new_index: 19,
|
|
||||||
value: "c\n",
|
|
||||||
},
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_display() {
|
|
||||||
let old_text = vec![
|
|
||||||
"1\n", "2\n", "3\n", "4\n", "5\n", "6\n", "7\n", "8\n", "9\n",
|
|
||||||
];
|
|
||||||
let mut new_text = old_text.clone();
|
|
||||||
new_text[3] = "a\n";
|
|
||||||
new_text[7] = "b\n";
|
|
||||||
let mut differ = TextDiffer::new_from_slices(&old_text, &new_text);
|
|
||||||
differ.set_context_radius(Some(2));
|
|
||||||
let ops = differ
|
|
||||||
.diff()
|
|
||||||
.into_iter()
|
|
||||||
.map(|x| x.to_string())
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
insta::assert_debug_snapshot!(ops, @r###"
|
|
||||||
[
|
|
||||||
" 2",
|
|
||||||
" 3",
|
|
||||||
"-4",
|
|
||||||
"+a",
|
|
||||||
" 5",
|
|
||||||
" 6",
|
|
||||||
" 7",
|
|
||||||
"-8",
|
|
||||||
"+b",
|
|
||||||
" 9",
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
}
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue