Change word diffing to split into words and spaces

This commit is contained in:
Armin Ronacher 2021-01-31 21:40:20 +01:00
parent e63fe172c5
commit 459fdfdf9d
3 changed files with 250 additions and 21 deletions

View file

@ -11,6 +11,7 @@ All notable changes to similar are documented here.
* Fixed grouped diff operation to return an empty result if the diff
does not show any changes.
* Added inline diff highlighting support.
* Changed word splitting to split into words and whitespace.
## 0.4.0

View file

@ -136,6 +136,8 @@ impl TextDiffConfig {
}
/// Creates a diff of words.
///
/// This splits the text into words and whitespace.
pub fn diff_words<'old, 'new, 'bufs>(
&self,
old: &'old str,
@ -543,37 +545,27 @@ fn split_lines(s: &str) -> impl Iterator<Item = &str> {
.flatten()
}
/// Splits text into words with whitespace attached.
/// Partitions at whitespace.
fn split_words(s: &str) -> impl Iterator<Item = &str> {
let mut iter = s.char_indices().peekable();
let mut last_pos = 0;
std::iter::from_fn(move || {
if let Some((idx, c)) = iter.next() {
let mut rv = None;
if c.is_whitespace() {
let mut last = (idx, c);
while let Some(&(next_idx, next_char)) = iter.peek() {
if !next_char.is_whitespace() {
break;
}
iter.next();
last = (next_idx, next_char);
let is_whitespace = c.is_whitespace();
let start = idx;
let mut end = idx + c.len_utf8();
while let Some(&(_, next_char)) = iter.peek() {
if next_char.is_whitespace() != is_whitespace {
break;
}
let whitespace_end = last.0 + last.1.len_utf8();
rv = Some(&s[last_pos..whitespace_end]);
last_pos = whitespace_end;
iter.next();
end += next_char.len_utf8();
}
Some(rv)
} else if last_pos < s.len() {
let tmp = &s[last_pos..];
last_pos = s.len();
Some(Some(tmp))
Some(&s[start..end])
} else {
None
}
})
.flatten()
}
/// Splits text into characters.
@ -706,7 +698,7 @@ fn test_split_lines() {
fn test_split_words() {
assert_eq!(
split_words("foo bar baz\n\n aha").collect::<Vec<_>>(),
["foo ", "bar ", "baz\n\n ", "aha"]
["foo", " ", "bar", " ", "baz", "\n\n ", "aha"]
);
}
@ -736,6 +728,20 @@ fn test_captured_ops() {
insta::assert_debug_snapshot!(&diff.ops());
}
#[test]
fn test_captured_word_ops() {
let diff = TextDiff::from_words(
"Hello World\nsome stuff here\nsome more stuff here\n",
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
let changes = diff
.ops()
.iter()
.flat_map(|op| diff.iter_changes(op))
.collect::<Vec<_>>();
insta::assert_debug_snapshot!(&changes);
}
#[test]
fn test_unified_diff() {
let diff = TextDiff::from_lines(

View file

@ -0,0 +1,222 @@
---
source: src/text/mod.rs
expression: "&changes"
---
[
Change {
tag: Equal,
old_index: Some(
0,
),
new_index: Some(
0,
),
value: "Hello",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
1,
),
new_index: Some(
1,
),
value: " ",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
2,
),
new_index: Some(
2,
),
value: "World",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
3,
),
new_index: Some(
3,
),
value: "\n",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
4,
),
new_index: Some(
4,
),
value: "some",
missing_newline: false,
},
Change {
tag: Insert,
old_index: None,
new_index: Some(
5,
),
value: " ",
missing_newline: false,
},
Change {
tag: Insert,
old_index: None,
new_index: Some(
6,
),
value: "amazing",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
5,
),
new_index: Some(
7,
),
value: " ",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
6,
),
new_index: Some(
8,
),
value: "stuff",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
7,
),
new_index: Some(
9,
),
value: " ",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
8,
),
new_index: Some(
10,
),
value: "here",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
9,
),
new_index: Some(
11,
),
value: "\n",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
10,
),
new_index: Some(
12,
),
value: "some",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
11,
),
new_index: Some(
13,
),
value: " ",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
12,
),
new_index: Some(
14,
),
value: "more",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
13,
),
new_index: Some(
15,
),
value: " ",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
14,
),
new_index: Some(
16,
),
value: "stuff",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
15,
),
new_index: Some(
17,
),
value: " ",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
16,
),
new_index: Some(
18,
),
value: "here",
missing_newline: false,
},
Change {
tag: Equal,
old_index: Some(
17,
),
new_index: Some(
19,
),
value: "\n",
missing_newline: false,
},
]