diff --git a/CHANGELOG.md b/CHANGELOG.md index 90079f2..77a8ed8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to similar are documented here. * Fixed grouped diff operation to return an empty result if the diff does not show any changes. * Added inline diff highlighting support. +* Changed word splitting to split into words and whitespace. ## 0.4.0 diff --git a/src/text/mod.rs b/src/text/mod.rs index 5c204de..f7db170 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -136,6 +136,8 @@ impl TextDiffConfig { } /// Creates a diff of words. + /// + /// This splits the text into words and whitespace. pub fn diff_words<'old, 'new, 'bufs>( &self, old: &'old str, @@ -543,37 +545,27 @@ fn split_lines(s: &str) -> impl Iterator { .flatten() } -/// Splits text into words with whitespace attached. +/// Partitions at whitespace. fn split_words(s: &str) -> impl Iterator { let mut iter = s.char_indices().peekable(); - let mut last_pos = 0; std::iter::from_fn(move || { if let Some((idx, c)) = iter.next() { - let mut rv = None; - if c.is_whitespace() { - let mut last = (idx, c); - while let Some(&(next_idx, next_char)) = iter.peek() { - if !next_char.is_whitespace() { - break; - } - iter.next(); - last = (next_idx, next_char); + let is_whitespace = c.is_whitespace(); + let start = idx; + let mut end = idx + c.len_utf8(); + while let Some(&(_, next_char)) = iter.peek() { + if next_char.is_whitespace() != is_whitespace { + break; } - let whitespace_end = last.0 + last.1.len_utf8(); - rv = Some(&s[last_pos..whitespace_end]); - last_pos = whitespace_end; + iter.next(); + end += next_char.len_utf8(); } - Some(rv) - } else if last_pos < s.len() { - let tmp = &s[last_pos..]; - last_pos = s.len(); - Some(Some(tmp)) + Some(&s[start..end]) } else { None } }) - .flatten() } /// Splits text into characters. @@ -706,7 +698,7 @@ fn test_split_lines() { fn test_split_words() { assert_eq!( split_words("foo bar baz\n\n aha").collect::>(), - ["foo ", "bar ", "baz\n\n ", "aha"] + ["foo", " ", "bar", " ", "baz", "\n\n ", "aha"] ); } @@ -736,6 +728,20 @@ fn test_captured_ops() { insta::assert_debug_snapshot!(&diff.ops()); } +#[test] +fn test_captured_word_ops() { + let diff = TextDiff::from_words( + "Hello World\nsome stuff here\nsome more stuff here\n", + "Hello World\nsome amazing stuff here\nsome more stuff here\n", + ); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_changes(op)) + .collect::>(); + insta::assert_debug_snapshot!(&changes); +} + #[test] fn test_unified_diff() { let diff = TextDiff::from_lines( diff --git a/src/text/snapshots/similar__text__captured_word_ops.snap b/src/text/snapshots/similar__text__captured_word_ops.snap new file mode 100644 index 0000000..df27ad2 --- /dev/null +++ b/src/text/snapshots/similar__text__captured_word_ops.snap @@ -0,0 +1,222 @@ +--- +source: src/text/mod.rs +expression: "&changes" +--- +[ + Change { + tag: Equal, + old_index: Some( + 0, + ), + new_index: Some( + 0, + ), + value: "Hello", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 1, + ), + new_index: Some( + 1, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 2, + ), + new_index: Some( + 2, + ), + value: "World", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 3, + ), + new_index: Some( + 3, + ), + value: "\n", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 4, + ), + new_index: Some( + 4, + ), + value: "some", + missing_newline: false, + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 5, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Insert, + old_index: None, + new_index: Some( + 6, + ), + value: "amazing", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 5, + ), + new_index: Some( + 7, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 6, + ), + new_index: Some( + 8, + ), + value: "stuff", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 7, + ), + new_index: Some( + 9, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 8, + ), + new_index: Some( + 10, + ), + value: "here", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 9, + ), + new_index: Some( + 11, + ), + value: "\n", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 10, + ), + new_index: Some( + 12, + ), + value: "some", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 11, + ), + new_index: Some( + 13, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 12, + ), + new_index: Some( + 14, + ), + value: "more", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 13, + ), + new_index: Some( + 15, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 14, + ), + new_index: Some( + 16, + ), + value: "stuff", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 15, + ), + new_index: Some( + 17, + ), + value: " ", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 16, + ), + new_index: Some( + 18, + ), + value: "here", + missing_newline: false, + }, + Change { + tag: Equal, + old_index: Some( + 17, + ), + new_index: Some( + 19, + ), + value: "\n", + missing_newline: false, + }, +]