diff --git a/CHANGELOG.md b/CHANGELOG.md index 77a8ed8..fb728e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to similar are documented here. does not show any changes. * Added inline diff highlighting support. * Changed word splitting to split into words and whitespace. +* Added support for unicode based word splitting (`TextDiff::from_unicode_words`). ## 0.4.0 diff --git a/Cargo.toml b/Cargo.toml index d665014..31bf790 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ all-features = true [features] default = ["text"] text = [] +inline = ["unicode"] unicode = ["text", "unicode-segmentation"] [dev-dependencies] @@ -30,7 +31,7 @@ required-features = ["text"] [[example]] name = "terminal-inline" -required-features = ["text"] +required-features = ["text", "inline"] [[example]] name = "udiff" diff --git a/src/lib.rs b/src/lib.rs index 5ed2b69..fb0bd1f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -38,11 +38,14 @@ //! cases it's useful to pull in extra functionality. Likewise you can turn //! off some functionality. //! +//! * `text`: this feature is enabled by default and enables the [`text`] module. +//! If the crate is used without default features it's removed. //! * `unicode`: when this feature is enabled the text diffing functionality //! gains the ability to diff on a grapheme instead of character level. This //! is particularly useful when working with text containing emojis. -//! * `text`: this feature is enabled by default and enables the [`text`] module. -//! If the crate is used without default features it's removed. +//! * `inline`: this feature gives access to additional functionality of the +//! `text` module to provide inline information about which values changed +//! in a line diff. This currently also enables the `unicode` feature. #![warn(missing_docs)] pub mod algorithms; pub mod text; diff --git a/src/text/inline.rs b/src/text/inline.rs index 56ed2c0..0270e76 100644 --- a/src/text/inline.rs +++ b/src/text/inline.rs @@ -1,9 +1,10 @@ +#![cfg(feature = "inline")] use std::{fmt, iter}; use crate::algorithms::{Algorithm, DiffOp, DiffTag}; use crate::text::{Change, ChangeTag, TextDiff}; -use super::split_words; +use super::split_unicode_words; use std::ops::Range; @@ -118,8 +119,8 @@ pub(crate) fn iter_inline_changes<'diff>( (ChangeTag::Delete, Some(ChangeTag::Insert)) => { let old_value = change.value(); let new_value = next_change.unwrap().value(); - let old_chars = split_words(&old_value).collect::>(); - let new_chars = split_words(&new_value).collect::>(); + let old_chars = split_unicode_words(&old_value).collect::>(); + let new_chars = split_unicode_words(&new_value).collect::>(); let old_mindex = MultiIndex::new(&old_chars, old_value); let new_mindex = MultiIndex::new(&new_chars, new_value); let inline_diff = TextDiff::configure() @@ -185,3 +186,18 @@ pub(crate) fn iter_inline_changes<'diff>( }) .flatten() } + +#[test] +fn test_line_ops_inline() { + let diff = TextDiff::from_lines( + "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", + "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", + ); + assert_eq!(diff.newline_terminated(), true); + let changes = diff + .ops() + .iter() + .flat_map(|op| diff.iter_inline_changes(op)) + .collect::>(); + insta::assert_debug_snapshot!(&changes); +} diff --git a/src/text/mod.rs b/src/text/mod.rs index a89a942..d361a37 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -74,9 +74,11 @@ use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap}; use std::fmt; +#[cfg(feature = "inline")] mod inline; mod udiff; +#[cfg(feature = "inline")] pub use self::inline::*; pub use self::udiff::*; @@ -150,6 +152,25 @@ impl TextDiffConfig { ) } + /// Creates a diff of unicode words. + /// + /// This splits the text into words according to unicode rules. This is + /// generally recommended over [`diff_words`] but requires a dependency. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn diff_unicode_words<'old, 'new, 'bufs>( + &self, + old: &'old str, + new: &'new str, + ) -> TextDiff<'old, 'new, 'bufs> { + self.diff( + Cow::Owned(split_unicode_words(old).collect()), + Cow::Owned(split_unicode_words(new).collect()), + false, + ) + } + /// Creates a diff of characters. pub fn diff_chars<'old, 'new, 'bufs>( &self, @@ -301,6 +322,16 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> { Self::configure().diff_words(old, new) } + /// Creates a diff of unicode words. + /// + /// Equivalent to `TextDiff::configure().diff_unicode_words(old, new)`. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn from_unicode_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> { + Self::configure().diff_unicode_words(old, new) + } + /// Creates a diff of chars. /// /// Equivalent to `TextDiff::configure().diff_chars(old, new)`. @@ -486,6 +517,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> { /// level diff on adjacent line replacements. The exact behavior of /// this function with regards to how it detects those inline changes /// is currently not defined and will likely change over time. + #[cfg(feature = "inline")] pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator { iter_inline_changes(self, op) } @@ -567,6 +599,12 @@ fn split_words(s: &str) -> impl Iterator { }) } +/// Splits words according to unicode rules. +#[cfg(feature = "unicode")] +fn split_unicode_words(s: &str) -> impl Iterator { + unicode_segmentation::UnicodeSegmentation::split_word_bounds(s) +} + /// Splits text into characters. fn split_chars(s: &str) -> impl Iterator { s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()]) @@ -782,21 +820,6 @@ fn test_virtual_newlines() { insta::assert_debug_snapshot!(&changes); } -#[test] -fn test_line_ops_inline() { - let diff = TextDiff::from_lines( - "Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff", - "Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n", - ); - assert_eq!(diff.newline_terminated(), true); - let changes = diff - .ops() - .iter() - .flat_map(|op| diff.iter_inline_changes(op)) - .collect::>(); - insta::assert_debug_snapshot!(&changes); -} - #[test] fn test_char_diff() { let diff = TextDiff::from_chars("Hello World", "Hallo Welt"); diff --git a/src/text/snapshots/similar__text__line_ops_inline.snap b/src/text/snapshots/similar__text__inline__line_ops_inline.snap similarity index 98% rename from src/text/snapshots/similar__text__line_ops_inline.snap rename to src/text/snapshots/similar__text__inline__line_ops_inline.snap index 8d6d51f..4870e2d 100644 --- a/src/text/snapshots/similar__text__line_ops_inline.snap +++ b/src/text/snapshots/similar__text__inline__line_ops_inline.snap @@ -1,5 +1,5 @@ --- -source: src/text/mod.rs +source: src/text/inline.rs expression: "&changes" --- [