diff --git a/CHANGELOG.md b/CHANGELOG.md index b9fe103..7576aff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to similar are documented here. +## 0.3.0 + +* Added grapheme and character level diffing utilities. + ## 0.2.0 * Fixed a bug in the patience algorithm causing it not not work. diff --git a/Cargo.toml b/Cargo.toml index 87aa377..d89c2b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,5 +9,15 @@ repository = "https://github.com/mitsuhiko/similar" keywords = ["diff", "difference", "patience", "compare", "changes"] readme = "README.md" +[package.metadata.docs.rs] +all-features = true + +[features] +default = [] +unicode = ["unicode-segmentation"] + [dev-dependencies] insta = "1.5.2" + +[dependencies] +unicode-segmentation = { version = "1.7.1", optional = true } diff --git a/src/text.rs b/src/text.rs index 651c787..975ec0c 100644 --- a/src/text.rs +++ b/src/text.rs @@ -54,7 +54,7 @@ use std::ops::Range; use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag}; -/// A builder for more complex uses of [`TextDiff`]. +/// A builder type config for more complex uses of [`TextDiff`]. #[derive(Clone, Debug)] pub struct TextDiffConfig { algorithm: Algorithm, @@ -72,6 +72,8 @@ impl Default for TextDiffConfig { impl TextDiffConfig { /// Changes the algorithm. + /// + /// The default algorithm is [`Algorithm::Myers`]. pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self { self.algorithm = alg; self @@ -79,13 +81,20 @@ impl TextDiffConfig { /// Changes the newlnine termination flag. /// - /// The default is automatic based on input. + /// The default is automatic based on input. This flag controls the + /// behavior of the [`TextDiff::write_unified_diff`] method with regards + /// to newlines. When the flag is set to `false` (which is the default) + /// then newlines are added. Otherwise the newlines from the source + /// sequences are reused. pub fn newline_terminated(&mut self, yes: bool) -> &mut Self { self.newline_terminated = Some(yes); self } /// Creates a diff of lines. + /// + /// This splits the text `old` and `new` into lines preserving newlines + /// in the input. pub fn diff_lines<'old, 'new, 'bufs>( &self, old: &'old str, @@ -111,6 +120,35 @@ impl TextDiffConfig { ) } + /// Creates a diff of characters. + pub fn diff_chars<'old, 'new, 'bufs>( + &self, + old: &'old str, + new: &'new str, + ) -> TextDiff<'old, 'new, 'bufs> { + self.diff( + Cow::Owned(split_chars(old).collect()), + Cow::Owned(split_chars(new).collect()), + false, + ) + } + + /// Creates a diff of graphemes. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn diff_graphemes<'old, 'new, 'bufs>( + &self, + old: &'old str, + new: &'new str, + ) -> TextDiff<'old, 'new, 'bufs> { + self.diff( + Cow::Owned(split_graphemes(old).collect()), + Cow::Owned(split_graphemes(new).collect()), + false, + ) + } + /// Creates a diff of arbitrary slices. pub fn diff_slices<'old, 'new, 'bufs>( &self, @@ -222,6 +260,19 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> { Self::configure().diff_words(old, new) } + /// Creates a diff of chars. + pub fn from_chars(&self, old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> { + Self::configure().diff_chars(old, new) + } + + /// Creates a diff of graphemes. + /// + /// This requires the `unicode` feature. + #[cfg(feature = "unicode")] + pub fn from_graphemes(&self, old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> { + Self::configure().diff_graphemes(old, new) + } + /// Creates a diff of arbitrary slices. pub fn from_slices( &self, @@ -487,6 +538,17 @@ fn split_words(s: &str) -> impl Iterator { .flatten() } +/// Splits text into characters. +fn split_chars(s: &str) -> impl Iterator { + s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()]) +} + +/// Splits text into graphemes. +#[cfg(feature = "unicode")] +fn split_graphemes(s: &str) -> impl Iterator { + unicode_segmentation::UnicodeSegmentation::graphemes(s, true) +} + /// Quick way to get a unified diff as string. pub fn unified_diff<'old, 'new>( alg: Algorithm, @@ -520,6 +582,23 @@ fn test_split_words() { ); } +#[test] +fn test_split_chars() { + assert_eq!( + split_chars("abcfö❄️").collect::>(), + vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"] + ); +} + +#[test] +#[cfg(feature = "unicode")] +fn test_split_graphemes() { + assert_eq!( + split_graphemes("abcfö❄️").collect::>(), + vec!["a", "b", "c", "f", "ö", "❄️"] + ); +} + #[test] fn test_captured_ops() { let diff = TextDiff::from_lines(