diff --git a/Cargo.toml b/Cargo.toml index 74207aa..c302025 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,10 @@ required-features = ["text"] name = "terminal-inline" required-features = ["text", "inline", "bytes"] +[[example]] +name = "original-slices" +required-features = ["text"] + [[example]] name = "udiff" required-features = ["text", "bytes"] diff --git a/examples/original-slices.rs b/examples/original-slices.rs new file mode 100644 index 0000000..bdca420 --- /dev/null +++ b/examples/original-slices.rs @@ -0,0 +1,11 @@ +use similar::utils::diff_chars; +use similar::Algorithm; + +fn main() { + let old = "1234567890abcdef".to_string(); + let new = "0123456789Oabzdef".to_string(); + + for (change_tag, value) in diff_chars(Algorithm::Myers, &old, &new) { + println!("{}{:?}", change_tag, value); + } +} diff --git a/src/lib.rs b/src/lib.rs index 317822d..19a4fec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,8 @@ //! It provides both low level access to the algorithms with the minimal //! trait bounds necessary, as well as a generic interface. //! * [`udiff`]: Unified diff functionality. +//! * [`utils`]: utilities for common diff related operations. This module +//! provides additional diffing functions for working with text diffs. //! //! # Sequence Diffing //! @@ -41,15 +43,13 @@ //! "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more", //! ); //! -//! for op in diff.ops() { -//! for change in diff.iter_changes(op) { -//! let sign = match change.tag() { -//! ChangeTag::Delete => "-", -//! ChangeTag::Insert => "+", -//! ChangeTag::Equal => " ", -//! }; -//! print!("{}{}", sign, change); -//! } +//! for change in diff.iter_all_changes() { +//! let sign = match change.tag() { +//! ChangeTag::Delete => "-", +//! ChangeTag::Insert => "+", +//! ChangeTag::Equal => " ", +//! }; +//! print!("{}{}", sign, change); //! } //! # } //! ``` @@ -129,6 +129,8 @@ pub mod algorithms; #[cfg(feature = "text")] pub mod udiff; +#[cfg(feature = "text")] +pub mod utils; mod common; #[cfg(feature = "text")] diff --git a/src/text/mod.rs b/src/text/mod.rs index f1b393c..9b66c58 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -58,7 +58,27 @@ impl TextDiffConfig { /// Creates a diff of lines. /// /// This splits the text `old` and `new` into lines preserving newlines - /// in the input. + /// in the input. Line diffs are very common and because of that enjoy + /// special handling in similar. When a line diff is created with this + /// method the `newline_terminated` flag is flipped to `true` and will + /// influence the behavior of unified diff generation. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "a\n"), + /// (ChangeTag::Equal, "b\n"), + /// (ChangeTag::Delete, "c"), + /// (ChangeTag::Insert, "C"), + /// ]); + /// ``` pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, @@ -74,6 +94,31 @@ impl TextDiffConfig { /// Creates a diff of words. /// /// This splits the text into words and whitespace. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Equal, " "), + /// (ChangeTag::Delete, "bar"), + /// (ChangeTag::Insert, "BAR"), + /// (ChangeTag::Equal, " "), + /// (ChangeTag::Equal, "baz"), + /// ]); + /// ``` pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, @@ -87,6 +132,33 @@ impl TextDiffConfig { } /// Creates a diff of characters. + /// + /// Note on character diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "a"), + /// (ChangeTag::Equal, "b"), + /// (ChangeTag::Equal, "c"), + /// (ChangeTag::Delete, "d"), + /// (ChangeTag::Delete, "e"), + /// (ChangeTag::Insert, "D"), + /// (ChangeTag::Insert, "D"), + /// (ChangeTag::Equal, "f"), + /// ]); + /// ``` pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, old: &'old T, @@ -106,6 +178,31 @@ impl TextDiffConfig { /// requires a dependency. /// /// This requires the `unicode` feature. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "ah"), + /// (ChangeTag::Equal, "("), + /// (ChangeTag::Delete, "be"), + /// (ChangeTag::Insert, "ah"), + /// (ChangeTag::Equal, ")"), + /// (ChangeTag::Equal, "ce"), + /// ]); + /// ``` #[cfg(feature = "unicode")] pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, @@ -122,6 +219,30 @@ impl TextDiffConfig { /// Creates a diff of graphemes. /// /// This requires the `unicode` feature. + /// + /// Note on grapheme diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. You might also want to combine word level + /// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper) + /// which lets you remap the diffs back to the original input strings. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️"); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "💩"), + /// (ChangeTag::Delete, "🇦🇹"), + /// (ChangeTag::Delete, "🦠"), + /// (ChangeTag::Insert, "🇦🇱"), + /// (ChangeTag::Insert, "❄️"), + /// ]); + /// ``` #[cfg(feature = "unicode")] pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>( &self, @@ -136,6 +257,25 @@ impl TextDiffConfig { } /// Creates a diff of arbitrary slices. + /// + /// ```rust + /// use similar::{TextDiff, ChangeTag}; + /// + /// let old = &["foo", "bar", "baz"]; + /// let new = &["foo", "BAR", "baz"]; + /// let diff = TextDiff::configure().diff_slices(old, new); + /// let changes: Vec<_> = diff + /// .iter_all_changes() + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Delete, "bar"), + /// (ChangeTag::Insert, "BAR"), + /// (ChangeTag::Equal, "baz"), + /// ]); + /// ``` pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>( &self, old: &'bufs [&'old T], @@ -185,7 +325,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { /// Creates a diff of lines. /// - /// Equivalent to `TextDiff::configure().diff_lines(old, new)`. + /// For more information see [`TextDiffConfig::diff_lines`]. pub fn from_lines( old: &'old T, new: &'new T, @@ -195,7 +335,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { /// Creates a diff of words. /// - /// Equivalent to `TextDiff::configure().diff_words(old, new)`. + /// For more information see [`TextDiffConfig::diff_words`]. pub fn from_words( old: &'old T, new: &'new T, @@ -205,7 +345,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { /// Creates a diff of chars. /// - /// Equivalent to `TextDiff::configure().diff_chars(old, new)`. + /// For more information see [`TextDiffConfig::diff_chars`]. pub fn from_chars( old: &'old T, new: &'new T, @@ -215,7 +355,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { /// Creates a diff of unicode words. /// - /// Equivalent to `TextDiff::configure().diff_unicode_words(old, new)`. + /// For more information see [`TextDiffConfig::diff_unicode_words`]. /// /// This requires the `unicode` feature. #[cfg(feature = "unicode")] @@ -228,7 +368,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { /// Creates a diff of graphemes. /// - /// Equivalent to `TextDiff::configure().diff_graphemes(old, new)`. + /// For more information see [`TextDiffConfig::diff_graphemes`]. /// /// This requires the `unicode` feature. #[cfg(feature = "unicode")] @@ -243,7 +383,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> { impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> { /// Creates a diff of arbitrary slices. /// - /// Equivalent to `TextDiff::configure().diff_slices(old, new)`. + /// For more information see [`TextDiffConfig::diff_slices`]. pub fn from_slices( old: &'bufs [&'old T], new: &'bufs [&'new T], diff --git a/src/types.rs b/src/types.rs index 23ede7e..9998840 100644 --- a/src/types.rs +++ b/src/types.rs @@ -247,6 +247,25 @@ impl DiffOp { /// /// `old` and `new` are two indexable objects like the types you pass to /// the diffing algorithm functions. + /// + /// ```rust + /// use similar::{ChangeTag, Algorithm}; + /// use similar::capture_diff_slices; + /// let old = vec!["foo", "bar", "baz"]; + /// let new = vec!["foo", "bar", "blah"]; + /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); + /// let changes: Vec<_> = ops + /// .iter() + /// .flat_map(|x| x.iter_changes(&old, &new)) + /// .map(|x| (x.tag(), x.value())) + /// .collect(); + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, "foo"), + /// (ChangeTag::Equal, "bar"), + /// (ChangeTag::Delete, "baz"), + /// (ChangeTag::Insert, "blah"), + /// ]); + /// ``` pub fn iter_changes<'x, 'lookup, Old, New, T>( &self, old: &'lookup Old, @@ -338,6 +357,67 @@ impl DiffOp { } }) } + + /// Given a diffop yields the changes it encodes against the given slices. + /// + /// This is similar to [`DiffOp::iter_changes`] but instead of yielding the + /// individual changes it yields consequitive changed slices. + /// + /// This will only ever yield a single tuple or two tuples in case a + /// [`DiffOp::Replace`] operation is passed. + /// + /// ```rust + /// use similar::{ChangeTag, Algorithm}; + /// use similar::capture_diff_slices; + /// let old = vec!["foo", "bar", "baz"]; + /// let new = vec!["foo", "bar", "blah"]; + /// let ops = capture_diff_slices(Algorithm::Myers, &old, &new); + /// let changes: Vec<_> = ops.iter().flat_map(|x| x.iter_slices(&old, &new)).collect(); + /// assert_eq!(changes, vec![ + /// (ChangeTag::Equal, &["foo", "bar"][..]), + /// (ChangeTag::Delete, &["baz"][..]), + /// (ChangeTag::Insert, &["blah"][..]), + /// ]); + /// ``` + /// + /// Due to lifetime restrictions it's currently impossible for the + /// returned slices to outlive the lookup. + pub fn iter_slices<'lookup, Old, New, T>( + &self, + old: &'lookup Old, + new: &'lookup New, + ) -> impl Iterator + where + T: 'lookup + ?Sized, + Old: Index, Output = T> + ?Sized, + New: Index, Output = T> + ?Sized, + { + match *self { + DiffOp::Equal { old_index, len, .. } => { + Some((ChangeTag::Equal, &old[old_index..old_index + len])) + .into_iter() + .chain(None.into_iter()) + } + DiffOp::Insert { + new_index, new_len, .. + } => Some((ChangeTag::Insert, &new[new_index..new_index + new_len])) + .into_iter() + .chain(None.into_iter()), + DiffOp::Delete { + old_index, old_len, .. + } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) + .into_iter() + .chain(None.into_iter()), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => Some((ChangeTag::Delete, &old[old_index..old_index + old_len])) + .into_iter() + .chain(Some((ChangeTag::Insert, &new[new_index..new_index + new_len])).into_iter()), + } + } } #[cfg(feature = "text")] diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..1f8fdc9 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,415 @@ +//! Utilities for common diff related operations. +//! +//! This module provides specialized utilities and simplified diff operations +//! for common operations. It's useful when you want to work with text diffs +//! and you're interested in getting vectors of these changes directly. +//! +//! # Slice Remapping +//! +//! When working with [`TextDiff`] it's common that one takes advantage of the +//! built-in tokenization of the differ. This for instance lets you do +//! grapheme level diffs. This is implemented by the differ generating rather +//! small slices of strings and running a diff algorithm over them. +//! +//! The downside of this is that all the [`DiffOp`] objects produced by the +//! diffing algorithm encode operations on these rather small slices. For +//! a lot of use cases this is not what one wants which can make this very +//! inconvenient. This module provides a [`TextDiffRemapper`] which lets you +//! map from the ranges that the [`TextDiff`] returns to the original input +//! strings. For more information see [`TextDiffRemapper`]. +//! +//! # Simple Diff Functions +//! +//! This module provides a range of common test diff functions that will +//! produce vectors of `(change_tag, value)` tuples. They will automatically +//! optimize towards returning the most useful slice that one would expect for +//! the type of diff performed. + +use std::hash::Hash; +use std::ops::{Index, Range}; + +use crate::{ + capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff, +}; + +struct SliceRemapper<'x, T: ?Sized> { + source: &'x T, + indexes: Vec>, +} + +impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> { + fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> { + let indexes = slices + .iter() + .scan(0, |state, item| { + let start = *state; + let end = start + item.len(); + *state = end; + Some(start..end) + }) + .collect(); + SliceRemapper { source, indexes } + } + + fn slice(&self, range: Range) -> Option<&'x T> { + let start = self.indexes.get(range.start)?.start; + let end = self.indexes.get(range.end - 1)?.end; + Some(self.source.slice(start..end)) + } +} + +impl<'x, T: DiffableStr + ?Sized> Index> for SliceRemapper<'x, T> { + type Output = T; + + fn index(&self, range: Range) -> &Self::Output { + self.slice(range).expect("out of bounds") + } +} + +/// A remapper that can remap diff ops to the original slices. +/// +/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from +/// two strings and the internal tokenization is used, this remapper can take +/// a range in the tokenized sequences and remap it to the original string. +/// This is particularly useful when you want to do things like character or +/// grapheme level diffs but you want to not have to iterate over small sequences +/// but large consequitive ones from the source. +/// +/// ```rust +/// use similar::{ChangeTag, TextDiff}; +/// use similar::utils::TextDiffRemapper; +/// +/// let old = "yo! foo bar baz"; +/// let new = "yo! foo bor baz"; +/// let diff = TextDiff::from_words(old, new); +/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); +/// let changes: Vec<_> = diff.ops() +/// .iter() +/// .flat_map(move |x| remapper.iter_slices(x)) +/// .collect(); +/// +/// assert_eq!(changes, vec![ +/// (ChangeTag::Equal, "yo! foo "), +/// (ChangeTag::Delete, "bar"), +/// (ChangeTag::Insert, "bor"), +/// (ChangeTag::Equal, " baz") +/// ]); +pub struct TextDiffRemapper<'x, T: ?Sized> { + old: SliceRemapper<'x, T>, + new: SliceRemapper<'x, T>, +} + +impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> { + /// Creates a new remapper from strings and slices. + pub fn new( + old_slices: &[&'x T], + new_slices: &[&'x T], + old: &'x T, + new: &'x T, + ) -> TextDiffRemapper<'x, T> { + TextDiffRemapper { + old: SliceRemapper::new(old, old_slices), + new: SliceRemapper::new(new, new_slices), + } + } + + /// Creates a new remapper from a text diff and the original strings. + pub fn from_text_diff<'old, 'new, 'bufs>( + diff: &TextDiff<'old, 'new, 'bufs, T>, + old: &'x T, + new: &'x T, + ) -> TextDiffRemapper<'x, T> + where + 'old: 'x, + 'new: 'x, + { + TextDiffRemapper { + old: SliceRemapper::new(old, diff.old_slices()), + new: SliceRemapper::new(new, diff.new_slices()), + } + } + + /// Slices into the old string. + pub fn slice_old(&self, range: Range) -> Option<&'x T> { + self.old.slice(range) + } + + /// Slices into the new string. + pub fn slice_new(&self, range: Range) -> Option<&'x T> { + self.new.slice(range) + } + + /// Given a diffop yields the changes it encodes against the original strings. + /// + /// This is the same as the [`DiffOp::iter_slices`] method. + /// + /// ## Panics + /// + /// This method can panic if the input strings passed to the constructor + /// are incompatible with the input strings passed to the diffing algorithm. + pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator { + // note: this is equivalent to the code in `DiffOp::iter_slices`. It is + // a copy/paste because the slicing currently cannot be well abstracted + // because of lifetime issues caused by the `Index` trait. + match *op { + DiffOp::Equal { old_index, len, .. } => { + Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len))) + .into_iter() + .chain(None.into_iter()) + } + DiffOp::Insert { + new_index, new_len, .. + } => Some(( + ChangeTag::Insert, + self.new.slice(new_index..new_index + new_len), + )) + .into_iter() + .chain(None.into_iter()), + DiffOp::Delete { + old_index, old_len, .. + } => Some(( + ChangeTag::Delete, + self.old.slice(old_index..old_index + old_len), + )) + .into_iter() + .chain(None.into_iter()), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => Some(( + ChangeTag::Delete, + self.old.slice(old_index..old_index + old_len), + )) + .into_iter() + .chain( + Some(( + ChangeTag::Insert, + self.new.slice(new_index..new_index + new_len), + )) + .into_iter(), + ), + } + .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds"))) + } +} + +/// Shortcut for diffing two slices. +/// +/// This function produces the diff of two slices and returns a vector +/// with the changes. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_slices; +/// +/// let old = "foo\nbar\nbaz".lines().collect::>(); +/// let new = "foo\nbar\nBAZ".lines().collect::>(); +/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![ +/// (ChangeTag::Equal, &["foo", "bar"][..]), +/// (ChangeTag::Delete, &["baz"][..]), +/// (ChangeTag::Insert, &["BAZ"][..]), +/// ]); +/// ``` +pub fn diff_slices<'x, T: PartialEq + Hash + Ord>( + alg: Algorithm, + old: &'x [T], + new: &'x [T], +) -> Vec<(ChangeTag, &'x [T])> { + capture_diff_slices(alg, old, new) + .iter() + .flat_map(|op| op.iter_slices(old, new)) + .collect() +} + +/// Shortcut for making a character level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than character level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_chars; +/// +/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![ +/// (ChangeTag::Equal, "foo"), +/// (ChangeTag::Delete, "bar"), +/// (ChangeTag::Insert, "BAR"), +/// (ChangeTag::Equal, "baz"), +/// ]); +/// ``` +pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a word level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than word level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_words; +/// +/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![ +/// (ChangeTag::Equal, "foo "), +/// (ChangeTag::Delete, "bar"), +/// (ChangeTag::Insert, "bor"), +/// (ChangeTag::Equal, " baz"), +/// ]); +/// ``` +pub fn diff_words<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure().algorithm(alg).diff_words(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a unicode word level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than word level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_unicode_words; +/// +/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?"; +/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?"; +/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![ +/// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "), +/// (ChangeTag::Delete, "32.3"), +/// (ChangeTag::Insert, "9.84"), +/// (ChangeTag::Equal, " "), +/// (ChangeTag::Delete, "feet"), +/// (ChangeTag::Insert, "meters"), +/// (ChangeTag::Equal, ", right?") +/// ]); +/// ``` +/// +/// This requires the `unicode` feature. +#[cfg(feature = "unicode")] +pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure() + .algorithm(alg) + .diff_unicode_words(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a grapheme level diff. +/// +/// This function produces the diff of two strings and returns a vector +/// with the changes. It returns connected slices into the original string +/// rather than grapheme level slices. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_graphemes; +/// +/// let old = "The flag of Austria is 🇦🇹"; +/// let new = "The flag of Albania is 🇦🇱"; +/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![ +/// (ChangeTag::Equal, "The flag of A"), +/// (ChangeTag::Delete, "ustr"), +/// (ChangeTag::Insert, "lban"), +/// (ChangeTag::Equal, "ia is "), +/// (ChangeTag::Delete, "🇦🇹"), +/// (ChangeTag::Insert, "🇦🇱"), +/// ]); +/// ``` +/// +/// This requires the `unicode` feature. +#[cfg(feature = "unicode")] +pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + let old = old.as_diffable_str(); + let new = new.as_diffable_str(); + let diff = TextDiff::configure() + .algorithm(alg) + .diff_graphemes(old, new); + let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); + diff.ops() + .iter() + .flat_map(move |x| remapper.iter_slices(x)) + .collect() +} + +/// Shortcut for making a line diff. +/// +/// This function produces the diff of two slices and returns a vector +/// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a +/// change tag for each line. +/// +/// ```rust +/// use similar::{Algorithm, ChangeTag}; +/// use similar::utils::diff_lines; +/// +/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![ +/// (ChangeTag::Equal, "foo\n"), +/// (ChangeTag::Equal, "bar\n"), +/// (ChangeTag::Equal, "baz\n"), +/// (ChangeTag::Delete, "blah"), +/// (ChangeTag::Insert, "blurgh"), +/// ]); +/// ``` +pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>( + alg: Algorithm, + old: &'x T, + new: &'x T, +) -> Vec<(ChangeTag, &'x T::Output)> { + TextDiff::configure() + .algorithm(alg) + .diff_lines(old, new) + .iter_all_changes() + .map(|change| (change.tag(), change.value())) + .collect() +} + +#[test] +fn test_remapper() { + let a = "foo bar baz"; + let words = a.tokenize_words(); + dbg!(&words); + let remap = SliceRemapper::new(a, &words); + assert_eq!(remap.slice(0..3), Some("foo bar")); + assert_eq!(remap.slice(1..3), Some(" bar")); + assert_eq!(remap.slice(0..1), Some("foo")); + assert_eq!(remap.slice(0..5), Some("foo bar baz")); + assert_eq!(remap.slice(0..6), None); +}