Added slice remapper and improved documentation (#8)
* Experimental slice remapper * Added iter_slices to DiffOp * Improvements to the utility diff functions * Documentation improvements * More documentation updates on utils * More documentation on main text diff
This commit is contained in:
parent
81ef0b1adc
commit
777105fbb0
6 changed files with 668 additions and 16 deletions
|
|
@ -41,6 +41,10 @@ required-features = ["text"]
|
||||||
name = "terminal-inline"
|
name = "terminal-inline"
|
||||||
required-features = ["text", "inline", "bytes"]
|
required-features = ["text", "inline", "bytes"]
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "original-slices"
|
||||||
|
required-features = ["text"]
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "udiff"
|
name = "udiff"
|
||||||
required-features = ["text", "bytes"]
|
required-features = ["text", "bytes"]
|
||||||
|
|
|
||||||
11
examples/original-slices.rs
Normal file
11
examples/original-slices.rs
Normal file
|
|
@ -0,0 +1,11 @@
|
||||||
|
use similar::utils::diff_chars;
|
||||||
|
use similar::Algorithm;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let old = "1234567890abcdef".to_string();
|
||||||
|
let new = "0123456789Oabzdef".to_string();
|
||||||
|
|
||||||
|
for (change_tag, value) in diff_chars(Algorithm::Myers, &old, &new) {
|
||||||
|
println!("{}{:?}", change_tag, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
20
src/lib.rs
20
src/lib.rs
|
|
@ -10,6 +10,8 @@
|
||||||
//! It provides both low level access to the algorithms with the minimal
|
//! It provides both low level access to the algorithms with the minimal
|
||||||
//! trait bounds necessary, as well as a generic interface.
|
//! trait bounds necessary, as well as a generic interface.
|
||||||
//! * [`udiff`]: Unified diff functionality.
|
//! * [`udiff`]: Unified diff functionality.
|
||||||
|
//! * [`utils`]: utilities for common diff related operations. This module
|
||||||
|
//! provides additional diffing functions for working with text diffs.
|
||||||
//!
|
//!
|
||||||
//! # Sequence Diffing
|
//! # Sequence Diffing
|
||||||
//!
|
//!
|
||||||
|
|
@ -41,15 +43,13 @@
|
||||||
//! "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more",
|
//! "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more",
|
||||||
//! );
|
//! );
|
||||||
//!
|
//!
|
||||||
//! for op in diff.ops() {
|
//! for change in diff.iter_all_changes() {
|
||||||
//! for change in diff.iter_changes(op) {
|
//! let sign = match change.tag() {
|
||||||
//! let sign = match change.tag() {
|
//! ChangeTag::Delete => "-",
|
||||||
//! ChangeTag::Delete => "-",
|
//! ChangeTag::Insert => "+",
|
||||||
//! ChangeTag::Insert => "+",
|
//! ChangeTag::Equal => " ",
|
||||||
//! ChangeTag::Equal => " ",
|
//! };
|
||||||
//! };
|
//! print!("{}{}", sign, change);
|
||||||
//! print!("{}{}", sign, change);
|
|
||||||
//! }
|
|
||||||
//! }
|
//! }
|
||||||
//! # }
|
//! # }
|
||||||
//! ```
|
//! ```
|
||||||
|
|
@ -129,6 +129,8 @@
|
||||||
pub mod algorithms;
|
pub mod algorithms;
|
||||||
#[cfg(feature = "text")]
|
#[cfg(feature = "text")]
|
||||||
pub mod udiff;
|
pub mod udiff;
|
||||||
|
#[cfg(feature = "text")]
|
||||||
|
pub mod utils;
|
||||||
|
|
||||||
mod common;
|
mod common;
|
||||||
#[cfg(feature = "text")]
|
#[cfg(feature = "text")]
|
||||||
|
|
|
||||||
154
src/text/mod.rs
154
src/text/mod.rs
|
|
@ -58,7 +58,27 @@ impl TextDiffConfig {
|
||||||
/// Creates a diff of lines.
|
/// Creates a diff of lines.
|
||||||
///
|
///
|
||||||
/// This splits the text `old` and `new` into lines preserving newlines
|
/// This splits the text `old` and `new` into lines preserving newlines
|
||||||
/// in the input.
|
/// in the input. Line diffs are very common and because of that enjoy
|
||||||
|
/// special handling in similar. When a line diff is created with this
|
||||||
|
/// method the `newline_terminated` flag is flipped to `true` and will
|
||||||
|
/// influence the behavior of unified diff generation.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{TextDiff, ChangeTag};
|
||||||
|
///
|
||||||
|
/// let diff = TextDiff::configure().diff_lines("a\nb\nc", "a\nb\nC");
|
||||||
|
/// let changes: Vec<_> = diff
|
||||||
|
/// .iter_all_changes()
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "a\n"),
|
||||||
|
/// (ChangeTag::Equal, "b\n"),
|
||||||
|
/// (ChangeTag::Delete, "c"),
|
||||||
|
/// (ChangeTag::Insert, "C"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old T,
|
old: &'old T,
|
||||||
|
|
@ -74,6 +94,31 @@ impl TextDiffConfig {
|
||||||
/// Creates a diff of words.
|
/// Creates a diff of words.
|
||||||
///
|
///
|
||||||
/// This splits the text into words and whitespace.
|
/// This splits the text into words and whitespace.
|
||||||
|
///
|
||||||
|
/// Note on word diffs: because the text differ will tokenize the strings
|
||||||
|
/// into small segments it can be inconvenient to work with the results
|
||||||
|
/// depending on the use case. You might also want to combine word level
|
||||||
|
/// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
|
||||||
|
/// which lets you remap the diffs back to the original input strings.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{TextDiff, ChangeTag};
|
||||||
|
///
|
||||||
|
/// let diff = TextDiff::configure().diff_words("foo bar baz", "foo BAR baz");
|
||||||
|
/// let changes: Vec<_> = diff
|
||||||
|
/// .iter_all_changes()
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "foo"),
|
||||||
|
/// (ChangeTag::Equal, " "),
|
||||||
|
/// (ChangeTag::Delete, "bar"),
|
||||||
|
/// (ChangeTag::Insert, "BAR"),
|
||||||
|
/// (ChangeTag::Equal, " "),
|
||||||
|
/// (ChangeTag::Equal, "baz"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old T,
|
old: &'old T,
|
||||||
|
|
@ -87,6 +132,33 @@ impl TextDiffConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of characters.
|
/// Creates a diff of characters.
|
||||||
|
///
|
||||||
|
/// Note on character diffs: because the text differ will tokenize the strings
|
||||||
|
/// into small segments it can be inconvenient to work with the results
|
||||||
|
/// depending on the use case. You might also want to combine word level
|
||||||
|
/// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
|
||||||
|
/// which lets you remap the diffs back to the original input strings.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{TextDiff, ChangeTag};
|
||||||
|
///
|
||||||
|
/// let diff = TextDiff::configure().diff_chars("abcdef", "abcDDf");
|
||||||
|
/// let changes: Vec<_> = diff
|
||||||
|
/// .iter_all_changes()
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "a"),
|
||||||
|
/// (ChangeTag::Equal, "b"),
|
||||||
|
/// (ChangeTag::Equal, "c"),
|
||||||
|
/// (ChangeTag::Delete, "d"),
|
||||||
|
/// (ChangeTag::Delete, "e"),
|
||||||
|
/// (ChangeTag::Insert, "D"),
|
||||||
|
/// (ChangeTag::Insert, "D"),
|
||||||
|
/// (ChangeTag::Equal, "f"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old T,
|
old: &'old T,
|
||||||
|
|
@ -106,6 +178,31 @@ impl TextDiffConfig {
|
||||||
/// requires a dependency.
|
/// requires a dependency.
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
|
///
|
||||||
|
/// Note on word diffs: because the text differ will tokenize the strings
|
||||||
|
/// into small segments it can be inconvenient to work with the results
|
||||||
|
/// depending on the use case. You might also want to combine word level
|
||||||
|
/// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
|
||||||
|
/// which lets you remap the diffs back to the original input strings.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{TextDiff, ChangeTag};
|
||||||
|
///
|
||||||
|
/// let diff = TextDiff::configure().diff_unicode_words("ah(be)ce", "ah(ah)ce");
|
||||||
|
/// let changes: Vec<_> = diff
|
||||||
|
/// .iter_all_changes()
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "ah"),
|
||||||
|
/// (ChangeTag::Equal, "("),
|
||||||
|
/// (ChangeTag::Delete, "be"),
|
||||||
|
/// (ChangeTag::Insert, "ah"),
|
||||||
|
/// (ChangeTag::Equal, ")"),
|
||||||
|
/// (ChangeTag::Equal, "ce"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -122,6 +219,30 @@ impl TextDiffConfig {
|
||||||
/// Creates a diff of graphemes.
|
/// Creates a diff of graphemes.
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
|
///
|
||||||
|
/// Note on grapheme diffs: because the text differ will tokenize the strings
|
||||||
|
/// into small segments it can be inconvenient to work with the results
|
||||||
|
/// depending on the use case. You might also want to combine word level
|
||||||
|
/// diffs with the [`TextDiffRemapper`](crate::utils::TextDiffRemapper)
|
||||||
|
/// which lets you remap the diffs back to the original input strings.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{TextDiff, ChangeTag};
|
||||||
|
///
|
||||||
|
/// let diff = TextDiff::configure().diff_graphemes("💩🇦🇹🦠", "💩🇦🇱❄️");
|
||||||
|
/// let changes: Vec<_> = diff
|
||||||
|
/// .iter_all_changes()
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "💩"),
|
||||||
|
/// (ChangeTag::Delete, "🇦🇹"),
|
||||||
|
/// (ChangeTag::Delete, "🦠"),
|
||||||
|
/// (ChangeTag::Insert, "🇦🇱"),
|
||||||
|
/// (ChangeTag::Insert, "❄️"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -136,6 +257,25 @@ impl TextDiffConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of arbitrary slices.
|
/// Creates a diff of arbitrary slices.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{TextDiff, ChangeTag};
|
||||||
|
///
|
||||||
|
/// let old = &["foo", "bar", "baz"];
|
||||||
|
/// let new = &["foo", "BAR", "baz"];
|
||||||
|
/// let diff = TextDiff::configure().diff_slices(old, new);
|
||||||
|
/// let changes: Vec<_> = diff
|
||||||
|
/// .iter_all_changes()
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "foo"),
|
||||||
|
/// (ChangeTag::Delete, "bar"),
|
||||||
|
/// (ChangeTag::Insert, "BAR"),
|
||||||
|
/// (ChangeTag::Equal, "baz"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
|
pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'bufs [&'old T],
|
old: &'bufs [&'old T],
|
||||||
|
|
@ -185,7 +325,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
|
|
||||||
/// Creates a diff of lines.
|
/// Creates a diff of lines.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_lines(old, new)`.
|
/// For more information see [`TextDiffConfig::diff_lines`].
|
||||||
pub fn from_lines<T: DiffableStrRef + ?Sized>(
|
pub fn from_lines<T: DiffableStrRef + ?Sized>(
|
||||||
old: &'old T,
|
old: &'old T,
|
||||||
new: &'new T,
|
new: &'new T,
|
||||||
|
|
@ -195,7 +335,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
|
|
||||||
/// Creates a diff of words.
|
/// Creates a diff of words.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_words(old, new)`.
|
/// For more information see [`TextDiffConfig::diff_words`].
|
||||||
pub fn from_words<T: DiffableStrRef + ?Sized>(
|
pub fn from_words<T: DiffableStrRef + ?Sized>(
|
||||||
old: &'old T,
|
old: &'old T,
|
||||||
new: &'new T,
|
new: &'new T,
|
||||||
|
|
@ -205,7 +345,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
|
|
||||||
/// Creates a diff of chars.
|
/// Creates a diff of chars.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
|
/// For more information see [`TextDiffConfig::diff_chars`].
|
||||||
pub fn from_chars<T: DiffableStrRef + ?Sized>(
|
pub fn from_chars<T: DiffableStrRef + ?Sized>(
|
||||||
old: &'old T,
|
old: &'old T,
|
||||||
new: &'new T,
|
new: &'new T,
|
||||||
|
|
@ -215,7 +355,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
|
|
||||||
/// Creates a diff of unicode words.
|
/// Creates a diff of unicode words.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_unicode_words(old, new)`.
|
/// For more information see [`TextDiffConfig::diff_unicode_words`].
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
|
|
@ -228,7 +368,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
|
|
||||||
/// Creates a diff of graphemes.
|
/// Creates a diff of graphemes.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_graphemes(old, new)`.
|
/// For more information see [`TextDiffConfig::diff_graphemes`].
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
|
|
@ -243,7 +383,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
|
impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
/// Creates a diff of arbitrary slices.
|
/// Creates a diff of arbitrary slices.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_slices(old, new)`.
|
/// For more information see [`TextDiffConfig::diff_slices`].
|
||||||
pub fn from_slices(
|
pub fn from_slices(
|
||||||
old: &'bufs [&'old T],
|
old: &'bufs [&'old T],
|
||||||
new: &'bufs [&'new T],
|
new: &'bufs [&'new T],
|
||||||
|
|
|
||||||
80
src/types.rs
80
src/types.rs
|
|
@ -247,6 +247,25 @@ impl DiffOp {
|
||||||
///
|
///
|
||||||
/// `old` and `new` are two indexable objects like the types you pass to
|
/// `old` and `new` are two indexable objects like the types you pass to
|
||||||
/// the diffing algorithm functions.
|
/// the diffing algorithm functions.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{ChangeTag, Algorithm};
|
||||||
|
/// use similar::capture_diff_slices;
|
||||||
|
/// let old = vec!["foo", "bar", "baz"];
|
||||||
|
/// let new = vec!["foo", "bar", "blah"];
|
||||||
|
/// let ops = capture_diff_slices(Algorithm::Myers, &old, &new);
|
||||||
|
/// let changes: Vec<_> = ops
|
||||||
|
/// .iter()
|
||||||
|
/// .flat_map(|x| x.iter_changes(&old, &new))
|
||||||
|
/// .map(|x| (x.tag(), x.value()))
|
||||||
|
/// .collect();
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "foo"),
|
||||||
|
/// (ChangeTag::Equal, "bar"),
|
||||||
|
/// (ChangeTag::Delete, "baz"),
|
||||||
|
/// (ChangeTag::Insert, "blah"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
pub fn iter_changes<'x, 'lookup, Old, New, T>(
|
pub fn iter_changes<'x, 'lookup, Old, New, T>(
|
||||||
&self,
|
&self,
|
||||||
old: &'lookup Old,
|
old: &'lookup Old,
|
||||||
|
|
@ -338,6 +357,67 @@ impl DiffOp {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Given a diffop yields the changes it encodes against the given slices.
|
||||||
|
///
|
||||||
|
/// This is similar to [`DiffOp::iter_changes`] but instead of yielding the
|
||||||
|
/// individual changes it yields consequitive changed slices.
|
||||||
|
///
|
||||||
|
/// This will only ever yield a single tuple or two tuples in case a
|
||||||
|
/// [`DiffOp::Replace`] operation is passed.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{ChangeTag, Algorithm};
|
||||||
|
/// use similar::capture_diff_slices;
|
||||||
|
/// let old = vec!["foo", "bar", "baz"];
|
||||||
|
/// let new = vec!["foo", "bar", "blah"];
|
||||||
|
/// let ops = capture_diff_slices(Algorithm::Myers, &old, &new);
|
||||||
|
/// let changes: Vec<_> = ops.iter().flat_map(|x| x.iter_slices(&old, &new)).collect();
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, &["foo", "bar"][..]),
|
||||||
|
/// (ChangeTag::Delete, &["baz"][..]),
|
||||||
|
/// (ChangeTag::Insert, &["blah"][..]),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Due to lifetime restrictions it's currently impossible for the
|
||||||
|
/// returned slices to outlive the lookup.
|
||||||
|
pub fn iter_slices<'lookup, Old, New, T>(
|
||||||
|
&self,
|
||||||
|
old: &'lookup Old,
|
||||||
|
new: &'lookup New,
|
||||||
|
) -> impl Iterator<Item = (ChangeTag, &'lookup T)>
|
||||||
|
where
|
||||||
|
T: 'lookup + ?Sized,
|
||||||
|
Old: Index<Range<usize>, Output = T> + ?Sized,
|
||||||
|
New: Index<Range<usize>, Output = T> + ?Sized,
|
||||||
|
{
|
||||||
|
match *self {
|
||||||
|
DiffOp::Equal { old_index, len, .. } => {
|
||||||
|
Some((ChangeTag::Equal, &old[old_index..old_index + len]))
|
||||||
|
.into_iter()
|
||||||
|
.chain(None.into_iter())
|
||||||
|
}
|
||||||
|
DiffOp::Insert {
|
||||||
|
new_index, new_len, ..
|
||||||
|
} => Some((ChangeTag::Insert, &new[new_index..new_index + new_len]))
|
||||||
|
.into_iter()
|
||||||
|
.chain(None.into_iter()),
|
||||||
|
DiffOp::Delete {
|
||||||
|
old_index, old_len, ..
|
||||||
|
} => Some((ChangeTag::Delete, &old[old_index..old_index + old_len]))
|
||||||
|
.into_iter()
|
||||||
|
.chain(None.into_iter()),
|
||||||
|
DiffOp::Replace {
|
||||||
|
old_index,
|
||||||
|
old_len,
|
||||||
|
new_index,
|
||||||
|
new_len,
|
||||||
|
} => Some((ChangeTag::Delete, &old[old_index..old_index + old_len]))
|
||||||
|
.into_iter()
|
||||||
|
.chain(Some((ChangeTag::Insert, &new[new_index..new_index + new_len])).into_iter()),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "text")]
|
#[cfg(feature = "text")]
|
||||||
|
|
|
||||||
415
src/utils.rs
Normal file
415
src/utils.rs
Normal file
|
|
@ -0,0 +1,415 @@
|
||||||
|
//! Utilities for common diff related operations.
|
||||||
|
//!
|
||||||
|
//! This module provides specialized utilities and simplified diff operations
|
||||||
|
//! for common operations. It's useful when you want to work with text diffs
|
||||||
|
//! and you're interested in getting vectors of these changes directly.
|
||||||
|
//!
|
||||||
|
//! # Slice Remapping
|
||||||
|
//!
|
||||||
|
//! When working with [`TextDiff`] it's common that one takes advantage of the
|
||||||
|
//! built-in tokenization of the differ. This for instance lets you do
|
||||||
|
//! grapheme level diffs. This is implemented by the differ generating rather
|
||||||
|
//! small slices of strings and running a diff algorithm over them.
|
||||||
|
//!
|
||||||
|
//! The downside of this is that all the [`DiffOp`] objects produced by the
|
||||||
|
//! diffing algorithm encode operations on these rather small slices. For
|
||||||
|
//! a lot of use cases this is not what one wants which can make this very
|
||||||
|
//! inconvenient. This module provides a [`TextDiffRemapper`] which lets you
|
||||||
|
//! map from the ranges that the [`TextDiff`] returns to the original input
|
||||||
|
//! strings. For more information see [`TextDiffRemapper`].
|
||||||
|
//!
|
||||||
|
//! # Simple Diff Functions
|
||||||
|
//!
|
||||||
|
//! This module provides a range of common test diff functions that will
|
||||||
|
//! produce vectors of `(change_tag, value)` tuples. They will automatically
|
||||||
|
//! optimize towards returning the most useful slice that one would expect for
|
||||||
|
//! the type of diff performed.
|
||||||
|
|
||||||
|
use std::hash::Hash;
|
||||||
|
use std::ops::{Index, Range};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SliceRemapper<'x, T: ?Sized> {
|
||||||
|
source: &'x T,
|
||||||
|
indexes: Vec<Range<usize>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> {
|
||||||
|
fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> {
|
||||||
|
let indexes = slices
|
||||||
|
.iter()
|
||||||
|
.scan(0, |state, item| {
|
||||||
|
let start = *state;
|
||||||
|
let end = start + item.len();
|
||||||
|
*state = end;
|
||||||
|
Some(start..end)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
SliceRemapper { source, indexes }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice(&self, range: Range<usize>) -> Option<&'x T> {
|
||||||
|
let start = self.indexes.get(range.start)?.start;
|
||||||
|
let end = self.indexes.get(range.end - 1)?.end;
|
||||||
|
Some(self.source.slice(start..end))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'x, T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'x, T> {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
fn index(&self, range: Range<usize>) -> &Self::Output {
|
||||||
|
self.slice(range).expect("out of bounds")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A remapper that can remap diff ops to the original slices.
|
||||||
|
///
|
||||||
|
/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from
|
||||||
|
/// two strings and the internal tokenization is used, this remapper can take
|
||||||
|
/// a range in the tokenized sequences and remap it to the original string.
|
||||||
|
/// This is particularly useful when you want to do things like character or
|
||||||
|
/// grapheme level diffs but you want to not have to iterate over small sequences
|
||||||
|
/// but large consequitive ones from the source.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{ChangeTag, TextDiff};
|
||||||
|
/// use similar::utils::TextDiffRemapper;
|
||||||
|
///
|
||||||
|
/// let old = "yo! foo bar baz";
|
||||||
|
/// let new = "yo! foo bor baz";
|
||||||
|
/// let diff = TextDiff::from_words(old, new);
|
||||||
|
/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
|
||||||
|
/// let changes: Vec<_> = diff.ops()
|
||||||
|
/// .iter()
|
||||||
|
/// .flat_map(move |x| remapper.iter_slices(x))
|
||||||
|
/// .collect();
|
||||||
|
///
|
||||||
|
/// assert_eq!(changes, vec![
|
||||||
|
/// (ChangeTag::Equal, "yo! foo "),
|
||||||
|
/// (ChangeTag::Delete, "bar"),
|
||||||
|
/// (ChangeTag::Insert, "bor"),
|
||||||
|
/// (ChangeTag::Equal, " baz")
|
||||||
|
/// ]);
|
||||||
|
pub struct TextDiffRemapper<'x, T: ?Sized> {
|
||||||
|
old: SliceRemapper<'x, T>,
|
||||||
|
new: SliceRemapper<'x, T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> {
|
||||||
|
/// Creates a new remapper from strings and slices.
|
||||||
|
pub fn new(
|
||||||
|
old_slices: &[&'x T],
|
||||||
|
new_slices: &[&'x T],
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> TextDiffRemapper<'x, T> {
|
||||||
|
TextDiffRemapper {
|
||||||
|
old: SliceRemapper::new(old, old_slices),
|
||||||
|
new: SliceRemapper::new(new, new_slices),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new remapper from a text diff and the original strings.
|
||||||
|
pub fn from_text_diff<'old, 'new, 'bufs>(
|
||||||
|
diff: &TextDiff<'old, 'new, 'bufs, T>,
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> TextDiffRemapper<'x, T>
|
||||||
|
where
|
||||||
|
'old: 'x,
|
||||||
|
'new: 'x,
|
||||||
|
{
|
||||||
|
TextDiffRemapper {
|
||||||
|
old: SliceRemapper::new(old, diff.old_slices()),
|
||||||
|
new: SliceRemapper::new(new, diff.new_slices()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Slices into the old string.
|
||||||
|
pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> {
|
||||||
|
self.old.slice(range)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Slices into the new string.
|
||||||
|
pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> {
|
||||||
|
self.new.slice(range)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Given a diffop yields the changes it encodes against the original strings.
|
||||||
|
///
|
||||||
|
/// This is the same as the [`DiffOp::iter_slices`] method.
|
||||||
|
///
|
||||||
|
/// ## Panics
|
||||||
|
///
|
||||||
|
/// This method can panic if the input strings passed to the constructor
|
||||||
|
/// are incompatible with the input strings passed to the diffing algorithm.
|
||||||
|
pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> {
|
||||||
|
// note: this is equivalent to the code in `DiffOp::iter_slices`. It is
|
||||||
|
// a copy/paste because the slicing currently cannot be well abstracted
|
||||||
|
// because of lifetime issues caused by the `Index` trait.
|
||||||
|
match *op {
|
||||||
|
DiffOp::Equal { old_index, len, .. } => {
|
||||||
|
Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len)))
|
||||||
|
.into_iter()
|
||||||
|
.chain(None.into_iter())
|
||||||
|
}
|
||||||
|
DiffOp::Insert {
|
||||||
|
new_index, new_len, ..
|
||||||
|
} => Some((
|
||||||
|
ChangeTag::Insert,
|
||||||
|
self.new.slice(new_index..new_index + new_len),
|
||||||
|
))
|
||||||
|
.into_iter()
|
||||||
|
.chain(None.into_iter()),
|
||||||
|
DiffOp::Delete {
|
||||||
|
old_index, old_len, ..
|
||||||
|
} => Some((
|
||||||
|
ChangeTag::Delete,
|
||||||
|
self.old.slice(old_index..old_index + old_len),
|
||||||
|
))
|
||||||
|
.into_iter()
|
||||||
|
.chain(None.into_iter()),
|
||||||
|
DiffOp::Replace {
|
||||||
|
old_index,
|
||||||
|
old_len,
|
||||||
|
new_index,
|
||||||
|
new_len,
|
||||||
|
} => Some((
|
||||||
|
ChangeTag::Delete,
|
||||||
|
self.old.slice(old_index..old_index + old_len),
|
||||||
|
))
|
||||||
|
.into_iter()
|
||||||
|
.chain(
|
||||||
|
Some((
|
||||||
|
ChangeTag::Insert,
|
||||||
|
self.new.slice(new_index..new_index + new_len),
|
||||||
|
))
|
||||||
|
.into_iter(),
|
||||||
|
),
|
||||||
|
}
|
||||||
|
.map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for diffing two slices.
|
||||||
|
///
|
||||||
|
/// This function produces the diff of two slices and returns a vector
|
||||||
|
/// with the changes.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{Algorithm, ChangeTag};
|
||||||
|
/// use similar::utils::diff_slices;
|
||||||
|
///
|
||||||
|
/// let old = "foo\nbar\nbaz".lines().collect::<Vec<_>>();
|
||||||
|
/// let new = "foo\nbar\nBAZ".lines().collect::<Vec<_>>();
|
||||||
|
/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![
|
||||||
|
/// (ChangeTag::Equal, &["foo", "bar"][..]),
|
||||||
|
/// (ChangeTag::Delete, &["baz"][..]),
|
||||||
|
/// (ChangeTag::Insert, &["BAZ"][..]),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
pub fn diff_slices<'x, T: PartialEq + Hash + Ord>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &'x [T],
|
||||||
|
new: &'x [T],
|
||||||
|
) -> Vec<(ChangeTag, &'x [T])> {
|
||||||
|
capture_diff_slices(alg, old, new)
|
||||||
|
.iter()
|
||||||
|
.flat_map(|op| op.iter_slices(old, new))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for making a character level diff.
|
||||||
|
///
|
||||||
|
/// This function produces the diff of two strings and returns a vector
|
||||||
|
/// with the changes. It returns connected slices into the original string
|
||||||
|
/// rather than character level slices.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{Algorithm, ChangeTag};
|
||||||
|
/// use similar::utils::diff_chars;
|
||||||
|
///
|
||||||
|
/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![
|
||||||
|
/// (ChangeTag::Equal, "foo"),
|
||||||
|
/// (ChangeTag::Delete, "bar"),
|
||||||
|
/// (ChangeTag::Insert, "BAR"),
|
||||||
|
/// (ChangeTag::Equal, "baz"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> Vec<(ChangeTag, &'x T::Output)> {
|
||||||
|
let old = old.as_diffable_str();
|
||||||
|
let new = new.as_diffable_str();
|
||||||
|
let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new);
|
||||||
|
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
|
||||||
|
diff.ops()
|
||||||
|
.iter()
|
||||||
|
.flat_map(move |x| remapper.iter_slices(x))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for making a word level diff.
|
||||||
|
///
|
||||||
|
/// This function produces the diff of two strings and returns a vector
|
||||||
|
/// with the changes. It returns connected slices into the original string
|
||||||
|
/// rather than word level slices.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{Algorithm, ChangeTag};
|
||||||
|
/// use similar::utils::diff_words;
|
||||||
|
///
|
||||||
|
/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![
|
||||||
|
/// (ChangeTag::Equal, "foo "),
|
||||||
|
/// (ChangeTag::Delete, "bar"),
|
||||||
|
/// (ChangeTag::Insert, "bor"),
|
||||||
|
/// (ChangeTag::Equal, " baz"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
pub fn diff_words<'x, T: DiffableStrRef + ?Sized>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> Vec<(ChangeTag, &'x T::Output)> {
|
||||||
|
let old = old.as_diffable_str();
|
||||||
|
let new = new.as_diffable_str();
|
||||||
|
let diff = TextDiff::configure().algorithm(alg).diff_words(old, new);
|
||||||
|
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
|
||||||
|
diff.ops()
|
||||||
|
.iter()
|
||||||
|
.flat_map(move |x| remapper.iter_slices(x))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for making a unicode word level diff.
|
||||||
|
///
|
||||||
|
/// This function produces the diff of two strings and returns a vector
|
||||||
|
/// with the changes. It returns connected slices into the original string
|
||||||
|
/// rather than word level slices.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{Algorithm, ChangeTag};
|
||||||
|
/// use similar::utils::diff_unicode_words;
|
||||||
|
///
|
||||||
|
/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
|
||||||
|
/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?";
|
||||||
|
/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![
|
||||||
|
/// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "),
|
||||||
|
/// (ChangeTag::Delete, "32.3"),
|
||||||
|
/// (ChangeTag::Insert, "9.84"),
|
||||||
|
/// (ChangeTag::Equal, " "),
|
||||||
|
/// (ChangeTag::Delete, "feet"),
|
||||||
|
/// (ChangeTag::Insert, "meters"),
|
||||||
|
/// (ChangeTag::Equal, ", right?")
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This requires the `unicode` feature.
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> Vec<(ChangeTag, &'x T::Output)> {
|
||||||
|
let old = old.as_diffable_str();
|
||||||
|
let new = new.as_diffable_str();
|
||||||
|
let diff = TextDiff::configure()
|
||||||
|
.algorithm(alg)
|
||||||
|
.diff_unicode_words(old, new);
|
||||||
|
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
|
||||||
|
diff.ops()
|
||||||
|
.iter()
|
||||||
|
.flat_map(move |x| remapper.iter_slices(x))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for making a grapheme level diff.
|
||||||
|
///
|
||||||
|
/// This function produces the diff of two strings and returns a vector
|
||||||
|
/// with the changes. It returns connected slices into the original string
|
||||||
|
/// rather than grapheme level slices.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{Algorithm, ChangeTag};
|
||||||
|
/// use similar::utils::diff_graphemes;
|
||||||
|
///
|
||||||
|
/// let old = "The flag of Austria is 🇦🇹";
|
||||||
|
/// let new = "The flag of Albania is 🇦🇱";
|
||||||
|
/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![
|
||||||
|
/// (ChangeTag::Equal, "The flag of A"),
|
||||||
|
/// (ChangeTag::Delete, "ustr"),
|
||||||
|
/// (ChangeTag::Insert, "lban"),
|
||||||
|
/// (ChangeTag::Equal, "ia is "),
|
||||||
|
/// (ChangeTag::Delete, "🇦🇹"),
|
||||||
|
/// (ChangeTag::Insert, "🇦🇱"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This requires the `unicode` feature.
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> Vec<(ChangeTag, &'x T::Output)> {
|
||||||
|
let old = old.as_diffable_str();
|
||||||
|
let new = new.as_diffable_str();
|
||||||
|
let diff = TextDiff::configure()
|
||||||
|
.algorithm(alg)
|
||||||
|
.diff_graphemes(old, new);
|
||||||
|
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
|
||||||
|
diff.ops()
|
||||||
|
.iter()
|
||||||
|
.flat_map(move |x| remapper.iter_slices(x))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shortcut for making a line diff.
|
||||||
|
///
|
||||||
|
/// This function produces the diff of two slices and returns a vector
|
||||||
|
/// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a
|
||||||
|
/// change tag for each line.
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::{Algorithm, ChangeTag};
|
||||||
|
/// use similar::utils::diff_lines;
|
||||||
|
///
|
||||||
|
/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![
|
||||||
|
/// (ChangeTag::Equal, "foo\n"),
|
||||||
|
/// (ChangeTag::Equal, "bar\n"),
|
||||||
|
/// (ChangeTag::Equal, "baz\n"),
|
||||||
|
/// (ChangeTag::Delete, "blah"),
|
||||||
|
/// (ChangeTag::Insert, "blurgh"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &'x T,
|
||||||
|
new: &'x T,
|
||||||
|
) -> Vec<(ChangeTag, &'x T::Output)> {
|
||||||
|
TextDiff::configure()
|
||||||
|
.algorithm(alg)
|
||||||
|
.diff_lines(old, new)
|
||||||
|
.iter_all_changes()
|
||||||
|
.map(|change| (change.tag(), change.value()))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_remapper() {
|
||||||
|
let a = "foo bar baz";
|
||||||
|
let words = a.tokenize_words();
|
||||||
|
dbg!(&words);
|
||||||
|
let remap = SliceRemapper::new(a, &words);
|
||||||
|
assert_eq!(remap.slice(0..3), Some("foo bar"));
|
||||||
|
assert_eq!(remap.slice(1..3), Some(" bar"));
|
||||||
|
assert_eq!(remap.slice(0..1), Some("foo"));
|
||||||
|
assert_eq!(remap.slice(0..5), Some("foo bar baz"));
|
||||||
|
assert_eq!(remap.slice(0..6), None);
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue