Add support for byte diffing (#7)
Restructured text diffing to also support bstr
This commit is contained in:
parent
e53427b56f
commit
4b85e70f91
11 changed files with 775 additions and 307 deletions
400
src/text/mod.rs
400
src/text/mod.rs
|
|
@ -11,7 +11,7 @@
|
|||
//! Text diffing is available by default but can be disabled by turning off the
|
||||
//! default features. The feature to enable to get it back is `text`.
|
||||
//!
|
||||
//! ## Examples
|
||||
//! # Examples
|
||||
//!
|
||||
//! A super simple example for how to generate a unified diff with three lines
|
||||
//! off context around the changes:
|
||||
|
|
@ -38,7 +38,7 @@
|
|||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! ## Ops vs Changes
|
||||
//! # Ops vs Changes
|
||||
//!
|
||||
//! Because very commonly two compared sequences will largely match this module
|
||||
//! splits it's functionality into two layers. The first is inherited from the
|
||||
|
|
@ -51,7 +51,7 @@
|
|||
//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
|
||||
//! this even works for very long files if paired with this method.
|
||||
//!
|
||||
//! ## Trailing Newlines
|
||||
//! # Trailing Newlines
|
||||
//!
|
||||
//! When working with line diffs (and unified diffs in general) there are two
|
||||
//! "philosophies" to look at lines. One is to diff lines without their newline
|
||||
|
|
@ -68,11 +68,30 @@
|
|||
//! either rendering a virtual newline at that position or to indicate it in
|
||||
//! different ways. For instance the unified diff code will render the special
|
||||
//! `\ No newline at end of file` marker.
|
||||
//!
|
||||
//! # Bytes vs Unicode
|
||||
//!
|
||||
//! This module concerns itself with a loser definition of "text" than you would
|
||||
//! normally see in Rust. While by default it can only operate on [`str`] types
|
||||
//! by enabling the `bytes` feature it gains support for byte slices with some
|
||||
//! caveats.
|
||||
//!
|
||||
//! A lot of text diff functionality assumes that what is being diffed constiutes
|
||||
//! text, but in the real world it can often be challenging to ensure that this is
|
||||
//! all valid utf-8. Because of this the crate is built so that most functinality
|
||||
//! also still works with bytes for as long as they are roughtly ASCII compatible.
|
||||
//!
|
||||
//! This means you will be successful in creating a unified diff from latin1
|
||||
//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you
|
||||
//! will only get garbage.
|
||||
#![cfg(feature = "text")]
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::fmt;
|
||||
use std::hash::Hash;
|
||||
|
||||
mod abstraction;
|
||||
|
||||
#[cfg(feature = "inline")]
|
||||
mod inline;
|
||||
|
|
@ -82,6 +101,8 @@ mod udiff;
|
|||
pub use self::inline::*;
|
||||
pub use self::udiff::*;
|
||||
|
||||
pub use crate::text::abstraction::*;
|
||||
|
||||
use crate::algorithms::{
|
||||
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
|
||||
};
|
||||
|
|
@ -127,14 +148,14 @@ impl TextDiffConfig {
|
|||
///
|
||||
/// This splits the text `old` and `new` into lines preserving newlines
|
||||
/// in the input.
|
||||
pub fn diff_lines<'old, 'new, 'bufs>(
|
||||
pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
self.diff(
|
||||
Cow::Owned(split_lines(old).collect()),
|
||||
Cow::Owned(split_lines(new).collect()),
|
||||
Cow::Owned(old.as_diffable_str().split_lines()),
|
||||
Cow::Owned(new.as_diffable_str().split_lines()),
|
||||
true,
|
||||
)
|
||||
}
|
||||
|
|
@ -142,14 +163,27 @@ impl TextDiffConfig {
|
|||
/// Creates a diff of words.
|
||||
///
|
||||
/// This splits the text into words and whitespace.
|
||||
pub fn diff_words<'old, 'new, 'bufs>(
|
||||
pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
self.diff(
|
||||
Cow::Owned(split_words(old).collect()),
|
||||
Cow::Owned(split_words(new).collect()),
|
||||
Cow::Owned(old.as_diffable_str().split_words()),
|
||||
Cow::Owned(new.as_diffable_str().split_words()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of characters.
|
||||
pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||
&self,
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
self.diff(
|
||||
Cow::Owned(old.as_diffable_str().split_chars()),
|
||||
Cow::Owned(new.as_diffable_str().split_chars()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
|
@ -162,27 +196,14 @@ impl TextDiffConfig {
|
|||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn diff_unicode_words<'old, 'new, 'bufs>(
|
||||
pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
self.diff(
|
||||
Cow::Owned(split_unicode_words(old).collect()),
|
||||
Cow::Owned(split_unicode_words(new).collect()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of characters.
|
||||
pub fn diff_chars<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
self.diff(
|
||||
Cow::Owned(split_chars(old).collect()),
|
||||
Cow::Owned(split_chars(new).collect()),
|
||||
Cow::Owned(old.as_diffable_str().split_unicode_words()),
|
||||
Cow::Owned(new.as_diffable_str().split_unicode_words()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
|
@ -191,33 +212,33 @@ impl TextDiffConfig {
|
|||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn diff_graphemes<'old, 'new, 'bufs>(
|
||||
pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
self.diff(
|
||||
Cow::Owned(split_graphemes(old).collect()),
|
||||
Cow::Owned(split_graphemes(new).collect()),
|
||||
Cow::Owned(old.as_diffable_str().split_graphemes()),
|
||||
Cow::Owned(new.as_diffable_str().split_graphemes()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of arbitrary slices.
|
||||
pub fn diff_slices<'old, 'new, 'bufs>(
|
||||
pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
|
||||
&self,
|
||||
old: &'bufs [&'old str],
|
||||
new: &'bufs [&'new str],
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
old: &'bufs [&'old T],
|
||||
new: &'bufs [&'new T],
|
||||
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||
self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
|
||||
}
|
||||
|
||||
fn diff<'old, 'new, 'bufs>(
|
||||
fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
|
||||
&self,
|
||||
old: Cow<'bufs, [&'old str]>,
|
||||
new: Cow<'bufs, [&'new str]>,
|
||||
old: Cow<'bufs, [&'old T]>,
|
||||
new: Cow<'bufs, [&'new T]>,
|
||||
newline_terminated: bool,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||
let ops = capture_diff_slices(self.algorithm, &old, &new);
|
||||
TextDiff {
|
||||
old,
|
||||
|
|
@ -230,9 +251,9 @@ impl TextDiffConfig {
|
|||
}
|
||||
|
||||
/// Captures diff op codes for textual diffs
|
||||
pub struct TextDiff<'old, 'new, 'bufs> {
|
||||
old: Cow<'bufs, [&'old str]>,
|
||||
new: Cow<'bufs, [&'new str]>,
|
||||
pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
|
||||
old: Cow<'bufs, [&'old T]>,
|
||||
new: Cow<'bufs, [&'new T]>,
|
||||
ops: Vec<DiffOp>,
|
||||
newline_terminated: bool,
|
||||
algorithm: Algorithm,
|
||||
|
|
@ -255,26 +276,26 @@ pub enum ChangeTag {
|
|||
/// exists so that it's more convenient to work with textual differences as
|
||||
/// the underlying [`DiffOp`] does not know anything about strings.
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
|
||||
pub struct Change<'s> {
|
||||
pub struct Change<'s, T: DiffableStr + ?Sized> {
|
||||
tag: ChangeTag,
|
||||
old_index: Option<usize>,
|
||||
new_index: Option<usize>,
|
||||
value: &'s str,
|
||||
value: &'s T,
|
||||
missing_newline: bool,
|
||||
}
|
||||
|
||||
impl<'s> fmt::Display for Change<'s> {
|
||||
impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"{}{}",
|
||||
self.value(),
|
||||
self.as_str_lossy(),
|
||||
if self.missing_newline { "\n" } else { "" }
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Change<'s> {
|
||||
impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
|
||||
/// Returns the change tag.
|
||||
pub fn tag(&self) -> ChangeTag {
|
||||
self.tag
|
||||
|
|
@ -290,11 +311,21 @@ impl<'s> Change<'s> {
|
|||
self.new_index
|
||||
}
|
||||
|
||||
/// Returns the changed value.
|
||||
pub fn value(&self) -> &'s str {
|
||||
/// Returns the underlying changed value.
|
||||
pub fn value(&self) -> &'s T {
|
||||
self.value
|
||||
}
|
||||
|
||||
/// Returns the value as string if it is utf-8.
|
||||
pub fn as_str(&self) -> Option<&'s str> {
|
||||
T::as_str(self.value)
|
||||
}
|
||||
|
||||
/// Returns the value (lossy) decoded as utf-8 string.
|
||||
pub fn as_str_lossy(&self) -> Cow<'s, str> {
|
||||
T::as_str_lossy(self.value)
|
||||
}
|
||||
|
||||
/// Returns `true` if this change needs to be followed up by a
|
||||
/// missing newline.
|
||||
///
|
||||
|
|
@ -305,7 +336,7 @@ impl<'s> Change<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||
/// Configures a text differ before diffing.
|
||||
pub fn configure() -> TextDiffConfig {
|
||||
TextDiffConfig::default()
|
||||
|
|
@ -314,15 +345,31 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
/// Creates a diff of lines.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_lines(old, new)`.
|
||||
pub fn from_lines(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_lines(old, new)
|
||||
pub fn from_lines<T: DiffableStrRef + ?Sized>(
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
TextDiff::configure().diff_lines(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of words.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_words(old, new)`.
|
||||
pub fn from_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_words(old, new)
|
||||
pub fn from_words<T: DiffableStrRef + ?Sized>(
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
TextDiff::configure().diff_words(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of chars.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
|
||||
pub fn from_chars<T: DiffableStrRef + ?Sized>(
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
TextDiff::configure().diff_chars(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of unicode words.
|
||||
|
|
@ -331,15 +378,11 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn from_unicode_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_unicode_words(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of chars.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
|
||||
pub fn from_chars(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_chars(old, new)
|
||||
pub fn from_unicode_words<T: DiffableStrRef + ?Sized>(
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
TextDiff::configure().diff_unicode_words(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of graphemes.
|
||||
|
|
@ -348,18 +391,23 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn from_graphemes(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_graphemes(old, new)
|
||||
pub fn from_graphemes<T: DiffableStrRef + ?Sized>(
|
||||
old: &'old T,
|
||||
new: &'new T,
|
||||
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||
TextDiff::configure().diff_graphemes(old, new)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
|
||||
/// Creates a diff of arbitrary slices.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_slices(old, new)`.
|
||||
pub fn from_slices(
|
||||
old: &'bufs [&'old str],
|
||||
new: &'bufs [&'new str],
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_slices(old, new)
|
||||
old: &'bufs [&'old T],
|
||||
new: &'bufs [&'new T],
|
||||
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||
TextDiff::configure().diff_slices(old, new)
|
||||
}
|
||||
|
||||
/// The name of the algorithm that created the diff.
|
||||
|
|
@ -376,12 +424,12 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
}
|
||||
|
||||
/// Returns all old slices.
|
||||
pub fn old_slices(&self) -> &[&'old str] {
|
||||
pub fn old_slices(&self) -> &[&'old T] {
|
||||
&self.old
|
||||
}
|
||||
|
||||
/// Returns all new slices.
|
||||
pub fn new_slices(&self) -> &[&'new str] {
|
||||
pub fn new_slices(&self) -> &[&'new T] {
|
||||
&self.new
|
||||
}
|
||||
|
||||
|
|
@ -405,7 +453,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
/// ways in which a change could be encoded (insert/delete vs replace), look
|
||||
/// up the value from the appropriate slice and also handle correct index
|
||||
/// handling.
|
||||
pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change> {
|
||||
pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change<'_, T>> {
|
||||
let newline_terminated = self.newline_terminated;
|
||||
let (tag, old_range, new_range) = op.as_tag_tuple();
|
||||
let mut old_index = old_range.start;
|
||||
|
|
@ -426,7 +474,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
value: first,
|
||||
missing_newline: newline_terminated
|
||||
&& rest.is_empty()
|
||||
&& !first.ends_with(&['\r', '\n'][..]),
|
||||
&& !first.ends_with_newline(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
|
|
@ -443,7 +491,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
value: first,
|
||||
missing_newline: newline_terminated
|
||||
&& rest.is_empty()
|
||||
&& !first.ends_with(&['\r', '\n'][..]),
|
||||
&& !first.ends_with_newline(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
|
|
@ -460,7 +508,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
value: first,
|
||||
missing_newline: newline_terminated
|
||||
&& rest.is_empty()
|
||||
&& !first.ends_with(&['\r', '\n'][..]),
|
||||
&& !first.ends_with_newline(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
|
|
@ -477,7 +525,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
value: first,
|
||||
missing_newline: newline_terminated
|
||||
&& rest.is_empty()
|
||||
&& !first.ends_with(&['\r', '\n'][..]),
|
||||
&& !first.ends_with_newline(),
|
||||
})
|
||||
} else if let Some((&first, rest)) = new_slices.split_first() {
|
||||
new_slices = rest;
|
||||
|
|
@ -489,7 +537,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
value: first,
|
||||
missing_newline: newline_terminated
|
||||
&& rest.is_empty()
|
||||
&& !first.ends_with(&['\r', '\n'][..]),
|
||||
&& !first.ends_with_newline(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
|
|
@ -498,17 +546,6 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Iterates over the changes the op expands to with inline emphasis.
|
||||
///
|
||||
/// This is very similar to [`TextDiff::iter_changes`] but it performs a second
|
||||
/// level diff on adjacent line replacements. The exact behavior of
|
||||
/// this function with regards to how it detects those inline changes
|
||||
/// is currently not defined and will likely change over time.
|
||||
#[cfg(feature = "inline")]
|
||||
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange> {
|
||||
iter_inline_changes(self, op)
|
||||
}
|
||||
|
||||
/// Returns the captured diff ops.
|
||||
pub fn ops(&self) -> &[DiffOp] {
|
||||
&self.ops
|
||||
|
|
@ -522,85 +559,20 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
}
|
||||
|
||||
/// Utility to return a unified diff formatter.
|
||||
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
||||
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
|
||||
UnifiedDiff::from_text_diff(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a string splits it into lines.
|
||||
///
|
||||
/// This operation will preserve the newline separation character at the end.
|
||||
/// It supports all common newline sequences (`\r\n`, `\n` as well as `\r`).
|
||||
fn split_lines(s: &str) -> impl Iterator<Item = &str> {
|
||||
let mut iter = s.char_indices().peekable();
|
||||
let mut last_pos = 0;
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
if let Some((idx, c)) = iter.next() {
|
||||
let mut rv = None;
|
||||
if c == '\r' {
|
||||
if iter.peek().map_or(false, |x| x.1 == '\n') {
|
||||
rv = Some(&s[last_pos..=idx + 1]);
|
||||
iter.next();
|
||||
last_pos = idx + 2;
|
||||
} else {
|
||||
rv = Some(&s[last_pos..=idx]);
|
||||
last_pos = idx + 1;
|
||||
}
|
||||
} else if c == '\n' {
|
||||
rv = Some(&s[last_pos..=idx]);
|
||||
last_pos = idx + 1;
|
||||
}
|
||||
Some(rv)
|
||||
} else if last_pos < s.len() {
|
||||
let tmp = &s[last_pos..];
|
||||
last_pos = s.len();
|
||||
Some(Some(tmp))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
/// Partitions at whitespace.
|
||||
fn split_words(s: &str) -> impl Iterator<Item = &str> {
|
||||
let mut iter = s.char_indices().peekable();
|
||||
|
||||
std::iter::from_fn(move || {
|
||||
if let Some((idx, c)) = iter.next() {
|
||||
let is_whitespace = c.is_whitespace();
|
||||
let start = idx;
|
||||
let mut end = idx + c.len_utf8();
|
||||
while let Some(&(_, next_char)) = iter.peek() {
|
||||
if next_char.is_whitespace() != is_whitespace {
|
||||
break;
|
||||
}
|
||||
iter.next();
|
||||
end += next_char.len_utf8();
|
||||
}
|
||||
Some(&s[start..end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Splits words according to unicode rules.
|
||||
#[cfg(feature = "unicode")]
|
||||
fn split_unicode_words(s: &str) -> impl Iterator<Item = &str> {
|
||||
unicode_segmentation::UnicodeSegmentation::split_word_bounds(s)
|
||||
}
|
||||
|
||||
/// Splits text into characters.
|
||||
fn split_chars(s: &str) -> impl Iterator<Item = &str> {
|
||||
s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
|
||||
}
|
||||
|
||||
/// Splits text into graphemes.
|
||||
#[cfg(feature = "unicode")]
|
||||
fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
|
||||
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
|
||||
/// Iterates over the changes the op expands to with inline emphasis.
|
||||
///
|
||||
/// This is very similar to [`TextDiff::iter_changes`] but it performs a second
|
||||
/// level diff on adjacent line replacements. The exact behavior of
|
||||
/// this function with regards to how it detects those inline changes
|
||||
/// is currently not defined and will likely change over time.
|
||||
#[cfg(feature = "inline")]
|
||||
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange<'_, T>> {
|
||||
iter_inline_changes(self, op)
|
||||
}
|
||||
}
|
||||
|
||||
// quick and dirty way to get an upper sequence ratio.
|
||||
|
|
@ -619,10 +591,10 @@ fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
|
|||
///
|
||||
/// It counts the number of matches without regard to order, which is an
|
||||
/// obvious upper bound.
|
||||
struct QuickSeqRatio<'a>(HashMap<&'a str, i32>);
|
||||
struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
|
||||
|
||||
impl<'a> QuickSeqRatio<'a> {
|
||||
pub fn new(seq: &[&'a str]) -> QuickSeqRatio<'a> {
|
||||
impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
|
||||
pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
|
||||
let mut counts = HashMap::new();
|
||||
for &word in seq {
|
||||
*counts.entry(word).or_insert(0) += 1;
|
||||
|
|
@ -630,7 +602,7 @@ impl<'a> QuickSeqRatio<'a> {
|
|||
QuickSeqRatio(counts)
|
||||
}
|
||||
|
||||
pub fn calc(&self, seq: &[&str]) -> f32 {
|
||||
pub fn calc(&self, seq: &[&T]) -> f32 {
|
||||
let n = self.0.len() + seq.len();
|
||||
if n == 0 {
|
||||
return 1.0;
|
||||
|
|
@ -669,18 +641,18 @@ impl<'a> QuickSeqRatio<'a> {
|
|||
/// );
|
||||
/// assert_eq!(matches, vec!["apple", "ape"]);
|
||||
/// ```
|
||||
pub fn get_close_matches<'a>(
|
||||
word: &str,
|
||||
possibilities: &[&'a str],
|
||||
pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
|
||||
word: &T,
|
||||
possibilities: &[&'a T],
|
||||
n: usize,
|
||||
cutoff: f32,
|
||||
) -> Vec<&'a str> {
|
||||
) -> Vec<&'a T> {
|
||||
let mut matches = BinaryHeap::new();
|
||||
let seq1 = split_chars(word).collect::<Vec<_>>();
|
||||
let seq1 = word.split_chars();
|
||||
let quick_ratio = QuickSeqRatio::new(&seq1);
|
||||
|
||||
for &possibility in possibilities {
|
||||
let seq2 = split_chars(possibility).collect::<Vec<_>>();
|
||||
let seq2 = possibility.split_chars();
|
||||
|
||||
if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
|
||||
continue;
|
||||
|
|
@ -707,42 +679,6 @@ pub fn get_close_matches<'a>(
|
|||
rv
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_lines() {
|
||||
assert_eq!(
|
||||
split_lines("first\nsecond\rthird\r\nfourth\nlast").collect::<Vec<_>>(),
|
||||
vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
|
||||
);
|
||||
assert_eq!(split_lines("\n\n").collect::<Vec<_>>(), vec!["\n", "\n"]);
|
||||
assert_eq!(split_lines("\n").collect::<Vec<_>>(), vec!["\n"]);
|
||||
assert!(split_lines("").collect::<Vec<_>>().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_words() {
|
||||
assert_eq!(
|
||||
split_words("foo bar baz\n\n aha").collect::<Vec<_>>(),
|
||||
["foo", " ", "bar", " ", "baz", "\n\n ", "aha"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_chars() {
|
||||
assert_eq!(
|
||||
split_chars("abcfö❄️").collect::<Vec<_>>(),
|
||||
vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "unicode")]
|
||||
fn test_split_graphemes() {
|
||||
assert_eq!(
|
||||
split_graphemes("abcfö❄️").collect::<Vec<_>>(),
|
||||
vec!["a", "b", "c", "f", "ö", "❄️"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_captured_ops() {
|
||||
let diff = TextDiff::from_lines(
|
||||
|
|
@ -782,10 +718,9 @@ fn test_unified_diff() {
|
|||
|
||||
#[test]
|
||||
fn test_line_ops() {
|
||||
let diff = TextDiff::from_lines(
|
||||
"Hello World\nsome stuff here\nsome more stuff here\n",
|
||||
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
|
||||
);
|
||||
let a = "Hello World\nsome stuff here\nsome more stuff here\n";
|
||||
let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
|
||||
let diff = TextDiff::from_lines(a, b);
|
||||
assert_eq!(diff.newline_terminated(), true);
|
||||
let changes = diff
|
||||
.ops()
|
||||
|
|
@ -793,6 +728,19 @@ fn test_line_ops() {
|
|||
.flat_map(|op| diff.iter_changes(op))
|
||||
.collect::<Vec<_>>();
|
||||
insta::assert_debug_snapshot!(&changes);
|
||||
|
||||
#[cfg(feature = "bytes")]
|
||||
{
|
||||
let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
|
||||
let byte_changes = byte_diff
|
||||
.ops()
|
||||
.iter()
|
||||
.flat_map(|op| byte_diff.iter_changes(op))
|
||||
.collect::<Vec<_>>();
|
||||
for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
|
||||
assert_eq!(change.as_str_lossy(), byte_change.as_str_lossy());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -811,6 +759,12 @@ fn test_virtual_newlines() {
|
|||
fn test_char_diff() {
|
||||
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
|
||||
insta::assert_debug_snapshot!(diff.ops());
|
||||
|
||||
#[cfg(feature = "bytes")]
|
||||
{
|
||||
let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
|
||||
assert_eq!(diff.ops(), byte_diff.ops());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue