Add support for byte diffing (#7)
Restructured text diffing to also support bstr
This commit is contained in:
parent
e53427b56f
commit
4b85e70f91
11 changed files with 775 additions and 307 deletions
|
|
@ -5,6 +5,8 @@ All notable changes to similar are documented here.
|
||||||
## 0.6.0
|
## 0.6.0
|
||||||
|
|
||||||
* Add `get_diff_ratio`.
|
* Add `get_diff_ratio`.
|
||||||
|
* Add support for byte diffing and change the text interface to abstract
|
||||||
|
over `DiffableStr`.
|
||||||
|
|
||||||
## 0.5.0
|
## 0.5.0
|
||||||
|
|
||||||
|
|
|
||||||
11
Cargo.toml
11
Cargo.toml
|
|
@ -19,7 +19,11 @@ all-features = true
|
||||||
default = ["text"]
|
default = ["text"]
|
||||||
text = []
|
text = []
|
||||||
inline = ["unicode"]
|
inline = ["unicode"]
|
||||||
unicode = ["text", "unicode-segmentation"]
|
|
||||||
|
# this annoyingly currently also turns on bstr and not just bstr/unicode
|
||||||
|
# unclear if this is fixable
|
||||||
|
unicode = ["text", "unicode-segmentation", "bstr/unicode"]
|
||||||
|
bytes = ["bstr", "text"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = "1.5.2"
|
insta = "1.5.2"
|
||||||
|
|
@ -27,6 +31,7 @@ console = "0.14.0"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
unicode-segmentation = { version = "1.7.1", optional = true }
|
unicode-segmentation = { version = "1.7.1", optional = true }
|
||||||
|
bstr = { version = "0.2.14", optional = true, default-features = false }
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "terminal"
|
name = "terminal"
|
||||||
|
|
@ -34,11 +39,11 @@ required-features = ["text"]
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "terminal-inline"
|
name = "terminal-inline"
|
||||||
required-features = ["text", "inline"]
|
required-features = ["text", "inline", "bytes"]
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "udiff"
|
name = "udiff"
|
||||||
required-features = ["text"]
|
required-features = ["text", "bytes"]
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "close-matches"
|
name = "close-matches"
|
||||||
|
|
|
||||||
1
Makefile
1
Makefile
|
|
@ -10,6 +10,7 @@ test:
|
||||||
@cargo test
|
@cargo test
|
||||||
@cargo test --all-features
|
@cargo test --all-features
|
||||||
@cargo test --no-default-features
|
@cargo test --no-default-features
|
||||||
|
@cargo test --no-default-features --features bytes
|
||||||
|
|
||||||
format:
|
format:
|
||||||
@rustup component add rustfmt 2> /dev/null
|
@rustup component add rustfmt 2> /dev/null
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::read_to_string;
|
use std::fs::read;
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
use console::{style, Style};
|
use console::{style, Style};
|
||||||
|
|
@ -23,8 +23,8 @@ fn main() {
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let old = read_to_string(&args[1]).unwrap();
|
let old = read(&args[1]).unwrap();
|
||||||
let new = read_to_string(&args[2]).unwrap();
|
let new = read(&args[2]).unwrap();
|
||||||
let diff = TextDiff::from_lines(&old, &new);
|
let diff = TextDiff::from_lines(&old, &new);
|
||||||
|
|
||||||
for (idx, group) in diff.grouped_ops(3).iter().enumerate() {
|
for (idx, group) in diff.grouped_ops(3).iter().enumerate() {
|
||||||
|
|
@ -44,7 +44,7 @@ fn main() {
|
||||||
style(Line(change.new_index())).dim(),
|
style(Line(change.new_index())).dim(),
|
||||||
s.apply_to(sign).bold(),
|
s.apply_to(sign).bold(),
|
||||||
);
|
);
|
||||||
for &(emphasized, value) in change.values() {
|
for (emphasized, value) in change.iter_strings() {
|
||||||
if emphasized {
|
if emphasized {
|
||||||
print!("{}", s.apply_to(value).underlined().on_black());
|
print!("{}", s.apply_to(value).underlined().on_black());
|
||||||
} else {
|
} else {
|
||||||
|
|
|
||||||
|
|
@ -14,7 +14,7 @@ fn main() {
|
||||||
ChangeTag::Insert => ("+", Style::new().green()),
|
ChangeTag::Insert => ("+", Style::new().green()),
|
||||||
ChangeTag::Equal => (" ", Style::new()),
|
ChangeTag::Equal => (" ", Style::new()),
|
||||||
};
|
};
|
||||||
print!("{}{}", style.apply_to(sign).bold(), style.apply_to(change),);
|
print!("{}{}", style.apply_to(sign).bold(), style.apply_to(change));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
use std::fs::read_to_string;
|
use std::fs::read;
|
||||||
|
use std::io;
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
use similar::text::TextDiff;
|
use similar::text::TextDiff;
|
||||||
|
|
@ -10,13 +11,14 @@ fn main() {
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
let old = read_to_string(&args[1]).unwrap();
|
let old = read(&args[1]).unwrap();
|
||||||
let new = read_to_string(&args[2]).unwrap();
|
let new = read(&args[2]).unwrap();
|
||||||
print!(
|
TextDiff::from_lines(&old, &new)
|
||||||
"{}",
|
.unified_diff()
|
||||||
TextDiff::from_lines(&old, &new).unified_diff().header(
|
.header(
|
||||||
&args[1].as_os_str().to_string_lossy(),
|
&args[1].as_os_str().to_string_lossy(),
|
||||||
&args[2].as_os_str().to_string_lossy()
|
&args[2].as_os_str().to_string_lossy(),
|
||||||
)
|
)
|
||||||
);
|
.to_writer(io::stdout())
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,8 @@
|
||||||
//! * `unicode`: when this feature is enabled the text diffing functionality
|
//! * `unicode`: when this feature is enabled the text diffing functionality
|
||||||
//! gains the ability to diff on a grapheme instead of character level. This
|
//! gains the ability to diff on a grapheme instead of character level. This
|
||||||
//! is particularly useful when working with text containing emojis.
|
//! is particularly useful when working with text containing emojis.
|
||||||
|
//! * `bytes`: when this feature is enabled the text module gains support for
|
||||||
|
//! working with byte slices.
|
||||||
//! * `inline`: this feature gives access to additional functionality of the
|
//! * `inline`: this feature gives access to additional functionality of the
|
||||||
//! `text` module to provide inline information about which values changed
|
//! `text` module to provide inline information about which values changed
|
||||||
//! in a line diff. This currently also enables the `unicode` feature.
|
//! in a line diff. This currently also enables the `unicode` feature.
|
||||||
|
|
|
||||||
425
src/text/abstraction.rs
Normal file
425
src/text/abstraction.rs
Normal file
|
|
@ -0,0 +1,425 @@
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
use bstr::ByteSlice;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::hash::Hash;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
/// Reference to a [`DiffableStr`].
|
||||||
|
pub trait DiffableStrRef {
|
||||||
|
/// The type of the resolved [`DiffableStr`].
|
||||||
|
type Output: DiffableStr + ?Sized;
|
||||||
|
|
||||||
|
/// Resolves the reference.
|
||||||
|
fn as_diffable_str(&self) -> &Self::Output;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: DiffableStr + ?Sized> DiffableStrRef for T {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
fn as_diffable_str(&self) -> &T {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DiffableStrRef for String {
|
||||||
|
type Output = str;
|
||||||
|
|
||||||
|
fn as_diffable_str(&self) -> &str {
|
||||||
|
self.as_str()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> {
|
||||||
|
type Output = T;
|
||||||
|
|
||||||
|
fn as_diffable_str(&self) -> &T {
|
||||||
|
&self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
impl DiffableStrRef for Vec<u8> {
|
||||||
|
type Output = [u8];
|
||||||
|
|
||||||
|
fn as_diffable_str(&self) -> &[u8] {
|
||||||
|
self.as_slice()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// All supported diffable strings.
|
||||||
|
///
|
||||||
|
/// The text module can work with different types of strings depending
|
||||||
|
/// on how the crate is compiled. Out of the box `&str` is always supported
|
||||||
|
/// but with the `bytes` feature one can also work with `[u8]` slices for
|
||||||
|
/// as long as they are ASCII compatible.
|
||||||
|
pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned {
|
||||||
|
/// Splits the value into newlines with newlines attached.
|
||||||
|
fn split_lines(&self) -> Vec<&Self>;
|
||||||
|
|
||||||
|
/// Splits the value into newlines with newlines separated.
|
||||||
|
fn split_lines_and_newlines(&self) -> Vec<&Self>;
|
||||||
|
|
||||||
|
/// Tokenizes into words.
|
||||||
|
fn split_words(&self) -> Vec<&Self>;
|
||||||
|
|
||||||
|
/// Splits the input into characters.
|
||||||
|
fn split_chars(&self) -> Vec<&Self>;
|
||||||
|
|
||||||
|
/// Splits into unicode words.
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_unicode_words(&self) -> Vec<&Self>;
|
||||||
|
|
||||||
|
/// Splits into unicode graphemes..
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_graphemes(&self) -> Vec<&Self>;
|
||||||
|
|
||||||
|
/// Decodes the string (potentially) lossy.
|
||||||
|
fn as_str(&self) -> Option<&str>;
|
||||||
|
|
||||||
|
/// Decodes the string (potentially) lossy.
|
||||||
|
fn as_str_lossy(&self) -> Cow<'_, str>;
|
||||||
|
|
||||||
|
/// Checks if the string ends in a newline.
|
||||||
|
fn ends_with_newline(&self) -> bool;
|
||||||
|
|
||||||
|
/// The length of the string.
|
||||||
|
fn len(&self) -> usize;
|
||||||
|
|
||||||
|
/// Slices the string.
|
||||||
|
fn slice(&self, rng: Range<usize>) -> &Self;
|
||||||
|
|
||||||
|
/// Returns the strings as slice of raw bytes.
|
||||||
|
fn as_bytes(&self) -> &[u8];
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DiffableStr for str {
|
||||||
|
fn split_lines(&self) -> Vec<&Self> {
|
||||||
|
let mut iter = self.char_indices().peekable();
|
||||||
|
let mut last_pos = 0;
|
||||||
|
let mut lines = vec![];
|
||||||
|
|
||||||
|
while let Some((idx, c)) = iter.next() {
|
||||||
|
if c == '\r' {
|
||||||
|
if iter.peek().map_or(false, |x| x.1 == '\n') {
|
||||||
|
lines.push(&self[last_pos..=idx + 1]);
|
||||||
|
iter.next();
|
||||||
|
last_pos = idx + 2;
|
||||||
|
} else {
|
||||||
|
lines.push(&self[last_pos..=idx]);
|
||||||
|
last_pos = idx + 1;
|
||||||
|
}
|
||||||
|
} else if c == '\n' {
|
||||||
|
lines.push(&self[last_pos..=idx]);
|
||||||
|
last_pos = idx + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if last_pos < self.len() {
|
||||||
|
lines.push(&self[last_pos..]);
|
||||||
|
}
|
||||||
|
|
||||||
|
lines
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_lines_and_newlines(&self) -> Vec<&Self> {
|
||||||
|
let mut rv = vec![];
|
||||||
|
let mut iter = self.char_indices().peekable();
|
||||||
|
|
||||||
|
while let Some((idx, c)) = iter.next() {
|
||||||
|
let is_newline = c == '\r' || c == '\n';
|
||||||
|
let start = idx;
|
||||||
|
let mut end = idx + c.len_utf8();
|
||||||
|
while let Some(&(_, next_char)) = iter.peek() {
|
||||||
|
if (next_char == '\r' || next_char == '\n') != is_newline {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
iter.next();
|
||||||
|
end += next_char.len_utf8();
|
||||||
|
}
|
||||||
|
rv.push(&self[start..end]);
|
||||||
|
}
|
||||||
|
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_words(&self) -> Vec<&Self> {
|
||||||
|
let mut iter = self.char_indices().peekable();
|
||||||
|
let mut rv = vec![];
|
||||||
|
|
||||||
|
while let Some((idx, c)) = iter.next() {
|
||||||
|
let is_whitespace = c.is_whitespace();
|
||||||
|
let start = idx;
|
||||||
|
let mut end = idx + c.len_utf8();
|
||||||
|
while let Some(&(_, next_char)) = iter.peek() {
|
||||||
|
if next_char.is_whitespace() != is_whitespace {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
iter.next();
|
||||||
|
end += next_char.len_utf8();
|
||||||
|
}
|
||||||
|
rv.push(&self[start..end]);
|
||||||
|
}
|
||||||
|
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_chars(&self) -> Vec<&Self> {
|
||||||
|
self.char_indices()
|
||||||
|
.map(move |(i, c)| &self[i..i + c.len_utf8()])
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_unicode_words(&self) -> Vec<&Self> {
|
||||||
|
unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_graphemes(&self) -> Vec<&Self> {
|
||||||
|
unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_str(&self) -> Option<&str> {
|
||||||
|
Some(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_str_lossy(&self) -> Cow<'_, str> {
|
||||||
|
Cow::Borrowed(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ends_with_newline(&self) -> bool {
|
||||||
|
self.ends_with(&['\r', '\n'][..])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
str::len(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice(&self, rng: Range<usize>) -> &Self {
|
||||||
|
&self[rng]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_bytes(&self) -> &[u8] {
|
||||||
|
str::as_bytes(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
impl DiffableStr for [u8] {
|
||||||
|
fn split_lines(&self) -> Vec<&Self> {
|
||||||
|
let mut iter = self.char_indices().peekable();
|
||||||
|
let mut last_pos = 0;
|
||||||
|
let mut lines = vec![];
|
||||||
|
|
||||||
|
while let Some((_, end, c)) = iter.next() {
|
||||||
|
if c == '\r' {
|
||||||
|
if iter.peek().map_or(false, |x| x.2 == '\n') {
|
||||||
|
lines.push(&self[last_pos..end + 1]);
|
||||||
|
iter.next();
|
||||||
|
last_pos = end + 1;
|
||||||
|
} else {
|
||||||
|
lines.push(&self[last_pos..end]);
|
||||||
|
last_pos = end;
|
||||||
|
}
|
||||||
|
} else if c == '\n' {
|
||||||
|
lines.push(&self[last_pos..end]);
|
||||||
|
last_pos = end;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if last_pos < self.len() {
|
||||||
|
lines.push(&self[last_pos..]);
|
||||||
|
}
|
||||||
|
|
||||||
|
lines
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_lines_and_newlines(&self) -> Vec<&Self> {
|
||||||
|
let mut rv = vec![];
|
||||||
|
let mut iter = self.char_indices().peekable();
|
||||||
|
|
||||||
|
while let Some((start, mut end, c)) = iter.next() {
|
||||||
|
let is_newline = c == '\r' || c == '\n';
|
||||||
|
while let Some(&(_, new_end, next_char)) = iter.peek() {
|
||||||
|
if (next_char == '\r' || next_char == '\n') != is_newline {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
iter.next();
|
||||||
|
end = new_end;
|
||||||
|
}
|
||||||
|
rv.push(&self[start..end]);
|
||||||
|
}
|
||||||
|
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_words(&self) -> Vec<&Self> {
|
||||||
|
let mut iter = self.char_indices().peekable();
|
||||||
|
let mut rv = vec![];
|
||||||
|
|
||||||
|
while let Some((start, mut end, c)) = iter.next() {
|
||||||
|
let is_whitespace = c.is_whitespace();
|
||||||
|
while let Some(&(_, new_end, next_char)) = iter.peek() {
|
||||||
|
if next_char.is_whitespace() != is_whitespace {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
iter.next();
|
||||||
|
end = new_end;
|
||||||
|
}
|
||||||
|
rv.push(&self[start..end]);
|
||||||
|
}
|
||||||
|
|
||||||
|
rv
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_unicode_words(&self) -> Vec<&Self> {
|
||||||
|
self.words_with_breaks().map(|x| x.as_bytes()).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_graphemes(&self) -> Vec<&Self> {
|
||||||
|
self.graphemes().map(|x| x.as_bytes()).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn split_chars(&self) -> Vec<&Self> {
|
||||||
|
self.char_indices()
|
||||||
|
.map(move |(start, end, _)| &self[start..end])
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_str(&self) -> Option<&str> {
|
||||||
|
std::str::from_utf8(self).ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_str_lossy(&self) -> Cow<'_, str> {
|
||||||
|
String::from_utf8_lossy(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ends_with_newline(&self) -> bool {
|
||||||
|
matches!(self.last_byte(), Some(b'\r') | Some(b'\n'))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
<[u8]>::len(self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn slice(&self, rng: Range<usize>) -> &Self {
|
||||||
|
&self[rng]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_bytes(&self) -> &[u8] {
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_lines() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_lines("first\nsecond\rthird\r\nfourth\nlast"),
|
||||||
|
vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
|
||||||
|
);
|
||||||
|
assert_eq!(DiffableStr::split_lines("\n\n"), vec!["\n", "\n"]);
|
||||||
|
assert_eq!(DiffableStr::split_lines("\n"), vec!["\n"]);
|
||||||
|
assert!(DiffableStr::split_lines("").is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_words() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_words("foo bar baz\n\n aha"),
|
||||||
|
["foo", " ", "bar", " ", "baz", "\n\n ", "aha"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_chars() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_chars("abcfö❄️"),
|
||||||
|
vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn test_split_graphemes() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_graphemes("abcfö❄️"),
|
||||||
|
vec!["a", "b", "c", "f", "ö", "❄️"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
fn test_split_lines_bytes() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()),
|
||||||
|
vec![
|
||||||
|
"first\n".as_bytes(),
|
||||||
|
"second\r".as_bytes(),
|
||||||
|
"third\r\n".as_bytes(),
|
||||||
|
"fourth\n".as_bytes(),
|
||||||
|
"last".as_bytes()
|
||||||
|
]
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_lines("\n\n".as_bytes()),
|
||||||
|
vec!["\n".as_bytes(), "\n".as_bytes()]
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_lines("\n".as_bytes()),
|
||||||
|
vec!["\n".as_bytes()]
|
||||||
|
);
|
||||||
|
assert!(DiffableStr::split_lines("".as_bytes()).is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
fn test_split_words_bytes() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_words("foo bar baz\n\n aha".as_bytes()),
|
||||||
|
[
|
||||||
|
&b"foo"[..],
|
||||||
|
&b" "[..],
|
||||||
|
&b"bar"[..],
|
||||||
|
&b" "[..],
|
||||||
|
&b"baz"[..],
|
||||||
|
&b"\n\n "[..],
|
||||||
|
&b"aha"[..]
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
fn test_split_chars_bytes() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_chars("abcfö❄️".as_bytes()),
|
||||||
|
vec![
|
||||||
|
&b"a"[..],
|
||||||
|
&b"b"[..],
|
||||||
|
&b"c"[..],
|
||||||
|
&b"f"[..],
|
||||||
|
"ö".as_bytes(),
|
||||||
|
"❄".as_bytes(),
|
||||||
|
"\u{fe0f}".as_bytes()
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(all(feature = "bytes", feature = "unicode"))]
|
||||||
|
fn test_split_graphemes_bytes() {
|
||||||
|
assert_eq!(
|
||||||
|
DiffableStr::split_graphemes("abcfö❄️".as_bytes()),
|
||||||
|
vec![
|
||||||
|
&b"a"[..],
|
||||||
|
&b"b"[..],
|
||||||
|
&b"c"[..],
|
||||||
|
&b"f"[..],
|
||||||
|
"ö".as_bytes(),
|
||||||
|
"❄️".as_bytes()
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
@ -1,24 +1,23 @@
|
||||||
#![cfg(feature = "inline")]
|
#![cfg(feature = "inline")]
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag};
|
use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag};
|
||||||
use crate::text::{Change, ChangeTag, TextDiff};
|
use crate::text::{Change, ChangeTag, DiffableStr, TextDiff};
|
||||||
|
|
||||||
use super::split_unicode_words;
|
|
||||||
|
|
||||||
use std::ops::Index;
|
use std::ops::Index;
|
||||||
|
|
||||||
struct MultiLookup<'bufs, 's> {
|
struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> {
|
||||||
strings: &'bufs [&'s str],
|
strings: &'bufs [&'s T],
|
||||||
seqs: Vec<(&'s str, usize, usize)>,
|
seqs: Vec<(&'s T, usize, usize)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'bufs, 's> MultiLookup<'bufs, 's> {
|
impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> {
|
||||||
fn new(strings: &'bufs [&'s str]) -> MultiLookup<'bufs, 's> {
|
fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> {
|
||||||
let mut seqs = Vec::new();
|
let mut seqs = Vec::new();
|
||||||
for (string_idx, string) in strings.iter().enumerate() {
|
for (string_idx, string) in strings.iter().enumerate() {
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
for word in split_unicode_words(string) {
|
for word in string.split_unicode_words() {
|
||||||
seqs.push((word, string_idx, offset));
|
seqs.push((word, string_idx, offset));
|
||||||
offset += word.len();
|
offset += word.len();
|
||||||
}
|
}
|
||||||
|
|
@ -30,7 +29,7 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
|
||||||
self.seqs.len()
|
self.seqs.len()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s str)> {
|
fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> {
|
||||||
let mut last = None;
|
let mut last = None;
|
||||||
let mut rv = Vec::new();
|
let mut rv = Vec::new();
|
||||||
|
|
||||||
|
|
@ -44,7 +43,8 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
|
||||||
} else {
|
} else {
|
||||||
rv.push((
|
rv.push((
|
||||||
last_str_idx,
|
last_str_idx,
|
||||||
&self.strings[last_str_idx][start_char_idx..start_char_idx + last_len],
|
self.strings[last_str_idx]
|
||||||
|
.slice(start_char_idx..start_char_idx + last_len),
|
||||||
));
|
));
|
||||||
Some((str_idx, char_idx, s.len()))
|
Some((str_idx, char_idx, s.len()))
|
||||||
}
|
}
|
||||||
|
|
@ -55,7 +55,7 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
|
||||||
if let Some((str_idx, start_char_idx, len)) = last {
|
if let Some((str_idx, start_char_idx, len)) = last {
|
||||||
rv.push((
|
rv.push((
|
||||||
str_idx,
|
str_idx,
|
||||||
&self.strings[str_idx][start_char_idx..start_char_idx + len],
|
self.strings[str_idx].slice(start_char_idx..start_char_idx + len),
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -63,43 +63,26 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'bufs, 's> Index<usize> for MultiLookup<'bufs, 's> {
|
impl<'bufs, 's, T: DiffableStr + ?Sized> Index<usize> for MultiLookup<'bufs, 's, T> {
|
||||||
type Output = str;
|
type Output = T;
|
||||||
|
|
||||||
fn index(&self, index: usize) -> &Self::Output {
|
fn index(&self, index: usize) -> &Self::Output {
|
||||||
&self.seqs[index].0
|
&self.seqs[index].0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn partition_newlines(s: &str) -> impl Iterator<Item = (&str, bool)> {
|
fn push_values<'s, T: DiffableStr + ?Sized>(
|
||||||
let mut iter = s.char_indices().peekable();
|
v: &mut Vec<Vec<(bool, &'s T)>>,
|
||||||
|
idx: usize,
|
||||||
std::iter::from_fn(move || {
|
emphasized: bool,
|
||||||
if let Some((idx, c)) = iter.next() {
|
s: &'s T,
|
||||||
let is_newline = c == '\r' || c == '\n';
|
) {
|
||||||
let start = idx;
|
|
||||||
let mut end = idx + c.len_utf8();
|
|
||||||
while let Some(&(_, next_char)) = iter.peek() {
|
|
||||||
if (next_char == '\r' || next_char == '\n') != is_newline {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
iter.next();
|
|
||||||
end += next_char.len_utf8();
|
|
||||||
}
|
|
||||||
Some((&s[start..end], is_newline))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn push_values<'s>(v: &mut Vec<Vec<(bool, &'s str)>>, idx: usize, emphasized: bool, s: &'s str) {
|
|
||||||
v.resize_with(v.len().max(idx + 1), Vec::new);
|
v.resize_with(v.len().max(idx + 1), Vec::new);
|
||||||
// newlines cause all kinds of wacky stuff if they end up highlighted.
|
// newlines cause all kinds of wacky stuff if they end up highlighted.
|
||||||
// because of this we want to unemphasize all newlines we encounter.
|
// because of this we want to unemphasize all newlines we encounter.
|
||||||
if emphasized {
|
if emphasized {
|
||||||
for (seg, is_nl) in partition_newlines(s) {
|
for seg in s.split_lines_and_newlines() {
|
||||||
v[idx].push((!is_nl, seg));
|
v[idx].push((!seg.ends_with_newline(), seg));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
v[idx].push((false, s));
|
v[idx].push((false, s));
|
||||||
|
|
@ -110,15 +93,15 @@ fn push_values<'s>(v: &mut Vec<Vec<(bool, &'s str)>>, idx: usize, emphasized: bo
|
||||||
///
|
///
|
||||||
/// This is like [`Change`] but with inline highlight info.
|
/// This is like [`Change`] but with inline highlight info.
|
||||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
|
#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
|
||||||
pub struct InlineChange<'s> {
|
pub struct InlineChange<'s, T: DiffableStr + ?Sized> {
|
||||||
tag: ChangeTag,
|
tag: ChangeTag,
|
||||||
old_index: Option<usize>,
|
old_index: Option<usize>,
|
||||||
new_index: Option<usize>,
|
new_index: Option<usize>,
|
||||||
values: Vec<(bool, &'s str)>,
|
values: Vec<(bool, &'s T)>,
|
||||||
missing_newline: bool,
|
missing_newline: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> InlineChange<'s> {
|
impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
|
||||||
/// Returns the change tag.
|
/// Returns the change tag.
|
||||||
pub fn tag(&self) -> ChangeTag {
|
pub fn tag(&self) -> ChangeTag {
|
||||||
self.tag
|
self.tag
|
||||||
|
|
@ -135,10 +118,23 @@ impl<'s> InlineChange<'s> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the changed values.
|
/// Returns the changed values.
|
||||||
pub fn values(&self) -> &[(bool, &'s str)] {
|
///
|
||||||
|
/// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
|
||||||
|
/// is true if it should be highlighted as an inline diff.
|
||||||
|
pub fn values(&self) -> &[(bool, &'s T)] {
|
||||||
&self.values
|
&self.values
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Iterates over all (potentially lossy) utf-8 decoded values.
|
||||||
|
///
|
||||||
|
/// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
|
||||||
|
/// is true if it should be highlighted as an inline diff.
|
||||||
|
pub fn iter_strings(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> {
|
||||||
|
self.values()
|
||||||
|
.iter()
|
||||||
|
.map(|(emphasized, raw_value)| (*emphasized, raw_value.as_str_lossy()))
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns `true` if this change needs to be followed up by a
|
/// Returns `true` if this change needs to be followed up by a
|
||||||
/// missing newline.
|
/// missing newline.
|
||||||
pub fn missing_newline(&self) -> bool {
|
pub fn missing_newline(&self) -> bool {
|
||||||
|
|
@ -146,8 +142,8 @@ impl<'s> InlineChange<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> From<Change<'s>> for InlineChange<'s> {
|
impl<'s, T: DiffableStr + ?Sized> From<Change<'s, T>> for InlineChange<'s, T> {
|
||||||
fn from(change: Change<'s>) -> InlineChange<'s> {
|
fn from(change: Change<'s, T>) -> InlineChange<'s, T> {
|
||||||
InlineChange {
|
InlineChange {
|
||||||
tag: change.tag(),
|
tag: change.tag(),
|
||||||
old_index: change.old_index(),
|
old_index: change.old_index(),
|
||||||
|
|
@ -158,9 +154,9 @@ impl<'s> From<Change<'s>> for InlineChange<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> fmt::Display for InlineChange<'s> {
|
impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
for &(emphasized, value) in &self.values {
|
for (emphasized, value) in self.iter_strings() {
|
||||||
let marker = match (emphasized, self.tag) {
|
let marker = match (emphasized, self.tag) {
|
||||||
(false, _) | (true, ChangeTag::Equal) => "",
|
(false, _) | (true, ChangeTag::Equal) => "",
|
||||||
(true, ChangeTag::Delete) => "-",
|
(true, ChangeTag::Delete) => "-",
|
||||||
|
|
@ -175,10 +171,13 @@ impl<'s> fmt::Display for InlineChange<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn iter_inline_changes<'diff>(
|
pub(crate) fn iter_inline_changes<'diff, T>(
|
||||||
diff: &'diff TextDiff,
|
diff: &'diff TextDiff<'_, '_, '_, T>,
|
||||||
op: &DiffOp,
|
op: &DiffOp,
|
||||||
) -> impl Iterator<Item = InlineChange<'diff>> {
|
) -> impl Iterator<Item = InlineChange<'diff, T>>
|
||||||
|
where
|
||||||
|
T: DiffableStr + ?Sized,
|
||||||
|
{
|
||||||
let newline_terminated = diff.newline_terminated;
|
let newline_terminated = diff.newline_terminated;
|
||||||
let (tag, old_range, new_range) = op.as_tag_tuple();
|
let (tag, old_range, new_range) = op.as_tag_tuple();
|
||||||
|
|
||||||
|
|
@ -267,7 +266,7 @@ pub(crate) fn iter_inline_changes<'diff>(
|
||||||
|
|
||||||
if newline_terminated
|
if newline_terminated
|
||||||
&& !old_slices.is_empty()
|
&& !old_slices.is_empty()
|
||||||
&& !old_slices[old_slices.len() - 1].ends_with(&['\r', '\n'][..])
|
&& !old_slices[old_slices.len() - 1].ends_with_newline()
|
||||||
{
|
{
|
||||||
if let Some(last) = rv.last_mut() {
|
if let Some(last) = rv.last_mut() {
|
||||||
last.missing_newline = true;
|
last.missing_newline = true;
|
||||||
|
|
@ -287,7 +286,7 @@ pub(crate) fn iter_inline_changes<'diff>(
|
||||||
|
|
||||||
if newline_terminated
|
if newline_terminated
|
||||||
&& !new_slices.is_empty()
|
&& !new_slices.is_empty()
|
||||||
&& !new_slices[new_slices.len() - 1].ends_with(&['\r', '\n'][..])
|
&& !new_slices[new_slices.len() - 1].ends_with_newline()
|
||||||
{
|
{
|
||||||
if let Some(last) = rv.last_mut() {
|
if let Some(last) = rv.last_mut() {
|
||||||
last.missing_newline = true;
|
last.missing_newline = true;
|
||||||
|
|
|
||||||
400
src/text/mod.rs
400
src/text/mod.rs
|
|
@ -11,7 +11,7 @@
|
||||||
//! Text diffing is available by default but can be disabled by turning off the
|
//! Text diffing is available by default but can be disabled by turning off the
|
||||||
//! default features. The feature to enable to get it back is `text`.
|
//! default features. The feature to enable to get it back is `text`.
|
||||||
//!
|
//!
|
||||||
//! ## Examples
|
//! # Examples
|
||||||
//!
|
//!
|
||||||
//! A super simple example for how to generate a unified diff with three lines
|
//! A super simple example for how to generate a unified diff with three lines
|
||||||
//! off context around the changes:
|
//! off context around the changes:
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
//! }
|
//! }
|
||||||
//! ```
|
//! ```
|
||||||
//!
|
//!
|
||||||
//! ## Ops vs Changes
|
//! # Ops vs Changes
|
||||||
//!
|
//!
|
||||||
//! Because very commonly two compared sequences will largely match this module
|
//! Because very commonly two compared sequences will largely match this module
|
||||||
//! splits it's functionality into two layers. The first is inherited from the
|
//! splits it's functionality into two layers. The first is inherited from the
|
||||||
|
|
@ -51,7 +51,7 @@
|
||||||
//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
|
//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
|
||||||
//! this even works for very long files if paired with this method.
|
//! this even works for very long files if paired with this method.
|
||||||
//!
|
//!
|
||||||
//! ## Trailing Newlines
|
//! # Trailing Newlines
|
||||||
//!
|
//!
|
||||||
//! When working with line diffs (and unified diffs in general) there are two
|
//! When working with line diffs (and unified diffs in general) there are two
|
||||||
//! "philosophies" to look at lines. One is to diff lines without their newline
|
//! "philosophies" to look at lines. One is to diff lines without their newline
|
||||||
|
|
@ -68,11 +68,30 @@
|
||||||
//! either rendering a virtual newline at that position or to indicate it in
|
//! either rendering a virtual newline at that position or to indicate it in
|
||||||
//! different ways. For instance the unified diff code will render the special
|
//! different ways. For instance the unified diff code will render the special
|
||||||
//! `\ No newline at end of file` marker.
|
//! `\ No newline at end of file` marker.
|
||||||
|
//!
|
||||||
|
//! # Bytes vs Unicode
|
||||||
|
//!
|
||||||
|
//! This module concerns itself with a loser definition of "text" than you would
|
||||||
|
//! normally see in Rust. While by default it can only operate on [`str`] types
|
||||||
|
//! by enabling the `bytes` feature it gains support for byte slices with some
|
||||||
|
//! caveats.
|
||||||
|
//!
|
||||||
|
//! A lot of text diff functionality assumes that what is being diffed constiutes
|
||||||
|
//! text, but in the real world it can often be challenging to ensure that this is
|
||||||
|
//! all valid utf-8. Because of this the crate is built so that most functinality
|
||||||
|
//! also still works with bytes for as long as they are roughtly ASCII compatible.
|
||||||
|
//!
|
||||||
|
//! This means you will be successful in creating a unified diff from latin1
|
||||||
|
//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you
|
||||||
|
//! will only get garbage.
|
||||||
#![cfg(feature = "text")]
|
#![cfg(feature = "text")]
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::{BinaryHeap, HashMap};
|
use std::collections::{BinaryHeap, HashMap};
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::hash::Hash;
|
||||||
|
|
||||||
|
mod abstraction;
|
||||||
|
|
||||||
#[cfg(feature = "inline")]
|
#[cfg(feature = "inline")]
|
||||||
mod inline;
|
mod inline;
|
||||||
|
|
@ -82,6 +101,8 @@ mod udiff;
|
||||||
pub use self::inline::*;
|
pub use self::inline::*;
|
||||||
pub use self::udiff::*;
|
pub use self::udiff::*;
|
||||||
|
|
||||||
|
pub use crate::text::abstraction::*;
|
||||||
|
|
||||||
use crate::algorithms::{
|
use crate::algorithms::{
|
||||||
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
|
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
|
||||||
};
|
};
|
||||||
|
|
@ -127,14 +148,14 @@ impl TextDiffConfig {
|
||||||
///
|
///
|
||||||
/// This splits the text `old` and `new` into lines preserving newlines
|
/// This splits the text `old` and `new` into lines preserving newlines
|
||||||
/// in the input.
|
/// in the input.
|
||||||
pub fn diff_lines<'old, 'new, 'bufs>(
|
pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old str,
|
old: &'old T,
|
||||||
new: &'new str,
|
new: &'new T,
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
self.diff(
|
self.diff(
|
||||||
Cow::Owned(split_lines(old).collect()),
|
Cow::Owned(old.as_diffable_str().split_lines()),
|
||||||
Cow::Owned(split_lines(new).collect()),
|
Cow::Owned(new.as_diffable_str().split_lines()),
|
||||||
true,
|
true,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -142,14 +163,27 @@ impl TextDiffConfig {
|
||||||
/// Creates a diff of words.
|
/// Creates a diff of words.
|
||||||
///
|
///
|
||||||
/// This splits the text into words and whitespace.
|
/// This splits the text into words and whitespace.
|
||||||
pub fn diff_words<'old, 'new, 'bufs>(
|
pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old str,
|
old: &'old T,
|
||||||
new: &'new str,
|
new: &'new T,
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
self.diff(
|
self.diff(
|
||||||
Cow::Owned(split_words(old).collect()),
|
Cow::Owned(old.as_diffable_str().split_words()),
|
||||||
Cow::Owned(split_words(new).collect()),
|
Cow::Owned(new.as_diffable_str().split_words()),
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff of characters.
|
||||||
|
pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
|
&self,
|
||||||
|
old: &'old T,
|
||||||
|
new: &'new T,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
|
self.diff(
|
||||||
|
Cow::Owned(old.as_diffable_str().split_chars()),
|
||||||
|
Cow::Owned(new.as_diffable_str().split_chars()),
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -162,27 +196,14 @@ impl TextDiffConfig {
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
pub fn diff_unicode_words<'old, 'new, 'bufs>(
|
pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old str,
|
old: &'old T,
|
||||||
new: &'new str,
|
new: &'new T,
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
self.diff(
|
self.diff(
|
||||||
Cow::Owned(split_unicode_words(old).collect()),
|
Cow::Owned(old.as_diffable_str().split_unicode_words()),
|
||||||
Cow::Owned(split_unicode_words(new).collect()),
|
Cow::Owned(new.as_diffable_str().split_unicode_words()),
|
||||||
false,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates a diff of characters.
|
|
||||||
pub fn diff_chars<'old, 'new, 'bufs>(
|
|
||||||
&self,
|
|
||||||
old: &'old str,
|
|
||||||
new: &'new str,
|
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
|
||||||
self.diff(
|
|
||||||
Cow::Owned(split_chars(old).collect()),
|
|
||||||
Cow::Owned(split_chars(new).collect()),
|
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
@ -191,33 +212,33 @@ impl TextDiffConfig {
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
pub fn diff_graphemes<'old, 'new, 'bufs>(
|
pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old str,
|
old: &'old T,
|
||||||
new: &'new str,
|
new: &'new T,
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
self.diff(
|
self.diff(
|
||||||
Cow::Owned(split_graphemes(old).collect()),
|
Cow::Owned(old.as_diffable_str().split_graphemes()),
|
||||||
Cow::Owned(split_graphemes(new).collect()),
|
Cow::Owned(new.as_diffable_str().split_graphemes()),
|
||||||
false,
|
false,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of arbitrary slices.
|
/// Creates a diff of arbitrary slices.
|
||||||
pub fn diff_slices<'old, 'new, 'bufs>(
|
pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: &'bufs [&'old str],
|
old: &'bufs [&'old T],
|
||||||
new: &'bufs [&'new str],
|
new: &'bufs [&'new T],
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
|
self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn diff<'old, 'new, 'bufs>(
|
fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
|
||||||
&self,
|
&self,
|
||||||
old: Cow<'bufs, [&'old str]>,
|
old: Cow<'bufs, [&'old T]>,
|
||||||
new: Cow<'bufs, [&'new str]>,
|
new: Cow<'bufs, [&'new T]>,
|
||||||
newline_terminated: bool,
|
newline_terminated: bool,
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
let ops = capture_diff_slices(self.algorithm, &old, &new);
|
let ops = capture_diff_slices(self.algorithm, &old, &new);
|
||||||
TextDiff {
|
TextDiff {
|
||||||
old,
|
old,
|
||||||
|
|
@ -230,9 +251,9 @@ impl TextDiffConfig {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Captures diff op codes for textual diffs
|
/// Captures diff op codes for textual diffs
|
||||||
pub struct TextDiff<'old, 'new, 'bufs> {
|
pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
|
||||||
old: Cow<'bufs, [&'old str]>,
|
old: Cow<'bufs, [&'old T]>,
|
||||||
new: Cow<'bufs, [&'new str]>,
|
new: Cow<'bufs, [&'new T]>,
|
||||||
ops: Vec<DiffOp>,
|
ops: Vec<DiffOp>,
|
||||||
newline_terminated: bool,
|
newline_terminated: bool,
|
||||||
algorithm: Algorithm,
|
algorithm: Algorithm,
|
||||||
|
|
@ -255,26 +276,26 @@ pub enum ChangeTag {
|
||||||
/// exists so that it's more convenient to work with textual differences as
|
/// exists so that it's more convenient to work with textual differences as
|
||||||
/// the underlying [`DiffOp`] does not know anything about strings.
|
/// the underlying [`DiffOp`] does not know anything about strings.
|
||||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
|
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
|
||||||
pub struct Change<'s> {
|
pub struct Change<'s, T: DiffableStr + ?Sized> {
|
||||||
tag: ChangeTag,
|
tag: ChangeTag,
|
||||||
old_index: Option<usize>,
|
old_index: Option<usize>,
|
||||||
new_index: Option<usize>,
|
new_index: Option<usize>,
|
||||||
value: &'s str,
|
value: &'s T,
|
||||||
missing_newline: bool,
|
missing_newline: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> fmt::Display for Change<'s> {
|
impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
"{}{}",
|
"{}{}",
|
||||||
self.value(),
|
self.as_str_lossy(),
|
||||||
if self.missing_newline { "\n" } else { "" }
|
if self.missing_newline { "\n" } else { "" }
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Change<'s> {
|
impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
|
||||||
/// Returns the change tag.
|
/// Returns the change tag.
|
||||||
pub fn tag(&self) -> ChangeTag {
|
pub fn tag(&self) -> ChangeTag {
|
||||||
self.tag
|
self.tag
|
||||||
|
|
@ -290,11 +311,21 @@ impl<'s> Change<'s> {
|
||||||
self.new_index
|
self.new_index
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the changed value.
|
/// Returns the underlying changed value.
|
||||||
pub fn value(&self) -> &'s str {
|
pub fn value(&self) -> &'s T {
|
||||||
self.value
|
self.value
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the value as string if it is utf-8.
|
||||||
|
pub fn as_str(&self) -> Option<&'s str> {
|
||||||
|
T::as_str(self.value)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the value (lossy) decoded as utf-8 string.
|
||||||
|
pub fn as_str_lossy(&self) -> Cow<'s, str> {
|
||||||
|
T::as_str_lossy(self.value)
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns `true` if this change needs to be followed up by a
|
/// Returns `true` if this change needs to be followed up by a
|
||||||
/// missing newline.
|
/// missing newline.
|
||||||
///
|
///
|
||||||
|
|
@ -305,7 +336,7 @@ impl<'s> Change<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
|
||||||
/// Configures a text differ before diffing.
|
/// Configures a text differ before diffing.
|
||||||
pub fn configure() -> TextDiffConfig {
|
pub fn configure() -> TextDiffConfig {
|
||||||
TextDiffConfig::default()
|
TextDiffConfig::default()
|
||||||
|
|
@ -314,15 +345,31 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
/// Creates a diff of lines.
|
/// Creates a diff of lines.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_lines(old, new)`.
|
/// Equivalent to `TextDiff::configure().diff_lines(old, new)`.
|
||||||
pub fn from_lines(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
pub fn from_lines<T: DiffableStrRef + ?Sized>(
|
||||||
Self::configure().diff_lines(old, new)
|
old: &'old T,
|
||||||
|
new: &'new T,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
|
TextDiff::configure().diff_lines(old, new)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of words.
|
/// Creates a diff of words.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_words(old, new)`.
|
/// Equivalent to `TextDiff::configure().diff_words(old, new)`.
|
||||||
pub fn from_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
pub fn from_words<T: DiffableStrRef + ?Sized>(
|
||||||
Self::configure().diff_words(old, new)
|
old: &'old T,
|
||||||
|
new: &'new T,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
|
TextDiff::configure().diff_words(old, new)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff of chars.
|
||||||
|
///
|
||||||
|
/// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
|
||||||
|
pub fn from_chars<T: DiffableStrRef + ?Sized>(
|
||||||
|
old: &'old T,
|
||||||
|
new: &'new T,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
|
TextDiff::configure().diff_chars(old, new)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of unicode words.
|
/// Creates a diff of unicode words.
|
||||||
|
|
@ -331,15 +378,11 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
pub fn from_unicode_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
pub fn from_unicode_words<T: DiffableStrRef + ?Sized>(
|
||||||
Self::configure().diff_unicode_words(old, new)
|
old: &'old T,
|
||||||
}
|
new: &'new T,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
/// Creates a diff of chars.
|
TextDiff::configure().diff_unicode_words(old, new)
|
||||||
///
|
|
||||||
/// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
|
|
||||||
pub fn from_chars(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
|
||||||
Self::configure().diff_chars(old, new)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of graphemes.
|
/// Creates a diff of graphemes.
|
||||||
|
|
@ -348,18 +391,23 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
///
|
///
|
||||||
/// This requires the `unicode` feature.
|
/// This requires the `unicode` feature.
|
||||||
#[cfg(feature = "unicode")]
|
#[cfg(feature = "unicode")]
|
||||||
pub fn from_graphemes(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
pub fn from_graphemes<T: DiffableStrRef + ?Sized>(
|
||||||
Self::configure().diff_graphemes(old, new)
|
old: &'old T,
|
||||||
|
new: &'new T,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs, T::Output> {
|
||||||
|
TextDiff::configure().diff_graphemes(old, new)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
/// Creates a diff of arbitrary slices.
|
/// Creates a diff of arbitrary slices.
|
||||||
///
|
///
|
||||||
/// Equivalent to `TextDiff::configure().diff_slices(old, new)`.
|
/// Equivalent to `TextDiff::configure().diff_slices(old, new)`.
|
||||||
pub fn from_slices(
|
pub fn from_slices(
|
||||||
old: &'bufs [&'old str],
|
old: &'bufs [&'old T],
|
||||||
new: &'bufs [&'new str],
|
new: &'bufs [&'new T],
|
||||||
) -> TextDiff<'old, 'new, 'bufs> {
|
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
Self::configure().diff_slices(old, new)
|
TextDiff::configure().diff_slices(old, new)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The name of the algorithm that created the diff.
|
/// The name of the algorithm that created the diff.
|
||||||
|
|
@ -376,12 +424,12 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns all old slices.
|
/// Returns all old slices.
|
||||||
pub fn old_slices(&self) -> &[&'old str] {
|
pub fn old_slices(&self) -> &[&'old T] {
|
||||||
&self.old
|
&self.old
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns all new slices.
|
/// Returns all new slices.
|
||||||
pub fn new_slices(&self) -> &[&'new str] {
|
pub fn new_slices(&self) -> &[&'new T] {
|
||||||
&self.new
|
&self.new
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -405,7 +453,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
/// ways in which a change could be encoded (insert/delete vs replace), look
|
/// ways in which a change could be encoded (insert/delete vs replace), look
|
||||||
/// up the value from the appropriate slice and also handle correct index
|
/// up the value from the appropriate slice and also handle correct index
|
||||||
/// handling.
|
/// handling.
|
||||||
pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change> {
|
pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change<'_, T>> {
|
||||||
let newline_terminated = self.newline_terminated;
|
let newline_terminated = self.newline_terminated;
|
||||||
let (tag, old_range, new_range) = op.as_tag_tuple();
|
let (tag, old_range, new_range) = op.as_tag_tuple();
|
||||||
let mut old_index = old_range.start;
|
let mut old_index = old_range.start;
|
||||||
|
|
@ -426,7 +474,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
value: first,
|
value: first,
|
||||||
missing_newline: newline_terminated
|
missing_newline: newline_terminated
|
||||||
&& rest.is_empty()
|
&& rest.is_empty()
|
||||||
&& !first.ends_with(&['\r', '\n'][..]),
|
&& !first.ends_with_newline(),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
|
@ -443,7 +491,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
value: first,
|
value: first,
|
||||||
missing_newline: newline_terminated
|
missing_newline: newline_terminated
|
||||||
&& rest.is_empty()
|
&& rest.is_empty()
|
||||||
&& !first.ends_with(&['\r', '\n'][..]),
|
&& !first.ends_with_newline(),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
|
@ -460,7 +508,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
value: first,
|
value: first,
|
||||||
missing_newline: newline_terminated
|
missing_newline: newline_terminated
|
||||||
&& rest.is_empty()
|
&& rest.is_empty()
|
||||||
&& !first.ends_with(&['\r', '\n'][..]),
|
&& !first.ends_with_newline(),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
|
@ -477,7 +525,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
value: first,
|
value: first,
|
||||||
missing_newline: newline_terminated
|
missing_newline: newline_terminated
|
||||||
&& rest.is_empty()
|
&& rest.is_empty()
|
||||||
&& !first.ends_with(&['\r', '\n'][..]),
|
&& !first.ends_with_newline(),
|
||||||
})
|
})
|
||||||
} else if let Some((&first, rest)) = new_slices.split_first() {
|
} else if let Some((&first, rest)) = new_slices.split_first() {
|
||||||
new_slices = rest;
|
new_slices = rest;
|
||||||
|
|
@ -489,7 +537,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
value: first,
|
value: first,
|
||||||
missing_newline: newline_terminated
|
missing_newline: newline_terminated
|
||||||
&& rest.is_empty()
|
&& rest.is_empty()
|
||||||
&& !first.ends_with(&['\r', '\n'][..]),
|
&& !first.ends_with_newline(),
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
|
@ -498,17 +546,6 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterates over the changes the op expands to with inline emphasis.
|
|
||||||
///
|
|
||||||
/// This is very similar to [`TextDiff::iter_changes`] but it performs a second
|
|
||||||
/// level diff on adjacent line replacements. The exact behavior of
|
|
||||||
/// this function with regards to how it detects those inline changes
|
|
||||||
/// is currently not defined and will likely change over time.
|
|
||||||
#[cfg(feature = "inline")]
|
|
||||||
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange> {
|
|
||||||
iter_inline_changes(self, op)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the captured diff ops.
|
/// Returns the captured diff ops.
|
||||||
pub fn ops(&self) -> &[DiffOp] {
|
pub fn ops(&self) -> &[DiffOp] {
|
||||||
&self.ops
|
&self.ops
|
||||||
|
|
@ -522,85 +559,20 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Utility to return a unified diff formatter.
|
/// Utility to return a unified diff formatter.
|
||||||
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
|
||||||
UnifiedDiff::from_text_diff(self)
|
UnifiedDiff::from_text_diff(self)
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
/// Given a string splits it into lines.
|
/// Iterates over the changes the op expands to with inline emphasis.
|
||||||
///
|
///
|
||||||
/// This operation will preserve the newline separation character at the end.
|
/// This is very similar to [`TextDiff::iter_changes`] but it performs a second
|
||||||
/// It supports all common newline sequences (`\r\n`, `\n` as well as `\r`).
|
/// level diff on adjacent line replacements. The exact behavior of
|
||||||
fn split_lines(s: &str) -> impl Iterator<Item = &str> {
|
/// this function with regards to how it detects those inline changes
|
||||||
let mut iter = s.char_indices().peekable();
|
/// is currently not defined and will likely change over time.
|
||||||
let mut last_pos = 0;
|
#[cfg(feature = "inline")]
|
||||||
|
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange<'_, T>> {
|
||||||
std::iter::from_fn(move || {
|
iter_inline_changes(self, op)
|
||||||
if let Some((idx, c)) = iter.next() {
|
}
|
||||||
let mut rv = None;
|
|
||||||
if c == '\r' {
|
|
||||||
if iter.peek().map_or(false, |x| x.1 == '\n') {
|
|
||||||
rv = Some(&s[last_pos..=idx + 1]);
|
|
||||||
iter.next();
|
|
||||||
last_pos = idx + 2;
|
|
||||||
} else {
|
|
||||||
rv = Some(&s[last_pos..=idx]);
|
|
||||||
last_pos = idx + 1;
|
|
||||||
}
|
|
||||||
} else if c == '\n' {
|
|
||||||
rv = Some(&s[last_pos..=idx]);
|
|
||||||
last_pos = idx + 1;
|
|
||||||
}
|
|
||||||
Some(rv)
|
|
||||||
} else if last_pos < s.len() {
|
|
||||||
let tmp = &s[last_pos..];
|
|
||||||
last_pos = s.len();
|
|
||||||
Some(Some(tmp))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.flatten()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Partitions at whitespace.
|
|
||||||
fn split_words(s: &str) -> impl Iterator<Item = &str> {
|
|
||||||
let mut iter = s.char_indices().peekable();
|
|
||||||
|
|
||||||
std::iter::from_fn(move || {
|
|
||||||
if let Some((idx, c)) = iter.next() {
|
|
||||||
let is_whitespace = c.is_whitespace();
|
|
||||||
let start = idx;
|
|
||||||
let mut end = idx + c.len_utf8();
|
|
||||||
while let Some(&(_, next_char)) = iter.peek() {
|
|
||||||
if next_char.is_whitespace() != is_whitespace {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
iter.next();
|
|
||||||
end += next_char.len_utf8();
|
|
||||||
}
|
|
||||||
Some(&s[start..end])
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Splits words according to unicode rules.
|
|
||||||
#[cfg(feature = "unicode")]
|
|
||||||
fn split_unicode_words(s: &str) -> impl Iterator<Item = &str> {
|
|
||||||
unicode_segmentation::UnicodeSegmentation::split_word_bounds(s)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Splits text into characters.
|
|
||||||
fn split_chars(s: &str) -> impl Iterator<Item = &str> {
|
|
||||||
s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Splits text into graphemes.
|
|
||||||
#[cfg(feature = "unicode")]
|
|
||||||
fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
|
|
||||||
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// quick and dirty way to get an upper sequence ratio.
|
// quick and dirty way to get an upper sequence ratio.
|
||||||
|
|
@ -619,10 +591,10 @@ fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
|
||||||
///
|
///
|
||||||
/// It counts the number of matches without regard to order, which is an
|
/// It counts the number of matches without regard to order, which is an
|
||||||
/// obvious upper bound.
|
/// obvious upper bound.
|
||||||
struct QuickSeqRatio<'a>(HashMap<&'a str, i32>);
|
struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
|
||||||
|
|
||||||
impl<'a> QuickSeqRatio<'a> {
|
impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
|
||||||
pub fn new(seq: &[&'a str]) -> QuickSeqRatio<'a> {
|
pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
|
||||||
let mut counts = HashMap::new();
|
let mut counts = HashMap::new();
|
||||||
for &word in seq {
|
for &word in seq {
|
||||||
*counts.entry(word).or_insert(0) += 1;
|
*counts.entry(word).or_insert(0) += 1;
|
||||||
|
|
@ -630,7 +602,7 @@ impl<'a> QuickSeqRatio<'a> {
|
||||||
QuickSeqRatio(counts)
|
QuickSeqRatio(counts)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn calc(&self, seq: &[&str]) -> f32 {
|
pub fn calc(&self, seq: &[&T]) -> f32 {
|
||||||
let n = self.0.len() + seq.len();
|
let n = self.0.len() + seq.len();
|
||||||
if n == 0 {
|
if n == 0 {
|
||||||
return 1.0;
|
return 1.0;
|
||||||
|
|
@ -669,18 +641,18 @@ impl<'a> QuickSeqRatio<'a> {
|
||||||
/// );
|
/// );
|
||||||
/// assert_eq!(matches, vec!["apple", "ape"]);
|
/// assert_eq!(matches, vec!["apple", "ape"]);
|
||||||
/// ```
|
/// ```
|
||||||
pub fn get_close_matches<'a>(
|
pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
|
||||||
word: &str,
|
word: &T,
|
||||||
possibilities: &[&'a str],
|
possibilities: &[&'a T],
|
||||||
n: usize,
|
n: usize,
|
||||||
cutoff: f32,
|
cutoff: f32,
|
||||||
) -> Vec<&'a str> {
|
) -> Vec<&'a T> {
|
||||||
let mut matches = BinaryHeap::new();
|
let mut matches = BinaryHeap::new();
|
||||||
let seq1 = split_chars(word).collect::<Vec<_>>();
|
let seq1 = word.split_chars();
|
||||||
let quick_ratio = QuickSeqRatio::new(&seq1);
|
let quick_ratio = QuickSeqRatio::new(&seq1);
|
||||||
|
|
||||||
for &possibility in possibilities {
|
for &possibility in possibilities {
|
||||||
let seq2 = split_chars(possibility).collect::<Vec<_>>();
|
let seq2 = possibility.split_chars();
|
||||||
|
|
||||||
if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
|
if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -707,42 +679,6 @@ pub fn get_close_matches<'a>(
|
||||||
rv
|
rv
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_split_lines() {
|
|
||||||
assert_eq!(
|
|
||||||
split_lines("first\nsecond\rthird\r\nfourth\nlast").collect::<Vec<_>>(),
|
|
||||||
vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
|
|
||||||
);
|
|
||||||
assert_eq!(split_lines("\n\n").collect::<Vec<_>>(), vec!["\n", "\n"]);
|
|
||||||
assert_eq!(split_lines("\n").collect::<Vec<_>>(), vec!["\n"]);
|
|
||||||
assert!(split_lines("").collect::<Vec<_>>().is_empty());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_split_words() {
|
|
||||||
assert_eq!(
|
|
||||||
split_words("foo bar baz\n\n aha").collect::<Vec<_>>(),
|
|
||||||
["foo", " ", "bar", " ", "baz", "\n\n ", "aha"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_split_chars() {
|
|
||||||
assert_eq!(
|
|
||||||
split_chars("abcfö❄️").collect::<Vec<_>>(),
|
|
||||||
vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[cfg(feature = "unicode")]
|
|
||||||
fn test_split_graphemes() {
|
|
||||||
assert_eq!(
|
|
||||||
split_graphemes("abcfö❄️").collect::<Vec<_>>(),
|
|
||||||
vec!["a", "b", "c", "f", "ö", "❄️"]
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_captured_ops() {
|
fn test_captured_ops() {
|
||||||
let diff = TextDiff::from_lines(
|
let diff = TextDiff::from_lines(
|
||||||
|
|
@ -782,10 +718,9 @@ fn test_unified_diff() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_line_ops() {
|
fn test_line_ops() {
|
||||||
let diff = TextDiff::from_lines(
|
let a = "Hello World\nsome stuff here\nsome more stuff here\n";
|
||||||
"Hello World\nsome stuff here\nsome more stuff here\n",
|
let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
|
||||||
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
|
let diff = TextDiff::from_lines(a, b);
|
||||||
);
|
|
||||||
assert_eq!(diff.newline_terminated(), true);
|
assert_eq!(diff.newline_terminated(), true);
|
||||||
let changes = diff
|
let changes = diff
|
||||||
.ops()
|
.ops()
|
||||||
|
|
@ -793,6 +728,19 @@ fn test_line_ops() {
|
||||||
.flat_map(|op| diff.iter_changes(op))
|
.flat_map(|op| diff.iter_changes(op))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
insta::assert_debug_snapshot!(&changes);
|
insta::assert_debug_snapshot!(&changes);
|
||||||
|
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
{
|
||||||
|
let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
|
||||||
|
let byte_changes = byte_diff
|
||||||
|
.ops()
|
||||||
|
.iter()
|
||||||
|
.flat_map(|op| byte_diff.iter_changes(op))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
|
||||||
|
assert_eq!(change.as_str_lossy(), byte_change.as_str_lossy());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -811,6 +759,12 @@ fn test_virtual_newlines() {
|
||||||
fn test_char_diff() {
|
fn test_char_diff() {
|
||||||
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
|
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
|
||||||
insta::assert_debug_snapshot!(diff.ops());
|
insta::assert_debug_snapshot!(diff.ops());
|
||||||
|
|
||||||
|
#[cfg(feature = "bytes")]
|
||||||
|
{
|
||||||
|
let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
|
||||||
|
assert_eq!(diff.ops(), byte_diff.ops());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
|
||||||
|
|
@ -13,13 +13,23 @@
|
||||||
//! .context_radius(10)
|
//! .context_radius(10)
|
||||||
//! .header("old_file", "new_file"));
|
//! .header("old_file", "new_file"));
|
||||||
//! ```
|
//! ```
|
||||||
|
//!
|
||||||
|
//! # Unicode vs Bytes
|
||||||
|
//!
|
||||||
|
//! The [`UnifiedDiff`] type supports both unicode and byte diffs for all
|
||||||
|
//! types compatible with [`DiffableStr`]. You can pick between the two
|
||||||
|
//! versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
|
||||||
|
//! The former uses [`DiffableStr::as_str_lossy`], the latter uses
|
||||||
|
//! [`DiffableStr::as_bytes`] for each line.
|
||||||
|
|
||||||
use std::fmt;
|
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
use std::{fmt, io};
|
||||||
|
|
||||||
use crate::algorithms::{Algorithm, DiffOp};
|
use crate::algorithms::{Algorithm, DiffOp};
|
||||||
use crate::text::{Change, ChangeTag, TextDiff};
|
use crate::text::{Change, ChangeTag, TextDiff};
|
||||||
|
|
||||||
|
use super::DiffableStr;
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
struct UnifiedDiffHunkRange(usize, usize);
|
struct UnifiedDiffHunkRange(usize, usize);
|
||||||
|
|
||||||
|
|
@ -77,17 +87,34 @@ impl fmt::Display for UnifiedHunkHeader {
|
||||||
|
|
||||||
/// Unified diff formatter.
|
/// Unified diff formatter.
|
||||||
///
|
///
|
||||||
/// The `Display` implementation renders a unified diff.
|
/// ```rust
|
||||||
pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
/// use similar::text::TextDiff;
|
||||||
diff: &'diff TextDiff<'old, 'new, 'bufs>,
|
/// # let old_text = "";
|
||||||
|
/// # let new_text = "";
|
||||||
|
/// let text_diff = TextDiff::from_lines(old_text, new_text);
|
||||||
|
/// print!("{}", text_diff
|
||||||
|
/// .unified_diff()
|
||||||
|
/// .context_radius(10)
|
||||||
|
/// .header("old_file", "new_file"));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// ## Unicode vs Bytes
|
||||||
|
///
|
||||||
|
/// The [`UnifiedDiff`] type supports both unicode and byte diffs for all
|
||||||
|
/// types compatible with [`DiffableStr`]. You can pick between the two
|
||||||
|
/// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
|
||||||
|
/// The former uses [`DiffableStr::as_str_lossy`], the latter uses
|
||||||
|
/// [`DiffableStr::as_bytes`] for each line.
|
||||||
|
pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
|
||||||
|
diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
|
||||||
context_radius: usize,
|
context_radius: usize,
|
||||||
missing_newline_hint: bool,
|
missing_newline_hint: bool,
|
||||||
header: Option<(String, String)>,
|
header: Option<(String, String)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
|
||||||
/// Creates a formatter from a text diff object.
|
/// Creates a formatter from a text diff object.
|
||||||
pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs>) -> Self {
|
pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs, T>) -> Self {
|
||||||
UnifiedDiff {
|
UnifiedDiff {
|
||||||
diff,
|
diff,
|
||||||
context_radius: 3,
|
context_radius: 3,
|
||||||
|
|
@ -127,7 +154,7 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterates over all hunks as configured.
|
/// Iterates over all hunks as configured.
|
||||||
pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs>> {
|
pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>> {
|
||||||
let diff = self.diff;
|
let diff = self.diff;
|
||||||
let missing_newline_hint = self.missing_newline_hint;
|
let missing_newline_hint = self.missing_newline_hint;
|
||||||
self.diff
|
self.diff
|
||||||
|
|
@ -137,6 +164,19 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
||||||
.map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint))
|
.map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write the unified diff as bytes to the output stream.
|
||||||
|
pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
|
||||||
|
let mut header = self.header.as_ref();
|
||||||
|
for hunk in self.iter_hunks() {
|
||||||
|
if let Some((old_file, new_file)) = header.take() {
|
||||||
|
writeln!(w, "--- {}", old_file)?;
|
||||||
|
writeln!(w, "+++ {}", new_file)?;
|
||||||
|
}
|
||||||
|
write!(w, "{}", hunk)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
|
fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
|
||||||
if let Some((a, b)) = header {
|
if let Some((a, b)) = header {
|
||||||
self.header(a, b);
|
self.header(a, b);
|
||||||
|
|
@ -148,19 +188,21 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
||||||
/// Unified diff hunk formatter.
|
/// Unified diff hunk formatter.
|
||||||
///
|
///
|
||||||
/// The `Display` this renders out a single unified diff's hunk.
|
/// The `Display` this renders out a single unified diff's hunk.
|
||||||
pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
|
pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
|
||||||
diff: &'diff TextDiff<'old, 'new, 'bufs>,
|
diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
|
||||||
ops: Vec<DiffOp>,
|
ops: Vec<DiffOp>,
|
||||||
missing_newline_hint: bool,
|
missing_newline_hint: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
|
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized>
|
||||||
|
UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
|
||||||
|
{
|
||||||
/// Creates a new hunk for some operations.
|
/// Creates a new hunk for some operations.
|
||||||
pub fn new(
|
pub fn new(
|
||||||
ops: Vec<DiffOp>,
|
ops: Vec<DiffOp>,
|
||||||
diff: &'diff TextDiff<'old, 'new, 'bufs>,
|
diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
|
||||||
missing_newline_hint: bool,
|
missing_newline_hint: bool,
|
||||||
) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
|
) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> {
|
||||||
UnifiedDiffHunk {
|
UnifiedDiffHunk {
|
||||||
diff,
|
diff,
|
||||||
ops,
|
ops,
|
||||||
|
|
@ -184,7 +226,7 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterates over all changes in a hunk.
|
/// Iterates over all changes in a hunk.
|
||||||
pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_>> + '_ {
|
pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_, T>> + '_ {
|
||||||
// unclear why this needs Box::new here. It seems to infer some really
|
// unclear why this needs Box::new here. It seems to infer some really
|
||||||
// odd lifetimes I can't figure out how to work with.
|
// odd lifetimes I can't figure out how to work with.
|
||||||
(Box::new(
|
(Box::new(
|
||||||
|
|
@ -193,9 +235,43 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
|
||||||
.flat_map(move |op| self.diff.iter_changes(op)),
|
.flat_map(move |op| self.diff.iter_changes(op)),
|
||||||
)) as Box<dyn Iterator<Item = _>>
|
)) as Box<dyn Iterator<Item = _>>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Write the hunk as bytes to the output stream.
|
||||||
|
pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
|
||||||
|
let mut wrote_header = false;
|
||||||
|
for change in self.iter_changes() {
|
||||||
|
if !wrote_header {
|
||||||
|
writeln!(w, "{}", self.header())?;
|
||||||
|
wrote_header = true;
|
||||||
|
}
|
||||||
|
write!(
|
||||||
|
w,
|
||||||
|
"{}",
|
||||||
|
match change.tag() {
|
||||||
|
ChangeTag::Equal => ' ',
|
||||||
|
ChangeTag::Delete => '-',
|
||||||
|
ChangeTag::Insert => '+',
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
w.write_all(change.value().as_bytes())?;
|
||||||
|
if self.diff.newline_terminated() {
|
||||||
|
write!(w, "\n")?;
|
||||||
|
}
|
||||||
|
if change.missing_newline() {
|
||||||
|
if self.missing_newline_hint {
|
||||||
|
writeln!(w, "\n\\ No newline at end of file")?;
|
||||||
|
} else {
|
||||||
|
writeln!(w)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
|
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
|
||||||
|
for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
|
||||||
|
{
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let nl = if self.diff.newline_terminated() {
|
let nl = if self.diff.newline_terminated() {
|
||||||
""
|
""
|
||||||
|
|
@ -216,7 +292,7 @@ impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'ne
|
||||||
ChangeTag::Delete => '-',
|
ChangeTag::Delete => '-',
|
||||||
ChangeTag::Insert => '+',
|
ChangeTag::Insert => '+',
|
||||||
},
|
},
|
||||||
change.value(),
|
change.as_str_lossy(),
|
||||||
nl
|
nl
|
||||||
)?;
|
)?;
|
||||||
if change.missing_newline() {
|
if change.missing_newline() {
|
||||||
|
|
@ -231,7 +307,9 @@ impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'ne
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiff<'diff, 'old, 'new, 'bufs> {
|
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
|
||||||
|
for UnifiedDiff<'diff, 'old, 'new, 'bufs, T>
|
||||||
|
{
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
let mut header = self.header.as_ref();
|
let mut header = self.header.as_ref();
|
||||||
for hunk in self.iter_hunks() {
|
for hunk in self.iter_hunks() {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue