Added support for unicode word splitting and change inline to use it
This commit is contained in:
parent
d0dd42e4af
commit
0a4dd224f5
6 changed files with 66 additions and 22 deletions
|
|
@ -12,6 +12,7 @@ All notable changes to similar are documented here.
|
|||
does not show any changes.
|
||||
* Added inline diff highlighting support.
|
||||
* Changed word splitting to split into words and whitespace.
|
||||
* Added support for unicode based word splitting (`TextDiff::from_unicode_words`).
|
||||
|
||||
## 0.4.0
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ all-features = true
|
|||
[features]
|
||||
default = ["text"]
|
||||
text = []
|
||||
inline = ["unicode"]
|
||||
unicode = ["text", "unicode-segmentation"]
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
@ -30,7 +31,7 @@ required-features = ["text"]
|
|||
|
||||
[[example]]
|
||||
name = "terminal-inline"
|
||||
required-features = ["text"]
|
||||
required-features = ["text", "inline"]
|
||||
|
||||
[[example]]
|
||||
name = "udiff"
|
||||
|
|
|
|||
|
|
@ -38,11 +38,14 @@
|
|||
//! cases it's useful to pull in extra functionality. Likewise you can turn
|
||||
//! off some functionality.
|
||||
//!
|
||||
//! * `text`: this feature is enabled by default and enables the [`text`] module.
|
||||
//! If the crate is used without default features it's removed.
|
||||
//! * `unicode`: when this feature is enabled the text diffing functionality
|
||||
//! gains the ability to diff on a grapheme instead of character level. This
|
||||
//! is particularly useful when working with text containing emojis.
|
||||
//! * `text`: this feature is enabled by default and enables the [`text`] module.
|
||||
//! If the crate is used without default features it's removed.
|
||||
//! * `inline`: this feature gives access to additional functionality of the
|
||||
//! `text` module to provide inline information about which values changed
|
||||
//! in a line diff. This currently also enables the `unicode` feature.
|
||||
#![warn(missing_docs)]
|
||||
pub mod algorithms;
|
||||
pub mod text;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
#![cfg(feature = "inline")]
|
||||
use std::{fmt, iter};
|
||||
|
||||
use crate::algorithms::{Algorithm, DiffOp, DiffTag};
|
||||
use crate::text::{Change, ChangeTag, TextDiff};
|
||||
|
||||
use super::split_words;
|
||||
use super::split_unicode_words;
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
|
|
@ -118,8 +119,8 @@ pub(crate) fn iter_inline_changes<'diff>(
|
|||
(ChangeTag::Delete, Some(ChangeTag::Insert)) => {
|
||||
let old_value = change.value();
|
||||
let new_value = next_change.unwrap().value();
|
||||
let old_chars = split_words(&old_value).collect::<Vec<_>>();
|
||||
let new_chars = split_words(&new_value).collect::<Vec<_>>();
|
||||
let old_chars = split_unicode_words(&old_value).collect::<Vec<_>>();
|
||||
let new_chars = split_unicode_words(&new_value).collect::<Vec<_>>();
|
||||
let old_mindex = MultiIndex::new(&old_chars, old_value);
|
||||
let new_mindex = MultiIndex::new(&new_chars, new_value);
|
||||
let inline_diff = TextDiff::configure()
|
||||
|
|
@ -185,3 +186,18 @@ pub(crate) fn iter_inline_changes<'diff>(
|
|||
})
|
||||
.flatten()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_ops_inline() {
|
||||
let diff = TextDiff::from_lines(
|
||||
"Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
|
||||
"Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
|
||||
);
|
||||
assert_eq!(diff.newline_terminated(), true);
|
||||
let changes = diff
|
||||
.ops()
|
||||
.iter()
|
||||
.flat_map(|op| diff.iter_inline_changes(op))
|
||||
.collect::<Vec<_>>();
|
||||
insta::assert_debug_snapshot!(&changes);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -74,9 +74,11 @@ use std::cmp::Reverse;
|
|||
use std::collections::{BinaryHeap, HashMap};
|
||||
use std::fmt;
|
||||
|
||||
#[cfg(feature = "inline")]
|
||||
mod inline;
|
||||
mod udiff;
|
||||
|
||||
#[cfg(feature = "inline")]
|
||||
pub use self::inline::*;
|
||||
pub use self::udiff::*;
|
||||
|
||||
|
|
@ -150,6 +152,25 @@ impl TextDiffConfig {
|
|||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of unicode words.
|
||||
///
|
||||
/// This splits the text into words according to unicode rules. This is
|
||||
/// generally recommended over [`diff_words`] but requires a dependency.
|
||||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn diff_unicode_words<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
self.diff(
|
||||
Cow::Owned(split_unicode_words(old).collect()),
|
||||
Cow::Owned(split_unicode_words(new).collect()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of characters.
|
||||
pub fn diff_chars<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
|
|
@ -301,6 +322,16 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
Self::configure().diff_words(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of unicode words.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_unicode_words(old, new)`.
|
||||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn from_unicode_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_unicode_words(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of chars.
|
||||
///
|
||||
/// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
|
||||
|
|
@ -486,6 +517,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
/// level diff on adjacent line replacements. The exact behavior of
|
||||
/// this function with regards to how it detects those inline changes
|
||||
/// is currently not defined and will likely change over time.
|
||||
#[cfg(feature = "inline")]
|
||||
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange> {
|
||||
iter_inline_changes(self, op)
|
||||
}
|
||||
|
|
@ -567,6 +599,12 @@ fn split_words(s: &str) -> impl Iterator<Item = &str> {
|
|||
})
|
||||
}
|
||||
|
||||
/// Splits words according to unicode rules.
|
||||
#[cfg(feature = "unicode")]
|
||||
fn split_unicode_words(s: &str) -> impl Iterator<Item = &str> {
|
||||
unicode_segmentation::UnicodeSegmentation::split_word_bounds(s)
|
||||
}
|
||||
|
||||
/// Splits text into characters.
|
||||
fn split_chars(s: &str) -> impl Iterator<Item = &str> {
|
||||
s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
|
||||
|
|
@ -782,21 +820,6 @@ fn test_virtual_newlines() {
|
|||
insta::assert_debug_snapshot!(&changes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_ops_inline() {
|
||||
let diff = TextDiff::from_lines(
|
||||
"Hello World\nsome stuff here\nsome more stuff here\n\nAha stuff here\nand more stuff",
|
||||
"Stuff\nHello World\nsome amazing stuff here\nsome more stuff here\n",
|
||||
);
|
||||
assert_eq!(diff.newline_terminated(), true);
|
||||
let changes = diff
|
||||
.ops()
|
||||
.iter()
|
||||
.flat_map(|op| diff.iter_inline_changes(op))
|
||||
.collect::<Vec<_>>();
|
||||
insta::assert_debug_snapshot!(&changes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_char_diff() {
|
||||
let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
---
|
||||
source: src/text/mod.rs
|
||||
source: src/text/inline.rs
|
||||
expression: "&changes"
|
||||
---
|
||||
[
|
||||
Loading…
Add table
Add a link
Reference in a new issue