Added support for grapheme and char diffing
This commit is contained in:
parent
fdaf81371c
commit
1b96739cc3
3 changed files with 95 additions and 2 deletions
|
|
@ -2,6 +2,10 @@
|
||||||
|
|
||||||
All notable changes to similar are documented here.
|
All notable changes to similar are documented here.
|
||||||
|
|
||||||
|
## 0.3.0
|
||||||
|
|
||||||
|
* Added grapheme and character level diffing utilities.
|
||||||
|
|
||||||
## 0.2.0
|
## 0.2.0
|
||||||
|
|
||||||
* Fixed a bug in the patience algorithm causing it not not work.
|
* Fixed a bug in the patience algorithm causing it not not work.
|
||||||
|
|
|
||||||
10
Cargo.toml
10
Cargo.toml
|
|
@ -9,5 +9,15 @@ repository = "https://github.com/mitsuhiko/similar"
|
||||||
keywords = ["diff", "difference", "patience", "compare", "changes"]
|
keywords = ["diff", "difference", "patience", "compare", "changes"]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
|
|
||||||
|
[package.metadata.docs.rs]
|
||||||
|
all-features = true
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
unicode = ["unicode-segmentation"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = "1.5.2"
|
insta = "1.5.2"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
unicode-segmentation = { version = "1.7.1", optional = true }
|
||||||
|
|
|
||||||
83
src/text.rs
83
src/text.rs
|
|
@ -54,7 +54,7 @@ use std::ops::Range;
|
||||||
|
|
||||||
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
|
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
|
||||||
|
|
||||||
/// A builder for more complex uses of [`TextDiff`].
|
/// A builder type config for more complex uses of [`TextDiff`].
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct TextDiffConfig {
|
pub struct TextDiffConfig {
|
||||||
algorithm: Algorithm,
|
algorithm: Algorithm,
|
||||||
|
|
@ -72,6 +72,8 @@ impl Default for TextDiffConfig {
|
||||||
|
|
||||||
impl TextDiffConfig {
|
impl TextDiffConfig {
|
||||||
/// Changes the algorithm.
|
/// Changes the algorithm.
|
||||||
|
///
|
||||||
|
/// The default algorithm is [`Algorithm::Myers`].
|
||||||
pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
|
pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
|
||||||
self.algorithm = alg;
|
self.algorithm = alg;
|
||||||
self
|
self
|
||||||
|
|
@ -79,13 +81,20 @@ impl TextDiffConfig {
|
||||||
|
|
||||||
/// Changes the newlnine termination flag.
|
/// Changes the newlnine termination flag.
|
||||||
///
|
///
|
||||||
/// The default is automatic based on input.
|
/// The default is automatic based on input. This flag controls the
|
||||||
|
/// behavior of the [`TextDiff::write_unified_diff`] method with regards
|
||||||
|
/// to newlines. When the flag is set to `false` (which is the default)
|
||||||
|
/// then newlines are added. Otherwise the newlines from the source
|
||||||
|
/// sequences are reused.
|
||||||
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
|
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
|
||||||
self.newline_terminated = Some(yes);
|
self.newline_terminated = Some(yes);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff of lines.
|
/// Creates a diff of lines.
|
||||||
|
///
|
||||||
|
/// This splits the text `old` and `new` into lines preserving newlines
|
||||||
|
/// in the input.
|
||||||
pub fn diff_lines<'old, 'new, 'bufs>(
|
pub fn diff_lines<'old, 'new, 'bufs>(
|
||||||
&self,
|
&self,
|
||||||
old: &'old str,
|
old: &'old str,
|
||||||
|
|
@ -111,6 +120,35 @@ impl TextDiffConfig {
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a diff of characters.
|
||||||
|
pub fn diff_chars<'old, 'new, 'bufs>(
|
||||||
|
&self,
|
||||||
|
old: &'old str,
|
||||||
|
new: &'new str,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs> {
|
||||||
|
self.diff(
|
||||||
|
Cow::Owned(split_chars(old).collect()),
|
||||||
|
Cow::Owned(split_chars(new).collect()),
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff of graphemes.
|
||||||
|
///
|
||||||
|
/// This requires the `unicode` feature.
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub fn diff_graphemes<'old, 'new, 'bufs>(
|
||||||
|
&self,
|
||||||
|
old: &'old str,
|
||||||
|
new: &'new str,
|
||||||
|
) -> TextDiff<'old, 'new, 'bufs> {
|
||||||
|
self.diff(
|
||||||
|
Cow::Owned(split_graphemes(old).collect()),
|
||||||
|
Cow::Owned(split_graphemes(new).collect()),
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a diff of arbitrary slices.
|
/// Creates a diff of arbitrary slices.
|
||||||
pub fn diff_slices<'old, 'new, 'bufs>(
|
pub fn diff_slices<'old, 'new, 'bufs>(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -222,6 +260,19 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
||||||
Self::configure().diff_words(old, new)
|
Self::configure().diff_words(old, new)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a diff of chars.
|
||||||
|
pub fn from_chars(&self, old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||||
|
Self::configure().diff_chars(old, new)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff of graphemes.
|
||||||
|
///
|
||||||
|
/// This requires the `unicode` feature.
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
pub fn from_graphemes(&self, old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||||
|
Self::configure().diff_graphemes(old, new)
|
||||||
|
}
|
||||||
|
|
||||||
/// Creates a diff of arbitrary slices.
|
/// Creates a diff of arbitrary slices.
|
||||||
pub fn from_slices(
|
pub fn from_slices(
|
||||||
&self,
|
&self,
|
||||||
|
|
@ -487,6 +538,17 @@ fn split_words(s: &str) -> impl Iterator<Item = &str> {
|
||||||
.flatten()
|
.flatten()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Splits text into characters.
|
||||||
|
fn split_chars(s: &str) -> impl Iterator<Item = &str> {
|
||||||
|
s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Splits text into graphemes.
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
|
||||||
|
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
|
||||||
|
}
|
||||||
|
|
||||||
/// Quick way to get a unified diff as string.
|
/// Quick way to get a unified diff as string.
|
||||||
pub fn unified_diff<'old, 'new>(
|
pub fn unified_diff<'old, 'new>(
|
||||||
alg: Algorithm,
|
alg: Algorithm,
|
||||||
|
|
@ -520,6 +582,23 @@ fn test_split_words() {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_chars() {
|
||||||
|
assert_eq!(
|
||||||
|
split_chars("abcfö❄️").collect::<Vec<_>>(),
|
||||||
|
vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[cfg(feature = "unicode")]
|
||||||
|
fn test_split_graphemes() {
|
||||||
|
assert_eq!(
|
||||||
|
split_graphemes("abcfö❄️").collect::<Vec<_>>(),
|
||||||
|
vec!["a", "b", "c", "f", "ö", "❄️"]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_captured_ops() {
|
fn test_captured_ops() {
|
||||||
let diff = TextDiff::from_lines(
|
let diff = TextDiff::from_lines(
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue