Added support for grapheme and char diffing
This commit is contained in:
parent
fdaf81371c
commit
1b96739cc3
3 changed files with 95 additions and 2 deletions
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
All notable changes to similar are documented here.
|
||||
|
||||
## 0.3.0
|
||||
|
||||
* Added grapheme and character level diffing utilities.
|
||||
|
||||
## 0.2.0
|
||||
|
||||
* Fixed a bug in the patience algorithm causing it not not work.
|
||||
|
|
|
|||
10
Cargo.toml
10
Cargo.toml
|
|
@ -9,5 +9,15 @@ repository = "https://github.com/mitsuhiko/similar"
|
|||
keywords = ["diff", "difference", "patience", "compare", "changes"]
|
||||
readme = "README.md"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
all-features = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
unicode = ["unicode-segmentation"]
|
||||
|
||||
[dev-dependencies]
|
||||
insta = "1.5.2"
|
||||
|
||||
[dependencies]
|
||||
unicode-segmentation = { version = "1.7.1", optional = true }
|
||||
|
|
|
|||
83
src/text.rs
83
src/text.rs
|
|
@ -54,7 +54,7 @@ use std::ops::Range;
|
|||
|
||||
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
|
||||
|
||||
/// A builder for more complex uses of [`TextDiff`].
|
||||
/// A builder type config for more complex uses of [`TextDiff`].
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct TextDiffConfig {
|
||||
algorithm: Algorithm,
|
||||
|
|
@ -72,6 +72,8 @@ impl Default for TextDiffConfig {
|
|||
|
||||
impl TextDiffConfig {
|
||||
/// Changes the algorithm.
|
||||
///
|
||||
/// The default algorithm is [`Algorithm::Myers`].
|
||||
pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
|
||||
self.algorithm = alg;
|
||||
self
|
||||
|
|
@ -79,13 +81,20 @@ impl TextDiffConfig {
|
|||
|
||||
/// Changes the newlnine termination flag.
|
||||
///
|
||||
/// The default is automatic based on input.
|
||||
/// The default is automatic based on input. This flag controls the
|
||||
/// behavior of the [`TextDiff::write_unified_diff`] method with regards
|
||||
/// to newlines. When the flag is set to `false` (which is the default)
|
||||
/// then newlines are added. Otherwise the newlines from the source
|
||||
/// sequences are reused.
|
||||
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
|
||||
self.newline_terminated = Some(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Creates a diff of lines.
|
||||
///
|
||||
/// This splits the text `old` and `new` into lines preserving newlines
|
||||
/// in the input.
|
||||
pub fn diff_lines<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
|
|
@ -111,6 +120,35 @@ impl TextDiffConfig {
|
|||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of characters.
|
||||
pub fn diff_chars<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
self.diff(
|
||||
Cow::Owned(split_chars(old).collect()),
|
||||
Cow::Owned(split_chars(new).collect()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of graphemes.
|
||||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn diff_graphemes<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
) -> TextDiff<'old, 'new, 'bufs> {
|
||||
self.diff(
|
||||
Cow::Owned(split_graphemes(old).collect()),
|
||||
Cow::Owned(split_graphemes(new).collect()),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
/// Creates a diff of arbitrary slices.
|
||||
pub fn diff_slices<'old, 'new, 'bufs>(
|
||||
&self,
|
||||
|
|
@ -222,6 +260,19 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
Self::configure().diff_words(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of chars.
|
||||
pub fn from_chars(&self, old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_chars(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of graphemes.
|
||||
///
|
||||
/// This requires the `unicode` feature.
|
||||
#[cfg(feature = "unicode")]
|
||||
pub fn from_graphemes(&self, old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
|
||||
Self::configure().diff_graphemes(old, new)
|
||||
}
|
||||
|
||||
/// Creates a diff of arbitrary slices.
|
||||
pub fn from_slices(
|
||||
&self,
|
||||
|
|
@ -487,6 +538,17 @@ fn split_words(s: &str) -> impl Iterator<Item = &str> {
|
|||
.flatten()
|
||||
}
|
||||
|
||||
/// Splits text into characters.
|
||||
fn split_chars(s: &str) -> impl Iterator<Item = &str> {
|
||||
s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
|
||||
}
|
||||
|
||||
/// Splits text into graphemes.
|
||||
#[cfg(feature = "unicode")]
|
||||
fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
|
||||
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
|
||||
}
|
||||
|
||||
/// Quick way to get a unified diff as string.
|
||||
pub fn unified_diff<'old, 'new>(
|
||||
alg: Algorithm,
|
||||
|
|
@ -520,6 +582,23 @@ fn test_split_words() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_chars() {
|
||||
assert_eq!(
|
||||
split_chars("abcfö❄️").collect::<Vec<_>>(),
|
||||
vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "unicode")]
|
||||
fn test_split_graphemes() {
|
||||
assert_eq!(
|
||||
split_graphemes("abcfö❄️").collect::<Vec<_>>(),
|
||||
vec!["a", "b", "c", "f", "ö", "❄️"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_captured_ops() {
|
||||
let diff = TextDiff::from_lines(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue