Merge pull request #4 from mitsuhiko/feature/inline-highlighting

Added initial support for inline diff highlighting
This commit is contained in:
Armin Ronacher 2021-01-31 19:37:50 +01:00 committed by GitHub
commit 301e19a658
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 392 additions and 65 deletions

View file

@ -5,11 +5,12 @@ All notable changes to similar are documented here.
## 0.5.0
* Add `DiffOp::apply_to_hook` to apply a captured op to a diff hook.
* Added virtual newline handling to `iter_changes`.
* Added missing newline handling to the `Changes` type.
* Made unified diff support more flexible through the introduction of
the `UnifiedDiff` type.
* Fixed grouped diff operation to return an empty result if the diff
does not show any changes.
* Added inline diff highlighting support.
## 0.4.0

View file

@ -28,6 +28,10 @@ unicode-segmentation = { version = "1.7.1", optional = true }
name = "terminal"
required-features = ["text"]
[[example]]
name = "terminal-inline"
required-features = ["text"]
[[example]]
name = "udiff"
required-features = ["text"]

View file

@ -29,7 +29,7 @@ fn main() {
ChangeTag::Insert => "+",
ChangeTag::Equal => " ",
};
print!("{}{}", sign, change.value());
print!("{}{}", sign, change);
}
}
}

View file

@ -0,0 +1,30 @@
use console::Style;
use similar::text::{ChangeTag, TextDiff};
fn main() {
let diff = TextDiff::from_lines(
"Hello World\nThis is the second line.\nThis is the third.",
"Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more",
);
for op in diff.ops() {
for change in diff.iter_inline_changes(op) {
let (sign, style) = match change.tag() {
ChangeTag::Delete => ("-", Style::new().red()),
ChangeTag::Insert => ("+", Style::new().green()),
ChangeTag::Equal => (" ", Style::new()),
};
print!("{}", style.apply_to(sign).bold(),);
for &(emphasized, value) in change.values() {
if emphasized {
print!("{}", style.apply_to(value).underlined());
} else {
print!("{}", style.apply_to(value));
}
}
if change.is_missing_newline() {
println!();
}
}
}
}

View file

@ -14,11 +14,7 @@ fn main() {
ChangeTag::Insert => ("+", Style::new().green()),
ChangeTag::Equal => (" ", Style::new()),
};
print!(
"{}{}",
style.apply_to(sign).bold(),
style.apply_to(change.value())
);
print!("{}{}", style.apply_to(sign).bold(), style.apply_to(change),);
}
}
}

View file

@ -9,27 +9,40 @@ use std::ops::Range;
pub enum DiffOp {
/// A segment is equal (see [`DiffHook::equal`])
Equal {
/// The starting index in the old sequence.
old_index: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the segment.
len: usize,
},
/// A segment was deleted (see [`DiffHook::delete`])
Delete {
/// The starting index in the old sequence.
old_index: usize,
/// The length of the old segment.
old_len: usize,
/// The starting index in the new sequence.
new_index: usize,
},
/// A segment was inserted (see [`DiffHook::insert`])
Insert {
/// The starting index in the old sequence.
old_index: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the new segment.
new_len: usize,
},
/// A segment was replaced (see [`DiffHook::replace`])
Replace {
/// The starting index in the old sequence.
old_index: usize,
/// The length of the old segment.
old_len: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the new segment.
new_len: usize,
},
}
@ -37,9 +50,13 @@ pub enum DiffOp {
/// The tag of a diff operation.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum DiffTag {
/// The diff op encodes an equal segment.
Equal,
/// The diff op encodes a deleted segment.
Delete,
/// The diff op encodes an inserted segment.
Insert,
/// The diff op encodes a replaced segment.
Replace,
}

View file

@ -1,6 +1,7 @@
/// A trait for reacting to an edit script from the "old" version to
/// the "new" version.
pub trait DiffHook: Sized {
/// The error produced from the hook methods.
type Error;
/// Called when lines with indices `old_index` (in the old version) and

View file

@ -34,7 +34,9 @@ pub mod patience;
/// An enum representing a diffing algorithm.
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub enum Algorithm {
/// Picks the myers algorithm from [`myers`]
Myers,
/// Picks the patience algorithm from [`patience`]
Patience,
}

View file

@ -67,6 +67,7 @@ where
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
#![allow(clippy::many_single_char_names)]
if old_end > old_current && new_end > new_current {
let old_span = old_end - old_current;
let new_span = new_end - new_current;

View file

@ -18,7 +18,7 @@
//! ChangeTag::Insert => "+",
//! ChangeTag::Equal => " ",
//! };
//! print!("{}{}", sign, change.value());
//! print!("{}{}", sign, change);
//! }
//! }
//! # }
@ -43,5 +43,6 @@
//! is particularly useful when working with text containing emojis.
//! * `text`: this feature is enabled by default and enables the [`text`] module.
//! If the crate is used without default features it's removed.
#![warn(missing_docs)]
pub mod algorithms;
pub mod text;

187
src/text/inline.rs Normal file
View file

@ -0,0 +1,187 @@
use std::{fmt, iter};
use crate::algorithms::{Algorithm, DiffOp, DiffTag};
use crate::text::{Change, ChangeTag, TextDiff};
use super::split_chars;
use std::ops::Range;
struct MultiIndex<'a, 's> {
seq: &'a [&'s str],
value: &'s str,
}
impl<'a, 's> MultiIndex<'a, 's> {
pub fn new(seq: &'a [&'s str], value: &'s str) -> MultiIndex<'a, 's> {
MultiIndex { seq, value }
}
pub fn get_slice(&self, rng: Range<usize>) -> &'s str {
let mut start = 0;
for &sseq in &self.seq[..rng.start] {
start += sseq.len();
}
let mut end = start;
for &sseq in &self.seq[rng.start..rng.end] {
end += sseq.len();
}
&self.value[start..end]
}
}
/// Represents the expanded textual change with inline highlights.
///
/// This is like [`Change`] but with inline highlight info.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
pub struct InlineChange<'s> {
tag: ChangeTag,
old_index: Option<usize>,
new_index: Option<usize>,
values: Vec<(bool, &'s str)>,
missing_newline: bool,
}
impl<'s> InlineChange<'s> {
/// Returns the change tag.
pub fn tag(&self) -> ChangeTag {
self.tag
}
/// Returns the old index if available.
pub fn old_index(&self) -> Option<usize> {
self.old_index
}
/// Returns the new index if available.
pub fn new_index(&self) -> Option<usize> {
self.new_index
}
/// Returns the changed values.
pub fn values(&self) -> &[(bool, &'s str)] {
&self.values
}
/// Returns `true` if this change needs to be followed up by a
/// missing newline.
pub fn is_missing_newline(&self) -> bool {
self.missing_newline
}
}
impl<'s> From<Change<'s>> for InlineChange<'s> {
fn from(change: Change<'s>) -> InlineChange<'s> {
InlineChange {
tag: change.tag(),
old_index: change.old_index(),
new_index: change.old_index(),
values: vec![(false, change.value())],
missing_newline: change.missing_newline(),
}
}
}
impl<'s> fmt::Display for InlineChange<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for &(emphasized, value) in &self.values {
let marker = match (emphasized, self.tag) {
(false, _) | (true, ChangeTag::Equal) => "",
(true, ChangeTag::Delete) => "-",
(true, ChangeTag::Insert) => "+",
};
write!(f, "{}{}{}", marker, value, marker)?;
}
if self.missing_newline {
writeln!(f)?;
}
Ok(())
}
}
pub(crate) fn iter_inline_changes<'diff>(
diff: &'diff TextDiff,
op: &DiffOp,
) -> impl Iterator<Item = InlineChange<'diff>> {
let mut change_iter = diff.iter_changes(op).peekable();
let mut skip_next = false;
let newline_terminated = diff.newline_terminated;
iter::from_fn(move || {
if skip_next {
change_iter.next();
skip_next = false;
}
if let Some(change) = change_iter.next() {
let next_change = change_iter.peek();
match (change.tag, next_change.map(|x| x.tag())) {
(ChangeTag::Delete, Some(ChangeTag::Insert)) => {
let old_value = change.value();
let new_value = next_change.unwrap().value();
let old_chars = split_chars(&old_value).collect::<Vec<_>>();
let new_chars = split_chars(&new_value).collect::<Vec<_>>();
let old_mindex = MultiIndex::new(&old_chars, old_value);
let new_mindex = MultiIndex::new(&new_chars, new_value);
let inline_diff = TextDiff::configure()
.algorithm(Algorithm::Patience)
.diff_slices(&old_chars, &new_chars);
if inline_diff.ratio() < 0.5 {
return Some(None.into_iter().chain(Some(change.into()).into_iter()));
}
// skip the next element as we handle it here
skip_next = true;
let mut old_values = vec![];
let mut new_values = vec![];
for op in inline_diff.ops() {
match op.tag() {
DiffTag::Equal => {
old_values.push((false, old_mindex.get_slice(op.old_range())));
new_values.push((false, old_mindex.get_slice(op.old_range())));
}
DiffTag::Delete => {
old_values.push((true, old_mindex.get_slice(op.old_range())));
}
DiffTag::Insert => {
new_values.push((true, new_mindex.get_slice(op.new_range())));
}
DiffTag::Replace => {
old_values.push((true, old_mindex.get_slice(op.old_range())));
new_values.push((true, new_mindex.get_slice(op.new_range())));
}
}
}
Some(
Some(InlineChange {
tag: ChangeTag::Delete,
old_index: change.old_index(),
new_index: change.new_index(),
values: old_values,
missing_newline: newline_terminated
&& !old_value.ends_with(&['\r', '\n'][..]),
})
.into_iter()
.chain(
Some(InlineChange {
tag: ChangeTag::Insert,
old_index: change.old_index(),
new_index: change.new_index(),
values: new_values,
missing_newline: newline_terminated
&& !new_value.ends_with(&['\r', '\n'][..]),
})
.into_iter(),
),
)
}
_ => Some(None.into_iter().chain(Some(change.into()).into_iter())),
}
} else {
None
}
})
.flatten()
}

View file

@ -50,12 +50,34 @@
//!
//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
//! this even works for very long files if paired with this method.
//!
//! ## Trailing Newlines
//!
//! When working with line diffs (and unified diffs in general) there are two
//! "philosophies" to look at lines. One is to diff lines without their newline
//! character, the other is to diff with the newline character. Typically the
//! latter is done because text files do not _have_ to end in a newline character.
//! As a result there is a difference between `foo\n` and `foo` as far as diffs
//! are concerned.
//!
//! In similar this is handled on the [`Change`] or [`InlineChange`] level. If
//! a diff was created via [`TextDiff::from_lines`] the text diffing system is
//! instructed to check if there are missing newlines encountered. If that is
//! the case the [`Change`] object will return true from the
//! [`Change::missing_newline`] method so the caller knows to handle this by
//! either rendering a virtual newline at that position or to indicate it in
//! different ways. For instance the unified diff code will render the special
//! `\ No newline at end of file` marker.
#![cfg(feature = "text")]
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::{BinaryHeap, HashMap};
use std::fmt;
mod inline;
mod udiff;
pub use self::inline::*;
pub use self::udiff::*;
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
@ -193,8 +215,11 @@ pub struct TextDiff<'old, 'new, 'bufs> {
/// The tag of a change.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum ChangeTag {
/// The change indicates equality (not a change)
Equal,
/// The change indicates deleted text.
Delete,
/// The change indicates inserted text.
Insert,
}
@ -209,6 +234,18 @@ pub struct Change<'s> {
old_index: Option<usize>,
new_index: Option<usize>,
value: &'s str,
missing_newline: bool,
}
impl<'s> fmt::Display for Change<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}{}",
self.value(),
if self.missing_newline { "\n" } else { "" }
)
}
}
impl<'s> Change<'s> {
@ -232,29 +269,16 @@ impl<'s> Change<'s> {
self.value
}
/// Returns `true` for virtual changes.
/// Returns `true` if this change needs to be followed up by a
/// missing newline.
///
/// Virtual changes are changes that do not exist in either diff but are
/// necessary for a consistent user experience. This currently only
/// applies to changes related to newline handling. If lines are passed
/// to the [`TextDiff`] the [`TextDiff::newline_terminated`] flag is set
/// in which case newlines of the input are included in the changes. However
/// if the trailing newline is missing it would mess up processing greatly.
/// Because of this a trailing virtual newline is automatically added for a
/// more consistent user experience. This virtual newline can be detected
/// by explicitly checking for this flag.
pub fn is_virtual(&self) -> bool {
self.old_index.is_none() && self.new_index.is_none()
/// The [`std::fmt::Display`] implementation of [`Change`] will automatically
/// insert a newline after the value if this is true.
pub fn missing_newline(&self) -> bool {
self.missing_newline
}
}
const VIRTUAL_NEWLINE_CHANGE: Change<'static> = Change {
tag: ChangeTag::Equal,
old_index: None,
new_index: None,
value: "\n",
};
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
/// Configures a text differ before diffing.
pub fn configure() -> TextDiffConfig {
@ -361,11 +385,6 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
/// ways in which a change could be encoded (insert/delete vs replace), look
/// up the value from the appropriate slice and also handle correct index
/// handling.
///
/// In addition it has some custom handling to insert "virtual" newlines
/// for diffs where [`TextDiff::newline_terminated`] is `true` but the
/// diff does not end in newlines in the right places. For more information
/// see [`Change::is_virtual`].
pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change> {
let newline_terminated = self.newline_terminated;
let (tag, old_range, new_range) = op.as_tag_tuple();
@ -374,21 +393,6 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
let mut old_slices = &self.old_slices()[op.old_range()];
let mut new_slices = &self.new_slices()[op.new_range()];
// figure out if a virtual newline has to be inserted
let mut virtual_newline = if newline_terminated {
let last_element = match tag {
DiffTag::Equal | DiffTag::Delete | DiffTag::Replace => old_slices.last(),
DiffTag::Insert => new_slices.last(),
};
if !last_element.map_or(false, |x| x.ends_with(&['\r', '\n'][..])) {
Some(VIRTUAL_NEWLINE_CHANGE)
} else {
None
}
} else {
None
};
std::iter::from_fn(move || match tag {
DiffTag::Equal => {
if let Some((&first, rest)) = old_slices.split_first() {
@ -400,9 +404,12 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
old_index: Some(old_index - 1),
new_index: Some(new_index - 1),
value: first,
missing_newline: newline_terminated
&& rest.is_empty()
&& !first.ends_with(&['\r', '\n'][..]),
})
} else {
virtual_newline.take()
None
}
}
DiffTag::Delete => {
@ -414,9 +421,12 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
old_index: Some(old_index - 1),
new_index: None,
value: first,
missing_newline: newline_terminated
&& rest.is_empty()
&& !first.ends_with(&['\r', '\n'][..]),
})
} else {
virtual_newline.take()
None
}
}
DiffTag::Insert => {
@ -428,9 +438,12 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
old_index: None,
new_index: Some(new_index - 1),
value: first,
missing_newline: newline_terminated
&& rest.is_empty()
&& !first.ends_with(&['\r', '\n'][..]),
})
} else {
virtual_newline.take()
None
}
}
DiffTag::Replace => {
@ -442,22 +455,21 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
old_index: Some(old_index - 1),
new_index: None,
value: first,
missing_newline: newline_terminated
&& rest.is_empty()
&& !first.ends_with(&['\r', '\n'][..]),
})
} else if let Some(virtual_newline) = virtual_newline.take() {
Some(virtual_newline)
} else if let Some((&first, rest)) = new_slices.split_first() {
new_slices = rest;
new_index += 1;
// check for another virtual newline
if newline_terminated && rest.is_empty() && !first.ends_with(&['\r', '\n'][..])
{
virtual_newline = Some(VIRTUAL_NEWLINE_CHANGE);
}
Some(Change {
tag: ChangeTag::Insert,
old_index: None,
new_index: Some(new_index - 1),
value: first,
missing_newline: newline_terminated
&& rest.is_empty()
&& !first.ends_with(&['\r', '\n'][..]),
})
} else {
None
@ -466,6 +478,17 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
})
}
/// Iterates over the changes the op expands to with inline emphasis.
///
/// This is very similar to [`iter_changes`] but it performs a second
/// level per-character diff on adjacent line replacements. The exact
/// behavior of this function with regards to how it detects those
/// inline changes is currently not defined and will likely change
/// over time.
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange> {
iter_inline_changes(self, op)
}
/// Returns the captured diff ops.
pub fn ops(&self) -> &[DiffOp] {
&self.ops

View file

@ -12,6 +12,7 @@ expression: "&changes"
0,
),
value: "Hello World\n",
missing_newline: false,
},
Change {
tag: Delete,
@ -20,6 +21,7 @@ expression: "&changes"
),
new_index: None,
value: "some stuff here\n",
missing_newline: false,
},
Change {
tag: Insert,
@ -28,6 +30,7 @@ expression: "&changes"
1,
),
value: "some amazing stuff here\n",
missing_newline: false,
},
Change {
tag: Equal,
@ -38,5 +41,6 @@ expression: "&changes"
2,
),
value: "some more stuff here\n",
missing_newline: false,
},
]

View file

@ -0,0 +1,10 @@
---
source: src/text/udiff.rs
expression: "&diff.unified_diff().missing_newline_hint(false).header(\"a.txt\",\n \"b.txt\").to_string()"
---
--- a.txt
+++ b.txt
@@ -0 +0 @@
-a
+b

View file

@ -0,0 +1,11 @@
---
source: src/text/udiff.rs
expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()"
---
--- a.txt
+++ b.txt
@@ -0 +0 @@
-a
+b
\ No newline at end of file

View file

@ -12,6 +12,7 @@ expression: "&changes"
0,
),
value: "a\n",
missing_newline: false,
},
Change {
tag: Delete,
@ -20,12 +21,7 @@ expression: "&changes"
),
new_index: None,
value: "b",
},
Change {
tag: Equal,
old_index: None,
new_index: None,
value: "\n",
missing_newline: true,
},
Change {
tag: Insert,
@ -34,5 +30,6 @@ expression: "&changes"
1,
),
value: "c\n",
missing_newline: false,
},
]

View file

@ -81,6 +81,7 @@ impl fmt::Display for UnifiedHunkHeader {
pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs> {
diff: &'diff TextDiff<'old, 'new, 'bufs>,
context_radius: usize,
missing_newline_hint: bool,
header: Option<(String, String)>,
}
@ -90,6 +91,7 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
UnifiedDiff {
diff,
context_radius: 3,
missing_newline_hint: true,
header: None,
}
}
@ -114,14 +116,25 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
self
}
/// Controls the missing newline hint.
///
/// By default a special `\ No newline at end of file` marker is added to
/// the output when a file is not terminated with a final newline. This can
/// be disabled with this flag.
pub fn missing_newline_hint(&mut self, yes: bool) -> &mut Self {
self.missing_newline_hint = yes;
self
}
/// Iterates over all hunks as configured.
pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs>> {
let diff = self.diff;
let missing_newline_hint = self.missing_newline_hint;
self.diff
.grouped_ops(self.context_radius)
.into_iter()
.filter(|ops| !ops.is_empty())
.map(move |ops| UnifiedDiffHunk::new(ops, diff))
.map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint))
}
fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
@ -138,6 +151,7 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
diff: &'diff TextDiff<'old, 'new, 'bufs>,
ops: Vec<DiffOp>,
missing_newline_hint: bool,
}
impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
@ -145,8 +159,13 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
pub fn new(
ops: Vec<DiffOp>,
diff: &'diff TextDiff<'old, 'new, 'bufs>,
missing_newline_hint: bool,
) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
UnifiedDiffHunk { diff, ops }
UnifiedDiffHunk {
diff,
ops,
missing_newline_hint,
}
}
/// Returns the header for the hunk.
@ -159,6 +178,11 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
&self.ops
}
/// Returns the value of the `missing_newline_hint` flag.
pub fn missing_newline_hint(&self) -> bool {
self.missing_newline_hint
}
/// Iterates over all changes in a hunk.
pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_>> + '_ {
// unclear why this needs Box::new here. It seems to infer some really
@ -195,6 +219,13 @@ impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'ne
change.value(),
nl
)?;
if change.missing_newline() {
if self.missing_newline_hint {
writeln!(f, "\n\\ No newline at end of file")?;
} else {
writeln!(f)?;
}
}
}
Ok(())
}
@ -247,3 +278,14 @@ fn test_empty_unified_diff() {
let diff = TextDiff::from_lines("abc", "abc");
assert_eq!(diff.unified_diff().header("a.txt", "b.txt").to_string(), "");
}
#[test]
fn test_unified_diff_newline_hint() {
let diff = TextDiff::from_lines("a\n", "b");
insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string());
insta::assert_snapshot!(&diff
.unified_diff()
.missing_newline_hint(false)
.header("a.txt", "b.txt")
.to_string());
}