Customize unified diff formatting

This commit is contained in:
Armin Ronacher 2021-01-30 20:45:31 +01:00
parent 96bbaf1fdf
commit 94890c52a5
4 changed files with 232 additions and 110 deletions

View file

@ -6,6 +6,7 @@ All notable changes to similar are documented here.
* Add `DiffOp::apply_to_hook` to apply a captured op to a diff hook.
* Added virtual newline handling to `iter_changes`.
* Moved unified diff support into separate module `udiff`.
## 0.4.0

View file

@ -18,6 +18,7 @@
//! trait bounds necessary, as well as a generic interface.
//! * [`text`]: This extends the general diffing functionality to text (and more
//! specifically line) based diff operations.
//! * [`udiff`]: Unified diff functionality.
//!
//! ## Features
//!
@ -32,3 +33,4 @@
//! If the crate is used without default features it's removed.
pub mod algorithms;
pub mod text;
pub mod udiff;

View file

@ -21,7 +21,7 @@
//! # let old_text = "";
//! # let new_text = "";
//! let diff = TextDiff::from_lines(old_text, new_text);
//! let unified_diff = diff.unified_diff(3, Some(("old_file", "new_file")));
//! let unified_diff = diff.unified_diff().header("old_file", "new_file").to_string();
//! ```
//!
//! This is another example that iterates over the actual changes:
@ -54,11 +54,9 @@
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::{BinaryHeap, HashMap};
use std::fmt;
use std::io;
use std::ops::Range;
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
use crate::udiff::UnifiedDiff;
/// A builder type config for more complex uses of [`TextDiff`].
#[derive(Clone, Debug)]
@ -85,13 +83,13 @@ impl TextDiffConfig {
self
}
/// Changes the newlnine termination flag.
/// Changes the newline termination flag.
///
/// The default is automatic based on input. This flag controls the
/// behavior of the [`TextDiff::write_unified_diff`] method with regards
/// to newlines. When the flag is set to `false` (which is the default)
/// then newlines are added. Otherwise the newlines from the source
/// sequences are reused.
/// behavior of [`TextDiff::iter_changes`] and unified diff generation
/// with regards to newlines. When the flag is set to `false` (which
/// is the default) then newlines are added. Otherwise the newlines
/// from the source sequences are reused.
pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
self.newline_terminated = Some(yes);
self
@ -255,23 +253,6 @@ const VIRTUAL_NEWLINE_CHANGE: Change<'static> = Change {
value: "\n",
};
impl ChangeTag {
/// Returns the unified sign of this change.
///
/// This is the prefix rendered into a unified diff:
///
/// * `Equal`: an empty space (` `)
/// * `Delete: a minus sign (`-`)
/// * `Insert: a plus sign (`+`)
pub fn unified_sign(self) -> char {
match self {
ChangeTag::Equal => ' ',
ChangeTag::Delete => '-',
ChangeTag::Insert => '+',
}
}
}
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
/// Configures a text differ before diffing.
pub fn configure() -> TextDiffConfig {
@ -495,75 +476,9 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
group_diff_ops(self.ops().to_vec(), n)
}
/// Format a unified diff as string.
///
/// This is more or less equivalent to using [`TextDiff::write_unified_diff`] just
/// that a string is produced. Additionally if line diffs are printed
/// a single trailing newline is removed automatically.
pub fn unified_diff(&self, n: usize, header: Option<(&str, &str)>) -> String {
let mut rv = Vec::<u8>::new();
self.write_unified_diff(&mut rv, n, header).unwrap();
if self.newline_terminated && rv.last() == Some(&b'\n') {
rv.truncate(rv.len() - 1);
}
unsafe { String::from_utf8_unchecked(rv) }
}
/// Write a unified diff.
///
/// This takes a writer `w` and the number of context lines `n` which should
/// be shown around changes. Optionally a `header` can be provided which
/// will be written. The header should be two file names.
pub fn write_unified_diff<W: io::Write>(
&self,
mut w: W,
n: usize,
mut header: Option<(&str, &str)>,
) -> Result<(), io::Error> {
struct UnifiedRange(Range<usize>);
impl fmt::Display for UnifiedRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut beginning = self.0.start;
let len = self.0.end - self.0.start;
if len == 1 {
write!(f, "{}", beginning)
} else {
if len == 0 {
// empty ranges begin at line just before the range
beginning -= 1;
}
write!(f, "{},{}", beginning, len)
}
}
}
let nl = if self.newline_terminated { "" } else { "\n" };
for group in self.grouped_ops(n) {
if let Some((old_file, new_file)) = header.take() {
writeln!(&mut w, "--- {}", old_file)?;
writeln!(&mut w, "+++ {}", new_file)?;
}
writeln!(
&mut w,
"@@ -{} +{} @@",
UnifiedRange(group[0].old_range()),
UnifiedRange(group[group.len() - 1].new_range()),
)?;
for op in group {
for change in self.iter_changes(&op) {
write!(
&mut w,
"{}{}{}",
change.tag().unified_sign(),
change.value(),
nl
)?;
}
}
}
Ok(())
/// Utility to return a unified diff formatter.
pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
UnifiedDiff::from_text_diff(self)
}
}
@ -698,20 +613,6 @@ impl<'a> QuickSeqRatio<'a> {
}
}
/// Quick way to get a unified diff as string.
pub fn unified_diff<'old, 'new>(
alg: Algorithm,
old: &'old str,
new: &'new str,
n: usize,
header: Option<(&str, &str)>,
) -> String {
TextDiff::configure()
.algorithm(alg)
.diff_lines(old, new)
.unified_diff(n, header)
}
/// Use the text differ to find `n` close matches.
///
/// `cutoff` defines the threshold which needs to be reached for a word
@ -817,7 +718,11 @@ fn test_unified_diff() {
"Hello World\nsome amazing stuff here\nsome more stuff here\n",
);
assert_eq!(diff.newline_terminated(), true);
insta::assert_snapshot!(&diff.unified_diff(3, Some(("old", "new"))));
insta::assert_snapshot!(&diff
.unified_diff()
.context_radius(3)
.header("old", "new")
.to_string());
}
#[test]

214
src/udiff.rs Normal file
View file

@ -0,0 +1,214 @@
//! This module provides unified diff functionality.
//!
//! This module is available for as long as the `text` feature is enabled which
//! is enabled by default.
//!
//! ```rust
//! use similar::text::TextDiff;
//! # let old_text = "";
//! # let new_text = "";
//! let text_diff = TextDiff::from_lines(old_text, new_text);
//! print!("{}", text_diff
//! .unified_diff()
//! .context_radius(10)
//! .header("old_file", "new_file"));
//! ```
#![cfg(feature = "text")]
use std::fmt;
use std::ops::Range;
use crate::algorithms::{Algorithm, DiffOp};
use crate::text::{ChangeTag, TextDiff};
/// Represents a range of a unified diff hunk.
#[derive(Copy, Clone, Debug)]
struct HunkRange(usize, usize);
impl HunkRange {
fn new(range: Range<usize>) -> HunkRange {
HunkRange(range.start, range.end)
}
fn start(&self) -> usize {
self.0
}
fn end(&self) -> usize {
self.1
}
}
impl fmt::Display for HunkRange {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut beginning = self.start();
let len = self.end() - self.start();
if len == 1 {
write!(f, "{}", beginning)
} else {
if len == 0 {
// empty ranges begin at line just before the range
beginning -= 1;
}
write!(f, "{},{}", beginning, len)
}
}
}
/// Formats a unified diff hunk header for a group of diff operations.
pub struct HunkHeader {
old_range: HunkRange,
new_range: HunkRange,
}
impl HunkHeader {
/// Creates a hunk header from a (non empty) slice of diff ops.
pub fn new(ops: &[DiffOp]) -> HunkHeader {
HunkHeader {
old_range: HunkRange::new(ops[0].old_range()),
new_range: HunkRange::new(ops[ops.len() - 1].new_range()),
}
}
}
impl fmt::Display for HunkHeader {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range)
}
}
/// A unified diff formatter.
///
/// The `Display` implementation renders a unified diff.
pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs> {
diff: &'diff TextDiff<'old, 'new, 'bufs>,
context_radius: usize,
header: Option<(String, String)>,
}
impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
/// Creates a formatter from a text diff object.
pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs>) -> Self {
UnifiedDiff {
diff,
context_radius: 5,
header: None,
}
}
/// Changes the context radius. Defaults to `5`.
pub fn context_radius(&mut self, n: usize) -> &mut Self {
self.context_radius = n;
self
}
/// Sets a header to the diff.
pub fn header(&mut self, a: &str, b: &str) -> &mut Self {
self.header = Some((a.to_string(), b.to_string()));
self
}
/// Iterates over all hunks as configured.
pub fn iter_hunks(&self) -> impl Iterator<Item = Hunk<'diff, 'old, 'new, 'bufs>> {
let diff = self.diff;
self.diff
.grouped_ops(self.context_radius)
.into_iter()
.filter(|ops| !ops.is_empty())
.map(move |ops| Hunk::new(ops, diff))
}
fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
if let Some((a, b)) = header {
self.header(a, b);
}
self
}
}
/// Represents a single hunk in a unified diff.
///
/// When formatted with `Display` this renders out a single unified diff's
/// hunk.
pub struct Hunk<'diff, 'old, 'new, 'bufs> {
diff: &'diff TextDiff<'old, 'new, 'bufs>,
ops: Vec<DiffOp>,
}
impl<'diff, 'old, 'new, 'bufs> Hunk<'diff, 'old, 'new, 'bufs> {
/// Creates a new hunk for some operations.
pub fn new(
ops: Vec<DiffOp>,
diff: &'diff TextDiff<'old, 'new, 'bufs>,
) -> Hunk<'diff, 'old, 'new, 'bufs> {
Hunk { diff, ops }
}
/// Returns the header for the hunk.
pub fn header(&self) -> HunkHeader {
HunkHeader::new(&self.ops)
}
/// Returns all operations in the hunk.
pub fn ops(&self) -> &[DiffOp] {
&self.ops
}
}
impl<'diff, 'old, 'new, 'bufs> fmt::Display for Hunk<'diff, 'old, 'new, 'bufs> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let nl = if self.diff.newline_terminated() {
""
} else {
"\n"
};
writeln!(f, "{}", self.header())?;
for op in self.ops() {
for change in self.diff.iter_changes(&op) {
write!(
f,
"{}{}{}",
match change.tag() {
ChangeTag::Equal => ' ',
ChangeTag::Delete => '-',
ChangeTag::Insert => '+',
},
change.value(),
nl
)?;
}
}
Ok(())
}
}
impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiff<'diff, 'old, 'new, 'bufs> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut header = self.header.as_ref();
for hunk in self.iter_hunks() {
if let Some((old_file, new_file)) = header.take() {
writeln!(f, "--- {}", old_file)?;
writeln!(f, "+++ {}", new_file)?;
}
write!(f, "{}", hunk)?;
}
Ok(())
}
}
/// Quick way to get a unified diff as string.
pub fn unified_diff<'old, 'new>(
alg: Algorithm,
old: &'old str,
new: &'new str,
n: usize,
header: Option<(&str, &str)>,
) -> String {
TextDiff::configure()
.algorithm(alg)
.diff_lines(old, new)
.unified_diff()
.context_radius(n)
.header_opt(header)
.to_string()
}