From 94890c52a5885f38c18df173ba9b07d9077bd7b5 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Sat, 30 Jan 2021 20:45:31 +0100 Subject: [PATCH] Customize unified diff formatting --- CHANGELOG.md | 1 + src/lib.rs | 2 + src/text.rs | 125 ++++-------------------------- src/udiff.rs | 214 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 232 insertions(+), 110 deletions(-) create mode 100644 src/udiff.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index fad332b..6ef5ff0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to similar are documented here. * Add `DiffOp::apply_to_hook` to apply a captured op to a diff hook. * Added virtual newline handling to `iter_changes`. +* Moved unified diff support into separate module `udiff`. ## 0.4.0 diff --git a/src/lib.rs b/src/lib.rs index 1c33e6c..9669d69 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ //! trait bounds necessary, as well as a generic interface. //! * [`text`]: This extends the general diffing functionality to text (and more //! specifically line) based diff operations. +//! * [`udiff`]: Unified diff functionality. //! //! ## Features //! @@ -32,3 +33,4 @@ //! If the crate is used without default features it's removed. pub mod algorithms; pub mod text; +pub mod udiff; diff --git a/src/text.rs b/src/text.rs index c949c61..94e8772 100644 --- a/src/text.rs +++ b/src/text.rs @@ -21,7 +21,7 @@ //! # let old_text = ""; //! # let new_text = ""; //! let diff = TextDiff::from_lines(old_text, new_text); -//! let unified_diff = diff.unified_diff(3, Some(("old_file", "new_file"))); +//! let unified_diff = diff.unified_diff().header("old_file", "new_file").to_string(); //! ``` //! //! This is another example that iterates over the actual changes: @@ -54,11 +54,9 @@ use std::borrow::Cow; use std::cmp::Reverse; use std::collections::{BinaryHeap, HashMap}; -use std::fmt; -use std::io; -use std::ops::Range; use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag}; +use crate::udiff::UnifiedDiff; /// A builder type config for more complex uses of [`TextDiff`]. #[derive(Clone, Debug)] @@ -85,13 +83,13 @@ impl TextDiffConfig { self } - /// Changes the newlnine termination flag. + /// Changes the newline termination flag. /// /// The default is automatic based on input. This flag controls the - /// behavior of the [`TextDiff::write_unified_diff`] method with regards - /// to newlines. When the flag is set to `false` (which is the default) - /// then newlines are added. Otherwise the newlines from the source - /// sequences are reused. + /// behavior of [`TextDiff::iter_changes`] and unified diff generation + /// with regards to newlines. When the flag is set to `false` (which + /// is the default) then newlines are added. Otherwise the newlines + /// from the source sequences are reused. pub fn newline_terminated(&mut self, yes: bool) -> &mut Self { self.newline_terminated = Some(yes); self @@ -255,23 +253,6 @@ const VIRTUAL_NEWLINE_CHANGE: Change<'static> = Change { value: "\n", }; -impl ChangeTag { - /// Returns the unified sign of this change. - /// - /// This is the prefix rendered into a unified diff: - /// - /// * `Equal`: an empty space (` `) - /// * `Delete: a minus sign (`-`) - /// * `Insert: a plus sign (`+`) - pub fn unified_sign(self) -> char { - match self { - ChangeTag::Equal => ' ', - ChangeTag::Delete => '-', - ChangeTag::Insert => '+', - } - } -} - impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> { /// Configures a text differ before diffing. pub fn configure() -> TextDiffConfig { @@ -495,75 +476,9 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> { group_diff_ops(self.ops().to_vec(), n) } - /// Format a unified diff as string. - /// - /// This is more or less equivalent to using [`TextDiff::write_unified_diff`] just - /// that a string is produced. Additionally if line diffs are printed - /// a single trailing newline is removed automatically. - pub fn unified_diff(&self, n: usize, header: Option<(&str, &str)>) -> String { - let mut rv = Vec::::new(); - self.write_unified_diff(&mut rv, n, header).unwrap(); - if self.newline_terminated && rv.last() == Some(&b'\n') { - rv.truncate(rv.len() - 1); - } - unsafe { String::from_utf8_unchecked(rv) } - } - - /// Write a unified diff. - /// - /// This takes a writer `w` and the number of context lines `n` which should - /// be shown around changes. Optionally a `header` can be provided which - /// will be written. The header should be two file names. - pub fn write_unified_diff( - &self, - mut w: W, - n: usize, - mut header: Option<(&str, &str)>, - ) -> Result<(), io::Error> { - struct UnifiedRange(Range); - - impl fmt::Display for UnifiedRange { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let mut beginning = self.0.start; - let len = self.0.end - self.0.start; - if len == 1 { - write!(f, "{}", beginning) - } else { - if len == 0 { - // empty ranges begin at line just before the range - beginning -= 1; - } - write!(f, "{},{}", beginning, len) - } - } - } - - let nl = if self.newline_terminated { "" } else { "\n" }; - - for group in self.grouped_ops(n) { - if let Some((old_file, new_file)) = header.take() { - writeln!(&mut w, "--- {}", old_file)?; - writeln!(&mut w, "+++ {}", new_file)?; - } - writeln!( - &mut w, - "@@ -{} +{} @@", - UnifiedRange(group[0].old_range()), - UnifiedRange(group[group.len() - 1].new_range()), - )?; - for op in group { - for change in self.iter_changes(&op) { - write!( - &mut w, - "{}{}{}", - change.tag().unified_sign(), - change.value(), - nl - )?; - } - } - } - Ok(()) + /// Utility to return a unified diff formatter. + pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs> { + UnifiedDiff::from_text_diff(self) } } @@ -698,20 +613,6 @@ impl<'a> QuickSeqRatio<'a> { } } -/// Quick way to get a unified diff as string. -pub fn unified_diff<'old, 'new>( - alg: Algorithm, - old: &'old str, - new: &'new str, - n: usize, - header: Option<(&str, &str)>, -) -> String { - TextDiff::configure() - .algorithm(alg) - .diff_lines(old, new) - .unified_diff(n, header) -} - /// Use the text differ to find `n` close matches. /// /// `cutoff` defines the threshold which needs to be reached for a word @@ -817,7 +718,11 @@ fn test_unified_diff() { "Hello World\nsome amazing stuff here\nsome more stuff here\n", ); assert_eq!(diff.newline_terminated(), true); - insta::assert_snapshot!(&diff.unified_diff(3, Some(("old", "new")))); + insta::assert_snapshot!(&diff + .unified_diff() + .context_radius(3) + .header("old", "new") + .to_string()); } #[test] diff --git a/src/udiff.rs b/src/udiff.rs new file mode 100644 index 0000000..26847d7 --- /dev/null +++ b/src/udiff.rs @@ -0,0 +1,214 @@ +//! This module provides unified diff functionality. +//! +//! This module is available for as long as the `text` feature is enabled which +//! is enabled by default. +//! +//! ```rust +//! use similar::text::TextDiff; +//! # let old_text = ""; +//! # let new_text = ""; +//! let text_diff = TextDiff::from_lines(old_text, new_text); +//! print!("{}", text_diff +//! .unified_diff() +//! .context_radius(10) +//! .header("old_file", "new_file")); +//! ``` +#![cfg(feature = "text")] + +use std::fmt; +use std::ops::Range; + +use crate::algorithms::{Algorithm, DiffOp}; +use crate::text::{ChangeTag, TextDiff}; + +/// Represents a range of a unified diff hunk. +#[derive(Copy, Clone, Debug)] +struct HunkRange(usize, usize); + +impl HunkRange { + fn new(range: Range) -> HunkRange { + HunkRange(range.start, range.end) + } + + fn start(&self) -> usize { + self.0 + } + + fn end(&self) -> usize { + self.1 + } +} + +impl fmt::Display for HunkRange { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut beginning = self.start(); + let len = self.end() - self.start(); + if len == 1 { + write!(f, "{}", beginning) + } else { + if len == 0 { + // empty ranges begin at line just before the range + beginning -= 1; + } + write!(f, "{},{}", beginning, len) + } + } +} + +/// Formats a unified diff hunk header for a group of diff operations. +pub struct HunkHeader { + old_range: HunkRange, + new_range: HunkRange, +} + +impl HunkHeader { + /// Creates a hunk header from a (non empty) slice of diff ops. + pub fn new(ops: &[DiffOp]) -> HunkHeader { + HunkHeader { + old_range: HunkRange::new(ops[0].old_range()), + new_range: HunkRange::new(ops[ops.len() - 1].new_range()), + } + } +} + +impl fmt::Display for HunkHeader { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range) + } +} + +/// A unified diff formatter. +/// +/// The `Display` implementation renders a unified diff. +pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs> { + diff: &'diff TextDiff<'old, 'new, 'bufs>, + context_radius: usize, + header: Option<(String, String)>, +} + +impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> { + /// Creates a formatter from a text diff object. + pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs>) -> Self { + UnifiedDiff { + diff, + context_radius: 5, + header: None, + } + } + + /// Changes the context radius. Defaults to `5`. + pub fn context_radius(&mut self, n: usize) -> &mut Self { + self.context_radius = n; + self + } + + /// Sets a header to the diff. + pub fn header(&mut self, a: &str, b: &str) -> &mut Self { + self.header = Some((a.to_string(), b.to_string())); + self + } + + /// Iterates over all hunks as configured. + pub fn iter_hunks(&self) -> impl Iterator> { + let diff = self.diff; + self.diff + .grouped_ops(self.context_radius) + .into_iter() + .filter(|ops| !ops.is_empty()) + .map(move |ops| Hunk::new(ops, diff)) + } + + fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self { + if let Some((a, b)) = header { + self.header(a, b); + } + self + } +} + +/// Represents a single hunk in a unified diff. +/// +/// When formatted with `Display` this renders out a single unified diff's +/// hunk. +pub struct Hunk<'diff, 'old, 'new, 'bufs> { + diff: &'diff TextDiff<'old, 'new, 'bufs>, + ops: Vec, +} + +impl<'diff, 'old, 'new, 'bufs> Hunk<'diff, 'old, 'new, 'bufs> { + /// Creates a new hunk for some operations. + pub fn new( + ops: Vec, + diff: &'diff TextDiff<'old, 'new, 'bufs>, + ) -> Hunk<'diff, 'old, 'new, 'bufs> { + Hunk { diff, ops } + } + + /// Returns the header for the hunk. + pub fn header(&self) -> HunkHeader { + HunkHeader::new(&self.ops) + } + + /// Returns all operations in the hunk. + pub fn ops(&self) -> &[DiffOp] { + &self.ops + } +} + +impl<'diff, 'old, 'new, 'bufs> fmt::Display for Hunk<'diff, 'old, 'new, 'bufs> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let nl = if self.diff.newline_terminated() { + "" + } else { + "\n" + }; + writeln!(f, "{}", self.header())?; + for op in self.ops() { + for change in self.diff.iter_changes(&op) { + write!( + f, + "{}{}{}", + match change.tag() { + ChangeTag::Equal => ' ', + ChangeTag::Delete => '-', + ChangeTag::Insert => '+', + }, + change.value(), + nl + )?; + } + } + Ok(()) + } +} + +impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiff<'diff, 'old, 'new, 'bufs> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut header = self.header.as_ref(); + for hunk in self.iter_hunks() { + if let Some((old_file, new_file)) = header.take() { + writeln!(f, "--- {}", old_file)?; + writeln!(f, "+++ {}", new_file)?; + } + write!(f, "{}", hunk)?; + } + Ok(()) + } +} + +/// Quick way to get a unified diff as string. +pub fn unified_diff<'old, 'new>( + alg: Algorithm, + old: &'old str, + new: &'new str, + n: usize, + header: Option<(&str, &str)>, +) -> String { + TextDiff::configure() + .algorithm(alg) + .diff_lines(old, new) + .unified_diff() + .context_radius(n) + .header_opt(header) + .to_string() +}