Move all text functionality toplevel
This commit is contained in:
parent
de8d82ab63
commit
844769ae19
10 changed files with 96 additions and 118 deletions
|
|
@ -1,90 +1,4 @@
|
|||
//! Text diffing utilities.
|
||||
//!
|
||||
//! This provides helpful utilities for text (and more specifically line) diff
|
||||
//! operations. The main type you want to work with is [`TextDiff`] which
|
||||
//! uses the underlying diff algorithms to expose a convenient API to work with
|
||||
//! texts.
|
||||
//!
|
||||
//! It can produce a unified diff and also let you iterate over the changeset
|
||||
//! directly if you want.
|
||||
//!
|
||||
//! Text diffing is available by default but can be disabled by turning off the
|
||||
//! default features. The feature to enable to get it back is `text`.
|
||||
//!
|
||||
//! # Examples
|
||||
//!
|
||||
//! A super simple example for how to generate a unified diff with three lines
|
||||
//! off context around the changes:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # use similar::text::TextDiff;
|
||||
//! # let old_text = "";
|
||||
//! # let new_text = "";
|
||||
//! let diff = TextDiff::from_lines(old_text, new_text);
|
||||
//! let unified_diff = diff.unified_diff().header("old_file", "new_file").to_string();
|
||||
//! ```
|
||||
//!
|
||||
//! This is another example that iterates over the actual changes:
|
||||
//!
|
||||
//! ```rust
|
||||
//! # use similar::text::TextDiff;
|
||||
//! # let old_text = "";
|
||||
//! # let new_text = "";
|
||||
//! let diff = TextDiff::from_lines(old_text, new_text);
|
||||
//! for op in diff.ops() {
|
||||
//! for change in diff.iter_changes(op) {
|
||||
//! println!("{:?}", change);
|
||||
//! }
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! # Ops vs Changes
|
||||
//!
|
||||
//! Because very commonly two compared sequences will largely match this module
|
||||
//! splits it's functionality into two layers. The first is inherited from the
|
||||
//! general [`algorithms`](crate::algorithms) module: changes are encoded as
|
||||
//! [diff operations](crate::DiffOp). These are ranges of the
|
||||
//! differences by index in the source sequence. Because this can be cumbersome
|
||||
//! to work with a separate method [`TextDiff::iter_changes`] is provided which
|
||||
//! expands all the changes on an item by item level encoded in an operation.
|
||||
//!
|
||||
//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
|
||||
//! this even works for very long files if paired with this method.
|
||||
//!
|
||||
//! # Trailing Newlines
|
||||
//!
|
||||
//! When working with line diffs (and unified diffs in general) there are two
|
||||
//! "philosophies" to look at lines. One is to diff lines without their newline
|
||||
//! character, the other is to diff with the newline character. Typically the
|
||||
//! latter is done because text files do not _have_ to end in a newline character.
|
||||
//! As a result there is a difference between `foo\n` and `foo` as far as diffs
|
||||
//! are concerned.
|
||||
//!
|
||||
//! In similar this is handled on the [`Change`] or [`InlineChange`] level. If
|
||||
//! a diff was created via [`TextDiff::from_lines`] the text diffing system is
|
||||
//! instructed to check if there are missing newlines encountered. If that is
|
||||
//! the case the [`Change`] object will return true from the
|
||||
//! [`Change::missing_newline`] method so the caller knows to handle this by
|
||||
//! either rendering a virtual newline at that position or to indicate it in
|
||||
//! different ways. For instance the unified diff code will render the special
|
||||
//! `\ No newline at end of file` marker.
|
||||
//!
|
||||
//! # Bytes vs Unicode
|
||||
//!
|
||||
//! This module concerns itself with a loser definition of "text" than you would
|
||||
//! normally see in Rust. While by default it can only operate on [`str`] types
|
||||
//! by enabling the `bytes` feature it gains support for byte slices with some
|
||||
//! caveats.
|
||||
//!
|
||||
//! A lot of text diff functionality assumes that what is being diffed constiutes
|
||||
//! text, but in the real world it can often be challenging to ensure that this is
|
||||
//! all valid utf-8. Because of this the crate is built so that most functinality
|
||||
//! also still works with bytes for as long as they are roughtly ASCII compatible.
|
||||
//!
|
||||
//! This means you will be successful in creating a unified diff from latin1
|
||||
//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you
|
||||
//! will only get garbage.
|
||||
#![cfg(feature = "text")]
|
||||
use std::borrow::Cow;
|
||||
use std::cmp::Reverse;
|
||||
use std::collections::BinaryHeap;
|
||||
|
|
@ -92,15 +6,14 @@ use std::collections::BinaryHeap;
|
|||
mod abstraction;
|
||||
#[cfg(feature = "inline")]
|
||||
mod inline;
|
||||
mod udiff;
|
||||
mod utils;
|
||||
|
||||
pub use self::abstraction::{DiffableStr, DiffableStrRef};
|
||||
#[cfg(feature = "inline")]
|
||||
pub use self::inline::InlineChange;
|
||||
pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader};
|
||||
|
||||
use self::utils::{upper_seq_ratio, QuickSeqRatio};
|
||||
use crate::udiff::UnifiedDiff;
|
||||
use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Change, DiffOp};
|
||||
|
||||
/// A builder type config for more complex uses of [`TextDiff`].
|
||||
|
|
@ -358,7 +271,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n
|
|||
/// ratio of `0.0` would indicate completely distinct sequences.
|
||||
///
|
||||
/// ```rust
|
||||
/// # use similar::text::TextDiff;
|
||||
/// # use similar::TextDiff;
|
||||
/// let diff = TextDiff::from_chars("abcd", "bcde");
|
||||
/// assert_eq!(diff.ratio(), 0.75);
|
||||
/// ```
|
||||
|
|
@ -411,7 +324,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n
|
|||
/// to be considered similar. See [`TextDiff::ratio`] for more information.
|
||||
///
|
||||
/// ```
|
||||
/// # use similar::text::get_close_matches;
|
||||
/// # use similar::get_close_matches;
|
||||
/// let matches = get_close_matches(
|
||||
/// "appel",
|
||||
/// &["ape", "apple", "peach", "puppy"][..],
|
||||
|
|
|
|||
|
|
@ -1,25 +0,0 @@
|
|||
---
|
||||
source: src/text/udiff.rs
|
||||
expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()"
|
||||
---
|
||||
--- a.txt
|
||||
+++ b.txt
|
||||
@@ -15,3 +19,3 @@
|
||||
p
|
||||
q
|
||||
r
|
||||
-s
|
||||
+S
|
||||
t
|
||||
u
|
||||
v
|
||||
@@ -37,3 +41,3 @@
|
||||
L
|
||||
M
|
||||
N
|
||||
-O
|
||||
+o
|
||||
P
|
||||
Q
|
||||
R
|
||||
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
---
|
||||
source: src/text/udiff.rs
|
||||
expression: "&diff.unified_diff().missing_newline_hint(false).header(\"a.txt\",\n \"b.txt\").to_string()"
|
||||
---
|
||||
--- a.txt
|
||||
+++ b.txt
|
||||
@@ -0 +0 @@
|
||||
-a
|
||||
+b
|
||||
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
---
|
||||
source: src/text/udiff.rs
|
||||
expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()"
|
||||
---
|
||||
--- a.txt
|
||||
+++ b.txt
|
||||
@@ -0 +0 @@
|
||||
-a
|
||||
+b
|
||||
\ No newline at end of file
|
||||
|
||||
|
|
@ -1,348 +0,0 @@
|
|||
//! This module provides unified diff functionality.
|
||||
//!
|
||||
//! This module is available for as long as the `text` feature is enabled which
|
||||
//! is enabled by default.
|
||||
//!
|
||||
//! ```rust
|
||||
//! use similar::text::TextDiff;
|
||||
//! # let old_text = "";
|
||||
//! # let new_text = "";
|
||||
//! let text_diff = TextDiff::from_lines(old_text, new_text);
|
||||
//! print!("{}", text_diff
|
||||
//! .unified_diff()
|
||||
//! .context_radius(10)
|
||||
//! .header("old_file", "new_file"));
|
||||
//! ```
|
||||
//!
|
||||
//! # Unicode vs Bytes
|
||||
//!
|
||||
//! The [`UnifiedDiff`] type supports both unicode and byte diffs for all
|
||||
//! types compatible with [`DiffableStr`]. You can pick between the two
|
||||
//! versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
|
||||
//! The former uses [`DiffableStr::to_string_lossy`], the latter uses
|
||||
//! [`DiffableStr::as_bytes`] for each line.
|
||||
|
||||
use std::ops::Range;
|
||||
use std::{fmt, io};
|
||||
|
||||
use crate::text::TextDiff;
|
||||
use crate::types::{Algorithm, Change, DiffOp};
|
||||
|
||||
use super::DiffableStr;
|
||||
|
||||
struct MissingNewlineHint(bool);
|
||||
|
||||
impl fmt::Display for MissingNewlineHint {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
if self.0 {
|
||||
write!(f, "\n\\ No newline at end of file")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
struct UnifiedDiffHunkRange(usize, usize);
|
||||
|
||||
impl UnifiedDiffHunkRange {
|
||||
fn new(range: Range<usize>) -> UnifiedDiffHunkRange {
|
||||
UnifiedDiffHunkRange(range.start, range.end)
|
||||
}
|
||||
|
||||
fn start(&self) -> usize {
|
||||
self.0
|
||||
}
|
||||
|
||||
fn end(&self) -> usize {
|
||||
self.1
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for UnifiedDiffHunkRange {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut beginning = self.start();
|
||||
let len = self.end() - self.start();
|
||||
if len == 1 {
|
||||
write!(f, "{}", beginning)
|
||||
} else {
|
||||
if len == 0 {
|
||||
// empty ranges begin at line just before the range
|
||||
beginning -= 1;
|
||||
}
|
||||
write!(f, "{},{}", beginning, len)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Unified diff hunk header formatter.
|
||||
pub struct UnifiedHunkHeader {
|
||||
old_range: UnifiedDiffHunkRange,
|
||||
new_range: UnifiedDiffHunkRange,
|
||||
}
|
||||
|
||||
impl UnifiedHunkHeader {
|
||||
/// Creates a hunk header from a (non empty) slice of diff ops.
|
||||
pub fn new(ops: &[DiffOp]) -> UnifiedHunkHeader {
|
||||
UnifiedHunkHeader {
|
||||
old_range: UnifiedDiffHunkRange::new(ops[0].old_range()),
|
||||
new_range: UnifiedDiffHunkRange::new(ops[ops.len() - 1].new_range()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for UnifiedHunkHeader {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range)
|
||||
}
|
||||
}
|
||||
|
||||
/// Unified diff formatter.
|
||||
///
|
||||
/// ```rust
|
||||
/// use similar::text::TextDiff;
|
||||
/// # let old_text = "";
|
||||
/// # let new_text = "";
|
||||
/// let text_diff = TextDiff::from_lines(old_text, new_text);
|
||||
/// print!("{}", text_diff
|
||||
/// .unified_diff()
|
||||
/// .context_radius(10)
|
||||
/// .header("old_file", "new_file"));
|
||||
/// ```
|
||||
///
|
||||
/// ## Unicode vs Bytes
|
||||
///
|
||||
/// The [`UnifiedDiff`] type supports both unicode and byte diffs for all
|
||||
/// types compatible with [`DiffableStr`]. You can pick between the two
|
||||
/// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
|
||||
/// The former uses [`DiffableStr::to_string_lossy`], the latter uses
|
||||
/// [`DiffableStr::as_bytes`] for each line.
|
||||
pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
|
||||
diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
|
||||
context_radius: usize,
|
||||
missing_newline_hint: bool,
|
||||
header: Option<(String, String)>,
|
||||
}
|
||||
|
||||
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
|
||||
/// Creates a formatter from a text diff object.
|
||||
pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs, T>) -> Self {
|
||||
UnifiedDiff {
|
||||
diff,
|
||||
context_radius: 3,
|
||||
missing_newline_hint: true,
|
||||
header: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Changes the context radius.
|
||||
///
|
||||
/// The context radius is the number of lines between changes that should
|
||||
/// be emitted. This defaults to `3`.
|
||||
pub fn context_radius(&mut self, n: usize) -> &mut Self {
|
||||
self.context_radius = n;
|
||||
self
|
||||
}
|
||||
|
||||
/// Sets a header to the diff.
|
||||
///
|
||||
/// `a` and `b` are the file names that are added to the top of the unified
|
||||
/// file format. The names are accepted verbaitim which lets you encode
|
||||
/// a timestamp into it when separated by a tab (`\t`). For more information
|
||||
/// see [the unified diff format specification](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html#tag_20_34_10_07)
|
||||
pub fn header(&mut self, a: &str, b: &str) -> &mut Self {
|
||||
self.header = Some((a.to_string(), b.to_string()));
|
||||
self
|
||||
}
|
||||
|
||||
/// Controls the missing newline hint.
|
||||
///
|
||||
/// By default a special `\ No newline at end of file` marker is added to
|
||||
/// the output when a file is not terminated with a final newline. This can
|
||||
/// be disabled with this flag.
|
||||
pub fn missing_newline_hint(&mut self, yes: bool) -> &mut Self {
|
||||
self.missing_newline_hint = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Iterates over all hunks as configured.
|
||||
pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>> {
|
||||
let diff = self.diff;
|
||||
let missing_newline_hint = self.missing_newline_hint;
|
||||
self.diff
|
||||
.grouped_ops(self.context_radius)
|
||||
.into_iter()
|
||||
.filter(|ops| !ops.is_empty())
|
||||
.map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint))
|
||||
}
|
||||
|
||||
/// Write the unified diff as bytes to the output stream.
|
||||
pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
|
||||
let mut header = self.header.as_ref();
|
||||
for hunk in self.iter_hunks() {
|
||||
if let Some((old_file, new_file)) = header.take() {
|
||||
writeln!(w, "--- {}", old_file)?;
|
||||
writeln!(w, "+++ {}", new_file)?;
|
||||
}
|
||||
write!(w, "{}", hunk)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
|
||||
if let Some((a, b)) = header {
|
||||
self.header(a, b);
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Unified diff hunk formatter.
|
||||
///
|
||||
/// The `Display` this renders out a single unified diff's hunk.
|
||||
pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
|
||||
diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
|
||||
ops: Vec<DiffOp>,
|
||||
missing_newline_hint: bool,
|
||||
}
|
||||
|
||||
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized>
|
||||
UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
|
||||
{
|
||||
/// Creates a new hunk for some operations.
|
||||
pub fn new(
|
||||
ops: Vec<DiffOp>,
|
||||
diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
|
||||
missing_newline_hint: bool,
|
||||
) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> {
|
||||
UnifiedDiffHunk {
|
||||
diff,
|
||||
ops,
|
||||
missing_newline_hint,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the header for the hunk.
|
||||
pub fn header(&self) -> UnifiedHunkHeader {
|
||||
UnifiedHunkHeader::new(&self.ops)
|
||||
}
|
||||
|
||||
/// Returns all operations in the hunk.
|
||||
pub fn ops(&self) -> &[DiffOp] {
|
||||
&self.ops
|
||||
}
|
||||
|
||||
/// Returns the value of the `missing_newline_hint` flag.
|
||||
pub fn missing_newline_hint(&self) -> bool {
|
||||
self.missing_newline_hint
|
||||
}
|
||||
|
||||
/// Iterates over all changes in a hunk.
|
||||
pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_, T>> + '_ {
|
||||
// unclear why this needs Box::new here. It seems to infer some really
|
||||
// odd lifetimes I can't figure out how to work with.
|
||||
(Box::new(
|
||||
self.ops()
|
||||
.iter()
|
||||
.flat_map(move |op| self.diff.iter_changes(op)),
|
||||
)) as Box<dyn Iterator<Item = _>>
|
||||
}
|
||||
|
||||
/// Write the hunk as bytes to the output stream.
|
||||
pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
|
||||
for (idx, change) in self.iter_changes().enumerate() {
|
||||
if idx == 0 {
|
||||
writeln!(w, "{}", self.header())?;
|
||||
}
|
||||
write!(w, "{}", change.tag())?;
|
||||
w.write_all(change.value().as_bytes())?;
|
||||
if !self.diff.newline_terminated() {
|
||||
writeln!(w)?;
|
||||
}
|
||||
if self.diff.newline_terminated() && change.missing_newline() {
|
||||
writeln!(w, "{}", MissingNewlineHint(self.missing_newline_hint))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
|
||||
for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
for (idx, change) in self.iter_changes().enumerate() {
|
||||
if idx == 0 {
|
||||
writeln!(f, "{}", self.header())?;
|
||||
}
|
||||
write!(f, "{}{}", change.tag(), change.to_string_lossy())?;
|
||||
if !self.diff.newline_terminated() {
|
||||
writeln!(f)?;
|
||||
}
|
||||
if self.diff.newline_terminated() && change.missing_newline() {
|
||||
writeln!(f, "{}", MissingNewlineHint(self.missing_newline_hint))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
|
||||
for UnifiedDiff<'diff, 'old, 'new, 'bufs, T>
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut header = self.header.as_ref();
|
||||
for hunk in self.iter_hunks() {
|
||||
if let Some((old_file, new_file)) = header.take() {
|
||||
writeln!(f, "--- {}", old_file)?;
|
||||
writeln!(f, "+++ {}", new_file)?;
|
||||
}
|
||||
write!(f, "{}", hunk)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Quick way to get a unified diff as string.
|
||||
///
|
||||
/// `n` configures [`UnifiedDiff::context_radius`] and
|
||||
/// `header` configures [`UnifiedDiff::header`] when not `None`.
|
||||
pub fn unified_diff<'old, 'new>(
|
||||
alg: Algorithm,
|
||||
old: &'old str,
|
||||
new: &'new str,
|
||||
n: usize,
|
||||
header: Option<(&str, &str)>,
|
||||
) -> String {
|
||||
TextDiff::configure()
|
||||
.algorithm(alg)
|
||||
.diff_lines(old, new)
|
||||
.unified_diff()
|
||||
.context_radius(n)
|
||||
.header_opt(header)
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unified_diff() {
|
||||
let diff = TextDiff::from_lines(
|
||||
"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\nO\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ",
|
||||
"a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\nS\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\no\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ",
|
||||
);
|
||||
insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string());
|
||||
}
|
||||
#[test]
|
||||
fn test_empty_unified_diff() {
|
||||
let diff = TextDiff::from_lines("abc", "abc");
|
||||
assert_eq!(diff.unified_diff().header("a.txt", "b.txt").to_string(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unified_diff_newline_hint() {
|
||||
let diff = TextDiff::from_lines("a\n", "b");
|
||||
insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string());
|
||||
insta::assert_snapshot!(&diff
|
||||
.unified_diff()
|
||||
.missing_newline_hint(false)
|
||||
.header("a.txt", "b.txt")
|
||||
.to_string());
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue