Move all text functionality toplevel

2021-02-03 00:56:17 +01:00 · 2021-02-03 00:56:17 +01:00 · 844769ae19
commit 844769ae19
parent de8d82ab63
10 changed files with 96 additions and 118 deletions
--- a/src/text/mod.rs
+++ b/src/text/mod.rs
@ -1,90 +1,4 @@
 //! Text diffing utilities.
-//!
-//! This provides helpful utilities for text (and more specifically line) diff
-//! operations.  The main type you want to work with is [`TextDiff`] which
-//! uses the underlying diff algorithms to expose a convenient API to work with
-//! texts.
-//!
-//! It can produce a unified diff and also let you iterate over the changeset
-//! directly if you want.
-//!
-//! Text diffing is available by default but can be disabled by turning off the
-//! default features.  The feature to enable to get it back is `text`.
-//!
-//! # Examples
-//!
-//! A super simple example for how to generate a unified diff with three lines
-//! off context around the changes:
-//!
-//! ```rust
-//! # use similar::text::TextDiff;
-//! # let old_text = "";
-//! # let new_text = "";
-//! let diff = TextDiff::from_lines(old_text, new_text);
-//! let unified_diff = diff.unified_diff().header("old_file", "new_file").to_string();
-//! ```
-//!
-//! This is another example that iterates over the actual changes:
-//!
-//! ```rust
-//! # use similar::text::TextDiff;
-//! # let old_text = "";
-//! # let new_text = "";
-//! let diff = TextDiff::from_lines(old_text, new_text);
-//! for op in diff.ops() {
-//!     for change in diff.iter_changes(op) {
-//!         println!("{:?}", change);
-//!     }
-//! }
-//! ```
-//!
-//! # Ops vs Changes
-//!
-//! Because very commonly two compared sequences will largely match this module
-//! splits it's functionality into two layers.  The first is inherited from the
-//! general [`algorithms`](crate::algorithms) module: changes are encoded as
-//! [diff operations](crate::DiffOp).  These are ranges of the
-//! differences by index in the source sequence.  Because this can be cumbersome
-//! to work with a separate method [`TextDiff::iter_changes`] is provided which
-//! expands all the changes on an item by item level encoded in an operation.
-//!
-//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
-//! this even works for very long files if paired with this method.
-//!
-//! # Trailing Newlines
-//!
-//! When working with line diffs (and unified diffs in general) there are two
-//! "philosophies" to look at lines.  One is to diff lines without their newline
-//! character, the other is to diff with the newline character.  Typically the
-//! latter is done because text files do not _have_ to end in a newline character.
-//! As a result there is a difference between `foo\n` and `foo` as far as diffs
-//! are concerned.
-//!
-//! In similar this is handled on the [`Change`] or [`InlineChange`] level.  If
-//! a diff was created via [`TextDiff::from_lines`] the text diffing system is
-//! instructed to check if there are missing newlines encountered.  If that is
-//! the case the [`Change`] object will return true from the
-//! [`Change::missing_newline`] method so the caller knows to handle this by
-//! either rendering a virtual newline at that position or to indicate it in
-//! different ways.  For instance the unified diff code will render the special
-//! `\ No newline at end of file` marker.
-//!
-//! # Bytes vs Unicode
-//!
-//! This module concerns itself with a loser definition of "text" than you would
-//! normally see in Rust.  While by default it can only operate on [`str`] types
-//! by enabling the `bytes` feature it gains support for byte slices with some
-//! caveats.
-//!
-//! A lot of text diff functionality assumes that what is being diffed constiutes
-//! text, but in the real world it can often be challenging to ensure that this is
-//! all valid utf-8.  Because of this the crate is built so that most functinality
-//! also still works with bytes for as long as they are roughtly ASCII compatible.
-//!
-//! This means you will be successful in creating a unified diff from latin1
-//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you
-//! will only get garbage.
-#![cfg(feature = "text")]
 use std::borrow::Cow;
 use std::cmp::Reverse;
 use std::collections::BinaryHeap;
@ -92,15 +6,14 @@ use std::collections::BinaryHeap;
 mod abstraction;
 #[cfg(feature = "inline")]
 mod inline;
-mod udiff;
 mod utils;

 pub use self::abstraction::{DiffableStr, DiffableStrRef};
 #[cfg(feature = "inline")]
 pub use self::inline::InlineChange;
-pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader};

 use self::utils::{upper_seq_ratio, QuickSeqRatio};
+use crate::udiff::UnifiedDiff;
 use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Change, DiffOp};

 /// A builder type config for more complex uses of [`TextDiff`].
@ -358,7 +271,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n
    /// ratio of `0.0` would indicate completely distinct sequences.
    ///
    /// ```rust
-    /// # use similar::text::TextDiff;
+    /// # use similar::TextDiff;
    /// let diff = TextDiff::from_chars("abcd", "bcde");
    /// assert_eq!(diff.ratio(), 0.75);
    /// ```
@ -411,7 +324,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n
 /// to be considered similar.  See [`TextDiff::ratio`] for more information.
 ///
 /// ```
-/// # use similar::text::get_close_matches;
+/// # use similar::get_close_matches;
 /// let matches = get_close_matches(
 ///     "appel",
 ///     &["ape", "apple", "peach", "puppy"][..],
--- a/src/text/snapshots/similartextudiff__unified_diff.snap
+++ b/src/text/snapshots/similartextudiff__unified_diff.snap
@ -1,25 +0,0 @@
---
-source: src/text/udiff.rs
-expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()"
---
--- a.txt
-+++ b.txt
-@@ -15,3 +19,3 @@
- p
- q
- r
-s
-+S
- t
- u
- v
-@@ -37,3 +41,3 @@
- L
- M
- N
-O
-+o
- P
- Q
- R
-
--- a/src/text/snapshots/similartextudiff__unified_diff_newline_hint-2.snap
+++ b/src/text/snapshots/similartextudiff__unified_diff_newline_hint-2.snap
@ -1,10 +0,0 @@
---
-source: src/text/udiff.rs
-expression: "&diff.unified_diff().missing_newline_hint(false).header(\"a.txt\",\n                                                        \"b.txt\").to_string()"
---
--- a.txt
-+++ b.txt
-@@ -0 +0 @@
-a
-+b
-
--- a/src/text/snapshots/similartextudiff__unified_diff_newline_hint.snap
+++ b/src/text/snapshots/similartextudiff__unified_diff_newline_hint.snap
@ -1,11 +0,0 @@
---
-source: src/text/udiff.rs
-expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()"
---
--- a.txt
-+++ b.txt
-@@ -0 +0 @@
-a
-+b
-\ No newline at end of file
-
--- a/src/text/udiff.rs
+++ b/src/text/udiff.rs
@ -1,348 +0,0 @@
-//! This module provides unified diff functionality.
-//!
-//! This module is available for as long as the `text` feature is enabled which
-//! is enabled by default.
-//!
-//! ```rust
-//! use similar::text::TextDiff;
-//! # let old_text = "";
-//! # let new_text = "";
-//! let text_diff = TextDiff::from_lines(old_text, new_text);
-//! print!("{}", text_diff
-//!     .unified_diff()
-//!     .context_radius(10)
-//!     .header("old_file", "new_file"));
-//! ```
-//!
-//! # Unicode vs Bytes
-//!
-//! The [`UnifiedDiff`] type supports both unicode and byte diffs for all
-//! types compatible with [`DiffableStr`].  You can pick between the two
-//! versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
-//! The former uses [`DiffableStr::to_string_lossy`], the latter uses
-//! [`DiffableStr::as_bytes`] for each line.
-
-use std::ops::Range;
-use std::{fmt, io};
-
-use crate::text::TextDiff;
-use crate::types::{Algorithm, Change, DiffOp};
-
-use super::DiffableStr;
-
-struct MissingNewlineHint(bool);
-
-impl fmt::Display for MissingNewlineHint {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        if self.0 {
-            write!(f, "\n\\ No newline at end of file")?;
-        }
-        Ok(())
-    }
-}
-
-#[derive(Copy, Clone, Debug)]
-struct UnifiedDiffHunkRange(usize, usize);
-
-impl UnifiedDiffHunkRange {
-    fn new(range: Range<usize>) -> UnifiedDiffHunkRange {
-        UnifiedDiffHunkRange(range.start, range.end)
-    }
-
-    fn start(&self) -> usize {
-        self.0
-    }
-
-    fn end(&self) -> usize {
-        self.1
-    }
-}
-
-impl fmt::Display for UnifiedDiffHunkRange {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let mut beginning = self.start();
-        let len = self.end() - self.start();
-        if len == 1 {
-            write!(f, "{}", beginning)
-        } else {
-            if len == 0 {
-                // empty ranges begin at line just before the range
-                beginning -= 1;
-            }
-            write!(f, "{},{}", beginning, len)
-        }
-    }
-}
-
-/// Unified diff hunk header formatter.
-pub struct UnifiedHunkHeader {
-    old_range: UnifiedDiffHunkRange,
-    new_range: UnifiedDiffHunkRange,
-}
-
-impl UnifiedHunkHeader {
-    /// Creates a hunk header from a (non empty) slice of diff ops.
-    pub fn new(ops: &[DiffOp]) -> UnifiedHunkHeader {
-        UnifiedHunkHeader {
-            old_range: UnifiedDiffHunkRange::new(ops[0].old_range()),
-            new_range: UnifiedDiffHunkRange::new(ops[ops.len() - 1].new_range()),
-        }
-    }
-}
-
-impl fmt::Display for UnifiedHunkHeader {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range)
-    }
-}
-
-/// Unified diff formatter.
-///
-/// ```rust
-/// use similar::text::TextDiff;
-/// # let old_text = "";
-/// # let new_text = "";
-/// let text_diff = TextDiff::from_lines(old_text, new_text);
-/// print!("{}", text_diff
-///     .unified_diff()
-///     .context_radius(10)
-///     .header("old_file", "new_file"));
-/// ```
-///
-/// ## Unicode vs Bytes
-///
-/// The [`UnifiedDiff`] type supports both unicode and byte diffs for all
-/// types compatible with [`DiffableStr`].  You can pick between the two
-/// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
-/// The former uses [`DiffableStr::to_string_lossy`], the latter uses
-/// [`DiffableStr::as_bytes`] for each line.
-pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
-    diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
-    context_radius: usize,
-    missing_newline_hint: bool,
-    header: Option<(String, String)>,
-}
-
-impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
-    /// Creates a formatter from a text diff object.
-    pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs, T>) -> Self {
-        UnifiedDiff {
-            diff,
-            context_radius: 3,
-            missing_newline_hint: true,
-            header: None,
-        }
-    }
-
-    /// Changes the context radius.
-    ///
-    /// The context radius is the number of lines between changes that should
-    /// be emitted.  This defaults to `3`.
-    pub fn context_radius(&mut self, n: usize) -> &mut Self {
-        self.context_radius = n;
-        self
-    }
-
-    /// Sets a header to the diff.
-    ///
-    /// `a` and `b` are the file names that are added to the top of the unified
-    /// file format.  The names are accepted verbaitim which lets you encode
-    /// a timestamp into it when separated by a tab (`\t`).  For more information
-    /// see [the unified diff format specification](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/diff.html#tag_20_34_10_07)
-    pub fn header(&mut self, a: &str, b: &str) -> &mut Self {
-        self.header = Some((a.to_string(), b.to_string()));
-        self
-    }
-
-    /// Controls the missing newline hint.
-    ///
-    /// By default a special `\ No newline at end of file` marker is added to
-    /// the output when a file is not terminated with a final newline.  This can
-    /// be disabled with this flag.
-    pub fn missing_newline_hint(&mut self, yes: bool) -> &mut Self {
-        self.missing_newline_hint = yes;
-        self
-    }
-
-    /// Iterates over all hunks as configured.
-    pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>> {
-        let diff = self.diff;
-        let missing_newline_hint = self.missing_newline_hint;
-        self.diff
-            .grouped_ops(self.context_radius)
-            .into_iter()
-            .filter(|ops| !ops.is_empty())
-            .map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint))
-    }
-
-    /// Write the unified diff as bytes to the output stream.
-    pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
-        let mut header = self.header.as_ref();
-        for hunk in self.iter_hunks() {
-            if let Some((old_file, new_file)) = header.take() {
-                writeln!(w, "--- {}", old_file)?;
-                writeln!(w, "+++ {}", new_file)?;
-            }
-            write!(w, "{}", hunk)?;
-        }
-        Ok(())
-    }
-
-    fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
-        if let Some((a, b)) = header {
-            self.header(a, b);
-        }
-        self
-    }
-}
-
-/// Unified diff hunk formatter.
-///
-/// The `Display` this renders out a single unified diff's hunk.
-pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
-    diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
-    ops: Vec<DiffOp>,
-    missing_newline_hint: bool,
-}
-
-impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized>
-    UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
-{
-    /// Creates a new hunk for some operations.
-    pub fn new(
-        ops: Vec<DiffOp>,
-        diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
-        missing_newline_hint: bool,
-    ) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> {
-        UnifiedDiffHunk {
-            diff,
-            ops,
-            missing_newline_hint,
-        }
-    }
-
-    /// Returns the header for the hunk.
-    pub fn header(&self) -> UnifiedHunkHeader {
-        UnifiedHunkHeader::new(&self.ops)
-    }
-
-    /// Returns all operations in the hunk.
-    pub fn ops(&self) -> &[DiffOp] {
-        &self.ops
-    }
-
-    /// Returns the value of the `missing_newline_hint` flag.
-    pub fn missing_newline_hint(&self) -> bool {
-        self.missing_newline_hint
-    }
-
-    /// Iterates over all changes in a hunk.
-    pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_, T>> + '_ {
-        // unclear why this needs Box::new here.  It seems to infer some really
-        // odd lifetimes I can't figure out how to work with.
-        (Box::new(
-            self.ops()
-                .iter()
-                .flat_map(move |op| self.diff.iter_changes(op)),
-        )) as Box<dyn Iterator<Item = _>>
-    }
-
-    /// Write the hunk as bytes to the output stream.
-    pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
-        for (idx, change) in self.iter_changes().enumerate() {
-            if idx == 0 {
-                writeln!(w, "{}", self.header())?;
-            }
-            write!(w, "{}", change.tag())?;
-            w.write_all(change.value().as_bytes())?;
-            if !self.diff.newline_terminated() {
-                writeln!(w)?;
-            }
-            if self.diff.newline_terminated() && change.missing_newline() {
-                writeln!(w, "{}", MissingNewlineHint(self.missing_newline_hint))?;
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
-    for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
-{
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        for (idx, change) in self.iter_changes().enumerate() {
-            if idx == 0 {
-                writeln!(f, "{}", self.header())?;
-            }
-            write!(f, "{}{}", change.tag(), change.to_string_lossy())?;
-            if !self.diff.newline_terminated() {
-                writeln!(f)?;
-            }
-            if self.diff.newline_terminated() && change.missing_newline() {
-                writeln!(f, "{}", MissingNewlineHint(self.missing_newline_hint))?;
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
-    for UnifiedDiff<'diff, 'old, 'new, 'bufs, T>
-{
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let mut header = self.header.as_ref();
-        for hunk in self.iter_hunks() {
-            if let Some((old_file, new_file)) = header.take() {
-                writeln!(f, "--- {}", old_file)?;
-                writeln!(f, "+++ {}", new_file)?;
-            }
-            write!(f, "{}", hunk)?;
-        }
-        Ok(())
-    }
-}
-
-/// Quick way to get a unified diff as string.
-///
-/// `n` configures [`UnifiedDiff::context_radius`] and
-/// `header` configures [`UnifiedDiff::header`] when not `None`.
-pub fn unified_diff<'old, 'new>(
-    alg: Algorithm,
-    old: &'old str,
-    new: &'new str,
-    n: usize,
-    header: Option<(&str, &str)>,
-) -> String {
-    TextDiff::configure()
-        .algorithm(alg)
-        .diff_lines(old, new)
-        .unified_diff()
-        .context_radius(n)
-        .header_opt(header)
-        .to_string()
-}
-
-#[test]
-fn test_unified_diff() {
-    let diff = TextDiff::from_lines(
-        "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\nO\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ",
-        "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\nS\nt\nu\nv\nw\nx\ny\nz\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\nK\nL\nM\nN\no\nP\nQ\nR\nS\nT\nU\nV\nW\nX\nY\nZ",
-    );
-    insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string());
-}
-#[test]
-fn test_empty_unified_diff() {
-    let diff = TextDiff::from_lines("abc", "abc");
-    assert_eq!(diff.unified_diff().header("a.txt", "b.txt").to_string(), "");
-}
-
-#[test]
-fn test_unified_diff_newline_hint() {
-    let diff = TextDiff::from_lines("a\n", "b");
-    insta::assert_snapshot!(&diff.unified_diff().header("a.txt", "b.txt").to_string());
-    insta::assert_snapshot!(&diff
-        .unified_diff()
-        .missing_newline_hint(false)
-        .header("a.txt", "b.txt")
-        .to_string());
-}