Some internal refactorings

2021-02-02 20:15:31 +01:00 · 2021-02-02 20:15:31 +01:00 · 34e5b3d571
commit 34e5b3d571
parent 4b85e70f91
8 changed files with 174 additions and 157 deletions
--- a/examples/terminal-inline.rs
+++ b/examples/terminal-inline.rs
@ -44,7 +44,7 @@ fn main() {
                    style(Line(change.new_index())).dim(),
                    s.apply_to(sign).bold(),
                );
-                for (emphasized, value) in change.iter_strings() {
+                for (emphasized, value) in change.iter_strings_lossy() {
                    if emphasized {
                        print!("{}", s.apply_to(value).underlined().on_black());
                    } else {
--- a/src/algorithms/capture.rs
+++ b/src/algorithms/capture.rs
@ -155,34 +155,6 @@ impl DiffOp {
    }
 }
 /// A [`DiffHook`] that captures all diff operations.
 #[derive(Default, Clone)]
 pub struct Capture(Vec<DiffOp>);
 impl Capture {
    /// Creates a new capture hook.
    pub fn new() -> Capture {
        Capture::default()
    }
    /// Converts the capture hook into a vector of ops.
    pub fn into_ops(self) -> Vec<DiffOp> {
        self.0
    }
    /// Isolate change clusters by eliminating ranges with no changes.
    ///
    /// This is equivalent to calling [`group_diff_ops`] on [`Capture::into_ops`].
    pub fn into_grouped_ops(self, n: usize) -> Vec<Vec<DiffOp>> {
        group_diff_ops(self.into_ops(), n)
    }
    /// Accesses the captured operations.
    pub fn ops(&self) -> &[DiffOp] {
        &self.0
    }
 }
 /// Isolate change clusters by eliminating ranges with no changes.
 ///
 /// This will leave holes behind in long periods of equal ranges so that
@ -272,6 +244,34 @@ pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
    }
 }
 /// A [`DiffHook`] that captures all diff operations.
 #[derive(Default, Clone)]
 pub struct Capture(Vec<DiffOp>);
 impl Capture {
    /// Creates a new capture hook.
    pub fn new() -> Capture {
        Capture::default()
    }
    /// Converts the capture hook into a vector of ops.
    pub fn into_ops(self) -> Vec<DiffOp> {
        self.0
    }
    /// Isolate change clusters by eliminating ranges with no changes.
    ///
    /// This is equivalent to calling [`group_diff_ops`] on [`Capture::into_ops`].
    pub fn into_grouped_ops(self, n: usize) -> Vec<Vec<DiffOp>> {
        group_diff_ops(self.into_ops(), n)
    }
    /// Accesses the captured operations.
    pub fn ops(&self) -> &[DiffOp] {
        &self.0
    }
 }
 impl DiffHook for Capture {
    type Error = Infallible;
--- a/src/algorithms/mod.rs
+++ b/src/algorithms/mod.rs
@ -23,9 +23,9 @@ mod replace;
 use std::hash::Hash;
 use std::ops::{Index, Range};
-pub use capture::*;
+pub use capture::{get_diff_ratio, group_diff_ops, Capture, DiffOp, DiffTag};
-pub use hook::*;
+pub use hook::DiffHook;
-pub use replace::*;
+pub use replace::Replace;
 // actual diffing algorithms
 pub mod myers;
--- a/src/text/abstraction.rs
+++ b/src/text/abstraction.rs
@ -6,6 +6,13 @@ use std::hash::Hash;
 use std::ops::Range;
 /// Reference to a [`DiffableStr`].
 ///
 /// This type exists because while the library only really provides ways to
 /// work with `&str` and `&[u8]` there are types that deref into those string
 /// slices such as `String` and `Vec<u8>`.
 ///
 /// This trait is used in the library whenever it's nice to be able to pass
 /// strings of different types in.
 pub trait DiffableStrRef {
    /// The type of the resolved [`DiffableStr`].
    type Output: DiffableStr + ?Sized;
@ -78,7 +85,7 @@ pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned {
    fn as_str(&self) -> Option<&str>;
    /// Decodes the string (potentially) lossy.
-    fn as_str_lossy(&self) -> Cow<'_, str>;
+    fn to_string_lossy(&self) -> Cow<'_, str>;
    /// Checks if the string ends in a newline.
    fn ends_with_newline(&self) -> bool;
@ -91,6 +98,11 @@ pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned {
    /// Returns the strings as slice of raw bytes.
    fn as_bytes(&self) -> &[u8];
    /// Checks if the string is empty.
    fn is_empty(&self) -> bool {
        self.len() == 0
    }
 }
 impl DiffableStr for str {
@ -184,7 +196,7 @@ impl DiffableStr for str {
        Some(self)
    }
-    fn as_str_lossy(&self) -> Cow<'_, str> {
+    fn to_string_lossy(&self) -> Cow<'_, str> {
        Cow::Borrowed(self)
    }
@ -293,7 +305,7 @@ impl DiffableStr for [u8] {
        std::str::from_utf8(self).ok()
    }
-    fn as_str_lossy(&self) -> Cow<'_, str> {
+    fn to_string_lossy(&self) -> Cow<'_, str> {
        String::from_utf8_lossy(self)
    }
--- a/src/text/inline.rs
+++ b/src/text/inline.rs
@ -121,6 +121,10 @@ impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
    ///
    /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
    /// is true if it should be highlighted as an inline diff.
    ///
    /// Depending on the type of the underlying [`DiffableStr`] this value is
    /// more or less useful.  If you always want to have a utf-8 string it's
    /// better to use the [`InlineChange::iter_strings_lossy`] method.
    pub fn values(&self) -> &[(bool, &'s T)] {
        &self.values
    }
@ -129,10 +133,10 @@ impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
    ///
    /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
    /// is true if it should be highlighted as an inline diff.
-    pub fn iter_strings(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> {
+    pub fn iter_strings_lossy(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> {
        self.values()
            .iter()
-            .map(|(emphasized, raw_value)| (*emphasized, raw_value.as_str_lossy()))
+            .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy()))
    }
    /// Returns `true` if this change needs to be followed up by a
@ -156,7 +160,7 @@ impl<'s, T: DiffableStr + ?Sized> From<Change<'s, T>> for InlineChange<'s, T> {
 impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        for (emphasized, value) in self.iter_strings() {
+        for (emphasized, value) in self.iter_strings_lossy() {
            let marker = match (emphasized, self.tag) {
                (false, _) | (true, ChangeTag::Equal) => "",
                (true, ChangeTag::Delete) => "-",
--- a/src/text/mod.rs
+++ b/src/text/mod.rs
@ -87,22 +87,22 @@
 #![cfg(feature = "text")]
 use std::borrow::Cow;
 use std::cmp::Reverse;
-use std::collections::{BinaryHeap, HashMap};
+use std::collections::BinaryHeap;
 use std::fmt;
 use std::hash::Hash;
 mod abstraction;
 #[cfg(feature = "inline")]
 mod inline;
 mod udiff;
 mod utils;
 pub use self::abstraction::{DiffableStr, DiffableStrRef};
 #[cfg(feature = "inline")]
-pub use self::inline::*;
+pub use self::inline::InlineChange;
-pub use self::udiff::*;
+pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedHunkHeader};
 pub use crate::text::abstraction::*;
 use self::utils::{upper_seq_ratio, QuickSeqRatio};
 use crate::algorithms::{
    capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
 };
@ -250,15 +250,6 @@ impl TextDiffConfig {
    }
 }
 /// Captures diff op codes for textual diffs
 pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
    old: Cow<'bufs, [&'old T]>,
    new: Cow<'bufs, [&'new T]>,
    ops: Vec<DiffOp>,
    newline_terminated: bool,
    algorithm: Algorithm,
 }
 /// The tag of a change.
 #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
 pub enum ChangeTag {
@ -270,6 +261,20 @@ pub enum ChangeTag {
    Insert,
 }
 impl fmt::Display for ChangeTag {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(
            f,
            "{}",
            match &self {
                ChangeTag::Equal => ' ',
                ChangeTag::Delete => '-',
                ChangeTag::Insert => '+',
            }
        )
    }
 }
 /// Represents the expanded textual change.
 ///
 /// This type is returned from the [`TextDiff::iter_changes`] method.  It
@ -289,7 +294,7 @@ impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
        write!(
            f,
            "{}{}",
-            self.as_str_lossy(),
+            self.to_string_lossy(),
            if self.missing_newline { "\n" } else { "" }
        )
    }
@ -312,6 +317,10 @@ impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
    }
    /// Returns the underlying changed value.
    ///
    /// Depending on the type of the underlying [`DiffableStr`] this value is
    /// more or less useful.  If you always want to have a utf-8 string it's
    /// best to use the [`Change::as_str`] and [`Change::to_string_lossy`] methods.
    pub fn value(&self) -> &'s T {
        self.value
    }
@ -322,8 +331,8 @@ impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
    }
    /// Returns the value (lossy) decoded as utf-8 string.
-    pub fn as_str_lossy(&self) -> Cow<'s, str> {
+    pub fn to_string_lossy(&self) -> Cow<'s, str> {
-        T::as_str_lossy(self.value)
+        T::to_string_lossy(self.value)
    }
    /// Returns `true` if this change needs to be followed up by a
@ -336,6 +345,15 @@ impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
    }
 }
 /// Captures diff op codes for textual diffs
 pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
    old: Cow<'bufs, [&'old T]>,
    new: Cow<'bufs, [&'new T]>,
    ops: Vec<DiffOp>,
    newline_terminated: bool,
    algorithm: Algorithm,
 }
 impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
    /// Configures a text differ before diffing.
    pub fn configure() -> TextDiffConfig {
@ -571,58 +589,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n
    /// is currently not defined and will likely change over time.
    #[cfg(feature = "inline")]
    pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange<'_, T>> {
-        iter_inline_changes(self, op)
+        inline::iter_inline_changes(self, op)
    }
 }
 // quick and dirty way to get an upper sequence ratio.
 fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
    let n = seq1.len() + seq2.len();
    if n == 0 {
        1.0
    } else {
        2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
    }
 }
 /// Internal utility to calculate an upper bound for a ratio for
 /// [`get_close_matches`].  This is based on Python's difflib approach
 /// of considering the two sets to be multisets.
 ///
 /// It counts the number of matches without regard to order, which is an
 /// obvious upper bound.
 struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
 impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
    pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
        let mut counts = HashMap::new();
        for &word in seq {
            *counts.entry(word).or_insert(0) += 1;
        }
        QuickSeqRatio(counts)
    }
    pub fn calc(&self, seq: &[&T]) -> f32 {
        let n = self.0.len() + seq.len();
        if n == 0 {
            return 1.0;
        }
        let mut available = HashMap::new();
        let mut matches = 0;
        for &word in seq {
            let x = if let Some(count) = available.get(&word) {
                *count
            } else {
                self.0.get(&word).copied().unwrap_or(0)
            };
            available.insert(word, x - 1);
            if x > 0 {
                matches += 1;
            }
        }
        2.0 * matches as f32 / n as f32
    }
 }
@ -738,7 +705,7 @@ fn test_line_ops() {
            .flat_map(|op| byte_diff.iter_changes(op))
            .collect::<Vec<_>>();
        for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
-            assert_eq!(change.as_str_lossy(), byte_change.as_str_lossy());
+            assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy());
        }
    }
 }
--- a/src/text/udiff.rs
+++ b/src/text/udiff.rs
@ -19,17 +19,28 @@
 //! The [`UnifiedDiff`] type supports both unicode and byte diffs for all
 //! types compatible with [`DiffableStr`].  You can pick between the two
 //! versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
-//! The former uses [`DiffableStr::as_str_lossy`], the latter uses
+//! The former uses [`DiffableStr::to_string_lossy`], the latter uses
 //! [`DiffableStr::as_bytes`] for each line.
 use std::ops::Range;
 use std::{fmt, io};
 use crate::algorithms::{Algorithm, DiffOp};
-use crate::text::{Change, ChangeTag, TextDiff};
+use crate::text::{Change, TextDiff};
 use super::DiffableStr;
 struct MissingNewlineHint(bool);
 impl fmt::Display for MissingNewlineHint {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if self.0 {
            write!(f, "\n\\ No newline at end of file")?;
        }
        Ok(())
    }
 }
 #[derive(Copy, Clone, Debug)]
 struct UnifiedDiffHunkRange(usize, usize);
@ -103,7 +114,7 @@ impl fmt::Display for UnifiedHunkHeader {
 /// The [`UnifiedDiff`] type supports both unicode and byte diffs for all
 /// types compatible with [`DiffableStr`].  You can pick between the two
 /// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
-/// The former uses [`DiffableStr::as_str_lossy`], the latter uses
+/// The former uses [`DiffableStr::to_string_lossy`], the latter uses
 /// [`DiffableStr::as_bytes`] for each line.
 pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
    diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
@ -238,31 +249,17 @@ impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized>
    /// Write the hunk as bytes to the output stream.
    pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
-        let mut wrote_header = false;
+        for (idx, change) in self.iter_changes().enumerate() {
-        for change in self.iter_changes() {
+            if idx == 0 {
            if !wrote_header {
                writeln!(w, "{}", self.header())?;
                wrote_header = true;
            }
-            write!(
+            write!(w, "{}", change.tag())?;
                w,
                "{}",
                match change.tag() {
                    ChangeTag::Equal => ' ',
                    ChangeTag::Delete => '-',
                    ChangeTag::Insert => '+',
                },
            )?;
            w.write_all(change.value().as_bytes())?;
-            if self.diff.newline_terminated() {
+            if !self.diff.newline_terminated() {
-                write!(w, "\n")?;
+                writeln!(w)?;
            }
            if change.missing_newline() {
-                if self.missing_newline_hint {
+                writeln!(w, "{}", MissingNewlineHint(self.missing_newline_hint))?;
                    writeln!(w, "\n\\ No newline at end of file")?;
                } else {
                    writeln!(w)?;
                }
            }
        }
        Ok(())
@ -273,34 +270,16 @@ impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
    for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
 {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let nl = if self.diff.newline_terminated() {
+        for (idx, change) in self.iter_changes().enumerate() {
-            ""
+            if idx == 0 {
        } else {
            "\n"
        };
        let mut wrote_header = false;
        for change in self.iter_changes() {
            if !wrote_header {
                writeln!(f, "{}", self.header())?;
                wrote_header = true;
            }
-            write!(
+            write!(f, "{}{}", change.tag(), change.to_string_lossy())?;
-                f,
+            if !self.diff.newline_terminated() {
-                "{}{}{}",
+                writeln!(f)?;
-                match change.tag() {
+            }
                    ChangeTag::Equal => ' ',
                    ChangeTag::Delete => '-',
                    ChangeTag::Insert => '+',
                },
                change.as_str_lossy(),
                nl
            )?;
            if change.missing_newline() {
-                if self.missing_newline_hint {
+                writeln!(f, "{}", MissingNewlineHint(self.missing_newline_hint))?;
                    writeln!(f, "\n\\ No newline at end of file")?;
                } else {
                    writeln!(f)?;
                }
            }
        }
        Ok(())
--- a/src/text/utils.rs
+++ b/src/text/utils.rs
@ -0,0 +1,55 @@
 use std::collections::HashMap;
 use std::hash::Hash;
 use super::DiffableStrRef;
 // quick and dirty way to get an upper sequence ratio.
 pub fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
    let n = seq1.len() + seq2.len();
    if n == 0 {
        1.0
    } else {
        2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
    }
 }
 /// Internal utility to calculate an upper bound for a ratio for
 /// [`get_close_matches`].  This is based on Python's difflib approach
 /// of considering the two sets to be multisets.
 ///
 /// It counts the number of matches without regard to order, which is an
 /// obvious upper bound.
 pub struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
 impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
    pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
        let mut counts = HashMap::new();
        for &word in seq {
            *counts.entry(word).or_insert(0) += 1;
        }
        QuickSeqRatio(counts)
    }
    pub fn calc(&self, seq: &[&T]) -> f32 {
        let n = self.0.len() + seq.len();
        if n == 0 {
            return 1.0;
        }
        let mut available = HashMap::new();
        let mut matches = 0;
        for &word in seq {
            let x = if let Some(count) = available.get(&word) {
                *count
            } else {
                self.0.get(&word).copied().unwrap_or(0)
            };
            available.insert(word, x - 1);
            if x > 0 {
                matches += 1;
            }
        }
        2.0 * matches as f32 / n as f32
    }
 }