From 1991955c523ab1ef9017cb0de33861ccf4b17071 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Tue, 2 Feb 2021 22:44:52 +0100 Subject: [PATCH] Restructure crate layout --- README.md | 2 + examples/terminal-inline.rs | 3 +- examples/terminal.rs | 3 +- src/algorithms/capture.rs | 156 +------------------- src/algorithms/mod.rs | 34 ++--- src/lib.rs | 17 ++- src/text/inline.rs | 5 +- src/text/mod.rs | 104 +------------- src/text/udiff.rs | 4 +- src/types.rs | 278 ++++++++++++++++++++++++++++++++++++ 10 files changed, 323 insertions(+), 283 deletions(-) create mode 100644 src/types.rs diff --git a/README.md b/README.md index edbbb21..3fdd1a2 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,9 @@ fn main() { * Myer's diff * Patience diff +* Diffing on arbitrary comparable sequences * Line, word, character and grapheme level diffing +* Text and Byte diffing * Unified diff generation ## License and Links diff --git a/examples/terminal-inline.rs b/examples/terminal-inline.rs index 900fdd5..997cae8 100644 --- a/examples/terminal-inline.rs +++ b/examples/terminal-inline.rs @@ -3,7 +3,8 @@ use std::fs::read; use std::process::exit; use console::{style, Style}; -use similar::text::{ChangeTag, TextDiff}; +use similar::text::TextDiff; +use similar::ChangeTag; struct Line(Option); diff --git a/examples/terminal.rs b/examples/terminal.rs index 26cc49a..03ad4ef 100644 --- a/examples/terminal.rs +++ b/examples/terminal.rs @@ -1,5 +1,6 @@ use console::Style; -use similar::text::{ChangeTag, TextDiff}; +use similar::text::TextDiff; +use similar::ChangeTag; fn main() { let diff = TextDiff::from_lines( diff --git a/src/algorithms/capture.rs b/src/algorithms/capture.rs index e19e192..971c10b 100644 --- a/src/algorithms/capture.rs +++ b/src/algorithms/capture.rs @@ -1,159 +1,7 @@ -use crate::algorithms::hook::DiffHook; use std::convert::Infallible; -use std::ops::Range; -/// Utility enum to capture a diff operation. -/// -/// This is used by [`Capture`](crate::algorithms::Capture). -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] -pub enum DiffOp { - /// A segment is equal (see [`DiffHook::equal`]) - Equal { - /// The starting index in the old sequence. - old_index: usize, - /// The starting index in the new sequence. - new_index: usize, - /// The length of the segment. - len: usize, - }, - /// A segment was deleted (see [`DiffHook::delete`]) - Delete { - /// The starting index in the old sequence. - old_index: usize, - /// The length of the old segment. - old_len: usize, - /// The starting index in the new sequence. - new_index: usize, - }, - /// A segment was inserted (see [`DiffHook::insert`]) - Insert { - /// The starting index in the old sequence. - old_index: usize, - /// The starting index in the new sequence. - new_index: usize, - /// The length of the new segment. - new_len: usize, - }, - /// A segment was replaced (see [`DiffHook::replace`]) - Replace { - /// The starting index in the old sequence. - old_index: usize, - /// The length of the old segment. - old_len: usize, - /// The starting index in the new sequence. - new_index: usize, - /// The length of the new segment. - new_len: usize, - }, -} - -/// The tag of a diff operation. -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] -pub enum DiffTag { - /// The diff op encodes an equal segment. - Equal, - /// The diff op encodes a deleted segment. - Delete, - /// The diff op encodes an inserted segment. - Insert, - /// The diff op encodes a replaced segment. - Replace, -} - -impl DiffOp { - /// Returns the tag of the operation. - pub fn tag(self) -> DiffTag { - self.as_tag_tuple().0 - } - - /// Returns the old range. - pub fn old_range(&self) -> Range { - self.as_tag_tuple().1 - } - - /// Returns the new range. - pub fn new_range(&self) -> Range { - self.as_tag_tuple().2 - } - - /// Transform the op into a tuple of diff tag and ranges. - /// - /// This is useful when operating on slices. The returned format is - /// `(tag, i1..i2, j1..j2)`: - /// - /// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]` - /// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case). - /// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case). - /// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`. - pub fn as_tag_tuple(&self) -> (DiffTag, Range, Range) { - match *self { - DiffOp::Equal { - old_index, - new_index, - len, - } => ( - DiffTag::Equal, - old_index..old_index + len, - new_index..new_index + len, - ), - DiffOp::Delete { - old_index, - new_index, - old_len, - } => ( - DiffTag::Delete, - old_index..old_index + old_len, - new_index..new_index, - ), - DiffOp::Insert { - old_index, - new_index, - new_len, - } => ( - DiffTag::Insert, - old_index..old_index, - new_index..new_index + new_len, - ), - DiffOp::Replace { - old_index, - old_len, - new_index, - new_len, - } => ( - DiffTag::Replace, - old_index..old_index + old_len, - new_index..new_index + new_len, - ), - } - } - - /// Apply this operation to a diff hook. - pub fn apply_to_hook(&self, d: &mut D) -> Result<(), D::Error> { - match *self { - DiffOp::Equal { - old_index, - new_index, - len, - } => d.equal(old_index, new_index, len), - DiffOp::Delete { - old_index, - old_len, - new_index, - } => d.delete(old_index, old_len, new_index), - DiffOp::Insert { - old_index, - new_index, - new_len, - } => d.insert(old_index, new_index, new_len), - DiffOp::Replace { - old_index, - old_len, - new_index, - new_len, - } => d.replace(old_index, old_len, new_index, new_len), - } - } -} +use crate::algorithms::hook::DiffHook; +use crate::DiffOp; /// Isolate change clusters by eliminating ranges with no changes. /// diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 4cd739f..2c4c1eb 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -14,6 +14,20 @@ //! //! Most of the crate operates on the [`Algorithm`] enum which abstracts over //! the different algorithms. +//! +//! # Example +//! +//! This is a simple example that shows how you can calculate the difference +//! between two sequences and capture the [`DiffOp`]s into a vector. +//! +//! ```rust +//! use similar::Algorithm; +//! use similar::algorithms::capture_diff_slices; +//! +//! let a = vec![1, 2, 3, 4, 5]; +//! let b = vec![1, 2, 3, 4, 7]; +//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b); +//! ``` // general traits and utilities mod capture; @@ -23,7 +37,9 @@ mod replace; use std::hash::Hash; use std::ops::{Index, Range}; -pub use capture::{get_diff_ratio, group_diff_ops, Capture, DiffOp, DiffTag}; +use crate::types::{Algorithm, DiffOp}; + +pub use capture::{get_diff_ratio, group_diff_ops, Capture}; pub use hook::DiffHook; pub use replace::Replace; @@ -31,22 +47,6 @@ pub use replace::Replace; pub mod myers; pub mod patience; -/// An enum representing a diffing algorithm. -#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum Algorithm { - /// Picks the myers algorithm from [`myers`] - Myers, - /// Picks the patience algorithm from [`patience`] - Patience, -} - -impl Default for Algorithm { - /// Returns the default algorithm ([`Algorithm::Myers`]). - fn default() -> Algorithm { - Algorithm::Myers - } -} - /// Creates a diff between old and new with the given algorithm. /// /// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. diff --git a/src/lib.rs b/src/lib.rs index ae2d1a4..17e80dd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,8 @@ //! //! ```rust //! # #[cfg(feature = "text")] { -//! use similar::text::{ChangeTag, TextDiff}; +//! use similar::ChangeTag; +//! use similar::text::TextDiff; //! //! let diff = TextDiff::from_lines( //! "Hello World\nThis is the second line.\nThis is the third.", @@ -42,12 +43,18 @@ //! If the crate is used without default features it's removed. //! * `unicode`: when this feature is enabled the text diffing functionality //! gains the ability to diff on a grapheme instead of character level. This -//! is particularly useful when working with text containing emojis. -//! * `bytes`: when this feature is enabled the text module gains support for -//! working with byte slices. +//! is particularly useful when working with text containing emojis. This +//! pulls in some relatively complex dependencies for working with the unicode +//! database. +//! * `bytes`: this feature adds support for working with byte slices in the +//! [`text`] module in addition to unicode strings. This pulls in the +//! [`bstr`] dependency. //! * `inline`: this feature gives access to additional functionality of the -//! `text` module to provide inline information about which values changed +//! [`text`] module to provide inline information about which values changed //! in a line diff. This currently also enables the `unicode` feature. #![warn(missing_docs)] pub mod algorithms; pub mod text; + +mod types; +pub use self::types::*; diff --git a/src/text/inline.rs b/src/text/inline.rs index 0540cb4..4ed02ac 100644 --- a/src/text/inline.rs +++ b/src/text/inline.rs @@ -2,8 +2,9 @@ use std::borrow::Cow; use std::fmt; -use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag}; -use crate::text::{Change, ChangeTag, DiffableStr, TextDiff}; +use crate::algorithms::{capture_diff, get_diff_ratio}; +use crate::text::{DiffableStr, TextDiff}; +use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; use std::ops::Index; diff --git a/src/text/mod.rs b/src/text/mod.rs index 7e774a2..b8b19aa 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -43,7 +43,7 @@ //! Because very commonly two compared sequences will largely match this module //! splits it's functionality into two layers. The first is inherited from the //! general [`algorithms`](crate::algorithms) module: changes are encoded as -//! [diff operations](crate::algorithms::DiffOp). These are ranges of the +//! [diff operations](crate::DiffOp). These are ranges of the //! differences by index in the source sequence. Because this can be cumbersome //! to work with a separate method [`TextDiff::iter_changes`] is provided which //! expands all the changes on an item by item level encoded in an operation. @@ -88,8 +88,6 @@ use std::borrow::Cow; use std::cmp::Reverse; use std::collections::BinaryHeap; -use std::fmt; -use std::hash::Hash; mod abstraction; #[cfg(feature = "inline")] @@ -103,9 +101,8 @@ pub use self::inline::InlineChange; pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader}; use self::utils::{upper_seq_ratio, QuickSeqRatio}; -use crate::algorithms::{ - capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag, -}; +use crate::algorithms::{capture_diff_slices, get_diff_ratio, group_diff_ops}; +use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; /// A builder type config for more complex uses of [`TextDiff`]. #[derive(Clone, Debug)] @@ -250,101 +247,6 @@ impl TextDiffConfig { } } -/// The tag of a change. -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] -pub enum ChangeTag { - /// The change indicates equality (not a change) - Equal, - /// The change indicates deleted text. - Delete, - /// The change indicates inserted text. - Insert, -} - -impl fmt::Display for ChangeTag { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - match &self { - ChangeTag::Equal => ' ', - ChangeTag::Delete => '-', - ChangeTag::Insert => '+', - } - ) - } -} - -/// Represents the expanded textual change. -/// -/// This type is returned from the [`TextDiff::iter_changes`] method. It -/// exists so that it's more convenient to work with textual differences as -/// the underlying [`DiffOp`] does not know anything about strings. -#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] -pub struct Change<'s, T: DiffableStr + ?Sized> { - tag: ChangeTag, - old_index: Option, - new_index: Option, - value: &'s T, - missing_newline: bool, -} - -impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "{}{}", - self.to_string_lossy(), - if self.missing_newline { "\n" } else { "" } - ) - } -} - -impl<'s, T: DiffableStr + ?Sized> Change<'s, T> { - /// Returns the change tag. - pub fn tag(&self) -> ChangeTag { - self.tag - } - - /// Returns the old index if available. - pub fn old_index(&self) -> Option { - self.old_index - } - - /// Returns the new index if available. - pub fn new_index(&self) -> Option { - self.new_index - } - - /// Returns the underlying changed value. - /// - /// Depending on the type of the underlying [`DiffableStr`] this value is - /// more or less useful. If you always want to have a utf-8 string it's - /// best to use the [`Change::as_str`] and [`Change::to_string_lossy`] methods. - pub fn value(&self) -> &'s T { - self.value - } - - /// Returns the value as string if it is utf-8. - pub fn as_str(&self) -> Option<&'s str> { - T::as_str(self.value) - } - - /// Returns the value (lossy) decoded as utf-8 string. - pub fn to_string_lossy(&self) -> Cow<'s, str> { - T::to_string_lossy(self.value) - } - - /// Returns `true` if this change needs to be followed up by a - /// missing newline. - /// - /// The [`std::fmt::Display`] implementation of [`Change`] will automatically - /// insert a newline after the value if this is true. - pub fn missing_newline(&self) -> bool { - self.missing_newline - } -} - /// Captures diff op codes for textual diffs pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> { old: Cow<'bufs, [&'old T]>, diff --git a/src/text/udiff.rs b/src/text/udiff.rs index 8c2f3b5..3c2d50e 100644 --- a/src/text/udiff.rs +++ b/src/text/udiff.rs @@ -25,8 +25,8 @@ use std::ops::Range; use std::{fmt, io}; -use crate::algorithms::{Algorithm, DiffOp}; -use crate::text::{Change, TextDiff}; +use crate::text::TextDiff; +use crate::types::{Algorithm, Change, DiffOp}; use super::DiffableStr; diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..23c1cc3 --- /dev/null +++ b/src/types.rs @@ -0,0 +1,278 @@ +use std::fmt; +use std::ops::Range; + +use crate::algorithms::DiffHook; + +/// An enum representing a diffing algorithm. +#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum Algorithm { + /// Picks the myers algorithm from [`crate::algorithms::myers`] + Myers, + /// Picks the patience algorithm from [`crate::algorithms::patience`] + Patience, +} + +impl Default for Algorithm { + /// Returns the default algorithm ([`Algorithm::Myers`]). + fn default() -> Algorithm { + Algorithm::Myers + } +} + +/// The tag of a change. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] +pub enum ChangeTag { + /// The change indicates equality (not a change) + Equal, + /// The change indicates deleted text. + Delete, + /// The change indicates inserted text. + Insert, +} + +impl fmt::Display for ChangeTag { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match &self { + ChangeTag::Equal => ' ', + ChangeTag::Delete => '-', + ChangeTag::Insert => '+', + } + ) + } +} + +/// Represents the expanded textual change. +/// +/// This type is returned from the [`crate::text::TextDiff::iter_changes`] method. +/// It exists so that it's more convenient to work with textual differences as +/// the underlying [`DiffOp`] does not know anything about strings. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] +pub struct Change<'s, T: ?Sized> { + pub(crate) tag: ChangeTag, + pub(crate) old_index: Option, + pub(crate) new_index: Option, + pub(crate) value: &'s T, + pub(crate) missing_newline: bool, +} + +impl<'s, T: ?Sized> Change<'s, T> { + /// Returns the change tag. + pub fn tag(&self) -> ChangeTag { + self.tag + } + + /// Returns the old index if available. + pub fn old_index(&self) -> Option { + self.old_index + } + + /// Returns the new index if available. + pub fn new_index(&self) -> Option { + self.new_index + } + + /// Returns the underlying changed value. + /// + /// Depending on the type of the underlying [`crate::text::DiffableStr`] + /// this value is more or less useful. If you always want to have a utf-8 + /// string it's best to use the [`Change::as_str`] and + /// [`Change::to_string_lossy`] methods. + pub fn value(&self) -> &'s T { + self.value + } +} + +/// Utility enum to capture a diff operation. +/// +/// This is used by [`Capture`](crate::algorithms::Capture). +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] +pub enum DiffOp { + /// A segment is equal (see [`DiffHook::equal`]) + Equal { + /// The starting index in the old sequence. + old_index: usize, + /// The starting index in the new sequence. + new_index: usize, + /// The length of the segment. + len: usize, + }, + /// A segment was deleted (see [`DiffHook::delete`]) + Delete { + /// The starting index in the old sequence. + old_index: usize, + /// The length of the old segment. + old_len: usize, + /// The starting index in the new sequence. + new_index: usize, + }, + /// A segment was inserted (see [`DiffHook::insert`]) + Insert { + /// The starting index in the old sequence. + old_index: usize, + /// The starting index in the new sequence. + new_index: usize, + /// The length of the new segment. + new_len: usize, + }, + /// A segment was replaced (see [`DiffHook::replace`]) + Replace { + /// The starting index in the old sequence. + old_index: usize, + /// The length of the old segment. + old_len: usize, + /// The starting index in the new sequence. + new_index: usize, + /// The length of the new segment. + new_len: usize, + }, +} + +/// The tag of a diff operation. +#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)] +pub enum DiffTag { + /// The diff op encodes an equal segment. + Equal, + /// The diff op encodes a deleted segment. + Delete, + /// The diff op encodes an inserted segment. + Insert, + /// The diff op encodes a replaced segment. + Replace, +} + +impl DiffOp { + /// Returns the tag of the operation. + pub fn tag(self) -> DiffTag { + self.as_tag_tuple().0 + } + + /// Returns the old range. + pub fn old_range(&self) -> Range { + self.as_tag_tuple().1 + } + + /// Returns the new range. + pub fn new_range(&self) -> Range { + self.as_tag_tuple().2 + } + + /// Transform the op into a tuple of diff tag and ranges. + /// + /// This is useful when operating on slices. The returned format is + /// `(tag, i1..i2, j1..j2)`: + /// + /// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]` + /// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case). + /// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case). + /// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`. + pub fn as_tag_tuple(&self) -> (DiffTag, Range, Range) { + match *self { + DiffOp::Equal { + old_index, + new_index, + len, + } => ( + DiffTag::Equal, + old_index..old_index + len, + new_index..new_index + len, + ), + DiffOp::Delete { + old_index, + new_index, + old_len, + } => ( + DiffTag::Delete, + old_index..old_index + old_len, + new_index..new_index, + ), + DiffOp::Insert { + old_index, + new_index, + new_len, + } => ( + DiffTag::Insert, + old_index..old_index, + new_index..new_index + new_len, + ), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => ( + DiffTag::Replace, + old_index..old_index + old_len, + new_index..new_index + new_len, + ), + } + } + + /// Apply this operation to a diff hook. + pub fn apply_to_hook(&self, d: &mut D) -> Result<(), D::Error> { + match *self { + DiffOp::Equal { + old_index, + new_index, + len, + } => d.equal(old_index, new_index, len), + DiffOp::Delete { + old_index, + old_len, + new_index, + } => d.delete(old_index, old_len, new_index), + DiffOp::Insert { + old_index, + new_index, + new_len, + } => d.insert(old_index, new_index, new_len), + DiffOp::Replace { + old_index, + old_len, + new_index, + new_len, + } => d.replace(old_index, old_len, new_index, new_len), + } + } +} + +#[cfg(feature = "text")] +mod text_additions { + use super::*; + use crate::text::DiffableStr; + use std::borrow::Cow; + + impl<'s, T: DiffableStr + ?Sized> Change<'s, T> { + /// Returns the value as string if it is utf-8. + pub fn as_str(&self) -> Option<&'s str> { + T::as_str(self.value) + } + + /// Returns the value (lossy) decoded as utf-8 string. + pub fn to_string_lossy(&self) -> Cow<'s, str> { + T::to_string_lossy(self.value) + } + + /// Returns `true` if this change needs to be followed up by a + /// missing newline. + /// + /// The [`std::fmt::Display`] implementation of [`Change`] will automatically + /// insert a newline after the value if this is true. + pub fn missing_newline(&self) -> bool { + self.missing_newline + } + } + + impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}{}", + self.to_string_lossy(), + if self.missing_newline { "\n" } else { "" } + ) + } + } +}