diff --git a/src/algorithms/capture.rs b/src/algorithms/capture.rs index 971c10b..fa6a067 100644 --- a/src/algorithms/capture.rs +++ b/src/algorithms/capture.rs @@ -1,96 +1,6 @@ use std::convert::Infallible; -use crate::algorithms::hook::DiffHook; -use crate::DiffOp; - -/// Isolate change clusters by eliminating ranges with no changes. -/// -/// This will leave holes behind in long periods of equal ranges so that -/// you can build things like unified diffs. -pub fn group_diff_ops(mut ops: Vec, n: usize) -> Vec> { - if ops.is_empty() { - return vec![]; - } - - let mut pending_group = Vec::new(); - let mut rv = Vec::new(); - - if let Some(DiffOp::Equal { - old_index, - new_index, - len, - }) = ops.first_mut() - { - let offset = (*len).saturating_sub(n); - *old_index += offset; - *new_index += offset; - *len -= offset; - } - - if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() { - *len -= (*len).saturating_sub(n); - } - - for op in ops.into_iter() { - if let DiffOp::Equal { - old_index, - new_index, - len, - } = op - { - // End the current group and start a new one whenever - // there is a large range with no changes. - if len > n * 2 { - pending_group.push(DiffOp::Equal { - old_index, - new_index, - len: n, - }); - rv.push(pending_group); - let offset = len.saturating_sub(n); - pending_group = vec![DiffOp::Equal { - old_index: old_index + offset, - new_index: new_index + offset, - len: len - offset, - }]; - continue; - } - } - pending_group.push(op); - } - - match &pending_group[..] { - &[] | &[DiffOp::Equal { .. }] => {} - _ => rv.push(pending_group), - } - - rv -} - -/// Return a measure of similarity in the range `0..=1`. -/// -/// A ratio of `1.0` means the two sequences are a complete match, a -/// ratio of `0.0` would indicate completely distinct sequences. The input -/// is the sequence of diff operations and the length of the old and new -/// sequence. -pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 { - let matches = ops - .iter() - .map(|op| { - if let DiffOp::Equal { len, .. } = *op { - len - } else { - 0 - } - }) - .sum::(); - let len = old_len + new_len; - if len == 0 { - 1.0 - } else { - 2.0 * matches as f32 / len as f32 - } -} +use crate::{group_diff_ops, DiffHook, DiffOp}; /// A [`DiffHook`] that captures all diff operations. #[derive(Default, Clone)] diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 2c4c1eb..35b3347 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -6,108 +6,33 @@ //! direct access to these algorithms can be useful in some cases. //! //! All these algorithms provide a `diff` function which takes two indexable -//! objects (for instance slices) and a [`DiffHook`]. As the diff is generated -//! the diff hook is invoked. Note that the diff hook does not get access to -//! the actual values but only the indexes. This is why the diff hook is not -//! used outside of the raw algorithm implementations as for most situations -//! access to the values is useful of required. +//! objects (for instance slices) and a [`DiffHook`](crate::DiffHook). As the +//! diff is generated the diff hook is invoked. Note that the diff hook does +//! not get access to the actual values but only the indexes. This is why the +//! diff hook is not used outside of the raw algorithm implementations as for +//! most situations access to the values is useful of required. //! -//! Most of the crate operates on the [`Algorithm`] enum which abstracts over -//! the different algorithms. +//! A more generic interface for these algorthms is available on the toplevel +//! module. //! //! # Example //! //! This is a simple example that shows how you can calculate the difference -//! between two sequences and capture the [`DiffOp`]s into a vector. +//! between two sequences and capture the ops into a vector. //! //! ```rust -//! use similar::Algorithm; -//! use similar::algorithms::capture_diff_slices; +//! use similar::{Algorithm, capture_diff_slices}; //! //! let a = vec![1, 2, 3, 4, 5]; //! let b = vec![1, 2, 3, 4, 7]; //! let ops = capture_diff_slices(Algorithm::Myers, &a, &b); //! ``` -// general traits and utilities mod capture; -mod hook; mod replace; -use std::hash::Hash; -use std::ops::{Index, Range}; - -use crate::types::{Algorithm, DiffOp}; - -pub use capture::{get_diff_ratio, group_diff_ops, Capture}; -pub use hook::DiffHook; +pub use capture::Capture; pub use replace::Replace; -// actual diffing algorithms pub mod myers; pub mod patience; - -/// Creates a diff between old and new with the given algorithm. -/// -/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. -pub fn diff( - alg: Algorithm, - d: &mut D, - old: &Old, - old_range: Range, - new: &New, - new_range: Range, -) -> Result<(), D::Error> -where - Old: Index + ?Sized, - New: Index + ?Sized, - D: DiffHook, - Old::Output: Hash + Eq + Ord, - New::Output: PartialEq + Hash + Eq + Ord, -{ - match alg { - Algorithm::Myers => myers::diff(d, old, old_range, new, new_range), - Algorithm::Patience => patience::diff(d, old, old_range, new, new_range), - } -} - -/// Shortcut for diffing slices with a specific algorithm. -pub fn diff_slices(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error> -where - D: DiffHook, - T: Eq + Hash + Ord, -{ - diff(alg, d, old, 0..old.len(), new, 0..new.len()) -} - -/// Creates a diff between old and new with the given algorithm capturing the ops. -/// -/// This is like [`diff`] but instead of using an arbitrary hook this will -/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s. -pub fn capture_diff( - alg: Algorithm, - old: &Old, - old_range: Range, - new: &New, - new_range: Range, -) -> Vec -where - Old: Index + ?Sized, - New: Index + ?Sized, - Old::Output: Hash + Eq + Ord, - New::Output: PartialEq + Hash + Eq + Ord, -{ - let mut d = Replace::new(Capture::new()); - diff(alg, &mut d, old, old_range, new, new_range).unwrap(); - d.into_inner().into_ops() -} - -/// Creates a diff between old and new with the given algorithm capturing the ops. -pub fn capture_diff_slices(alg: Algorithm, old: &[T], new: &[T]) -> Vec -where - T: Eq + Hash + Ord, -{ - let mut d = Replace::new(Capture::new()); - diff_slices(alg, &mut d, old, new).unwrap(); - d.into_inner().into_ops() -} diff --git a/src/algorithms/myers.rs b/src/algorithms/myers.rs index 2b5700c..23bb082 100644 --- a/src/algorithms/myers.rs +++ b/src/algorithms/myers.rs @@ -9,7 +9,7 @@ use std::cmp::{max, min}; use std::ops::{Index, Range}; -use crate::algorithms::DiffHook; +use crate::DiffHook; /// Myers' diff algorithm. /// diff --git a/src/algorithms/patience.rs b/src/algorithms/patience.rs index b70b8b2..269319e 100644 --- a/src/algorithms/patience.rs +++ b/src/algorithms/patience.rs @@ -10,7 +10,8 @@ use std::collections::HashMap; use std::hash::Hash; use std::ops::{Index, Range}; -use crate::algorithms::{myers, DiffHook, Replace}; +use crate::algorithms::{myers, Replace}; +use crate::DiffHook; /// Patience diff algorithm. /// diff --git a/src/algorithms/replace.rs b/src/algorithms/replace.rs index 9e70047..32485f7 100644 --- a/src/algorithms/replace.rs +++ b/src/algorithms/replace.rs @@ -1,4 +1,4 @@ -use crate::algorithms::DiffHook; +use crate::DiffHook; /// A [`DiffHook`] that combines deletions and insertions to give blocks /// of maximal length, and replacements when appropriate. diff --git a/src/common.rs b/src/common.rs new file mode 100644 index 0000000..1ba3fb7 --- /dev/null +++ b/src/common.rs @@ -0,0 +1,159 @@ +use std::hash::Hash; +use std::ops::{Index, Range}; + +use crate::algorithms::{myers, patience, Capture, Replace}; +use crate::{Algorithm, DiffHook, DiffOp}; + +/// Creates a diff between old and new with the given algorithm. +/// +/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`. +pub fn diff( + alg: Algorithm, + d: &mut D, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Result<(), D::Error> +where + Old: Index + ?Sized, + New: Index + ?Sized, + D: DiffHook, + Old::Output: Hash + Eq + Ord, + New::Output: PartialEq + Hash + Eq + Ord, +{ + match alg { + Algorithm::Myers => myers::diff(d, old, old_range, new, new_range), + Algorithm::Patience => patience::diff(d, old, old_range, new, new_range), + } +} + +/// Shortcut for diffing slices with a specific algorithm. +pub fn diff_slices(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error> +where + D: DiffHook, + T: Eq + Hash + Ord, +{ + diff(alg, d, old, 0..old.len(), new, 0..new.len()) +} + +/// Creates a diff between old and new with the given algorithm capturing the ops. +/// +/// This is like [`diff`] but instead of using an arbitrary hook this will +/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s. +pub fn capture_diff( + alg: Algorithm, + old: &Old, + old_range: Range, + new: &New, + new_range: Range, +) -> Vec +where + Old: Index + ?Sized, + New: Index + ?Sized, + Old::Output: Hash + Eq + Ord, + New::Output: PartialEq + Hash + Eq + Ord, +{ + let mut d = Replace::new(Capture::new()); + diff(alg, &mut d, old, old_range, new, new_range).unwrap(); + d.into_inner().into_ops() +} + +/// Creates a diff between old and new with the given algorithm capturing the ops. +pub fn capture_diff_slices(alg: Algorithm, old: &[T], new: &[T]) -> Vec +where + T: Eq + Hash + Ord, +{ + let mut d = Replace::new(Capture::new()); + diff_slices(alg, &mut d, old, new).unwrap(); + d.into_inner().into_ops() +} + +/// Return a measure of similarity in the range `0..=1`. +/// +/// A ratio of `1.0` means the two sequences are a complete match, a +/// ratio of `0.0` would indicate completely distinct sequences. The input +/// is the sequence of diff operations and the length of the old and new +/// sequence. +pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 { + let matches = ops + .iter() + .map(|op| { + if let DiffOp::Equal { len, .. } = *op { + len + } else { + 0 + } + }) + .sum::(); + let len = old_len + new_len; + if len == 0 { + 1.0 + } else { + 2.0 * matches as f32 / len as f32 + } +} + +/// Isolate change clusters by eliminating ranges with no changes. +/// +/// This will leave holes behind in long periods of equal ranges so that +/// you can build things like unified diffs. +pub fn group_diff_ops(mut ops: Vec, n: usize) -> Vec> { + if ops.is_empty() { + return vec![]; + } + + let mut pending_group = Vec::new(); + let mut rv = Vec::new(); + + if let Some(DiffOp::Equal { + old_index, + new_index, + len, + }) = ops.first_mut() + { + let offset = (*len).saturating_sub(n); + *old_index += offset; + *new_index += offset; + *len -= offset; + } + + if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() { + *len -= (*len).saturating_sub(n); + } + + for op in ops.into_iter() { + if let DiffOp::Equal { + old_index, + new_index, + len, + } = op + { + // End the current group and start a new one whenever + // there is a large range with no changes. + if len > n * 2 { + pending_group.push(DiffOp::Equal { + old_index, + new_index, + len: n, + }); + rv.push(pending_group); + let offset = len.saturating_sub(n); + pending_group = vec![DiffOp::Equal { + old_index: old_index + offset, + new_index: new_index + offset, + len: len - offset, + }]; + continue; + } + } + pending_group.push(op); + } + + match &pending_group[..] { + &[] | &[DiffOp::Equal { .. }] => {} + _ => rv.push(pending_group), + } + + rv +} diff --git a/src/algorithms/hook.rs b/src/hook.rs similarity index 100% rename from src/algorithms/hook.rs rename to src/hook.rs diff --git a/src/lib.rs b/src/lib.rs index 17e80dd..b9f1150 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -56,5 +56,9 @@ pub mod algorithms; pub mod text; +mod common; +mod hook; mod types; +pub use self::common::*; +pub use self::hook::*; pub use self::types::*; diff --git a/src/text/inline.rs b/src/text/inline.rs index d518f0e..a56c3e4 100644 --- a/src/text/inline.rs +++ b/src/text/inline.rs @@ -2,9 +2,9 @@ use std::borrow::Cow; use std::fmt; -use crate::algorithms::{capture_diff, get_diff_ratio}; use crate::text::{DiffableStr, TextDiff}; use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag}; +use crate::{capture_diff, get_diff_ratio}; use std::ops::Index; @@ -139,8 +139,8 @@ impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> { .map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy())) } - /// Returns `true` if this change needs to be followed up by a - /// missing newline. + /// Returns `true` if this change does not end in a newline and must be + /// followed up by one if line based diffs are used. pub fn missing_newline(&self) -> bool { !self.values.last().map_or(true, |x| x.1.ends_with_newline()) } diff --git a/src/text/mod.rs b/src/text/mod.rs index b34b616..d6f4a40 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -101,8 +101,7 @@ pub use self::inline::InlineChange; pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader}; use self::utils::{upper_seq_ratio, QuickSeqRatio}; -use crate::algorithms::{capture_diff_slices, get_diff_ratio, group_diff_ops}; -use crate::types::{Algorithm, Change, DiffOp}; +use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Change, DiffOp}; /// A builder type config for more complex uses of [`TextDiff`]. #[derive(Clone, Debug)] diff --git a/src/types.rs b/src/types.rs index ca258c9..c010275 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,7 +1,7 @@ use std::fmt; use std::ops::{Index, Range}; -use crate::algorithms::DiffHook; +use crate::hook::DiffHook; /// An enum representing a diffing algorithm. #[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] @@ -359,8 +359,8 @@ mod text_additions { T::to_string_lossy(self.value) } - /// Returns `true` if this change needs to be followed up by a - /// missing newline. + /// Returns `true` if this change does not end in a newline and must be + /// followed up by one if line based diffs are used. /// /// The [`std::fmt::Display`] implementation of [`Change`] will automatically /// insert a newline after the value if this is true.