Bigger refactorings in module layout

This commit is contained in:
Armin Ronacher 2021-02-03 00:05:54 +01:00
parent cab87515bd
commit 4770479fd7
11 changed files with 185 additions and 187 deletions

View file

@ -1,96 +1,6 @@
use std::convert::Infallible;
use crate::algorithms::hook::DiffHook;
use crate::DiffOp;
/// Isolate change clusters by eliminating ranges with no changes.
///
/// This will leave holes behind in long periods of equal ranges so that
/// you can build things like unified diffs.
pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
if ops.is_empty() {
return vec![];
}
let mut pending_group = Vec::new();
let mut rv = Vec::new();
if let Some(DiffOp::Equal {
old_index,
new_index,
len,
}) = ops.first_mut()
{
let offset = (*len).saturating_sub(n);
*old_index += offset;
*new_index += offset;
*len -= offset;
}
if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
*len -= (*len).saturating_sub(n);
}
for op in ops.into_iter() {
if let DiffOp::Equal {
old_index,
new_index,
len,
} = op
{
// End the current group and start a new one whenever
// there is a large range with no changes.
if len > n * 2 {
pending_group.push(DiffOp::Equal {
old_index,
new_index,
len: n,
});
rv.push(pending_group);
let offset = len.saturating_sub(n);
pending_group = vec![DiffOp::Equal {
old_index: old_index + offset,
new_index: new_index + offset,
len: len - offset,
}];
continue;
}
}
pending_group.push(op);
}
match &pending_group[..] {
&[] | &[DiffOp::Equal { .. }] => {}
_ => rv.push(pending_group),
}
rv
}
/// Return a measure of similarity in the range `0..=1`.
///
/// A ratio of `1.0` means the two sequences are a complete match, a
/// ratio of `0.0` would indicate completely distinct sequences. The input
/// is the sequence of diff operations and the length of the old and new
/// sequence.
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
let matches = ops
.iter()
.map(|op| {
if let DiffOp::Equal { len, .. } = *op {
len
} else {
0
}
})
.sum::<usize>();
let len = old_len + new_len;
if len == 0 {
1.0
} else {
2.0 * matches as f32 / len as f32
}
}
use crate::{group_diff_ops, DiffHook, DiffOp};
/// A [`DiffHook`] that captures all diff operations.
#[derive(Default, Clone)]

View file

@ -6,108 +6,33 @@
//! direct access to these algorithms can be useful in some cases.
//!
//! All these algorithms provide a `diff` function which takes two indexable
//! objects (for instance slices) and a [`DiffHook`]. As the diff is generated
//! the diff hook is invoked. Note that the diff hook does not get access to
//! the actual values but only the indexes. This is why the diff hook is not
//! used outside of the raw algorithm implementations as for most situations
//! access to the values is useful of required.
//! objects (for instance slices) and a [`DiffHook`](crate::DiffHook). As the
//! diff is generated the diff hook is invoked. Note that the diff hook does
//! not get access to the actual values but only the indexes. This is why the
//! diff hook is not used outside of the raw algorithm implementations as for
//! most situations access to the values is useful of required.
//!
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
//! the different algorithms.
//! A more generic interface for these algorthms is available on the toplevel
//! module.
//!
//! # Example
//!
//! This is a simple example that shows how you can calculate the difference
//! between two sequences and capture the [`DiffOp`]s into a vector.
//! between two sequences and capture the ops into a vector.
//!
//! ```rust
//! use similar::Algorithm;
//! use similar::algorithms::capture_diff_slices;
//! use similar::{Algorithm, capture_diff_slices};
//!
//! let a = vec![1, 2, 3, 4, 5];
//! let b = vec![1, 2, 3, 4, 7];
//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b);
//! ```
// general traits and utilities
mod capture;
mod hook;
mod replace;
use std::hash::Hash;
use std::ops::{Index, Range};
use crate::types::{Algorithm, DiffOp};
pub use capture::{get_diff_ratio, group_diff_ops, Capture};
pub use hook::DiffHook;
pub use capture::Capture;
pub use replace::Replace;
// actual diffing algorithms
pub mod myers;
pub mod patience;
/// Creates a diff between old and new with the given algorithm.
///
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
pub fn diff<Old, New, D>(
alg: Algorithm,
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
match alg {
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
}
}
/// Shortcut for diffing slices with a specific algorithm.
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
T: Eq + Hash + Ord,
{
diff(alg, d, old, 0..old.len(), new, 0..new.len())
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
///
/// This is like [`diff`] but instead of using an arbitrary hook this will
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
pub fn capture_diff<Old, New>(
alg: Algorithm,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Vec<DiffOp>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
let mut d = Replace::new(Capture::new());
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
d.into_inner().into_ops()
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
where
T: Eq + Hash + Ord,
{
let mut d = Replace::new(Capture::new());
diff_slices(alg, &mut d, old, new).unwrap();
d.into_inner().into_ops()
}

View file

@ -9,7 +9,7 @@
use std::cmp::{max, min};
use std::ops::{Index, Range};
use crate::algorithms::DiffHook;
use crate::DiffHook;
/// Myers' diff algorithm.
///

View file

@ -10,7 +10,8 @@ use std::collections::HashMap;
use std::hash::Hash;
use std::ops::{Index, Range};
use crate::algorithms::{myers, DiffHook, Replace};
use crate::algorithms::{myers, Replace};
use crate::DiffHook;
/// Patience diff algorithm.
///

View file

@ -1,4 +1,4 @@
use crate::algorithms::DiffHook;
use crate::DiffHook;
/// A [`DiffHook`] that combines deletions and insertions to give blocks
/// of maximal length, and replacements when appropriate.

159
src/common.rs Normal file
View file

@ -0,0 +1,159 @@
use std::hash::Hash;
use std::ops::{Index, Range};
use crate::algorithms::{myers, patience, Capture, Replace};
use crate::{Algorithm, DiffHook, DiffOp};
/// Creates a diff between old and new with the given algorithm.
///
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
pub fn diff<Old, New, D>(
alg: Algorithm,
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
match alg {
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
}
}
/// Shortcut for diffing slices with a specific algorithm.
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
T: Eq + Hash + Ord,
{
diff(alg, d, old, 0..old.len(), new, 0..new.len())
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
///
/// This is like [`diff`] but instead of using an arbitrary hook this will
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
pub fn capture_diff<Old, New>(
alg: Algorithm,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Vec<DiffOp>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
let mut d = Replace::new(Capture::new());
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
d.into_inner().into_ops()
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
where
T: Eq + Hash + Ord,
{
let mut d = Replace::new(Capture::new());
diff_slices(alg, &mut d, old, new).unwrap();
d.into_inner().into_ops()
}
/// Return a measure of similarity in the range `0..=1`.
///
/// A ratio of `1.0` means the two sequences are a complete match, a
/// ratio of `0.0` would indicate completely distinct sequences. The input
/// is the sequence of diff operations and the length of the old and new
/// sequence.
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
let matches = ops
.iter()
.map(|op| {
if let DiffOp::Equal { len, .. } = *op {
len
} else {
0
}
})
.sum::<usize>();
let len = old_len + new_len;
if len == 0 {
1.0
} else {
2.0 * matches as f32 / len as f32
}
}
/// Isolate change clusters by eliminating ranges with no changes.
///
/// This will leave holes behind in long periods of equal ranges so that
/// you can build things like unified diffs.
pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
if ops.is_empty() {
return vec![];
}
let mut pending_group = Vec::new();
let mut rv = Vec::new();
if let Some(DiffOp::Equal {
old_index,
new_index,
len,
}) = ops.first_mut()
{
let offset = (*len).saturating_sub(n);
*old_index += offset;
*new_index += offset;
*len -= offset;
}
if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
*len -= (*len).saturating_sub(n);
}
for op in ops.into_iter() {
if let DiffOp::Equal {
old_index,
new_index,
len,
} = op
{
// End the current group and start a new one whenever
// there is a large range with no changes.
if len > n * 2 {
pending_group.push(DiffOp::Equal {
old_index,
new_index,
len: n,
});
rv.push(pending_group);
let offset = len.saturating_sub(n);
pending_group = vec![DiffOp::Equal {
old_index: old_index + offset,
new_index: new_index + offset,
len: len - offset,
}];
continue;
}
}
pending_group.push(op);
}
match &pending_group[..] {
&[] | &[DiffOp::Equal { .. }] => {}
_ => rv.push(pending_group),
}
rv
}

View file

@ -56,5 +56,9 @@
pub mod algorithms;
pub mod text;
mod common;
mod hook;
mod types;
pub use self::common::*;
pub use self::hook::*;
pub use self::types::*;

View file

@ -2,9 +2,9 @@
use std::borrow::Cow;
use std::fmt;
use crate::algorithms::{capture_diff, get_diff_ratio};
use crate::text::{DiffableStr, TextDiff};
use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag};
use crate::{capture_diff, get_diff_ratio};
use std::ops::Index;
@ -139,8 +139,8 @@ impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
.map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy()))
}
/// Returns `true` if this change needs to be followed up by a
/// missing newline.
/// Returns `true` if this change does not end in a newline and must be
/// followed up by one if line based diffs are used.
pub fn missing_newline(&self) -> bool {
!self.values.last().map_or(true, |x| x.1.ends_with_newline())
}

View file

@ -101,8 +101,7 @@ pub use self::inline::InlineChange;
pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader};
use self::utils::{upper_seq_ratio, QuickSeqRatio};
use crate::algorithms::{capture_diff_slices, get_diff_ratio, group_diff_ops};
use crate::types::{Algorithm, Change, DiffOp};
use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Change, DiffOp};
/// A builder type config for more complex uses of [`TextDiff`].
#[derive(Clone, Debug)]

View file

@ -1,7 +1,7 @@
use std::fmt;
use std::ops::{Index, Range};
use crate::algorithms::DiffHook;
use crate::hook::DiffHook;
/// An enum representing a diffing algorithm.
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
@ -359,8 +359,8 @@ mod text_additions {
T::to_string_lossy(self.value)
}
/// Returns `true` if this change needs to be followed up by a
/// missing newline.
/// Returns `true` if this change does not end in a newline and must be
/// followed up by one if line based diffs are used.
///
/// The [`std::fmt::Display`] implementation of [`Change`] will automatically
/// insert a newline after the value if this is true.