Bigger refactorings in module layout
This commit is contained in:
parent
cab87515bd
commit
4770479fd7
11 changed files with 185 additions and 187 deletions
|
|
@ -1,96 +1,6 @@
|
|||
use std::convert::Infallible;
|
||||
|
||||
use crate::algorithms::hook::DiffHook;
|
||||
use crate::DiffOp;
|
||||
|
||||
/// Isolate change clusters by eliminating ranges with no changes.
|
||||
///
|
||||
/// This will leave holes behind in long periods of equal ranges so that
|
||||
/// you can build things like unified diffs.
|
||||
pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
|
||||
if ops.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut pending_group = Vec::new();
|
||||
let mut rv = Vec::new();
|
||||
|
||||
if let Some(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
}) = ops.first_mut()
|
||||
{
|
||||
let offset = (*len).saturating_sub(n);
|
||||
*old_index += offset;
|
||||
*new_index += offset;
|
||||
*len -= offset;
|
||||
}
|
||||
|
||||
if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
|
||||
*len -= (*len).saturating_sub(n);
|
||||
}
|
||||
|
||||
for op in ops.into_iter() {
|
||||
if let DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
} = op
|
||||
{
|
||||
// End the current group and start a new one whenever
|
||||
// there is a large range with no changes.
|
||||
if len > n * 2 {
|
||||
pending_group.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len: n,
|
||||
});
|
||||
rv.push(pending_group);
|
||||
let offset = len.saturating_sub(n);
|
||||
pending_group = vec![DiffOp::Equal {
|
||||
old_index: old_index + offset,
|
||||
new_index: new_index + offset,
|
||||
len: len - offset,
|
||||
}];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pending_group.push(op);
|
||||
}
|
||||
|
||||
match &pending_group[..] {
|
||||
&[] | &[DiffOp::Equal { .. }] => {}
|
||||
_ => rv.push(pending_group),
|
||||
}
|
||||
|
||||
rv
|
||||
}
|
||||
|
||||
/// Return a measure of similarity in the range `0..=1`.
|
||||
///
|
||||
/// A ratio of `1.0` means the two sequences are a complete match, a
|
||||
/// ratio of `0.0` would indicate completely distinct sequences. The input
|
||||
/// is the sequence of diff operations and the length of the old and new
|
||||
/// sequence.
|
||||
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
|
||||
let matches = ops
|
||||
.iter()
|
||||
.map(|op| {
|
||||
if let DiffOp::Equal { len, .. } = *op {
|
||||
len
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.sum::<usize>();
|
||||
let len = old_len + new_len;
|
||||
if len == 0 {
|
||||
1.0
|
||||
} else {
|
||||
2.0 * matches as f32 / len as f32
|
||||
}
|
||||
}
|
||||
use crate::{group_diff_ops, DiffHook, DiffOp};
|
||||
|
||||
/// A [`DiffHook`] that captures all diff operations.
|
||||
#[derive(Default, Clone)]
|
||||
|
|
|
|||
|
|
@ -6,108 +6,33 @@
|
|||
//! direct access to these algorithms can be useful in some cases.
|
||||
//!
|
||||
//! All these algorithms provide a `diff` function which takes two indexable
|
||||
//! objects (for instance slices) and a [`DiffHook`]. As the diff is generated
|
||||
//! the diff hook is invoked. Note that the diff hook does not get access to
|
||||
//! the actual values but only the indexes. This is why the diff hook is not
|
||||
//! used outside of the raw algorithm implementations as for most situations
|
||||
//! access to the values is useful of required.
|
||||
//! objects (for instance slices) and a [`DiffHook`](crate::DiffHook). As the
|
||||
//! diff is generated the diff hook is invoked. Note that the diff hook does
|
||||
//! not get access to the actual values but only the indexes. This is why the
|
||||
//! diff hook is not used outside of the raw algorithm implementations as for
|
||||
//! most situations access to the values is useful of required.
|
||||
//!
|
||||
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
|
||||
//! the different algorithms.
|
||||
//! A more generic interface for these algorthms is available on the toplevel
|
||||
//! module.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! This is a simple example that shows how you can calculate the difference
|
||||
//! between two sequences and capture the [`DiffOp`]s into a vector.
|
||||
//! between two sequences and capture the ops into a vector.
|
||||
//!
|
||||
//! ```rust
|
||||
//! use similar::Algorithm;
|
||||
//! use similar::algorithms::capture_diff_slices;
|
||||
//! use similar::{Algorithm, capture_diff_slices};
|
||||
//!
|
||||
//! let a = vec![1, 2, 3, 4, 5];
|
||||
//! let b = vec![1, 2, 3, 4, 7];
|
||||
//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b);
|
||||
//! ```
|
||||
|
||||
// general traits and utilities
|
||||
mod capture;
|
||||
mod hook;
|
||||
mod replace;
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::types::{Algorithm, DiffOp};
|
||||
|
||||
pub use capture::{get_diff_ratio, group_diff_ops, Capture};
|
||||
pub use hook::DiffHook;
|
||||
pub use capture::Capture;
|
||||
pub use replace::Replace;
|
||||
|
||||
// actual diffing algorithms
|
||||
pub mod myers;
|
||||
pub mod patience;
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm.
|
||||
///
|
||||
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
pub fn diff<Old, New, D>(
|
||||
alg: Algorithm,
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
match alg {
|
||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for diffing slices with a specific algorithm.
|
||||
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||
where
|
||||
D: DiffHook,
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||
///
|
||||
/// This is like [`diff`] but instead of using an arbitrary hook this will
|
||||
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
|
||||
pub fn capture_diff<Old, New>(
|
||||
alg: Algorithm,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Vec<DiffOp>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
let mut d = Replace::new(Capture::new());
|
||||
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
||||
where
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
let mut d = Replace::new(Capture::new());
|
||||
diff_slices(alg, &mut d, old, new).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
use std::cmp::{max, min};
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::DiffHook;
|
||||
use crate::DiffHook;
|
||||
|
||||
/// Myers' diff algorithm.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ use std::collections::HashMap;
|
|||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::{myers, DiffHook, Replace};
|
||||
use crate::algorithms::{myers, Replace};
|
||||
use crate::DiffHook;
|
||||
|
||||
/// Patience diff algorithm.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::algorithms::DiffHook;
|
||||
use crate::DiffHook;
|
||||
|
||||
/// A [`DiffHook`] that combines deletions and insertions to give blocks
|
||||
/// of maximal length, and replacements when appropriate.
|
||||
|
|
|
|||
159
src/common.rs
Normal file
159
src/common.rs
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::{myers, patience, Capture, Replace};
|
||||
use crate::{Algorithm, DiffHook, DiffOp};
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm.
|
||||
///
|
||||
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
pub fn diff<Old, New, D>(
|
||||
alg: Algorithm,
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
match alg {
|
||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for diffing slices with a specific algorithm.
|
||||
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||
where
|
||||
D: DiffHook,
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||
///
|
||||
/// This is like [`diff`] but instead of using an arbitrary hook this will
|
||||
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
|
||||
pub fn capture_diff<Old, New>(
|
||||
alg: Algorithm,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Vec<DiffOp>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
let mut d = Replace::new(Capture::new());
|
||||
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
||||
where
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
let mut d = Replace::new(Capture::new());
|
||||
diff_slices(alg, &mut d, old, new).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
}
|
||||
|
||||
/// Return a measure of similarity in the range `0..=1`.
|
||||
///
|
||||
/// A ratio of `1.0` means the two sequences are a complete match, a
|
||||
/// ratio of `0.0` would indicate completely distinct sequences. The input
|
||||
/// is the sequence of diff operations and the length of the old and new
|
||||
/// sequence.
|
||||
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
|
||||
let matches = ops
|
||||
.iter()
|
||||
.map(|op| {
|
||||
if let DiffOp::Equal { len, .. } = *op {
|
||||
len
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.sum::<usize>();
|
||||
let len = old_len + new_len;
|
||||
if len == 0 {
|
||||
1.0
|
||||
} else {
|
||||
2.0 * matches as f32 / len as f32
|
||||
}
|
||||
}
|
||||
|
||||
/// Isolate change clusters by eliminating ranges with no changes.
|
||||
///
|
||||
/// This will leave holes behind in long periods of equal ranges so that
|
||||
/// you can build things like unified diffs.
|
||||
pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
|
||||
if ops.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut pending_group = Vec::new();
|
||||
let mut rv = Vec::new();
|
||||
|
||||
if let Some(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
}) = ops.first_mut()
|
||||
{
|
||||
let offset = (*len).saturating_sub(n);
|
||||
*old_index += offset;
|
||||
*new_index += offset;
|
||||
*len -= offset;
|
||||
}
|
||||
|
||||
if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
|
||||
*len -= (*len).saturating_sub(n);
|
||||
}
|
||||
|
||||
for op in ops.into_iter() {
|
||||
if let DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
} = op
|
||||
{
|
||||
// End the current group and start a new one whenever
|
||||
// there is a large range with no changes.
|
||||
if len > n * 2 {
|
||||
pending_group.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len: n,
|
||||
});
|
||||
rv.push(pending_group);
|
||||
let offset = len.saturating_sub(n);
|
||||
pending_group = vec![DiffOp::Equal {
|
||||
old_index: old_index + offset,
|
||||
new_index: new_index + offset,
|
||||
len: len - offset,
|
||||
}];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pending_group.push(op);
|
||||
}
|
||||
|
||||
match &pending_group[..] {
|
||||
&[] | &[DiffOp::Equal { .. }] => {}
|
||||
_ => rv.push(pending_group),
|
||||
}
|
||||
|
||||
rv
|
||||
}
|
||||
|
|
@ -56,5 +56,9 @@
|
|||
pub mod algorithms;
|
||||
pub mod text;
|
||||
|
||||
mod common;
|
||||
mod hook;
|
||||
mod types;
|
||||
pub use self::common::*;
|
||||
pub use self::hook::*;
|
||||
pub use self::types::*;
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@
|
|||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
|
||||
use crate::algorithms::{capture_diff, get_diff_ratio};
|
||||
use crate::text::{DiffableStr, TextDiff};
|
||||
use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag};
|
||||
use crate::{capture_diff, get_diff_ratio};
|
||||
|
||||
use std::ops::Index;
|
||||
|
||||
|
|
@ -139,8 +139,8 @@ impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
|
|||
.map(|(emphasized, raw_value)| (*emphasized, raw_value.to_string_lossy()))
|
||||
}
|
||||
|
||||
/// Returns `true` if this change needs to be followed up by a
|
||||
/// missing newline.
|
||||
/// Returns `true` if this change does not end in a newline and must be
|
||||
/// followed up by one if line based diffs are used.
|
||||
pub fn missing_newline(&self) -> bool {
|
||||
!self.values.last().map_or(true, |x| x.1.ends_with_newline())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -101,8 +101,7 @@ pub use self::inline::InlineChange;
|
|||
pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader};
|
||||
|
||||
use self::utils::{upper_seq_ratio, QuickSeqRatio};
|
||||
use crate::algorithms::{capture_diff_slices, get_diff_ratio, group_diff_ops};
|
||||
use crate::types::{Algorithm, Change, DiffOp};
|
||||
use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Change, DiffOp};
|
||||
|
||||
/// A builder type config for more complex uses of [`TextDiff`].
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use std::fmt;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::DiffHook;
|
||||
use crate::hook::DiffHook;
|
||||
|
||||
/// An enum representing a diffing algorithm.
|
||||
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
|
|
@ -359,8 +359,8 @@ mod text_additions {
|
|||
T::to_string_lossy(self.value)
|
||||
}
|
||||
|
||||
/// Returns `true` if this change needs to be followed up by a
|
||||
/// missing newline.
|
||||
/// Returns `true` if this change does not end in a newline and must be
|
||||
/// followed up by one if line based diffs are used.
|
||||
///
|
||||
/// The [`std::fmt::Display`] implementation of [`Change`] will automatically
|
||||
/// insert a newline after the value if this is true.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue