Bigger refactorings in module layout

This commit is contained in:
Armin Ronacher 2021-02-03 00:05:54 +01:00
parent cab87515bd
commit 4770479fd7
11 changed files with 185 additions and 187 deletions

View file

@ -1,96 +1,6 @@
use std::convert::Infallible;
use crate::algorithms::hook::DiffHook;
use crate::DiffOp;
/// Isolate change clusters by eliminating ranges with no changes.
///
/// This will leave holes behind in long periods of equal ranges so that
/// you can build things like unified diffs.
pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
if ops.is_empty() {
return vec![];
}
let mut pending_group = Vec::new();
let mut rv = Vec::new();
if let Some(DiffOp::Equal {
old_index,
new_index,
len,
}) = ops.first_mut()
{
let offset = (*len).saturating_sub(n);
*old_index += offset;
*new_index += offset;
*len -= offset;
}
if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
*len -= (*len).saturating_sub(n);
}
for op in ops.into_iter() {
if let DiffOp::Equal {
old_index,
new_index,
len,
} = op
{
// End the current group and start a new one whenever
// there is a large range with no changes.
if len > n * 2 {
pending_group.push(DiffOp::Equal {
old_index,
new_index,
len: n,
});
rv.push(pending_group);
let offset = len.saturating_sub(n);
pending_group = vec![DiffOp::Equal {
old_index: old_index + offset,
new_index: new_index + offset,
len: len - offset,
}];
continue;
}
}
pending_group.push(op);
}
match &pending_group[..] {
&[] | &[DiffOp::Equal { .. }] => {}
_ => rv.push(pending_group),
}
rv
}
/// Return a measure of similarity in the range `0..=1`.
///
/// A ratio of `1.0` means the two sequences are a complete match, a
/// ratio of `0.0` would indicate completely distinct sequences. The input
/// is the sequence of diff operations and the length of the old and new
/// sequence.
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
let matches = ops
.iter()
.map(|op| {
if let DiffOp::Equal { len, .. } = *op {
len
} else {
0
}
})
.sum::<usize>();
let len = old_len + new_len;
if len == 0 {
1.0
} else {
2.0 * matches as f32 / len as f32
}
}
use crate::{group_diff_ops, DiffHook, DiffOp};
/// A [`DiffHook`] that captures all diff operations.
#[derive(Default, Clone)]

View file

@ -1,109 +0,0 @@
/// A trait for reacting to an edit script from the "old" version to
/// the "new" version.
pub trait DiffHook: Sized {
/// The error produced from the hook methods.
type Error;
/// Called when lines with indices `old_index` (in the old version) and
/// `new_index` (in the new version) start an section equal in both
/// versions, of length `len`.
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
let _ = old_index;
let _ = new_index;
let _ = len;
Ok(())
}
/// Called when a section of length `old_len`, starting at `old_index`,
/// needs to be deleted from the old version.
fn delete(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
) -> Result<(), Self::Error> {
let _ = old_index;
let _ = old_len;
let _ = new_index;
Ok(())
}
/// Called when a section of the new version, of length `new_len`
/// and starting at `new_index`, needs to be inserted at position `old_index'.
fn insert(
&mut self,
old_index: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
let _ = old_index;
let _ = new_index;
let _ = new_len;
Ok(())
}
/// Called when a section of the old version, starting at index
/// `old_index` and of length `old_len`, needs to be replaced with a
/// section of length `new_len`, starting at `new_index`, of the new
/// version.
///
/// The default implementations invokes `delete` and `insert`.
///
/// You can use the [`Replace`](crate::algorithms::Replace) hook to
/// automatically generate these.
fn replace(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
self.delete(old_index, old_len, new_index)?;
self.insert(old_index, new_index, new_len)
}
/// Always called at the end of the algorithm.
fn finish(&mut self) -> Result<(), Self::Error> {
Ok(())
}
}
impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
type Error = D::Error;
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
(*self).equal(old_index, new_index, len)
}
fn delete(
&mut self,
old_index: usize,
old_len: usize,
new_index: usize,
) -> Result<(), Self::Error> {
(*self).delete(old_index, old_len, new_index)
}
fn insert(
&mut self,
old_index: usize,
new_index: usize,
new_len: usize,
) -> Result<(), Self::Error> {
(*self).insert(old_index, new_index, new_len)
}
fn replace(
&mut self,
old: usize,
old_len: usize,
new: usize,
new_len: usize,
) -> Result<(), Self::Error> {
(*self).replace(old, old_len, new, new_len)
}
fn finish(&mut self) -> Result<(), Self::Error> {
(*self).finish()
}
}

View file

@ -6,108 +6,33 @@
//! direct access to these algorithms can be useful in some cases.
//!
//! All these algorithms provide a `diff` function which takes two indexable
//! objects (for instance slices) and a [`DiffHook`]. As the diff is generated
//! the diff hook is invoked. Note that the diff hook does not get access to
//! the actual values but only the indexes. This is why the diff hook is not
//! used outside of the raw algorithm implementations as for most situations
//! access to the values is useful of required.
//! objects (for instance slices) and a [`DiffHook`](crate::DiffHook). As the
//! diff is generated the diff hook is invoked. Note that the diff hook does
//! not get access to the actual values but only the indexes. This is why the
//! diff hook is not used outside of the raw algorithm implementations as for
//! most situations access to the values is useful of required.
//!
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
//! the different algorithms.
//! A more generic interface for these algorthms is available on the toplevel
//! module.
//!
//! # Example
//!
//! This is a simple example that shows how you can calculate the difference
//! between two sequences and capture the [`DiffOp`]s into a vector.
//! between two sequences and capture the ops into a vector.
//!
//! ```rust
//! use similar::Algorithm;
//! use similar::algorithms::capture_diff_slices;
//! use similar::{Algorithm, capture_diff_slices};
//!
//! let a = vec![1, 2, 3, 4, 5];
//! let b = vec![1, 2, 3, 4, 7];
//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b);
//! ```
// general traits and utilities
mod capture;
mod hook;
mod replace;
use std::hash::Hash;
use std::ops::{Index, Range};
use crate::types::{Algorithm, DiffOp};
pub use capture::{get_diff_ratio, group_diff_ops, Capture};
pub use hook::DiffHook;
pub use capture::Capture;
pub use replace::Replace;
// actual diffing algorithms
pub mod myers;
pub mod patience;
/// Creates a diff between old and new with the given algorithm.
///
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
pub fn diff<Old, New, D>(
alg: Algorithm,
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
match alg {
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
}
}
/// Shortcut for diffing slices with a specific algorithm.
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
T: Eq + Hash + Ord,
{
diff(alg, d, old, 0..old.len(), new, 0..new.len())
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
///
/// This is like [`diff`] but instead of using an arbitrary hook this will
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
pub fn capture_diff<Old, New>(
alg: Algorithm,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Vec<DiffOp>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
let mut d = Replace::new(Capture::new());
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
d.into_inner().into_ops()
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
where
T: Eq + Hash + Ord,
{
let mut d = Replace::new(Capture::new());
diff_slices(alg, &mut d, old, new).unwrap();
d.into_inner().into_ops()
}

View file

@ -9,7 +9,7 @@
use std::cmp::{max, min};
use std::ops::{Index, Range};
use crate::algorithms::DiffHook;
use crate::DiffHook;
/// Myers' diff algorithm.
///

View file

@ -10,7 +10,8 @@ use std::collections::HashMap;
use std::hash::Hash;
use std::ops::{Index, Range};
use crate::algorithms::{myers, DiffHook, Replace};
use crate::algorithms::{myers, Replace};
use crate::DiffHook;
/// Patience diff algorithm.
///

View file

@ -1,4 +1,4 @@
use crate::algorithms::DiffHook;
use crate::DiffHook;
/// A [`DiffHook`] that combines deletions and insertions to give blocks
/// of maximal length, and replacements when appropriate.