Bigger refactorings in module layout
This commit is contained in:
parent
cab87515bd
commit
4770479fd7
11 changed files with 185 additions and 187 deletions
|
|
@ -1,96 +1,6 @@
|
|||
use std::convert::Infallible;
|
||||
|
||||
use crate::algorithms::hook::DiffHook;
|
||||
use crate::DiffOp;
|
||||
|
||||
/// Isolate change clusters by eliminating ranges with no changes.
|
||||
///
|
||||
/// This will leave holes behind in long periods of equal ranges so that
|
||||
/// you can build things like unified diffs.
|
||||
pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
|
||||
if ops.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut pending_group = Vec::new();
|
||||
let mut rv = Vec::new();
|
||||
|
||||
if let Some(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
}) = ops.first_mut()
|
||||
{
|
||||
let offset = (*len).saturating_sub(n);
|
||||
*old_index += offset;
|
||||
*new_index += offset;
|
||||
*len -= offset;
|
||||
}
|
||||
|
||||
if let Some(DiffOp::Equal { len, .. }) = ops.last_mut() {
|
||||
*len -= (*len).saturating_sub(n);
|
||||
}
|
||||
|
||||
for op in ops.into_iter() {
|
||||
if let DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len,
|
||||
} = op
|
||||
{
|
||||
// End the current group and start a new one whenever
|
||||
// there is a large range with no changes.
|
||||
if len > n * 2 {
|
||||
pending_group.push(DiffOp::Equal {
|
||||
old_index,
|
||||
new_index,
|
||||
len: n,
|
||||
});
|
||||
rv.push(pending_group);
|
||||
let offset = len.saturating_sub(n);
|
||||
pending_group = vec![DiffOp::Equal {
|
||||
old_index: old_index + offset,
|
||||
new_index: new_index + offset,
|
||||
len: len - offset,
|
||||
}];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pending_group.push(op);
|
||||
}
|
||||
|
||||
match &pending_group[..] {
|
||||
&[] | &[DiffOp::Equal { .. }] => {}
|
||||
_ => rv.push(pending_group),
|
||||
}
|
||||
|
||||
rv
|
||||
}
|
||||
|
||||
/// Return a measure of similarity in the range `0..=1`.
|
||||
///
|
||||
/// A ratio of `1.0` means the two sequences are a complete match, a
|
||||
/// ratio of `0.0` would indicate completely distinct sequences. The input
|
||||
/// is the sequence of diff operations and the length of the old and new
|
||||
/// sequence.
|
||||
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
|
||||
let matches = ops
|
||||
.iter()
|
||||
.map(|op| {
|
||||
if let DiffOp::Equal { len, .. } = *op {
|
||||
len
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.sum::<usize>();
|
||||
let len = old_len + new_len;
|
||||
if len == 0 {
|
||||
1.0
|
||||
} else {
|
||||
2.0 * matches as f32 / len as f32
|
||||
}
|
||||
}
|
||||
use crate::{group_diff_ops, DiffHook, DiffOp};
|
||||
|
||||
/// A [`DiffHook`] that captures all diff operations.
|
||||
#[derive(Default, Clone)]
|
||||
|
|
|
|||
|
|
@ -1,109 +0,0 @@
|
|||
/// A trait for reacting to an edit script from the "old" version to
|
||||
/// the "new" version.
|
||||
pub trait DiffHook: Sized {
|
||||
/// The error produced from the hook methods.
|
||||
type Error;
|
||||
|
||||
/// Called when lines with indices `old_index` (in the old version) and
|
||||
/// `new_index` (in the new version) start an section equal in both
|
||||
/// versions, of length `len`.
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
let _ = old_index;
|
||||
let _ = new_index;
|
||||
let _ = len;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Called when a section of length `old_len`, starting at `old_index`,
|
||||
/// needs to be deleted from the old version.
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
let _ = old_index;
|
||||
let _ = old_len;
|
||||
let _ = new_index;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Called when a section of the new version, of length `new_len`
|
||||
/// and starting at `new_index`, needs to be inserted at position `old_index'.
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
let _ = old_index;
|
||||
let _ = new_index;
|
||||
let _ = new_len;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Called when a section of the old version, starting at index
|
||||
/// `old_index` and of length `old_len`, needs to be replaced with a
|
||||
/// section of length `new_len`, starting at `new_index`, of the new
|
||||
/// version.
|
||||
///
|
||||
/// The default implementations invokes `delete` and `insert`.
|
||||
///
|
||||
/// You can use the [`Replace`](crate::algorithms::Replace) hook to
|
||||
/// automatically generate these.
|
||||
fn replace(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
self.delete(old_index, old_len, new_index)?;
|
||||
self.insert(old_index, new_index, new_len)
|
||||
}
|
||||
|
||||
/// Always called at the end of the algorithm.
|
||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, D: DiffHook + 'a> DiffHook for &'a mut D {
|
||||
type Error = D::Error;
|
||||
|
||||
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||
(*self).equal(old_index, new_index, len)
|
||||
}
|
||||
|
||||
fn delete(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
old_len: usize,
|
||||
new_index: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
(*self).delete(old_index, old_len, new_index)
|
||||
}
|
||||
|
||||
fn insert(
|
||||
&mut self,
|
||||
old_index: usize,
|
||||
new_index: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
(*self).insert(old_index, new_index, new_len)
|
||||
}
|
||||
|
||||
fn replace(
|
||||
&mut self,
|
||||
old: usize,
|
||||
old_len: usize,
|
||||
new: usize,
|
||||
new_len: usize,
|
||||
) -> Result<(), Self::Error> {
|
||||
(*self).replace(old, old_len, new, new_len)
|
||||
}
|
||||
|
||||
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||
(*self).finish()
|
||||
}
|
||||
}
|
||||
|
|
@ -6,108 +6,33 @@
|
|||
//! direct access to these algorithms can be useful in some cases.
|
||||
//!
|
||||
//! All these algorithms provide a `diff` function which takes two indexable
|
||||
//! objects (for instance slices) and a [`DiffHook`]. As the diff is generated
|
||||
//! the diff hook is invoked. Note that the diff hook does not get access to
|
||||
//! the actual values but only the indexes. This is why the diff hook is not
|
||||
//! used outside of the raw algorithm implementations as for most situations
|
||||
//! access to the values is useful of required.
|
||||
//! objects (for instance slices) and a [`DiffHook`](crate::DiffHook). As the
|
||||
//! diff is generated the diff hook is invoked. Note that the diff hook does
|
||||
//! not get access to the actual values but only the indexes. This is why the
|
||||
//! diff hook is not used outside of the raw algorithm implementations as for
|
||||
//! most situations access to the values is useful of required.
|
||||
//!
|
||||
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
|
||||
//! the different algorithms.
|
||||
//! A more generic interface for these algorthms is available on the toplevel
|
||||
//! module.
|
||||
//!
|
||||
//! # Example
|
||||
//!
|
||||
//! This is a simple example that shows how you can calculate the difference
|
||||
//! between two sequences and capture the [`DiffOp`]s into a vector.
|
||||
//! between two sequences and capture the ops into a vector.
|
||||
//!
|
||||
//! ```rust
|
||||
//! use similar::Algorithm;
|
||||
//! use similar::algorithms::capture_diff_slices;
|
||||
//! use similar::{Algorithm, capture_diff_slices};
|
||||
//!
|
||||
//! let a = vec![1, 2, 3, 4, 5];
|
||||
//! let b = vec![1, 2, 3, 4, 7];
|
||||
//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b);
|
||||
//! ```
|
||||
|
||||
// general traits and utilities
|
||||
mod capture;
|
||||
mod hook;
|
||||
mod replace;
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::types::{Algorithm, DiffOp};
|
||||
|
||||
pub use capture::{get_diff_ratio, group_diff_ops, Capture};
|
||||
pub use hook::DiffHook;
|
||||
pub use capture::Capture;
|
||||
pub use replace::Replace;
|
||||
|
||||
// actual diffing algorithms
|
||||
pub mod myers;
|
||||
pub mod patience;
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm.
|
||||
///
|
||||
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
pub fn diff<Old, New, D>(
|
||||
alg: Algorithm,
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
match alg {
|
||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
||||
}
|
||||
}
|
||||
|
||||
/// Shortcut for diffing slices with a specific algorithm.
|
||||
pub fn diff_slices<D, T>(alg: Algorithm, d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||
where
|
||||
D: DiffHook,
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||
///
|
||||
/// This is like [`diff`] but instead of using an arbitrary hook this will
|
||||
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
|
||||
pub fn capture_diff<Old, New>(
|
||||
alg: Algorithm,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Vec<DiffOp>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
Old::Output: Hash + Eq + Ord,
|
||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||
{
|
||||
let mut d = Replace::new(Capture::new());
|
||||
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
}
|
||||
|
||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
||||
where
|
||||
T: Eq + Hash + Ord,
|
||||
{
|
||||
let mut d = Replace::new(Capture::new());
|
||||
diff_slices(alg, &mut d, old, new).unwrap();
|
||||
d.into_inner().into_ops()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
use std::cmp::{max, min};
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::DiffHook;
|
||||
use crate::DiffHook;
|
||||
|
||||
/// Myers' diff algorithm.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ use std::collections::HashMap;
|
|||
use std::hash::Hash;
|
||||
use std::ops::{Index, Range};
|
||||
|
||||
use crate::algorithms::{myers, DiffHook, Replace};
|
||||
use crate::algorithms::{myers, Replace};
|
||||
use crate::DiffHook;
|
||||
|
||||
/// Patience diff algorithm.
|
||||
///
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::algorithms::DiffHook;
|
||||
use crate::DiffHook;
|
||||
|
||||
/// A [`DiffHook`] that combines deletions and insertions to give blocks
|
||||
/// of maximal length, and replacements when appropriate.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue