Added deadlines to all algorithms (#18)
This adds a deadline to all algorithms which lets one bail in a fixed amount of time to avoid pathological cases.
This commit is contained in:
parent
99386e8106
commit
74e2805a95
11 changed files with 412 additions and 57 deletions
|
|
@ -6,6 +6,8 @@ All notable changes to similar are documented here.
|
||||||
|
|
||||||
* Performance improvements for the LCS algorithm.
|
* Performance improvements for the LCS algorithm.
|
||||||
* Small performance improvments by adding an early opt-out for and inline highlighting.
|
* Small performance improvments by adding an early opt-out for and inline highlighting.
|
||||||
|
* Added deadlines to all diffing algorithms to bail early.
|
||||||
|
* Deprecated slice diffing methods in the individual algorithm modules.
|
||||||
|
|
||||||
## 1.2.2
|
## 1.2.2
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ impl DiffHook for Capture {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_capture_hook_grouping() {
|
fn test_capture_hook_grouping() {
|
||||||
use crate::algorithms::{myers, Replace};
|
use crate::algorithms::{diff_slices, Algorithm, Replace};
|
||||||
|
|
||||||
let rng = (1..100).collect::<Vec<_>>();
|
let rng = (1..100).collect::<Vec<_>>();
|
||||||
let mut rng_new = rng.clone();
|
let mut rng_new = rng.clone();
|
||||||
|
|
@ -104,7 +104,7 @@ fn test_capture_hook_grouping() {
|
||||||
rng_new[34] = 1000;
|
rng_new[34] = 1000;
|
||||||
|
|
||||||
let mut d = Replace::new(Capture::new());
|
let mut d = Replace::new(Capture::new());
|
||||||
myers::diff_slices(&mut d, &rng, &rng_new).unwrap();
|
diff_slices(Algorithm::Myers, &mut d, &rng, &rng_new).unwrap();
|
||||||
|
|
||||||
let ops = d.into_inner().into_grouped_ops(3);
|
let ops = d.into_inner().into_grouped_ops(3);
|
||||||
let tags = ops
|
let tags = ops
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,18 @@
|
||||||
//! * space `O(MN)`
|
//! * space `O(MN)`
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::algorithms::DiffHook;
|
use crate::algorithms::DiffHook;
|
||||||
|
|
||||||
/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
|
/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
|
||||||
///
|
///
|
||||||
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||||
|
///
|
||||||
|
/// This diff is done with an optional deadline that defines the maximal
|
||||||
|
/// execution time permitted before it bails and falls back to an very bad
|
||||||
|
/// approximation. Deadlines with LCS do not make a lot of sense and should
|
||||||
|
/// not be used.
|
||||||
pub fn diff<Old, New, D>(
|
pub fn diff<Old, New, D>(
|
||||||
d: &mut D,
|
d: &mut D,
|
||||||
old: &Old,
|
old: &Old,
|
||||||
|
|
@ -17,6 +23,29 @@ pub fn diff<Old, New, D>(
|
||||||
new: &New,
|
new: &New,
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
) -> Result<(), D::Error>
|
) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
D: DiffHook,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
diff_deadline(d, old, old_range, new, new_range, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
|
||||||
|
///
|
||||||
|
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||||
|
///
|
||||||
|
/// This diff is done with an optional deadline that defines the maximal
|
||||||
|
/// execution time permitted before it bails and falls back to an approximation.
|
||||||
|
pub fn diff_deadline<Old, New, D>(
|
||||||
|
d: &mut D,
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -44,11 +73,12 @@ where
|
||||||
.take_while(|x| new[x.1] == old[x.0])
|
.take_while(|x| new[x.1] == old[x.0])
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
let table = make_table(
|
let maybe_table = make_table(
|
||||||
old,
|
old,
|
||||||
prefix_len..(old_range.len() - suffix_len),
|
prefix_len..(old_range.len() - suffix_len),
|
||||||
new,
|
new,
|
||||||
prefix_len..(new_range.len() - suffix_len),
|
prefix_len..(new_range.len() - suffix_len),
|
||||||
|
deadline,
|
||||||
);
|
);
|
||||||
let mut old_idx = 0;
|
let mut old_idx = 0;
|
||||||
let mut new_idx = 0;
|
let mut new_idx = 0;
|
||||||
|
|
@ -59,6 +89,7 @@ where
|
||||||
d.equal(old_range.start, new_range.start, prefix_len)?;
|
d.equal(old_range.start, new_range.start, prefix_len)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(table) = maybe_table {
|
||||||
while new_idx < new_len && old_idx < old_len {
|
while new_idx < new_len && old_idx < old_len {
|
||||||
let old_orig_idx = old_range.start + prefix_len + old_idx;
|
let old_orig_idx = old_range.start + prefix_len + old_idx;
|
||||||
let new_orig_idx = new_range.start + prefix_len + new_idx;
|
let new_orig_idx = new_range.start + prefix_len + new_idx;
|
||||||
|
|
@ -77,6 +108,12 @@ where
|
||||||
new_idx += 1;
|
new_idx += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
let old_orig_idx = old_range.start + prefix_len + old_idx;
|
||||||
|
let new_orig_idx = new_range.start + prefix_len + new_idx;
|
||||||
|
d.delete(old_orig_idx, old_len, new_orig_idx)?;
|
||||||
|
d.insert(old_orig_idx, new_orig_idx, new_len)?;
|
||||||
|
}
|
||||||
|
|
||||||
if old_idx < old_len {
|
if old_idx < old_len {
|
||||||
d.delete(
|
d.delete(
|
||||||
|
|
@ -107,6 +144,10 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shortcut for diffing slices.
|
/// Shortcut for diffing slices.
|
||||||
|
#[deprecated(
|
||||||
|
since = "1.4.0",
|
||||||
|
note = "slice utility function is now only available via similar::algorithms::diff_slices"
|
||||||
|
)]
|
||||||
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
D: DiffHook,
|
D: DiffHook,
|
||||||
|
|
@ -120,7 +161,8 @@ fn make_table<Old, New>(
|
||||||
old_range: Range<usize>,
|
old_range: Range<usize>,
|
||||||
new: &New,
|
new: &New,
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
) -> BTreeMap<(usize, usize), u32>
|
deadline: Option<Instant>,
|
||||||
|
) -> Option<BTreeMap<(usize, usize), u32>>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -131,6 +173,13 @@ where
|
||||||
let mut table = BTreeMap::new();
|
let mut table = BTreeMap::new();
|
||||||
|
|
||||||
for i in (0..new_len).rev() {
|
for i in (0..new_len).rev() {
|
||||||
|
// are we running for too long? give up on the table
|
||||||
|
if let Some(deadline) = deadline {
|
||||||
|
if Instant::now() > deadline {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for j in (0..old_len).rev() {
|
for j in (0..old_len).rev() {
|
||||||
let val = if new[i] == old[j] {
|
let val = if new[i] == old[j] {
|
||||||
table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1
|
table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1
|
||||||
|
|
@ -146,12 +195,12 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
table
|
Some(table)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_table() {
|
fn test_table() {
|
||||||
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3);
|
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
|
||||||
let expected = {
|
let expected = {
|
||||||
let mut m = BTreeMap::new();
|
let mut m = BTreeMap::new();
|
||||||
m.insert((1, 0), 1);
|
m.insert((1, 0), 1);
|
||||||
|
|
@ -168,7 +217,7 @@ fn test_diff() {
|
||||||
let b: &[usize] = &[0, 1, 2, 9, 4];
|
let b: &[usize] = &[0, 1, 2, 9, 4];
|
||||||
|
|
||||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -178,7 +227,7 @@ fn test_contiguous() {
|
||||||
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
||||||
|
|
||||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -188,6 +237,6 @@ fn test_pat() {
|
||||||
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
||||||
|
|
||||||
let mut d = crate::algorithms::Capture::new();
|
let mut d = crate::algorithms::Capture::new();
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.ops());
|
insta::assert_debug_snapshot!(d.ops());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@ mod replace;
|
||||||
|
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
pub use capture::Capture;
|
pub use capture::Capture;
|
||||||
pub use hook::{DiffHook, NoFinishHook};
|
pub use hook::{DiffHook, NoFinishHook};
|
||||||
|
|
@ -62,6 +63,34 @@ pub fn diff<Old, New, D>(
|
||||||
new: &New,
|
new: &New,
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
) -> Result<(), D::Error>
|
) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
D: DiffHook,
|
||||||
|
Old::Output: Hash + Eq + Ord,
|
||||||
|
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||||
|
{
|
||||||
|
diff_deadline(alg, d, old, old_range, new, new_range, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff between old and new with the given algorithm with deadline.
|
||||||
|
///
|
||||||
|
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||||
|
///
|
||||||
|
/// This diff is done with an optional deadline that defines the maximal
|
||||||
|
/// execution time permitted before it bails and falls back to an approximation.
|
||||||
|
/// Note that not all algorithms behave well if they reach the deadline (LCS
|
||||||
|
/// for instance produces a very simplistic diff when the deadline is reached
|
||||||
|
/// in all cases).
|
||||||
|
pub fn diff_deadline<Old, New, D>(
|
||||||
|
alg: Algorithm,
|
||||||
|
d: &mut D,
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -70,9 +99,9 @@ where
|
||||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||||
{
|
{
|
||||||
match alg {
|
match alg {
|
||||||
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
|
Algorithm::Myers => myers::diff_deadline(d, old, old_range, new, new_range, deadline),
|
||||||
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
|
Algorithm::Patience => patience::diff_deadline(d, old, old_range, new, new_range, deadline),
|
||||||
Algorithm::Lcs => lcs::diff(d, old, old_range, new, new_range),
|
Algorithm::Lcs => lcs::diff_deadline(d, old, old_range, new, new_range, deadline),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -84,3 +113,18 @@ where
|
||||||
{
|
{
|
||||||
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
diff(alg, d, old, 0..old.len(), new, 0..new.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Shortcut for diffing slices with a specific algorithm.
|
||||||
|
pub fn diff_slices_deadline<D, T>(
|
||||||
|
alg: Algorithm,
|
||||||
|
d: &mut D,
|
||||||
|
old: &[T],
|
||||||
|
new: &[T],
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
D: DiffHook,
|
||||||
|
T: Eq + Hash + Ord,
|
||||||
|
{
|
||||||
|
diff_deadline(alg, d, old, 0..old.len(), new, 0..new.len(), deadline)
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,19 @@
|
||||||
//!
|
//!
|
||||||
//! The implementation of this algorithm is based on the implementation by
|
//! The implementation of this algorithm is based on the implementation by
|
||||||
//! Brandon Williams.
|
//! Brandon Williams.
|
||||||
|
//!
|
||||||
|
//! # Heuristics
|
||||||
|
//!
|
||||||
|
//! At present this implementation of Myers' does not implement any more advanced
|
||||||
|
//! heuristics that would solve some pathological cases. For instane passing two
|
||||||
|
//! large and completely distinct sequences to the algorithm will make it spin
|
||||||
|
//! without making reasonable progress. Currently the only protection in the
|
||||||
|
//! library against this is to pass a deadline to the diffing algorithm.
|
||||||
|
//!
|
||||||
|
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
|
||||||
|
|
||||||
use std::ops::{Index, IndexMut, Range};
|
use std::ops::{Index, IndexMut, Range};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::algorithms::DiffHook;
|
use crate::algorithms::DiffHook;
|
||||||
|
|
||||||
|
|
@ -23,6 +34,29 @@ pub fn diff<Old, New, D>(
|
||||||
new: &New,
|
new: &New,
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
) -> Result<(), D::Error>
|
) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
D: DiffHook,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
diff_deadline(d, old, old_range, new, new_range, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Myers' diff algorithm with deadline.
|
||||||
|
///
|
||||||
|
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||||
|
///
|
||||||
|
/// This diff is done with an optional deadline that defines the maximal
|
||||||
|
/// execution time permitted before it bails and falls back to an approximation.
|
||||||
|
pub fn diff_deadline<Old, New, D>(
|
||||||
|
d: &mut D,
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -32,11 +66,17 @@ where
|
||||||
let max_d = max_d(old_range.len(), new_range.len());
|
let max_d = max_d(old_range.len(), new_range.len());
|
||||||
let mut vf = V::new(max_d);
|
let mut vf = V::new(max_d);
|
||||||
let mut vb = V::new(max_d);
|
let mut vb = V::new(max_d);
|
||||||
conquer(d, old, old_range, new, new_range, &mut vf, &mut vb)?;
|
conquer(
|
||||||
|
d, old, old_range, new, new_range, &mut vf, &mut vb, deadline,
|
||||||
|
)?;
|
||||||
d.finish()
|
d.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shortcut for diffing slices.
|
/// Shortcut for diffing slices.
|
||||||
|
#[deprecated(
|
||||||
|
since = "1.4.0",
|
||||||
|
note = "slice utility function is now only available via similar::algorithms::diff_slices"
|
||||||
|
)]
|
||||||
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
D: DiffHook,
|
D: DiffHook,
|
||||||
|
|
@ -172,7 +212,8 @@ fn find_middle_snake<Old, New>(
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
vf: &mut V,
|
vf: &mut V,
|
||||||
vb: &mut V,
|
vb: &mut V,
|
||||||
) -> Snake
|
deadline: Option<Instant>,
|
||||||
|
) -> Option<Snake>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -197,6 +238,13 @@ where
|
||||||
assert!(vb.len() >= d_max);
|
assert!(vb.len() >= d_max);
|
||||||
|
|
||||||
for d in 0..d_max as isize {
|
for d in 0..d_max as isize {
|
||||||
|
// are we running for too long?
|
||||||
|
if let Some(deadline) = deadline {
|
||||||
|
if Instant::now() > deadline {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Forward path
|
// Forward path
|
||||||
for k in (-d..=d).rev().step_by(2) {
|
for k in (-d..=d).rev().step_by(2) {
|
||||||
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
|
||||||
|
|
@ -230,10 +278,10 @@ where
|
||||||
// TODO optimize this so we don't have to compare against n
|
// TODO optimize this so we don't have to compare against n
|
||||||
if vf[k] + vb[-(k - delta)] >= n {
|
if vf[k] + vb[-(k - delta)] >= n {
|
||||||
// Return the snake
|
// Return the snake
|
||||||
return Snake {
|
return Some(Snake {
|
||||||
x_start: x0 + old_range.start,
|
x_start: x0 + old_range.start,
|
||||||
y_start: y0 + new_range.start,
|
y_start: y0 + new_range.start,
|
||||||
};
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -266,10 +314,10 @@ where
|
||||||
// TODO optimize this so we don't have to compare against n
|
// TODO optimize this so we don't have to compare against n
|
||||||
if vb[k] + vf[-(k - delta)] >= n {
|
if vb[k] + vf[-(k - delta)] >= n {
|
||||||
// Return the snake
|
// Return the snake
|
||||||
return Snake {
|
return Some(Snake {
|
||||||
x_start: n - x + old_range.start,
|
x_start: n - x + old_range.start,
|
||||||
y_start: m - y + new_range.start,
|
y_start: m - y + new_range.start,
|
||||||
};
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -277,9 +325,11 @@ where
|
||||||
// TODO: Maybe there's an opportunity to optimize and bail early?
|
// TODO: Maybe there's an opportunity to optimize and bail early?
|
||||||
}
|
}
|
||||||
|
|
||||||
unreachable!("unable to find a middle snake");
|
// deadline reached
|
||||||
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn conquer<Old, New, D>(
|
fn conquer<Old, New, D>(
|
||||||
d: &mut D,
|
d: &mut D,
|
||||||
old: &Old,
|
old: &Old,
|
||||||
|
|
@ -288,6 +338,7 @@ fn conquer<Old, New, D>(
|
||||||
mut new_range: Range<usize>,
|
mut new_range: Range<usize>,
|
||||||
vf: &mut V,
|
vf: &mut V,
|
||||||
vb: &mut V,
|
vb: &mut V,
|
||||||
|
deadline: Option<Instant>,
|
||||||
) -> Result<(), D::Error>
|
) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
|
|
@ -326,12 +377,30 @@ where
|
||||||
new_range.start,
|
new_range.start,
|
||||||
new_range.end - new_range.start,
|
new_range.end - new_range.start,
|
||||||
)?;
|
)?;
|
||||||
} else {
|
} else if let Some(snake) = find_middle_snake(
|
||||||
let snake = find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb);
|
old,
|
||||||
|
old_range.clone(),
|
||||||
|
new,
|
||||||
|
new_range.clone(),
|
||||||
|
vf,
|
||||||
|
vb,
|
||||||
|
deadline,
|
||||||
|
) {
|
||||||
let (old_a, old_b) = split_at(old_range, snake.x_start);
|
let (old_a, old_b) = split_at(old_range, snake.x_start);
|
||||||
let (new_a, new_b) = split_at(new_range, snake.y_start);
|
let (new_a, new_b) = split_at(new_range, snake.y_start);
|
||||||
conquer(d, old, old_a, new, new_a, vf, vb)?;
|
conquer(d, old, old_a, new, new_a, vf, vb, deadline)?;
|
||||||
conquer(d, old, old_b, new, new_b, vf, vb)?;
|
conquer(d, old, old_b, new, new_b, vf, vb, deadline)?;
|
||||||
|
} else {
|
||||||
|
d.delete(
|
||||||
|
old_range.start,
|
||||||
|
old_range.end - old_range.start,
|
||||||
|
new_range.start,
|
||||||
|
)?;
|
||||||
|
d.insert(
|
||||||
|
old_range.start,
|
||||||
|
new_range.start,
|
||||||
|
new_range.end - new_range.start,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if common_suffix_len > 0 {
|
if common_suffix_len > 0 {
|
||||||
|
|
@ -348,7 +417,7 @@ fn test_find_middle_snake() {
|
||||||
let max_d = max_d(a.len(), b.len());
|
let max_d = max_d(a.len(), b.len());
|
||||||
let mut vf = V::new(max_d);
|
let mut vf = V::new(max_d);
|
||||||
let mut vb = V::new(max_d);
|
let mut vb = V::new(max_d);
|
||||||
let snake = find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb);
|
let snake = find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb, None).unwrap();
|
||||||
assert_eq!(snake.x_start, 4);
|
assert_eq!(snake.x_start, 4);
|
||||||
assert_eq!(snake.y_start, 1);
|
assert_eq!(snake.y_start, 1);
|
||||||
}
|
}
|
||||||
|
|
@ -359,7 +428,7 @@ fn test_diff() {
|
||||||
let b: &[usize] = &[0, 1, 2, 9, 4];
|
let b: &[usize] = &[0, 1, 2, 9, 4];
|
||||||
|
|
||||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -369,7 +438,7 @@ fn test_contiguous() {
|
||||||
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
||||||
|
|
||||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -379,6 +448,45 @@ fn test_pat() {
|
||||||
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
||||||
|
|
||||||
let mut d = crate::algorithms::Capture::new();
|
let mut d = crate::algorithms::Capture::new();
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.ops());
|
insta::assert_debug_snapshot!(d.ops());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_deadline_reached() {
|
||||||
|
use std::ops::Index;
|
||||||
|
use std::time::Duration;
|
||||||
|
|
||||||
|
let a = (0..100).collect::<Vec<_>>();
|
||||||
|
let mut b = (0..100).collect::<Vec<_>>();
|
||||||
|
b[10] = 99;
|
||||||
|
b[50] = 99;
|
||||||
|
b[25] = 99;
|
||||||
|
|
||||||
|
struct SlowIndex<'a>(&'a [usize]);
|
||||||
|
|
||||||
|
impl<'a> Index<usize> for SlowIndex<'a> {
|
||||||
|
type Output = usize;
|
||||||
|
|
||||||
|
fn index(&self, index: usize) -> &Self::Output {
|
||||||
|
std::thread::sleep(Duration::from_millis(1));
|
||||||
|
&self.0[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let slow_a = SlowIndex(&a);
|
||||||
|
let slow_b = SlowIndex(&b);
|
||||||
|
|
||||||
|
// don't give it enough time to do anything interesting
|
||||||
|
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||||
|
diff_deadline(
|
||||||
|
&mut d,
|
||||||
|
&slow_a,
|
||||||
|
0..a.len(),
|
||||||
|
&slow_b,
|
||||||
|
0..b.len(),
|
||||||
|
Some(Instant::now() + Duration::from_millis(50)),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ use std::collections::hash_map::Entry;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace};
|
use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace};
|
||||||
|
|
||||||
|
|
@ -25,6 +26,30 @@ pub fn diff<Old, New, D>(
|
||||||
new: &New,
|
new: &New,
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
) -> Result<(), D::Error>
|
) -> Result<(), D::Error>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
Old::Output: Hash + Eq,
|
||||||
|
New::Output: PartialEq<Old::Output> + Hash + Eq,
|
||||||
|
D: DiffHook,
|
||||||
|
{
|
||||||
|
diff_deadline(d, old, old_range, new, new_range, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Patience diff algorithm with deadline.
|
||||||
|
///
|
||||||
|
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||||
|
///
|
||||||
|
/// This diff is done with an optional deadline that defines the maximal
|
||||||
|
/// execution time permitted before it bails and falls back to an approximation.
|
||||||
|
pub fn diff_deadline<Old, New, D>(
|
||||||
|
d: &mut D,
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -45,18 +70,24 @@ where
|
||||||
new_current: new_range.start,
|
new_current: new_range.start,
|
||||||
new_end: new_range.end,
|
new_end: new_range.end,
|
||||||
new_indexes: &new_indexes,
|
new_indexes: &new_indexes,
|
||||||
|
deadline,
|
||||||
});
|
});
|
||||||
myers::diff(
|
myers::diff_deadline(
|
||||||
&mut d,
|
&mut d,
|
||||||
&old_indexes,
|
&old_indexes,
|
||||||
0..old_indexes.len(),
|
0..old_indexes.len(),
|
||||||
&new_indexes,
|
&new_indexes,
|
||||||
0..new_indexes.len(),
|
0..new_indexes.len(),
|
||||||
|
deadline,
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Shortcut for diffing slices.
|
/// Shortcut for diffing slices.
|
||||||
|
#[deprecated(
|
||||||
|
since = "1.4.0",
|
||||||
|
note = "slice utility function is now only available via similar::algorithms::diff_slices"
|
||||||
|
)]
|
||||||
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||||
where
|
where
|
||||||
D: DiffHook,
|
D: DiffHook,
|
||||||
|
|
@ -128,6 +159,7 @@ struct Patience<'old, 'new, 'd, Old: ?Sized, New: ?Sized, D> {
|
||||||
new_current: usize,
|
new_current: usize,
|
||||||
new_end: usize,
|
new_end: usize,
|
||||||
new_indexes: &'new [Indexable<'new, New>],
|
new_indexes: &'new [Indexable<'new, New>],
|
||||||
|
deadline: Option<Instant>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'old, 'new, 'd, Old, New, D> DiffHook for Patience<'old, 'new, 'd, Old, New, D>
|
impl<'old, 'new, 'd, Old, New, D> DiffHook for Patience<'old, 'new, 'd, Old, New, D>
|
||||||
|
|
@ -153,12 +185,13 @@ where
|
||||||
self.d.equal(a0, b0, self.old_current - a0)?;
|
self.d.equal(a0, b0, self.old_current - a0)?;
|
||||||
}
|
}
|
||||||
let mut no_finish_d = NoFinishHook::new(&mut self.d);
|
let mut no_finish_d = NoFinishHook::new(&mut self.d);
|
||||||
myers::diff(
|
myers::diff_deadline(
|
||||||
&mut no_finish_d,
|
&mut no_finish_d,
|
||||||
self.old,
|
self.old,
|
||||||
self.old_current..self.old_indexes[old].index,
|
self.old_current..self.old_indexes[old].index,
|
||||||
self.new,
|
self.new,
|
||||||
self.new_current..self.new_indexes[new].index,
|
self.new_current..self.new_indexes[new].index,
|
||||||
|
self.deadline,
|
||||||
)?;
|
)?;
|
||||||
self.old_current = self.old_indexes[old].index;
|
self.old_current = self.old_indexes[old].index;
|
||||||
self.new_current = self.new_indexes[new].index;
|
self.new_current = self.new_indexes[new].index;
|
||||||
|
|
@ -167,12 +200,13 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
fn finish(&mut self) -> Result<(), D::Error> {
|
fn finish(&mut self) -> Result<(), D::Error> {
|
||||||
myers::diff(
|
myers::diff_deadline(
|
||||||
self.d,
|
self.d,
|
||||||
self.old,
|
self.old,
|
||||||
self.old_current..self.old_end,
|
self.old_current..self.old_end,
|
||||||
self.new,
|
self.new,
|
||||||
self.new_current..self.new_end,
|
self.new_current..self.new_end,
|
||||||
|
self.deadline,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -183,7 +217,7 @@ fn test_patience() {
|
||||||
let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18];
|
let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18];
|
||||||
|
|
||||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
}
|
}
|
||||||
|
|
@ -195,7 +229,7 @@ fn test_patience_out_of_bounds_bug() {
|
||||||
let b: &[usize] = &[1, 2, 3];
|
let b: &[usize] = &[1, 2, 3];
|
||||||
|
|
||||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||||
diff_slices(&mut d, a, b).unwrap();
|
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ impl<D: DiffHook> DiffHook for Replace<D> {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mayers_replace() {
|
fn test_mayers_replace() {
|
||||||
use crate::algorithms::myers;
|
use crate::algorithms::{diff_slices, Algorithm};
|
||||||
let a: &[&str] = &[
|
let a: &[&str] = &[
|
||||||
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n",
|
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n",
|
||||||
"a\n",
|
"a\n",
|
||||||
|
|
@ -159,7 +159,7 @@ fn test_mayers_replace() {
|
||||||
];
|
];
|
||||||
|
|
||||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||||
myers::diff_slices(&mut d, a, b).unwrap();
|
diff_slices(Algorithm::Myers, &mut d, a, b).unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###"
|
insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###"
|
||||||
[
|
[
|
||||||
|
|
@ -196,11 +196,13 @@ fn test_mayers_replace() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_replace() {
|
fn test_replace() {
|
||||||
|
use crate::algorithms::{diff_slices, Algorithm};
|
||||||
|
|
||||||
let a: &[usize] = &[0, 1, 2, 3, 4];
|
let a: &[usize] = &[0, 1, 2, 3, 4];
|
||||||
let b: &[usize] = &[0, 1, 2, 7, 8, 9];
|
let b: &[usize] = &[0, 1, 2, 7, 8, 9];
|
||||||
|
|
||||||
let mut d = Replace::new(crate::algorithms::Capture::new());
|
let mut d = Replace::new(crate::algorithms::Capture::new());
|
||||||
crate::algorithms::myers::diff_slices(&mut d, a, b).unwrap();
|
diff_slices(Algorithm::Myers, &mut d, a, b).unwrap();
|
||||||
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
|
||||||
[
|
[
|
||||||
Equal {
|
Equal {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
---
|
||||||
|
source: src/algorithms/myers.rs
|
||||||
|
expression: d.into_inner().ops()
|
||||||
|
---
|
||||||
|
[
|
||||||
|
Equal {
|
||||||
|
old_index: 0,
|
||||||
|
new_index: 0,
|
||||||
|
len: 10,
|
||||||
|
},
|
||||||
|
Replace {
|
||||||
|
old_index: 10,
|
||||||
|
old_len: 41,
|
||||||
|
new_index: 10,
|
||||||
|
new_len: 41,
|
||||||
|
},
|
||||||
|
Equal {
|
||||||
|
old_index: 51,
|
||||||
|
new_index: 51,
|
||||||
|
len: 49,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
@ -1,13 +1,15 @@
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::algorithms::{diff, diff_slices, Capture, Replace};
|
use crate::algorithms::{diff_deadline, diff_slices_deadline, Capture, Replace};
|
||||||
use crate::{Algorithm, DiffOp};
|
use crate::{Algorithm, DiffOp};
|
||||||
|
|
||||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
///
|
///
|
||||||
/// This is like [`diff`] but instead of using an arbitrary hook this will
|
/// This is like [`diff`](crate::algorithms::diff) but instead of using an
|
||||||
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
|
/// arbitrary hook this will always use [`Replace`] + [`Capture`] and return the
|
||||||
|
/// captured [`DiffOp`]s.
|
||||||
pub fn capture_diff<Old, New>(
|
pub fn capture_diff<Old, New>(
|
||||||
alg: Algorithm,
|
alg: Algorithm,
|
||||||
old: &Old,
|
old: &Old,
|
||||||
|
|
@ -15,6 +17,26 @@ pub fn capture_diff<Old, New>(
|
||||||
new: &New,
|
new: &New,
|
||||||
new_range: Range<usize>,
|
new_range: Range<usize>,
|
||||||
) -> Vec<DiffOp>
|
) -> Vec<DiffOp>
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
Old::Output: Hash + Eq + Ord,
|
||||||
|
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||||
|
{
|
||||||
|
capture_diff_deadline(alg, old, old_range, new, new_range, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
|
///
|
||||||
|
/// Works like [`capture_diff`] but with an optional deadline.
|
||||||
|
pub fn capture_diff_deadline<Old, New>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Vec<DiffOp>
|
||||||
where
|
where
|
||||||
Old: Index<usize> + ?Sized,
|
Old: Index<usize> + ?Sized,
|
||||||
New: Index<usize> + ?Sized,
|
New: Index<usize> + ?Sized,
|
||||||
|
|
@ -22,17 +44,32 @@ where
|
||||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||||
{
|
{
|
||||||
let mut d = Replace::new(Capture::new());
|
let mut d = Replace::new(Capture::new());
|
||||||
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
|
diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap();
|
||||||
d.into_inner().into_ops()
|
d.into_inner().into_ops()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
|
||||||
|
where
|
||||||
|
T: Eq + Hash + Ord,
|
||||||
|
{
|
||||||
|
capture_diff_slices_deadline(alg, old, new, None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
|
///
|
||||||
|
/// Works like [`capture_diff_slices`] but with an optional deadline.
|
||||||
|
pub fn capture_diff_slices_deadline<T>(
|
||||||
|
alg: Algorithm,
|
||||||
|
old: &[T],
|
||||||
|
new: &[T],
|
||||||
|
deadline: Option<Instant>,
|
||||||
|
) -> Vec<DiffOp>
|
||||||
where
|
where
|
||||||
T: Eq + Hash + Ord,
|
T: Eq + Hash + Ord,
|
||||||
{
|
{
|
||||||
let mut d = Replace::new(Capture::new());
|
let mut d = Replace::new(Capture::new());
|
||||||
diff_slices(alg, &mut d, old, new).unwrap();
|
diff_slices_deadline(alg, &mut d, old, new, deadline).unwrap();
|
||||||
d.into_inner().into_ops()
|
d.into_inner().into_ops()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
16
src/lib.rs
16
src/lib.rs
|
|
@ -106,6 +106,22 @@
|
||||||
//! As the [`TextDiff::grouped_ops`] method can isolate clusters of changes
|
//! As the [`TextDiff::grouped_ops`] method can isolate clusters of changes
|
||||||
//! this even works for very long files if paired with this method.
|
//! this even works for very long files if paired with this method.
|
||||||
//!
|
//!
|
||||||
|
//! # Deadlines and Performance
|
||||||
|
//!
|
||||||
|
//! For large and very distinct inputs the algorithms as implemented can take
|
||||||
|
//! a very, very long time to execute. Too long to make sense in practice.
|
||||||
|
//! To work around this issue all diffing algorithms also provide a version
|
||||||
|
//! that accepts a deadline which is the point in time as defined by an
|
||||||
|
//! [`Instant`](std::time::Instant) after which the algorithm should give up.
|
||||||
|
//! What giving up means depends on the algorithm. For instance due to the
|
||||||
|
//! recursive, divide and conquer nature of Myer's diff you will still get a
|
||||||
|
//! pretty decent diff in many cases when a deadline is reached. Whereas on the
|
||||||
|
//! other hand the LCS diff is unlikely to give any decent results in such a
|
||||||
|
//! situation.
|
||||||
|
//!
|
||||||
|
//! The [`TextDiff`] type also lets you configure a deadline and/or timeout
|
||||||
|
//! when performing a text diff.
|
||||||
|
//!
|
||||||
//! # Feature Flags
|
//! # Feature Flags
|
||||||
//!
|
//!
|
||||||
//! The crate by default does not have any dependencies however for some use
|
//! The crate by default does not have any dependencies however for some use
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::cmp::Reverse;
|
use std::cmp::Reverse;
|
||||||
use std::collections::BinaryHeap;
|
use std::collections::BinaryHeap;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
|
||||||
mod abstraction;
|
mod abstraction;
|
||||||
#[cfg(feature = "inline")]
|
#[cfg(feature = "inline")]
|
||||||
|
|
@ -15,7 +16,22 @@ pub use self::inline::InlineChange;
|
||||||
use self::utils::{upper_seq_ratio, QuickSeqRatio};
|
use self::utils::{upper_seq_ratio, QuickSeqRatio};
|
||||||
use crate::iter::{AllChangesIter, ChangesIter};
|
use crate::iter::{AllChangesIter, ChangesIter};
|
||||||
use crate::udiff::UnifiedDiff;
|
use crate::udiff::UnifiedDiff;
|
||||||
use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
|
use crate::{capture_diff_slices_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
enum Deadline {
|
||||||
|
Absolute(Instant),
|
||||||
|
Relative(Duration),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deadline {
|
||||||
|
fn into_instant(self) -> Instant {
|
||||||
|
match self {
|
||||||
|
Deadline::Absolute(instant) => instant,
|
||||||
|
Deadline::Relative(duration) => Instant::now() + duration,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A builder type config for more complex uses of [`TextDiff`].
|
/// A builder type config for more complex uses of [`TextDiff`].
|
||||||
///
|
///
|
||||||
|
|
@ -24,6 +40,7 @@ use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Diff
|
||||||
pub struct TextDiffConfig {
|
pub struct TextDiffConfig {
|
||||||
algorithm: Algorithm,
|
algorithm: Algorithm,
|
||||||
newline_terminated: Option<bool>,
|
newline_terminated: Option<bool>,
|
||||||
|
deadline: Option<Deadline>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TextDiffConfig {
|
impl Default for TextDiffConfig {
|
||||||
|
|
@ -31,6 +48,7 @@ impl Default for TextDiffConfig {
|
||||||
TextDiffConfig {
|
TextDiffConfig {
|
||||||
algorithm: Algorithm::default(),
|
algorithm: Algorithm::default(),
|
||||||
newline_terminated: None,
|
newline_terminated: None,
|
||||||
|
deadline: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -44,6 +62,24 @@ impl TextDiffConfig {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Sets a deadline for the diff operation.
|
||||||
|
///
|
||||||
|
/// By default a diff will take as long as it takes. For certain diff
|
||||||
|
/// algorthms like Myer's and Patience a maximum running time can be
|
||||||
|
/// defined after which the algorithm gives up and approximates.
|
||||||
|
pub fn deadline(&mut self, deadline: Instant) -> &mut Self {
|
||||||
|
self.deadline = Some(Deadline::Absolute(deadline));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sets a timeout for thediff operation.
|
||||||
|
///
|
||||||
|
/// This is like [`deadline`](Self::deadline) but accepts a duration.
|
||||||
|
pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
|
||||||
|
self.deadline = Some(Deadline::Relative(timeout));
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Changes the newline termination flag.
|
/// Changes the newline termination flag.
|
||||||
///
|
///
|
||||||
/// The default is automatic based on input. This flag controls the
|
/// The default is automatic based on input. This flag controls the
|
||||||
|
|
@ -291,7 +327,12 @@ impl TextDiffConfig {
|
||||||
new: Cow<'bufs, [&'new T]>,
|
new: Cow<'bufs, [&'new T]>,
|
||||||
newline_terminated: bool,
|
newline_terminated: bool,
|
||||||
) -> TextDiff<'old, 'new, 'bufs, T> {
|
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
let ops = capture_diff_slices(self.algorithm, &old, &new);
|
let ops = capture_diff_slices_deadline(
|
||||||
|
self.algorithm,
|
||||||
|
&old,
|
||||||
|
&new,
|
||||||
|
self.deadline.map(|x| x.into_instant()),
|
||||||
|
);
|
||||||
TextDiff {
|
TextDiff {
|
||||||
old,
|
old,
|
||||||
new,
|
new,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue