Added deadlines to all algorithms (#18)

This adds a deadline to all algorithms which lets one bail in a fixed amount of
time to avoid pathological cases.
This commit is contained in:
Armin Ronacher 2021-02-20 10:12:06 +01:00 committed by GitHub
parent 99386e8106
commit 74e2805a95
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 412 additions and 57 deletions

View file

@ -6,6 +6,8 @@ All notable changes to similar are documented here.
* Performance improvements for the LCS algorithm.
* Small performance improvments by adding an early opt-out for and inline highlighting.
* Added deadlines to all diffing algorithms to bail early.
* Deprecated slice diffing methods in the individual algorithm modules.
## 1.2.2

View file

@ -94,7 +94,7 @@ impl DiffHook for Capture {
#[test]
fn test_capture_hook_grouping() {
use crate::algorithms::{myers, Replace};
use crate::algorithms::{diff_slices, Algorithm, Replace};
let rng = (1..100).collect::<Vec<_>>();
let mut rng_new = rng.clone();
@ -104,7 +104,7 @@ fn test_capture_hook_grouping() {
rng_new[34] = 1000;
let mut d = Replace::new(Capture::new());
myers::diff_slices(&mut d, &rng, &rng_new).unwrap();
diff_slices(Algorithm::Myers, &mut d, &rng, &rng_new).unwrap();
let ops = d.into_inner().into_grouped_ops(3);
let tags = ops

View file

@ -4,12 +4,18 @@
//! * space `O(MN)`
use std::collections::BTreeMap;
use std::ops::{Index, Range};
use std::time::Instant;
use crate::algorithms::DiffHook;
/// HuntMcIlroy / HuntSzymanski LCS diff algorithm.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an very bad
/// approximation. Deadlines with LCS do not make a lot of sense and should
/// not be used.
pub fn diff<Old, New, D>(
d: &mut D,
old: &Old,
@ -17,6 +23,29 @@ pub fn diff<Old, New, D>(
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
New::Output: PartialEq<Old::Output>,
{
diff_deadline(d, old, old_range, new, new_range, None)
}
/// HuntMcIlroy / HuntSzymanski LCS diff algorithm.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an approximation.
pub fn diff_deadline<Old, New, D>(
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -44,11 +73,12 @@ where
.take_while(|x| new[x.1] == old[x.0])
.count();
let table = make_table(
let maybe_table = make_table(
old,
prefix_len..(old_range.len() - suffix_len),
new,
prefix_len..(new_range.len() - suffix_len),
deadline,
);
let mut old_idx = 0;
let mut new_idx = 0;
@ -59,23 +89,30 @@ where
d.equal(old_range.start, new_range.start, prefix_len)?;
}
while new_idx < new_len && old_idx < old_len {
if let Some(table) = maybe_table {
while new_idx < new_len && old_idx < old_len {
let old_orig_idx = old_range.start + prefix_len + old_idx;
let new_orig_idx = new_range.start + prefix_len + new_idx;
if new[new_orig_idx] == old[old_orig_idx] {
d.equal(old_orig_idx, new_orig_idx, 1)?;
old_idx += 1;
new_idx += 1;
} else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
>= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
{
d.delete(old_orig_idx, 1, new_orig_idx)?;
old_idx += 1;
} else {
d.insert(old_orig_idx, new_orig_idx, 1)?;
new_idx += 1;
}
}
} else {
let old_orig_idx = old_range.start + prefix_len + old_idx;
let new_orig_idx = new_range.start + prefix_len + new_idx;
if new[new_orig_idx] == old[old_orig_idx] {
d.equal(old_orig_idx, new_orig_idx, 1)?;
old_idx += 1;
new_idx += 1;
} else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
>= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
{
d.delete(old_orig_idx, 1, new_orig_idx)?;
old_idx += 1;
} else {
d.insert(old_orig_idx, new_orig_idx, 1)?;
new_idx += 1;
}
d.delete(old_orig_idx, old_len, new_orig_idx)?;
d.insert(old_orig_idx, new_orig_idx, new_len)?;
}
if old_idx < old_len {
@ -107,6 +144,10 @@ where
}
/// Shortcut for diffing slices.
#[deprecated(
since = "1.4.0",
note = "slice utility function is now only available via similar::algorithms::diff_slices"
)]
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
@ -120,7 +161,8 @@ fn make_table<Old, New>(
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> BTreeMap<(usize, usize), u32>
deadline: Option<Instant>,
) -> Option<BTreeMap<(usize, usize), u32>>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -131,6 +173,13 @@ where
let mut table = BTreeMap::new();
for i in (0..new_len).rev() {
// are we running for too long? give up on the table
if let Some(deadline) = deadline {
if Instant::now() > deadline {
return None;
}
}
for j in (0..old_len).rev() {
let val = if new[i] == old[j] {
table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1
@ -146,12 +195,12 @@ where
}
}
table
Some(table)
}
#[test]
fn test_table() {
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3);
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
let expected = {
let mut m = BTreeMap::new();
m.insert((1, 0), 1);
@ -168,7 +217,7 @@ fn test_diff() {
let b: &[usize] = &[0, 1, 2, 9, 4];
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -178,7 +227,7 @@ fn test_contiguous() {
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -188,6 +237,6 @@ fn test_pat() {
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
let mut d = crate::algorithms::Capture::new();
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.ops());
}

View file

@ -39,6 +39,7 @@ mod replace;
use std::hash::Hash;
use std::ops::{Index, Range};
use std::time::Instant;
pub use capture::Capture;
pub use hook::{DiffHook, NoFinishHook};
@ -62,6 +63,34 @@ pub fn diff<Old, New, D>(
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
diff_deadline(alg, d, old, old_range, new, new_range, None)
}
/// Creates a diff between old and new with the given algorithm with deadline.
///
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an approximation.
/// Note that not all algorithms behave well if they reach the deadline (LCS
/// for instance produces a very simplistic diff when the deadline is reached
/// in all cases).
pub fn diff_deadline<Old, New, D>(
alg: Algorithm,
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -70,9 +99,9 @@ where
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
match alg {
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
Algorithm::Lcs => lcs::diff(d, old, old_range, new, new_range),
Algorithm::Myers => myers::diff_deadline(d, old, old_range, new, new_range, deadline),
Algorithm::Patience => patience::diff_deadline(d, old, old_range, new, new_range, deadline),
Algorithm::Lcs => lcs::diff_deadline(d, old, old_range, new, new_range, deadline),
}
}
@ -84,3 +113,18 @@ where
{
diff(alg, d, old, 0..old.len(), new, 0..new.len())
}
/// Shortcut for diffing slices with a specific algorithm.
pub fn diff_slices_deadline<D, T>(
alg: Algorithm,
d: &mut D,
old: &[T],
new: &[T],
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
D: DiffHook,
T: Eq + Hash + Ord,
{
diff_deadline(alg, d, old, 0..old.len(), new, 0..new.len(), deadline)
}

View file

@ -8,8 +8,19 @@
//!
//! The implementation of this algorithm is based on the implementation by
//! Brandon Williams.
//!
//! # Heuristics
//!
//! At present this implementation of Myers' does not implement any more advanced
//! heuristics that would solve some pathological cases. For instane passing two
//! large and completely distinct sequences to the algorithm will make it spin
//! without making reasonable progress. Currently the only protection in the
//! library against this is to pass a deadline to the diffing algorithm.
//!
//! For potential improvements here see [similar#15](https://github.com/mitsuhiko/similar/issues/15).
use std::ops::{Index, IndexMut, Range};
use std::time::Instant;
use crate::algorithms::DiffHook;
@ -23,6 +34,29 @@ pub fn diff<Old, New, D>(
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
New::Output: PartialEq<Old::Output>,
{
diff_deadline(d, old, old_range, new, new_range, None)
}
/// Myers' diff algorithm with deadline.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an approximation.
pub fn diff_deadline<Old, New, D>(
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -32,11 +66,17 @@ where
let max_d = max_d(old_range.len(), new_range.len());
let mut vf = V::new(max_d);
let mut vb = V::new(max_d);
conquer(d, old, old_range, new, new_range, &mut vf, &mut vb)?;
conquer(
d, old, old_range, new, new_range, &mut vf, &mut vb, deadline,
)?;
d.finish()
}
/// Shortcut for diffing slices.
#[deprecated(
since = "1.4.0",
note = "slice utility function is now only available via similar::algorithms::diff_slices"
)]
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
@ -172,7 +212,8 @@ fn find_middle_snake<Old, New>(
new_range: Range<usize>,
vf: &mut V,
vb: &mut V,
) -> Snake
deadline: Option<Instant>,
) -> Option<Snake>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -197,6 +238,13 @@ where
assert!(vb.len() >= d_max);
for d in 0..d_max as isize {
// are we running for too long?
if let Some(deadline) = deadline {
if Instant::now() > deadline {
break;
}
}
// Forward path
for k in (-d..=d).rev().step_by(2) {
let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) {
@ -230,10 +278,10 @@ where
// TODO optimize this so we don't have to compare against n
if vf[k] + vb[-(k - delta)] >= n {
// Return the snake
return Snake {
return Some(Snake {
x_start: x0 + old_range.start,
y_start: y0 + new_range.start,
};
});
}
}
}
@ -266,10 +314,10 @@ where
// TODO optimize this so we don't have to compare against n
if vb[k] + vf[-(k - delta)] >= n {
// Return the snake
return Snake {
return Some(Snake {
x_start: n - x + old_range.start,
y_start: m - y + new_range.start,
};
});
}
}
}
@ -277,9 +325,11 @@ where
// TODO: Maybe there's an opportunity to optimize and bail early?
}
unreachable!("unable to find a middle snake");
// deadline reached
None
}
#[allow(clippy::too_many_arguments)]
fn conquer<Old, New, D>(
d: &mut D,
old: &Old,
@ -288,6 +338,7 @@ fn conquer<Old, New, D>(
mut new_range: Range<usize>,
vf: &mut V,
vb: &mut V,
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
@ -326,12 +377,30 @@ where
new_range.start,
new_range.end - new_range.start,
)?;
} else {
let snake = find_middle_snake(old, old_range.clone(), new, new_range.clone(), vf, vb);
} else if let Some(snake) = find_middle_snake(
old,
old_range.clone(),
new,
new_range.clone(),
vf,
vb,
deadline,
) {
let (old_a, old_b) = split_at(old_range, snake.x_start);
let (new_a, new_b) = split_at(new_range, snake.y_start);
conquer(d, old, old_a, new, new_a, vf, vb)?;
conquer(d, old, old_b, new, new_b, vf, vb)?;
conquer(d, old, old_a, new, new_a, vf, vb, deadline)?;
conquer(d, old, old_b, new, new_b, vf, vb, deadline)?;
} else {
d.delete(
old_range.start,
old_range.end - old_range.start,
new_range.start,
)?;
d.insert(
old_range.start,
new_range.start,
new_range.end - new_range.start,
)?;
}
if common_suffix_len > 0 {
@ -348,7 +417,7 @@ fn test_find_middle_snake() {
let max_d = max_d(a.len(), b.len());
let mut vf = V::new(max_d);
let mut vb = V::new(max_d);
let snake = find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb);
let snake = find_middle_snake(a, 0..a.len(), b, 0..b.len(), &mut vf, &mut vb, None).unwrap();
assert_eq!(snake.x_start, 4);
assert_eq!(snake.y_start, 1);
}
@ -359,7 +428,7 @@ fn test_diff() {
let b: &[usize] = &[0, 1, 2, 9, 4];
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -369,7 +438,7 @@ fn test_contiguous() {
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -379,6 +448,45 @@ fn test_pat() {
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
let mut d = crate::algorithms::Capture::new();
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.ops());
}
#[test]
fn test_deadline_reached() {
use std::ops::Index;
use std::time::Duration;
let a = (0..100).collect::<Vec<_>>();
let mut b = (0..100).collect::<Vec<_>>();
b[10] = 99;
b[50] = 99;
b[25] = 99;
struct SlowIndex<'a>(&'a [usize]);
impl<'a> Index<usize> for SlowIndex<'a> {
type Output = usize;
fn index(&self, index: usize) -> &Self::Output {
std::thread::sleep(Duration::from_millis(1));
&self.0[index]
}
}
let slow_a = SlowIndex(&a);
let slow_b = SlowIndex(&b);
// don't give it enough time to do anything interesting
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_deadline(
&mut d,
&slow_a,
0..a.len(),
&slow_b,
0..b.len(),
Some(Instant::now() + Duration::from_millis(50)),
)
.unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}

View file

@ -12,6 +12,7 @@ use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::hash::Hash;
use std::ops::{Index, Range};
use std::time::Instant;
use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace};
@ -25,6 +26,30 @@ pub fn diff<Old, New, D>(
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
Old::Output: Hash + Eq,
New::Output: PartialEq<Old::Output> + Hash + Eq,
D: DiffHook,
{
diff_deadline(d, old, old_range, new, new_range, None)
}
/// Patience diff algorithm with deadline.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an approximation.
pub fn diff_deadline<Old, New, D>(
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -45,18 +70,24 @@ where
new_current: new_range.start,
new_end: new_range.end,
new_indexes: &new_indexes,
deadline,
});
myers::diff(
myers::diff_deadline(
&mut d,
&old_indexes,
0..old_indexes.len(),
&new_indexes,
0..new_indexes.len(),
deadline,
)?;
Ok(())
}
/// Shortcut for diffing slices.
#[deprecated(
since = "1.4.0",
note = "slice utility function is now only available via similar::algorithms::diff_slices"
)]
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
@ -128,6 +159,7 @@ struct Patience<'old, 'new, 'd, Old: ?Sized, New: ?Sized, D> {
new_current: usize,
new_end: usize,
new_indexes: &'new [Indexable<'new, New>],
deadline: Option<Instant>,
}
impl<'old, 'new, 'd, Old, New, D> DiffHook for Patience<'old, 'new, 'd, Old, New, D>
@ -153,12 +185,13 @@ where
self.d.equal(a0, b0, self.old_current - a0)?;
}
let mut no_finish_d = NoFinishHook::new(&mut self.d);
myers::diff(
myers::diff_deadline(
&mut no_finish_d,
self.old,
self.old_current..self.old_indexes[old].index,
self.new,
self.new_current..self.new_indexes[new].index,
self.deadline,
)?;
self.old_current = self.old_indexes[old].index;
self.new_current = self.new_indexes[new].index;
@ -167,12 +200,13 @@ where
}
fn finish(&mut self) -> Result<(), D::Error> {
myers::diff(
myers::diff_deadline(
self.d,
self.old,
self.old_current..self.old_end,
self.new,
self.new_current..self.new_end,
self.deadline,
)
}
}
@ -183,7 +217,7 @@ fn test_patience() {
let b: &[usize] = &[10, 1, 2, 2, 8, 9, 4, 4, 7, 47, 18];
let mut d = Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -195,7 +229,7 @@ fn test_patience_out_of_bounds_bug() {
let b: &[usize] = &[1, 2, 3];
let mut d = Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}

View file

@ -134,7 +134,7 @@ impl<D: DiffHook> DiffHook for Replace<D> {
#[test]
fn test_mayers_replace() {
use crate::algorithms::myers;
use crate::algorithms::{diff_slices, Algorithm};
let a: &[&str] = &[
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n",
"a\n",
@ -159,7 +159,7 @@ fn test_mayers_replace() {
];
let mut d = Replace::new(crate::algorithms::Capture::new());
myers::diff_slices(&mut d, a, b).unwrap();
diff_slices(Algorithm::Myers, &mut d, a, b).unwrap();
insta::assert_debug_snapshot!(&d.into_inner().ops(), @r###"
[
@ -196,11 +196,13 @@ fn test_mayers_replace() {
#[test]
fn test_replace() {
use crate::algorithms::{diff_slices, Algorithm};
let a: &[usize] = &[0, 1, 2, 3, 4];
let b: &[usize] = &[0, 1, 2, 7, 8, 9];
let mut d = Replace::new(crate::algorithms::Capture::new());
crate::algorithms::myers::diff_slices(&mut d, a, b).unwrap();
diff_slices(Algorithm::Myers, &mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops(), @r###"
[
Equal {

View file

@ -0,0 +1,22 @@
---
source: src/algorithms/myers.rs
expression: d.into_inner().ops()
---
[
Equal {
old_index: 0,
new_index: 0,
len: 10,
},
Replace {
old_index: 10,
old_len: 41,
new_index: 10,
new_len: 41,
},
Equal {
old_index: 51,
new_index: 51,
len: 49,
},
]

View file

@ -1,13 +1,15 @@
use std::hash::Hash;
use std::ops::{Index, Range};
use std::time::Instant;
use crate::algorithms::{diff, diff_slices, Capture, Replace};
use crate::algorithms::{diff_deadline, diff_slices_deadline, Capture, Replace};
use crate::{Algorithm, DiffOp};
/// Creates a diff between old and new with the given algorithm capturing the ops.
///
/// This is like [`diff`] but instead of using an arbitrary hook this will
/// always use [`Replace`] + [`Capture`] and return the captured [`DiffOp`]s.
/// This is like [`diff`](crate::algorithms::diff) but instead of using an
/// arbitrary hook this will always use [`Replace`] + [`Capture`] and return the
/// captured [`DiffOp`]s.
pub fn capture_diff<Old, New>(
alg: Algorithm,
old: &Old,
@ -15,6 +17,26 @@ pub fn capture_diff<Old, New>(
new: &New,
new_range: Range<usize>,
) -> Vec<DiffOp>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
Old::Output: Hash + Eq + Ord,
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
capture_diff_deadline(alg, old, old_range, new, new_range, None)
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
///
/// Works like [`capture_diff`] but with an optional deadline.
pub fn capture_diff_deadline<Old, New>(
alg: Algorithm,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
deadline: Option<Instant>,
) -> Vec<DiffOp>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -22,17 +44,32 @@ where
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
{
let mut d = Replace::new(Capture::new());
diff(alg, &mut d, old, old_range, new, new_range).unwrap();
diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap();
d.into_inner().into_ops()
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
pub fn capture_diff_slices<T>(alg: Algorithm, old: &[T], new: &[T]) -> Vec<DiffOp>
where
T: Eq + Hash + Ord,
{
capture_diff_slices_deadline(alg, old, new, None)
}
/// Creates a diff between old and new with the given algorithm capturing the ops.
///
/// Works like [`capture_diff_slices`] but with an optional deadline.
pub fn capture_diff_slices_deadline<T>(
alg: Algorithm,
old: &[T],
new: &[T],
deadline: Option<Instant>,
) -> Vec<DiffOp>
where
T: Eq + Hash + Ord,
{
let mut d = Replace::new(Capture::new());
diff_slices(alg, &mut d, old, new).unwrap();
diff_slices_deadline(alg, &mut d, old, new, deadline).unwrap();
d.into_inner().into_ops()
}

View file

@ -106,6 +106,22 @@
//! As the [`TextDiff::grouped_ops`] method can isolate clusters of changes
//! this even works for very long files if paired with this method.
//!
//! # Deadlines and Performance
//!
//! For large and very distinct inputs the algorithms as implemented can take
//! a very, very long time to execute. Too long to make sense in practice.
//! To work around this issue all diffing algorithms also provide a version
//! that accepts a deadline which is the point in time as defined by an
//! [`Instant`](std::time::Instant) after which the algorithm should give up.
//! What giving up means depends on the algorithm. For instance due to the
//! recursive, divide and conquer nature of Myer's diff you will still get a
//! pretty decent diff in many cases when a deadline is reached. Whereas on the
//! other hand the LCS diff is unlikely to give any decent results in such a
//! situation.
//!
//! The [`TextDiff`] type also lets you configure a deadline and/or timeout
//! when performing a text diff.
//!
//! # Feature Flags
//!
//! The crate by default does not have any dependencies however for some use

View file

@ -2,6 +2,7 @@
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::time::{Duration, Instant};
mod abstraction;
#[cfg(feature = "inline")]
@ -15,7 +16,22 @@ pub use self::inline::InlineChange;
use self::utils::{upper_seq_ratio, QuickSeqRatio};
use crate::iter::{AllChangesIter, ChangesIter};
use crate::udiff::UnifiedDiff;
use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
use crate::{capture_diff_slices_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
#[derive(Debug, Clone, Copy)]
enum Deadline {
Absolute(Instant),
Relative(Duration),
}
impl Deadline {
fn into_instant(self) -> Instant {
match self {
Deadline::Absolute(instant) => instant,
Deadline::Relative(duration) => Instant::now() + duration,
}
}
}
/// A builder type config for more complex uses of [`TextDiff`].
///
@ -24,6 +40,7 @@ use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Diff
pub struct TextDiffConfig {
algorithm: Algorithm,
newline_terminated: Option<bool>,
deadline: Option<Deadline>,
}
impl Default for TextDiffConfig {
@ -31,6 +48,7 @@ impl Default for TextDiffConfig {
TextDiffConfig {
algorithm: Algorithm::default(),
newline_terminated: None,
deadline: None,
}
}
}
@ -44,6 +62,24 @@ impl TextDiffConfig {
self
}
/// Sets a deadline for the diff operation.
///
/// By default a diff will take as long as it takes. For certain diff
/// algorthms like Myer's and Patience a maximum running time can be
/// defined after which the algorithm gives up and approximates.
pub fn deadline(&mut self, deadline: Instant) -> &mut Self {
self.deadline = Some(Deadline::Absolute(deadline));
self
}
/// Sets a timeout for thediff operation.
///
/// This is like [`deadline`](Self::deadline) but accepts a duration.
pub fn timeout(&mut self, timeout: Duration) -> &mut Self {
self.deadline = Some(Deadline::Relative(timeout));
self
}
/// Changes the newline termination flag.
///
/// The default is automatic based on input. This flag controls the
@ -291,7 +327,12 @@ impl TextDiffConfig {
new: Cow<'bufs, [&'new T]>,
newline_terminated: bool,
) -> TextDiff<'old, 'new, 'bufs, T> {
let ops = capture_diff_slices(self.algorithm, &old, &new);
let ops = capture_diff_slices_deadline(
self.algorithm,
&old,
&new,
self.deadline.map(|x| x.into_instant()),
);
TextDiff {
old,
new,