Added deadlines to all algorithms (#18)

This adds a deadline to all algorithms which lets one bail in a fixed amount of
time to avoid pathological cases.
This commit is contained in:
Armin Ronacher 2021-02-20 10:12:06 +01:00 committed by GitHub
parent 99386e8106
commit 74e2805a95
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 412 additions and 57 deletions

View file

@ -4,12 +4,18 @@
//! * space `O(MN)`
use std::collections::BTreeMap;
use std::ops::{Index, Range};
use std::time::Instant;
use crate::algorithms::DiffHook;
/// HuntMcIlroy / HuntSzymanski LCS diff algorithm.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an very bad
/// approximation. Deadlines with LCS do not make a lot of sense and should
/// not be used.
pub fn diff<Old, New, D>(
d: &mut D,
old: &Old,
@ -17,6 +23,29 @@ pub fn diff<Old, New, D>(
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
New::Output: PartialEq<Old::Output>,
{
diff_deadline(d, old, old_range, new, new_range, None)
}
/// HuntMcIlroy / HuntSzymanski LCS diff algorithm.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
///
/// This diff is done with an optional deadline that defines the maximal
/// execution time permitted before it bails and falls back to an approximation.
pub fn diff_deadline<Old, New, D>(
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
deadline: Option<Instant>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -44,11 +73,12 @@ where
.take_while(|x| new[x.1] == old[x.0])
.count();
let table = make_table(
let maybe_table = make_table(
old,
prefix_len..(old_range.len() - suffix_len),
new,
prefix_len..(new_range.len() - suffix_len),
deadline,
);
let mut old_idx = 0;
let mut new_idx = 0;
@ -59,23 +89,30 @@ where
d.equal(old_range.start, new_range.start, prefix_len)?;
}
while new_idx < new_len && old_idx < old_len {
if let Some(table) = maybe_table {
while new_idx < new_len && old_idx < old_len {
let old_orig_idx = old_range.start + prefix_len + old_idx;
let new_orig_idx = new_range.start + prefix_len + new_idx;
if new[new_orig_idx] == old[old_orig_idx] {
d.equal(old_orig_idx, new_orig_idx, 1)?;
old_idx += 1;
new_idx += 1;
} else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
>= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
{
d.delete(old_orig_idx, 1, new_orig_idx)?;
old_idx += 1;
} else {
d.insert(old_orig_idx, new_orig_idx, 1)?;
new_idx += 1;
}
}
} else {
let old_orig_idx = old_range.start + prefix_len + old_idx;
let new_orig_idx = new_range.start + prefix_len + new_idx;
if new[new_orig_idx] == old[old_orig_idx] {
d.equal(old_orig_idx, new_orig_idx, 1)?;
old_idx += 1;
new_idx += 1;
} else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
>= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
{
d.delete(old_orig_idx, 1, new_orig_idx)?;
old_idx += 1;
} else {
d.insert(old_orig_idx, new_orig_idx, 1)?;
new_idx += 1;
}
d.delete(old_orig_idx, old_len, new_orig_idx)?;
d.insert(old_orig_idx, new_orig_idx, new_len)?;
}
if old_idx < old_len {
@ -107,6 +144,10 @@ where
}
/// Shortcut for diffing slices.
#[deprecated(
since = "1.4.0",
note = "slice utility function is now only available via similar::algorithms::diff_slices"
)]
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
@ -120,7 +161,8 @@ fn make_table<Old, New>(
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> BTreeMap<(usize, usize), u32>
deadline: Option<Instant>,
) -> Option<BTreeMap<(usize, usize), u32>>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
@ -131,6 +173,13 @@ where
let mut table = BTreeMap::new();
for i in (0..new_len).rev() {
// are we running for too long? give up on the table
if let Some(deadline) = deadline {
if Instant::now() > deadline {
return None;
}
}
for j in (0..old_len).rev() {
let val = if new[i] == old[j] {
table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1
@ -146,12 +195,12 @@ where
}
}
table
Some(table)
}
#[test]
fn test_table() {
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3);
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
let expected = {
let mut m = BTreeMap::new();
m.insert((1, 0), 1);
@ -168,7 +217,7 @@ fn test_diff() {
let b: &[usize] = &[0, 1, 2, 9, 4];
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -178,7 +227,7 @@ fn test_contiguous() {
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}
@ -188,6 +237,6 @@ fn test_pat() {
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
let mut d = crate::algorithms::Capture::new();
diff_slices(&mut d, a, b).unwrap();
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
insta::assert_debug_snapshot!(d.ops());
}