Added deadlines to all algorithms (#18)
This adds a deadline to all algorithms which lets one bail in a fixed amount of time to avoid pathological cases.
This commit is contained in:
parent
99386e8106
commit
74e2805a95
11 changed files with 412 additions and 57 deletions
|
|
@ -4,12 +4,18 @@
|
|||
//! * space `O(MN)`
|
||||
use std::collections::BTreeMap;
|
||||
use std::ops::{Index, Range};
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::algorithms::DiffHook;
|
||||
|
||||
/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
///
|
||||
/// This diff is done with an optional deadline that defines the maximal
|
||||
/// execution time permitted before it bails and falls back to an very bad
|
||||
/// approximation. Deadlines with LCS do not make a lot of sense and should
|
||||
/// not be used.
|
||||
pub fn diff<Old, New, D>(
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
|
|
@ -17,6 +23,29 @@ pub fn diff<Old, New, D>(
|
|||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
D: DiffHook,
|
||||
New::Output: PartialEq<Old::Output>,
|
||||
{
|
||||
diff_deadline(d, old, old_range, new, new_range, None)
|
||||
}
|
||||
|
||||
/// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
|
||||
///
|
||||
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
|
||||
///
|
||||
/// This diff is done with an optional deadline that defines the maximal
|
||||
/// execution time permitted before it bails and falls back to an approximation.
|
||||
pub fn diff_deadline<Old, New, D>(
|
||||
d: &mut D,
|
||||
old: &Old,
|
||||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<(), D::Error>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
|
|
@ -44,11 +73,12 @@ where
|
|||
.take_while(|x| new[x.1] == old[x.0])
|
||||
.count();
|
||||
|
||||
let table = make_table(
|
||||
let maybe_table = make_table(
|
||||
old,
|
||||
prefix_len..(old_range.len() - suffix_len),
|
||||
new,
|
||||
prefix_len..(new_range.len() - suffix_len),
|
||||
deadline,
|
||||
);
|
||||
let mut old_idx = 0;
|
||||
let mut new_idx = 0;
|
||||
|
|
@ -59,23 +89,30 @@ where
|
|||
d.equal(old_range.start, new_range.start, prefix_len)?;
|
||||
}
|
||||
|
||||
while new_idx < new_len && old_idx < old_len {
|
||||
if let Some(table) = maybe_table {
|
||||
while new_idx < new_len && old_idx < old_len {
|
||||
let old_orig_idx = old_range.start + prefix_len + old_idx;
|
||||
let new_orig_idx = new_range.start + prefix_len + new_idx;
|
||||
|
||||
if new[new_orig_idx] == old[old_orig_idx] {
|
||||
d.equal(old_orig_idx, new_orig_idx, 1)?;
|
||||
old_idx += 1;
|
||||
new_idx += 1;
|
||||
} else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
|
||||
>= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
|
||||
{
|
||||
d.delete(old_orig_idx, 1, new_orig_idx)?;
|
||||
old_idx += 1;
|
||||
} else {
|
||||
d.insert(old_orig_idx, new_orig_idx, 1)?;
|
||||
new_idx += 1;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let old_orig_idx = old_range.start + prefix_len + old_idx;
|
||||
let new_orig_idx = new_range.start + prefix_len + new_idx;
|
||||
|
||||
if new[new_orig_idx] == old[old_orig_idx] {
|
||||
d.equal(old_orig_idx, new_orig_idx, 1)?;
|
||||
old_idx += 1;
|
||||
new_idx += 1;
|
||||
} else if table.get(&(new_idx, old_idx + 1)).map_or(0, |&x| x)
|
||||
>= table.get(&(new_idx + 1, old_idx)).map_or(0, |&x| x)
|
||||
{
|
||||
d.delete(old_orig_idx, 1, new_orig_idx)?;
|
||||
old_idx += 1;
|
||||
} else {
|
||||
d.insert(old_orig_idx, new_orig_idx, 1)?;
|
||||
new_idx += 1;
|
||||
}
|
||||
d.delete(old_orig_idx, old_len, new_orig_idx)?;
|
||||
d.insert(old_orig_idx, new_orig_idx, new_len)?;
|
||||
}
|
||||
|
||||
if old_idx < old_len {
|
||||
|
|
@ -107,6 +144,10 @@ where
|
|||
}
|
||||
|
||||
/// Shortcut for diffing slices.
|
||||
#[deprecated(
|
||||
since = "1.4.0",
|
||||
note = "slice utility function is now only available via similar::algorithms::diff_slices"
|
||||
)]
|
||||
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
|
||||
where
|
||||
D: DiffHook,
|
||||
|
|
@ -120,7 +161,8 @@ fn make_table<Old, New>(
|
|||
old_range: Range<usize>,
|
||||
new: &New,
|
||||
new_range: Range<usize>,
|
||||
) -> BTreeMap<(usize, usize), u32>
|
||||
deadline: Option<Instant>,
|
||||
) -> Option<BTreeMap<(usize, usize), u32>>
|
||||
where
|
||||
Old: Index<usize> + ?Sized,
|
||||
New: Index<usize> + ?Sized,
|
||||
|
|
@ -131,6 +173,13 @@ where
|
|||
let mut table = BTreeMap::new();
|
||||
|
||||
for i in (0..new_len).rev() {
|
||||
// are we running for too long? give up on the table
|
||||
if let Some(deadline) = deadline {
|
||||
if Instant::now() > deadline {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
for j in (0..old_len).rev() {
|
||||
let val = if new[i] == old[j] {
|
||||
table.get(&(i + 1, j + 1)).map_or(0, |&x| x) + 1
|
||||
|
|
@ -146,12 +195,12 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
table
|
||||
Some(table)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_table() {
|
||||
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3);
|
||||
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3, None).unwrap();
|
||||
let expected = {
|
||||
let mut m = BTreeMap::new();
|
||||
m.insert((1, 0), 1);
|
||||
|
|
@ -168,7 +217,7 @@ fn test_diff() {
|
|||
let b: &[usize] = &[0, 1, 2, 9, 4];
|
||||
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||
}
|
||||
|
||||
|
|
@ -178,7 +227,7 @@ fn test_contiguous() {
|
|||
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];
|
||||
|
||||
let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||
insta::assert_debug_snapshot!(d.into_inner().ops());
|
||||
}
|
||||
|
||||
|
|
@ -188,6 +237,6 @@ fn test_pat() {
|
|||
let b: &[usize] = &[0, 1, 4, 5, 8, 9];
|
||||
|
||||
let mut d = crate::algorithms::Capture::new();
|
||||
diff_slices(&mut d, a, b).unwrap();
|
||||
diff(&mut d, a, 0..a.len(), b, 0..b.len()).unwrap();
|
||||
insta::assert_debug_snapshot!(d.ops());
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue