Add compaction support (#22)
This commit is contained in:
parent
ddb73d8351
commit
7e628d78d8
9 changed files with 712 additions and 19 deletions
|
|
@ -37,6 +37,10 @@ bstr = { version = "0.2.14", optional = true, default-features = false }
|
||||||
name = "assert"
|
name = "assert"
|
||||||
required-features = ["text", "inline"]
|
required-features = ["text", "inline"]
|
||||||
|
|
||||||
|
[[example]]
|
||||||
|
name = "patience"
|
||||||
|
required-features = ["text", "inline"]
|
||||||
|
|
||||||
[[example]]
|
[[example]]
|
||||||
name = "terminal"
|
name = "terminal"
|
||||||
required-features = ["text"]
|
required-features = ["text"]
|
||||||
|
|
|
||||||
48
examples/patience.rs
Normal file
48
examples/patience.rs
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
use similar::{Algorithm, TextDiff};
|
||||||
|
|
||||||
|
const OLD: &str = r#"
|
||||||
|
[
|
||||||
|
(
|
||||||
|
Major,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Minor,
|
||||||
|
20,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Value,
|
||||||
|
0,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
const NEW: &str = r#"
|
||||||
|
[
|
||||||
|
(
|
||||||
|
Major,
|
||||||
|
2,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Minor,
|
||||||
|
0,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Value,
|
||||||
|
0,
|
||||||
|
),
|
||||||
|
(
|
||||||
|
Value,
|
||||||
|
1,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
TextDiff::configure()
|
||||||
|
.algorithm(Algorithm::Patience)
|
||||||
|
.diff_lines(OLD, NEW)
|
||||||
|
.unified_diff()
|
||||||
|
);
|
||||||
|
}
|
||||||
355
src/algorithms/compact.rs
Normal file
355
src/algorithms/compact.rs
Normal file
|
|
@ -0,0 +1,355 @@
|
||||||
|
//! Implements basic compacting. This is based on the compaction logic from
|
||||||
|
//! diffy by Brandon Williams.
|
||||||
|
use std::ops::Index;
|
||||||
|
|
||||||
|
use crate::{DiffOp, DiffTag};
|
||||||
|
|
||||||
|
use super::utils::{common_prefix_len, common_suffix_len};
|
||||||
|
use super::DiffHook;
|
||||||
|
|
||||||
|
/// Performs semantic cleanup operations on a diff.
|
||||||
|
///
|
||||||
|
/// This merges similar ops together but also tries to move hunks up and
|
||||||
|
/// down the diff with the desire to connect as many hunks as possible.
|
||||||
|
/// It still needs to be combined with [`Replace`](crate::algorithms::Replace)
|
||||||
|
/// to get actual replace diff ops out.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Compact<'old, 'new, Old: ?Sized, New: ?Sized, D> {
|
||||||
|
d: D,
|
||||||
|
ops: Vec<DiffOp>,
|
||||||
|
old: &'old Old,
|
||||||
|
new: &'new New,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old, New, D> Compact<'old, 'new, Old, New, D>
|
||||||
|
where
|
||||||
|
D: DiffHook,
|
||||||
|
Old: Index<usize> + ?Sized + 'old,
|
||||||
|
New: Index<usize> + ?Sized + 'new,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
/// Creates a new compact hook wrapping another hook.
|
||||||
|
pub fn new(d: D, old: &'old Old, new: &'new New) -> Self {
|
||||||
|
Compact {
|
||||||
|
d,
|
||||||
|
ops: Vec::new(),
|
||||||
|
old,
|
||||||
|
new,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extracts the inner hook.
|
||||||
|
pub fn into_inner(self) -> D {
|
||||||
|
self.d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsRef<D>
|
||||||
|
for Compact<'old, 'new, Old, New, D>
|
||||||
|
{
|
||||||
|
fn as_ref(&self) -> &D {
|
||||||
|
&self.d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old: ?Sized, New: ?Sized, D: DiffHook> AsMut<D>
|
||||||
|
for Compact<'old, 'new, Old, New, D>
|
||||||
|
{
|
||||||
|
fn as_mut(&mut self) -> &mut D {
|
||||||
|
&mut self.d
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old, New, D> DiffHook for Compact<'old, 'new, Old, New, D>
|
||||||
|
where
|
||||||
|
D: DiffHook,
|
||||||
|
Old: Index<usize> + ?Sized + 'old,
|
||||||
|
New: Index<usize> + ?Sized + 'new,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
type Error = D::Error;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn equal(&mut self, old_index: usize, new_index: usize, len: usize) -> Result<(), Self::Error> {
|
||||||
|
self.ops.push(DiffOp::Equal {
|
||||||
|
old_index,
|
||||||
|
new_index,
|
||||||
|
len,
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn delete(
|
||||||
|
&mut self,
|
||||||
|
old_index: usize,
|
||||||
|
old_len: usize,
|
||||||
|
new_index: usize,
|
||||||
|
) -> Result<(), Self::Error> {
|
||||||
|
self.ops.push(DiffOp::Delete {
|
||||||
|
old_index,
|
||||||
|
old_len,
|
||||||
|
new_index,
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn insert(
|
||||||
|
&mut self,
|
||||||
|
old_index: usize,
|
||||||
|
new_index: usize,
|
||||||
|
new_len: usize,
|
||||||
|
) -> Result<(), Self::Error> {
|
||||||
|
self.ops.push(DiffOp::Insert {
|
||||||
|
old_index,
|
||||||
|
new_index,
|
||||||
|
new_len,
|
||||||
|
});
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish(&mut self) -> Result<(), Self::Error> {
|
||||||
|
cleanup_diff_ops(self.old, self.new, &mut self.ops);
|
||||||
|
for op in &self.ops {
|
||||||
|
op.apply_to_hook(&mut self.d)?;
|
||||||
|
}
|
||||||
|
self.d.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Walks through all edits and shifts them up and then down, trying to see if
|
||||||
|
// they run into similar edits which can be merged.
|
||||||
|
pub fn cleanup_diff_ops<Old, New>(old: &Old, new: &New, ops: &mut Vec<DiffOp>)
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
// First attempt to compact all Deletions
|
||||||
|
let mut pointer = 0;
|
||||||
|
while let Some(&op) = ops.get(pointer) {
|
||||||
|
if let DiffTag::Delete = op.tag() {
|
||||||
|
pointer = shift_diff_ops_up(ops, old, new, pointer);
|
||||||
|
pointer = shift_diff_ops_down(ops, old, new, pointer);
|
||||||
|
}
|
||||||
|
pointer += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Then attempt to compact all Insertions
|
||||||
|
let mut pointer = 0;
|
||||||
|
while let Some(&op) = ops.get(pointer) {
|
||||||
|
if let DiffTag::Insert = op.tag() {
|
||||||
|
pointer = shift_diff_ops_up(ops, old, new, pointer);
|
||||||
|
pointer = shift_diff_ops_down(ops, old, new, pointer);
|
||||||
|
}
|
||||||
|
pointer += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shift_diff_ops_up<Old, New>(
|
||||||
|
ops: &mut Vec<DiffOp>,
|
||||||
|
old: &Old,
|
||||||
|
new: &New,
|
||||||
|
mut pointer: usize,
|
||||||
|
) -> usize
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
while let Some(&prev_op) = pointer.checked_sub(1).and_then(|idx| ops.get(idx)) {
|
||||||
|
let this_op = ops[pointer];
|
||||||
|
match (this_op.tag(), prev_op.tag()) {
|
||||||
|
// Shift Inserts Upwards
|
||||||
|
(DiffTag::Insert, DiffTag::Equal) => {
|
||||||
|
let suffix_len =
|
||||||
|
common_suffix_len(old, prev_op.old_range(), new, this_op.new_range());
|
||||||
|
if suffix_len > 0 {
|
||||||
|
if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) {
|
||||||
|
ops[pointer + 1].grow_left(suffix_len);
|
||||||
|
} else {
|
||||||
|
ops.insert(
|
||||||
|
pointer + 1,
|
||||||
|
DiffOp::Equal {
|
||||||
|
old_index: prev_op.old_range().end - suffix_len,
|
||||||
|
new_index: this_op.new_range().end - suffix_len,
|
||||||
|
len: suffix_len,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ops[pointer].shift_left(suffix_len);
|
||||||
|
ops[pointer - 1].shrink_left(suffix_len);
|
||||||
|
|
||||||
|
if ops[pointer - 1].is_empty() {
|
||||||
|
ops.remove(pointer - 1);
|
||||||
|
pointer -= 1;
|
||||||
|
}
|
||||||
|
} else if ops[pointer - 1].is_empty() {
|
||||||
|
ops.remove(pointer - 1);
|
||||||
|
pointer -= 1;
|
||||||
|
} else {
|
||||||
|
// We can't shift upwards anymore
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Shift Deletions Upwards
|
||||||
|
(DiffTag::Delete, DiffTag::Equal) => {
|
||||||
|
// check common suffix for the amount we can shift
|
||||||
|
let suffix_len =
|
||||||
|
common_suffix_len(old, prev_op.old_range(), new, this_op.new_range());
|
||||||
|
if suffix_len != 0 {
|
||||||
|
if let Some(DiffTag::Equal) = ops.get(pointer + 1).map(|x| x.tag()) {
|
||||||
|
ops[pointer + 1].grow_left(suffix_len);
|
||||||
|
} else {
|
||||||
|
let old_range = prev_op.old_range();
|
||||||
|
ops.insert(
|
||||||
|
pointer + 1,
|
||||||
|
DiffOp::Equal {
|
||||||
|
old_index: old_range.end - suffix_len,
|
||||||
|
new_index: this_op.new_range().end - suffix_len,
|
||||||
|
len: old_range.len() - suffix_len,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
ops[pointer].shift_left(suffix_len);
|
||||||
|
ops[pointer - 1].shrink_left(suffix_len);
|
||||||
|
|
||||||
|
if ops[pointer - 1].is_empty() {
|
||||||
|
ops.remove(pointer - 1);
|
||||||
|
pointer -= 1;
|
||||||
|
}
|
||||||
|
} else if ops[pointer - 1].is_empty() {
|
||||||
|
ops.remove(pointer - 1);
|
||||||
|
pointer -= 1;
|
||||||
|
} else {
|
||||||
|
// We can't shift upwards anymore
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Swap the Delete and Insert
|
||||||
|
(DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => {
|
||||||
|
ops.swap(pointer - 1, pointer);
|
||||||
|
pointer -= 1;
|
||||||
|
}
|
||||||
|
// Merge the two ranges
|
||||||
|
(DiffTag::Insert, DiffTag::Insert) => {
|
||||||
|
ops[pointer - 1].grow_right(this_op.new_range().len());
|
||||||
|
ops.remove(pointer);
|
||||||
|
pointer -= 1;
|
||||||
|
}
|
||||||
|
(DiffTag::Delete, DiffTag::Delete) => {
|
||||||
|
ops[pointer - 1].grow_right(this_op.old_range().len());
|
||||||
|
ops.remove(pointer);
|
||||||
|
pointer -= 1;
|
||||||
|
}
|
||||||
|
_ => unreachable!("unexpected tag"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pointer
|
||||||
|
}
|
||||||
|
|
||||||
|
fn shift_diff_ops_down<Old, New>(
|
||||||
|
ops: &mut Vec<DiffOp>,
|
||||||
|
old: &Old,
|
||||||
|
new: &New,
|
||||||
|
mut pointer: usize,
|
||||||
|
) -> usize
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
New::Output: PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
while let Some(&next_op) = pointer.checked_add(1).and_then(|idx| ops.get(idx)) {
|
||||||
|
let this_op = ops[pointer];
|
||||||
|
match (this_op.tag(), next_op.tag()) {
|
||||||
|
// Shift Inserts Downwards
|
||||||
|
(DiffTag::Insert, DiffTag::Equal) => {
|
||||||
|
let prefix_len =
|
||||||
|
common_prefix_len(old, next_op.old_range(), new, this_op.new_range());
|
||||||
|
if prefix_len > 0 {
|
||||||
|
if let Some(DiffTag::Equal) = pointer
|
||||||
|
.checked_sub(1)
|
||||||
|
.and_then(|x| ops.get(x))
|
||||||
|
.map(|x| x.tag())
|
||||||
|
{
|
||||||
|
ops[pointer - 1].grow_right(prefix_len);
|
||||||
|
} else {
|
||||||
|
ops.insert(
|
||||||
|
pointer,
|
||||||
|
DiffOp::Equal {
|
||||||
|
old_index: next_op.old_range().start,
|
||||||
|
new_index: this_op.new_range().start,
|
||||||
|
len: prefix_len,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
pointer += 1;
|
||||||
|
}
|
||||||
|
ops[pointer].shift_right(prefix_len);
|
||||||
|
ops[pointer + 1].shrink_right(prefix_len);
|
||||||
|
|
||||||
|
if ops[pointer + 1].is_empty() {
|
||||||
|
ops.remove(pointer + 1);
|
||||||
|
}
|
||||||
|
} else if ops[pointer + 1].is_empty() {
|
||||||
|
ops.remove(pointer + 1);
|
||||||
|
} else {
|
||||||
|
// We can't shift upwards anymore
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Shift Deletions Downwards
|
||||||
|
(DiffTag::Delete, DiffTag::Equal) => {
|
||||||
|
// check common suffix for the amount we can shift
|
||||||
|
let prefix_len =
|
||||||
|
common_prefix_len(old, next_op.old_range(), new, this_op.new_range());
|
||||||
|
if prefix_len > 0 {
|
||||||
|
if let Some(DiffTag::Equal) = pointer
|
||||||
|
.checked_sub(1)
|
||||||
|
.and_then(|x| ops.get(x))
|
||||||
|
.map(|x| x.tag())
|
||||||
|
{
|
||||||
|
ops[pointer - 1].grow_right(prefix_len);
|
||||||
|
} else {
|
||||||
|
ops.insert(
|
||||||
|
pointer,
|
||||||
|
DiffOp::Equal {
|
||||||
|
old_index: next_op.old_range().start,
|
||||||
|
new_index: this_op.new_range().start,
|
||||||
|
len: prefix_len,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
pointer += 1;
|
||||||
|
}
|
||||||
|
ops[pointer].shift_right(prefix_len);
|
||||||
|
ops[pointer + 1].shrink_right(prefix_len);
|
||||||
|
|
||||||
|
if ops[pointer + 1].is_empty() {
|
||||||
|
ops.remove(pointer + 1);
|
||||||
|
}
|
||||||
|
} else if ops[pointer + 1].is_empty() {
|
||||||
|
ops.remove(pointer + 1);
|
||||||
|
} else {
|
||||||
|
// We can't shift downwards anymore
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Swap the Delete and Insert
|
||||||
|
(DiffTag::Insert, DiffTag::Delete) | (DiffTag::Delete, DiffTag::Insert) => {
|
||||||
|
ops.swap(pointer, pointer + 1);
|
||||||
|
pointer += 1;
|
||||||
|
}
|
||||||
|
// Merge the two ranges
|
||||||
|
(DiffTag::Insert, DiffTag::Insert) => {
|
||||||
|
ops[pointer].grow_right(next_op.new_range().len());
|
||||||
|
ops.remove(pointer + 1);
|
||||||
|
}
|
||||||
|
(DiffTag::Delete, DiffTag::Delete) => {
|
||||||
|
ops[pointer].grow_right(next_op.old_range().len());
|
||||||
|
ops.remove(pointer + 1);
|
||||||
|
}
|
||||||
|
_ => unreachable!("unexpected tag"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pointer
|
||||||
|
}
|
||||||
|
|
@ -34,17 +34,20 @@
|
||||||
//! [`capture_diff_slices`](crate::capture_diff_slices).
|
//! [`capture_diff_slices`](crate::capture_diff_slices).
|
||||||
|
|
||||||
mod capture;
|
mod capture;
|
||||||
|
mod compact;
|
||||||
mod hook;
|
mod hook;
|
||||||
mod replace;
|
mod replace;
|
||||||
mod utils;
|
pub(crate) mod utils;
|
||||||
|
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
pub use capture::Capture;
|
pub use capture::Capture;
|
||||||
|
pub use compact::Compact;
|
||||||
pub use hook::{DiffHook, NoFinishHook};
|
pub use hook::{DiffHook, NoFinishHook};
|
||||||
pub use replace::Replace;
|
pub use replace::Replace;
|
||||||
|
pub use utils::IdentifyDistinct;
|
||||||
|
|
||||||
#[doc(no_inline)]
|
#[doc(no_inline)]
|
||||||
pub use crate::Algorithm;
|
pub use crate::Algorithm;
|
||||||
|
|
|
||||||
|
|
@ -65,8 +65,8 @@ where
|
||||||
New::Output: PartialEq<Old::Output>,
|
New::Output: PartialEq<Old::Output>,
|
||||||
{
|
{
|
||||||
let max_d = max_d(old_range.len(), new_range.len());
|
let max_d = max_d(old_range.len(), new_range.len());
|
||||||
let mut vf = V::new(max_d);
|
|
||||||
let mut vb = V::new(max_d);
|
let mut vb = V::new(max_d);
|
||||||
|
let mut vf = V::new(max_d);
|
||||||
conquer(
|
conquer(
|
||||||
d, old, old_range, new, new_range, &mut vf, &mut vb, deadline,
|
d, old, old_range, new, new_range, &mut vf, &mut vb, deadline,
|
||||||
)?;
|
)?;
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
use std::collections::hash_map::Entry;
|
use std::collections::hash_map::Entry;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::hash::Hash;
|
use std::hash::{Hash, Hasher};
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Add, Index, Range};
|
||||||
|
|
||||||
/// Utility function to check if a range is empty that works on older rust versions
|
/// Utility function to check if a range is empty that works on older rust versions
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
|
|
@ -142,6 +142,175 @@ where
|
||||||
.count()
|
.count()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct OffsetLookup<Int> {
|
||||||
|
offset: usize,
|
||||||
|
vec: Vec<Int>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Int> Index<usize> for OffsetLookup<Int> {
|
||||||
|
type Output = Int;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn index(&self, index: usize) -> &Self::Output {
|
||||||
|
&self.vec[index - self.offset]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A utility struct to convert distinct items to unique integers.
|
||||||
|
///
|
||||||
|
/// This can be helpful on larger inputs to speed up the comparisons
|
||||||
|
/// performed by doing a first pass where the data set gets reduced
|
||||||
|
/// to (small) integers.
|
||||||
|
///
|
||||||
|
/// The idea is that instead of passing two sequences to a diffling algorithm
|
||||||
|
/// you first pass it via [`IdentifyDistinct`]:
|
||||||
|
///
|
||||||
|
/// ```rust
|
||||||
|
/// use similar::capture_diff;
|
||||||
|
/// use similar::algorithms::{Algorithm, IdentifyDistinct};
|
||||||
|
///
|
||||||
|
/// let old = &["foo", "bar", "baz"][..];
|
||||||
|
/// let new = &["foo", "blah", "baz"][..];
|
||||||
|
/// let h = IdentifyDistinct::<u32>::new(old, 0..old.len(), new, 0..new.len());
|
||||||
|
/// let ops = capture_diff(
|
||||||
|
/// Algorithm::Myers,
|
||||||
|
/// h.old_lookup(),
|
||||||
|
/// h.old_range(),
|
||||||
|
/// h.new_lookup(),
|
||||||
|
/// h.new_range(),
|
||||||
|
/// );
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// The indexes are the same as with the passed source ranges.
|
||||||
|
pub struct IdentifyDistinct<Int> {
|
||||||
|
old: OffsetLookup<Int>,
|
||||||
|
new: OffsetLookup<Int>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Int> IdentifyDistinct<Int>
|
||||||
|
where
|
||||||
|
Int: Add<Output = Int> + From<u8> + Default + Copy,
|
||||||
|
{
|
||||||
|
/// Creates an int hasher for two sequences.
|
||||||
|
pub fn new<Old, New>(
|
||||||
|
old: &Old,
|
||||||
|
old_range: Range<usize>,
|
||||||
|
new: &New,
|
||||||
|
new_range: Range<usize>,
|
||||||
|
) -> Self
|
||||||
|
where
|
||||||
|
Old: Index<usize> + ?Sized,
|
||||||
|
Old::Output: Eq + Hash,
|
||||||
|
New: Index<usize> + ?Sized,
|
||||||
|
New::Output: Eq + Hash + PartialEq<Old::Output>,
|
||||||
|
{
|
||||||
|
enum Key<'old, 'new, Old: ?Sized, New: ?Sized> {
|
||||||
|
Old(&'old Old),
|
||||||
|
New(&'new New),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old, New> Hash for Key<'old, 'new, Old, New>
|
||||||
|
where
|
||||||
|
Old: Hash + ?Sized,
|
||||||
|
New: Hash + ?Sized,
|
||||||
|
{
|
||||||
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||||
|
match *self {
|
||||||
|
Key::Old(val) => val.hash(state),
|
||||||
|
Key::New(val) => val.hash(state),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old, New> PartialEq for Key<'old, 'new, Old, New>
|
||||||
|
where
|
||||||
|
Old: Eq + ?Sized,
|
||||||
|
New: Eq + PartialEq<Old> + ?Sized,
|
||||||
|
{
|
||||||
|
#[inline(always)]
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
match (self, other) {
|
||||||
|
(Key::Old(a), Key::Old(b)) => a == b,
|
||||||
|
(Key::New(a), Key::New(b)) => a == b,
|
||||||
|
(Key::Old(a), Key::New(b)) | (Key::New(b), Key::Old(a)) => b == a,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'old, 'new, Old, New> Eq for Key<'old, 'new, Old, New>
|
||||||
|
where
|
||||||
|
Old: Eq + ?Sized,
|
||||||
|
New: Eq + PartialEq<Old> + ?Sized,
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut map = HashMap::new();
|
||||||
|
let mut old_seq = Vec::new();
|
||||||
|
let mut new_seq = Vec::new();
|
||||||
|
let mut next_id = Int::default();
|
||||||
|
let step = Int::from(1);
|
||||||
|
let old_start = old_range.start;
|
||||||
|
let new_start = new_range.start;
|
||||||
|
|
||||||
|
for idx in old_range {
|
||||||
|
let item = Key::Old(&old[idx]);
|
||||||
|
let id = match map.entry(item) {
|
||||||
|
Entry::Occupied(o) => *o.get(),
|
||||||
|
Entry::Vacant(v) => {
|
||||||
|
let id = next_id;
|
||||||
|
next_id = next_id + step;
|
||||||
|
*v.insert(id)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
old_seq.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
for idx in new_range {
|
||||||
|
let item = Key::New(&new[idx]);
|
||||||
|
let id = match map.entry(item) {
|
||||||
|
Entry::Occupied(o) => *o.get(),
|
||||||
|
Entry::Vacant(v) => {
|
||||||
|
let id = next_id;
|
||||||
|
next_id = next_id + step;
|
||||||
|
*v.insert(id)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
new_seq.push(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
IdentifyDistinct {
|
||||||
|
old: OffsetLookup {
|
||||||
|
offset: old_start,
|
||||||
|
vec: old_seq,
|
||||||
|
},
|
||||||
|
new: OffsetLookup {
|
||||||
|
offset: new_start,
|
||||||
|
vec: new_seq,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a lookup for the old side.
|
||||||
|
pub fn old_lookup(&self) -> &impl Index<usize, Output = Int> {
|
||||||
|
&self.old
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a lookup for the new side.
|
||||||
|
pub fn new_lookup(&self) -> &impl Index<usize, Output = Int> {
|
||||||
|
&self.new
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience method to get back the old range.
|
||||||
|
pub fn old_range(&self) -> Range<usize> {
|
||||||
|
self.old.offset..self.old.offset + self.old.vec.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convenience method to get back the new range.
|
||||||
|
pub fn new_range(&self) -> Range<usize> {
|
||||||
|
self.new.offset..self.new.offset + self.new.vec.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_unique() {
|
fn test_unique() {
|
||||||
let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
|
let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
|
||||||
|
|
@ -151,6 +320,24 @@ fn test_unique() {
|
||||||
assert_eq!(u, vec![('a', 0), ('c', 2)]);
|
assert_eq!(u, vec![('a', 0), ('c', 2)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_int_hasher() {
|
||||||
|
let ih = IdentifyDistinct::<u8>::new(
|
||||||
|
&["", "foo", "bar", "baz"][..],
|
||||||
|
1..4,
|
||||||
|
&["", "foo", "blah", "baz"][..],
|
||||||
|
1..4,
|
||||||
|
);
|
||||||
|
assert_eq!(ih.old_lookup()[1], 0);
|
||||||
|
assert_eq!(ih.old_lookup()[2], 1);
|
||||||
|
assert_eq!(ih.old_lookup()[3], 2);
|
||||||
|
assert_eq!(ih.new_lookup()[1], 0);
|
||||||
|
assert_eq!(ih.new_lookup()[2], 3);
|
||||||
|
assert_eq!(ih.new_lookup()[3], 2);
|
||||||
|
assert_eq!(ih.old_range(), 1..4);
|
||||||
|
assert_eq!(ih.new_range(), 1..4);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_common_prefix_len() {
|
fn test_common_prefix_len() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,14 @@ use std::hash::Hash;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::algorithms::{diff_deadline, diff_slices_deadline, Capture, Replace};
|
use crate::algorithms::{diff_deadline, Capture, Compact, Replace};
|
||||||
use crate::{Algorithm, DiffOp};
|
use crate::{Algorithm, DiffOp};
|
||||||
|
|
||||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
///
|
///
|
||||||
/// This is like [`diff`](crate::algorithms::diff) but instead of using an
|
/// This is like [`diff`](crate::algorithms::diff) but instead of using an
|
||||||
/// arbitrary hook this will always use [`Replace`] + [`Capture`] and return the
|
/// arbitrary hook this will always use [`Compact`] + [`Replace`] + [`Capture`]
|
||||||
/// captured [`DiffOp`]s.
|
/// and return the captured [`DiffOp`]s.
|
||||||
pub fn capture_diff<Old, New>(
|
pub fn capture_diff<Old, New>(
|
||||||
alg: Algorithm,
|
alg: Algorithm,
|
||||||
old: &Old,
|
old: &Old,
|
||||||
|
|
@ -43,9 +43,9 @@ where
|
||||||
Old::Output: Hash + Eq + Ord,
|
Old::Output: Hash + Eq + Ord,
|
||||||
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
New::Output: PartialEq<Old::Output> + Hash + Eq + Ord,
|
||||||
{
|
{
|
||||||
let mut d = Replace::new(Capture::new());
|
let mut d = Compact::new(Replace::new(Capture::new()), old, new);
|
||||||
diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap();
|
diff_deadline(alg, &mut d, old, old_range, new, new_range, deadline).unwrap();
|
||||||
d.into_inner().into_ops()
|
d.into_inner().into_inner().into_ops()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
/// Creates a diff between old and new with the given algorithm capturing the ops.
|
||||||
|
|
@ -68,9 +68,7 @@ pub fn capture_diff_slices_deadline<T>(
|
||||||
where
|
where
|
||||||
T: Eq + Hash + Ord,
|
T: Eq + Hash + Ord,
|
||||||
{
|
{
|
||||||
let mut d = Replace::new(Capture::new());
|
capture_diff_deadline(alg, old, 0..old.len(), new, 0..new.len(), deadline)
|
||||||
diff_slices_deadline(alg, &mut d, old, new, deadline).unwrap();
|
|
||||||
d.into_inner().into_ops()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a measure of similarity in the range `0..=1`.
|
/// Return a measure of similarity in the range `0..=1`.
|
||||||
|
|
|
||||||
|
|
@ -14,9 +14,10 @@ pub use self::abstraction::{DiffableStr, DiffableStrRef};
|
||||||
pub use self::inline::InlineChange;
|
pub use self::inline::InlineChange;
|
||||||
|
|
||||||
use self::utils::{upper_seq_ratio, QuickSeqRatio};
|
use self::utils::{upper_seq_ratio, QuickSeqRatio};
|
||||||
|
use crate::algorithms::IdentifyDistinct;
|
||||||
use crate::iter::{AllChangesIter, ChangesIter};
|
use crate::iter::{AllChangesIter, ChangesIter};
|
||||||
use crate::udiff::UnifiedDiff;
|
use crate::udiff::UnifiedDiff;
|
||||||
use crate::{capture_diff_slices_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
|
use crate::{capture_diff_deadline, get_diff_ratio, group_diff_ops, Algorithm, DiffOp};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
enum Deadline {
|
enum Deadline {
|
||||||
|
|
@ -327,12 +328,27 @@ impl TextDiffConfig {
|
||||||
new: Cow<'bufs, [&'new T]>,
|
new: Cow<'bufs, [&'new T]>,
|
||||||
newline_terminated: bool,
|
newline_terminated: bool,
|
||||||
) -> TextDiff<'old, 'new, 'bufs, T> {
|
) -> TextDiff<'old, 'new, 'bufs, T> {
|
||||||
let ops = capture_diff_slices_deadline(
|
let deadline = self.deadline.map(|x| x.into_instant());
|
||||||
self.algorithm,
|
let ops = if old.len() > 100 || new.len() > 100 {
|
||||||
&old,
|
let ih = IdentifyDistinct::<u32>::new(&old[..], 0..old.len(), &new[..], 0..new.len());
|
||||||
&new,
|
capture_diff_deadline(
|
||||||
self.deadline.map(|x| x.into_instant()),
|
self.algorithm,
|
||||||
);
|
ih.old_lookup(),
|
||||||
|
ih.old_range(),
|
||||||
|
ih.new_lookup(),
|
||||||
|
ih.new_range(),
|
||||||
|
deadline,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
capture_diff_deadline(
|
||||||
|
self.algorithm,
|
||||||
|
&old[..],
|
||||||
|
0..old.len(),
|
||||||
|
&new[..],
|
||||||
|
0..new.len(),
|
||||||
|
deadline,
|
||||||
|
)
|
||||||
|
};
|
||||||
TextDiff {
|
TextDiff {
|
||||||
old,
|
old,
|
||||||
new,
|
new,
|
||||||
|
|
|
||||||
82
src/types.rs
82
src/types.rs
|
|
@ -1,6 +1,7 @@
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::ops::{Index, Range};
|
use std::ops::{Index, Range};
|
||||||
|
|
||||||
|
use crate::algorithms::utils::is_empty_range;
|
||||||
use crate::algorithms::DiffHook;
|
use crate::algorithms::DiffHook;
|
||||||
use crate::iter::ChangesIter;
|
use crate::iter::ChangesIter;
|
||||||
|
|
||||||
|
|
@ -343,6 +344,87 @@ impl DiffOp {
|
||||||
.chain(Some((ChangeTag::Insert, &new[new_index..new_index + new_len])).into_iter()),
|
.chain(Some((ChangeTag::Insert, &new[new_index..new_index + new_len])).into_iter()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn is_empty(&self) -> bool {
|
||||||
|
let (_, old, new) = self.as_tag_tuple();
|
||||||
|
is_empty_range(&old) && is_empty_range(&new)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn shift_left(&mut self, adjust: usize) {
|
||||||
|
self.adjust((adjust, true), (0, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn shift_right(&mut self, adjust: usize) {
|
||||||
|
self.adjust((adjust, false), (0, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn grow_left(&mut self, adjust: usize) {
|
||||||
|
self.adjust((adjust, true), (adjust, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn grow_right(&mut self, adjust: usize) {
|
||||||
|
self.adjust((0, false), (adjust, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn shrink_left(&mut self, adjust: usize) {
|
||||||
|
self.adjust((0, false), (adjust, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn shrink_right(&mut self, adjust: usize) {
|
||||||
|
self.adjust((adjust, false), (adjust, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn adjust(&mut self, adjust_offset: (usize, bool), adjust_len: (usize, bool)) {
|
||||||
|
#[inline(always)]
|
||||||
|
fn modify(val: &mut usize, adj: (usize, bool)) {
|
||||||
|
if adj.1 {
|
||||||
|
*val -= adj.0;
|
||||||
|
} else {
|
||||||
|
*val += adj.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
match self {
|
||||||
|
DiffOp::Equal {
|
||||||
|
old_index,
|
||||||
|
new_index,
|
||||||
|
len,
|
||||||
|
} => {
|
||||||
|
modify(old_index, adjust_offset);
|
||||||
|
modify(new_index, adjust_offset);
|
||||||
|
modify(len, adjust_len);
|
||||||
|
}
|
||||||
|
DiffOp::Delete {
|
||||||
|
old_index,
|
||||||
|
old_len,
|
||||||
|
new_index,
|
||||||
|
} => {
|
||||||
|
modify(old_index, adjust_offset);
|
||||||
|
modify(old_len, adjust_len);
|
||||||
|
modify(new_index, adjust_offset);
|
||||||
|
}
|
||||||
|
DiffOp::Insert {
|
||||||
|
old_index,
|
||||||
|
new_index,
|
||||||
|
new_len,
|
||||||
|
} => {
|
||||||
|
modify(old_index, adjust_offset);
|
||||||
|
modify(new_index, adjust_offset);
|
||||||
|
modify(new_len, adjust_len);
|
||||||
|
}
|
||||||
|
DiffOp::Replace {
|
||||||
|
old_index,
|
||||||
|
old_len,
|
||||||
|
new_index,
|
||||||
|
new_len,
|
||||||
|
} => {
|
||||||
|
modify(old_index, adjust_offset);
|
||||||
|
modify(old_len, adjust_len);
|
||||||
|
modify(new_index, adjust_offset);
|
||||||
|
modify(new_len, adjust_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "text")]
|
#[cfg(feature = "text")]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue