Refactor patience unique code to be more readable

This commit is contained in:
Armin Ronacher 2021-02-21 19:38:40 +01:00
parent c32556cfee
commit b468bf94e3
2 changed files with 56 additions and 24 deletions

View file

@ -14,7 +14,7 @@ use std::time::Instant;
use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace}; use crate::algorithms::{myers, DiffHook, NoFinishHook, Replace};
use super::utils::{unique, Indexable}; use super::utils::{unique, UniqueItem};
/// Patience diff algorithm. /// Patience diff algorithm.
/// ///
@ -101,11 +101,11 @@ struct Patience<'old, 'new, 'd, Old: ?Sized, New: ?Sized, D> {
old: &'old Old, old: &'old Old,
old_current: usize, old_current: usize,
old_end: usize, old_end: usize,
old_indexes: &'old [Indexable<'old, Old>], old_indexes: &'old [UniqueItem<'old, Old>],
new: &'new New, new: &'new New,
new_current: usize, new_current: usize,
new_end: usize, new_end: usize,
new_indexes: &'new [Indexable<'new, New>], new_indexes: &'new [UniqueItem<'new, New>],
deadline: Option<Instant>, deadline: Option<Instant>,
} }
@ -121,8 +121,8 @@ where
for (old, new) in (old..old + len).zip(new..new + len) { for (old, new) in (old..old + len).zip(new..new + len) {
let a0 = self.old_current; let a0 = self.old_current;
let b0 = self.new_current; let b0 = self.new_current;
while self.old_current < self.old_indexes[old].index() while self.old_current < self.old_indexes[old].original_index()
&& self.new_current < self.new_indexes[new].index() && self.new_current < self.new_indexes[new].original_index()
&& self.new[self.new_current] == self.old[self.old_current] && self.new[self.new_current] == self.old[self.old_current]
{ {
self.old_current += 1; self.old_current += 1;
@ -135,13 +135,13 @@ where
myers::diff_deadline( myers::diff_deadline(
&mut no_finish_d, &mut no_finish_d,
self.old, self.old,
self.old_current..self.old_indexes[old].index(), self.old_current..self.old_indexes[old].original_index(),
self.new, self.new,
self.new_current..self.new_indexes[new].index(), self.new_current..self.new_indexes[new].original_index(),
self.deadline, self.deadline,
)?; )?;
self.old_current = self.old_indexes[old].index(); self.old_current = self.old_indexes[old].original_index();
self.new_current = self.new_indexes[new].index(); self.new_current = self.new_indexes[new].original_index();
} }
Ok(()) Ok(())
} }

View file

@ -1,5 +1,6 @@
use std::collections::hash_map::Entry; use std::collections::hash_map::Entry;
use std::collections::HashMap; use std::collections::HashMap;
use std::fmt::Debug;
use std::hash::Hash; use std::hash::Hash;
use std::ops::{Index, Range}; use std::ops::{Index, Range};
@ -10,46 +11,68 @@ pub fn is_empty_range<T: PartialOrd<T>>(range: &Range<T>) -> bool {
!(range.start < range.end) !(range.start < range.end)
} }
pub struct Indexable<'a, Idx: ?Sized> { /// Represents an item in the vector returend by [`unique`].
///
/// It compares like the underlying item does it was created from but
/// carries the index it was originally created from.
pub struct UniqueItem<'a, Idx: ?Sized> {
lookup: &'a Idx, lookup: &'a Idx,
index: usize, index: usize,
} }
impl<'a, Idx: ?Sized> Indexable<'a, Idx> { impl<'a, Idx: ?Sized> UniqueItem<'a, Idx>
/// Returns the index. where
pub fn index(&self) -> usize { Idx: Index<usize>,
{
/// Returns the value.
#[inline(always)]
pub fn value(&self) -> &Idx::Output {
&self.lookup[self.index]
}
/// Returns the original index.
#[inline(always)]
pub fn original_index(&self) -> usize {
self.index self.index
} }
} }
impl<'a, Idx: Index<usize> + 'a> std::fmt::Debug for Indexable<'a, Idx> impl<'a, Idx: Index<usize> + 'a> Debug for UniqueItem<'a, Idx>
where where
Idx::Output: std::fmt::Debug, Idx::Output: Debug,
{ {
fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(fmt, "{:?}", &self.lookup[self.index]) f.debug_struct("UniqueItem")
.field("value", &self.value())
.field("original_index", &self.original_index())
.finish()
} }
} }
impl<'a, 'b, A, B> PartialEq<Indexable<'a, A>> for Indexable<'b, B> impl<'a, 'b, A, B> PartialEq<UniqueItem<'a, A>> for UniqueItem<'b, B>
where where
A: Index<usize> + 'b + ?Sized, A: Index<usize> + 'b + ?Sized,
B: Index<usize> + 'b + ?Sized, B: Index<usize> + 'b + ?Sized,
B::Output: PartialEq<A::Output>, B::Output: PartialEq<A::Output>,
{ {
fn eq(&self, b: &Indexable<'a, A>) -> bool { #[inline(always)]
self.lookup[self.index] == b.lookup[b.index] fn eq(&self, other: &UniqueItem<'a, A>) -> bool {
self.value() == other.value()
} }
} }
pub fn unique<Idx>(seq: &Idx, range: Range<usize>) -> Vec<Indexable<Idx>> /// Returns only unique items in the sequence as vector.
///
/// Each item is wrapped in a [`UniqueItem`] so that both the value and the
/// index can be extracted.
pub fn unique<Idx>(lookup: &Idx, range: Range<usize>) -> Vec<UniqueItem<Idx>>
where where
Idx: Index<usize> + ?Sized, Idx: Index<usize> + ?Sized,
Idx::Output: Hash + Eq, Idx::Output: Hash + Eq,
{ {
let mut by_item = HashMap::new(); let mut by_item = HashMap::new();
for index in range { for index in range {
match by_item.entry(&seq[index]) { match by_item.entry(&lookup[index]) {
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
entry.insert(Some(index)); entry.insert(Some(index));
} }
@ -64,8 +87,17 @@ where
let mut rv = by_item let mut rv = by_item
.into_iter() .into_iter()
.filter_map(|(_, x)| x) .filter_map(|(_, x)| x)
.map(|index| Indexable { lookup: seq, index }) .map(|index| UniqueItem { lookup, index })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
rv.sort_by(|a, b| a.index.cmp(&b.index)); rv.sort_by_key(|a| a.original_index());
rv rv
} }
#[test]
fn test_unique() {
let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
.into_iter()
.map(|x| (*x.value(), x.original_index()))
.collect::<Vec<_>>();
assert_eq!(u, vec![('a', 0), ('c', 2)]);
}