Some internal refactorings

This commit is contained in:
Armin Ronacher 2021-02-02 20:15:31 +01:00
parent 4b85e70f91
commit 34e5b3d571
8 changed files with 174 additions and 157 deletions

View file

@ -87,22 +87,22 @@
#![cfg(feature = "text")]
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::{BinaryHeap, HashMap};
use std::collections::BinaryHeap;
use std::fmt;
use std::hash::Hash;
mod abstraction;
#[cfg(feature = "inline")]
mod inline;
mod udiff;
mod utils;
pub use self::abstraction::{DiffableStr, DiffableStrRef};
#[cfg(feature = "inline")]
pub use self::inline::*;
pub use self::udiff::*;
pub use crate::text::abstraction::*;
pub use self::inline::InlineChange;
pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedHunkHeader};
use self::utils::{upper_seq_ratio, QuickSeqRatio};
use crate::algorithms::{
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
};
@ -250,15 +250,6 @@ impl TextDiffConfig {
}
}
/// Captures diff op codes for textual diffs
pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
old: Cow<'bufs, [&'old T]>,
new: Cow<'bufs, [&'new T]>,
ops: Vec<DiffOp>,
newline_terminated: bool,
algorithm: Algorithm,
}
/// The tag of a change.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum ChangeTag {
@ -270,6 +261,20 @@ pub enum ChangeTag {
Insert,
}
impl fmt::Display for ChangeTag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}",
match &self {
ChangeTag::Equal => ' ',
ChangeTag::Delete => '-',
ChangeTag::Insert => '+',
}
)
}
}
/// Represents the expanded textual change.
///
/// This type is returned from the [`TextDiff::iter_changes`] method. It
@ -289,7 +294,7 @@ impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
write!(
f,
"{}{}",
self.as_str_lossy(),
self.to_string_lossy(),
if self.missing_newline { "\n" } else { "" }
)
}
@ -312,6 +317,10 @@ impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
}
/// Returns the underlying changed value.
///
/// Depending on the type of the underlying [`DiffableStr`] this value is
/// more or less useful. If you always want to have a utf-8 string it's
/// best to use the [`Change::as_str`] and [`Change::to_string_lossy`] methods.
pub fn value(&self) -> &'s T {
self.value
}
@ -322,8 +331,8 @@ impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
}
/// Returns the value (lossy) decoded as utf-8 string.
pub fn as_str_lossy(&self) -> Cow<'s, str> {
T::as_str_lossy(self.value)
pub fn to_string_lossy(&self) -> Cow<'s, str> {
T::to_string_lossy(self.value)
}
/// Returns `true` if this change needs to be followed up by a
@ -336,6 +345,15 @@ impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
}
}
/// Captures diff op codes for textual diffs
pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
old: Cow<'bufs, [&'old T]>,
new: Cow<'bufs, [&'new T]>,
ops: Vec<DiffOp>,
newline_terminated: bool,
algorithm: Algorithm,
}
impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
/// Configures a text differ before diffing.
pub fn configure() -> TextDiffConfig {
@ -571,58 +589,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n
/// is currently not defined and will likely change over time.
#[cfg(feature = "inline")]
pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange<'_, T>> {
iter_inline_changes(self, op)
}
}
// quick and dirty way to get an upper sequence ratio.
fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
let n = seq1.len() + seq2.len();
if n == 0 {
1.0
} else {
2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
}
}
/// Internal utility to calculate an upper bound for a ratio for
/// [`get_close_matches`]. This is based on Python's difflib approach
/// of considering the two sets to be multisets.
///
/// It counts the number of matches without regard to order, which is an
/// obvious upper bound.
struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);
impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
let mut counts = HashMap::new();
for &word in seq {
*counts.entry(word).or_insert(0) += 1;
}
QuickSeqRatio(counts)
}
pub fn calc(&self, seq: &[&T]) -> f32 {
let n = self.0.len() + seq.len();
if n == 0 {
return 1.0;
}
let mut available = HashMap::new();
let mut matches = 0;
for &word in seq {
let x = if let Some(count) = available.get(&word) {
*count
} else {
self.0.get(&word).copied().unwrap_or(0)
};
available.insert(word, x - 1);
if x > 0 {
matches += 1;
}
}
2.0 * matches as f32 / n as f32
inline::iter_inline_changes(self, op)
}
}
@ -738,7 +705,7 @@ fn test_line_ops() {
.flat_map(|op| byte_diff.iter_changes(op))
.collect::<Vec<_>>();
for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
assert_eq!(change.as_str_lossy(), byte_change.as_str_lossy());
assert_eq!(change.to_string_lossy(), byte_change.to_string_lossy());
}
}
}