Expose get_diff_ratio

This commit is contained in:
Armin Ronacher 2021-02-01 14:05:44 +01:00
parent 8089e56f07
commit e53427b56f
4 changed files with 36 additions and 24 deletions

View file

@ -247,6 +247,31 @@ pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
rv
}
/// Return a measure of similarity in the range `0..=1`.
///
/// A ratio of `1.0` means the two sequences are a complete match, a
/// ratio of `0.0` would indicate completely distinct sequences. The input
/// is the sequence of diff operations and the length of the old and new
/// sequence.
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
let matches = ops
.iter()
.map(|op| {
if let DiffOp::Equal { len, .. } = *op {
len
} else {
0
}
})
.sum::<usize>();
let len = old_len + new_len;
if len == 0 {
1.0
} else {
2.0 * matches as f32 / len as f32
}
}
impl DiffHook for Capture {
type Error = Infallible;

View file

@ -1,10 +1,10 @@
#![cfg(feature = "inline")]
use std::fmt;
use crate::algorithms::{capture_diff, Algorithm, DiffOp, DiffTag};
use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag};
use crate::text::{Change, ChangeTag, TextDiff};
use super::{diff_ratio, split_unicode_words};
use super::split_unicode_words;
use std::ops::Index;
@ -201,7 +201,7 @@ pub(crate) fn iter_inline_changes<'diff>(
0..new_lookup.len(),
);
if diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < 0.5 {
if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < 0.5 {
return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
}

View file

@ -82,7 +82,9 @@ mod udiff;
pub use self::inline::*;
pub use self::udiff::*;
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
use crate::algorithms::{
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
};
/// A builder type config for more complex uses of [`TextDiff`].
#[derive(Clone, Debug)]
@ -394,7 +396,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
/// assert_eq!(diff.ratio(), 0.75);
/// ```
pub fn ratio(&self) -> f32 {
diff_ratio(self.ops(), self.old.len(), self.new.len())
get_diff_ratio(self.ops(), self.old.len(), self.new.len())
}
/// Iterates over the changes the op expands to.
@ -601,25 +603,6 @@ fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
}
fn diff_ratio(ops: &[DiffOp], s1_len: usize, s2_len: usize) -> f32 {
let matches = ops
.iter()
.map(|op| {
if let DiffOp::Equal { len, .. } = *op {
len
} else {
0
}
})
.sum::<usize>();
let len = s1_len + s2_len;
if len == 0 {
1.0
} else {
2.0 * matches as f32 / len as f32
}
}
// quick and dirty way to get an upper sequence ratio.
fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
let n = seq1.len() + seq2.len();