Expose get_diff_ratio
This commit is contained in:
parent
8089e56f07
commit
e53427b56f
4 changed files with 36 additions and 24 deletions
|
|
@ -2,6 +2,10 @@
|
|||
|
||||
All notable changes to similar are documented here.
|
||||
|
||||
## 0.6.0
|
||||
|
||||
* Add `get_diff_ratio`.
|
||||
|
||||
## 0.5.0
|
||||
|
||||
* Add `DiffOp::apply_to_hook` to apply a captured op to a diff hook.
|
||||
|
|
|
|||
|
|
@ -247,6 +247,31 @@ pub fn group_diff_ops(mut ops: Vec<DiffOp>, n: usize) -> Vec<Vec<DiffOp>> {
|
|||
rv
|
||||
}
|
||||
|
||||
/// Return a measure of similarity in the range `0..=1`.
|
||||
///
|
||||
/// A ratio of `1.0` means the two sequences are a complete match, a
|
||||
/// ratio of `0.0` would indicate completely distinct sequences. The input
|
||||
/// is the sequence of diff operations and the length of the old and new
|
||||
/// sequence.
|
||||
pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 {
|
||||
let matches = ops
|
||||
.iter()
|
||||
.map(|op| {
|
||||
if let DiffOp::Equal { len, .. } = *op {
|
||||
len
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.sum::<usize>();
|
||||
let len = old_len + new_len;
|
||||
if len == 0 {
|
||||
1.0
|
||||
} else {
|
||||
2.0 * matches as f32 / len as f32
|
||||
}
|
||||
}
|
||||
|
||||
impl DiffHook for Capture {
|
||||
type Error = Infallible;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
#![cfg(feature = "inline")]
|
||||
use std::fmt;
|
||||
|
||||
use crate::algorithms::{capture_diff, Algorithm, DiffOp, DiffTag};
|
||||
use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag};
|
||||
use crate::text::{Change, ChangeTag, TextDiff};
|
||||
|
||||
use super::{diff_ratio, split_unicode_words};
|
||||
use super::split_unicode_words;
|
||||
|
||||
use std::ops::Index;
|
||||
|
||||
|
|
@ -201,7 +201,7 @@ pub(crate) fn iter_inline_changes<'diff>(
|
|||
0..new_lookup.len(),
|
||||
);
|
||||
|
||||
if diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < 0.5 {
|
||||
if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < 0.5 {
|
||||
return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box<dyn Iterator<Item = _>>;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -82,7 +82,9 @@ mod udiff;
|
|||
pub use self::inline::*;
|
||||
pub use self::udiff::*;
|
||||
|
||||
use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
|
||||
use crate::algorithms::{
|
||||
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
|
||||
};
|
||||
|
||||
/// A builder type config for more complex uses of [`TextDiff`].
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
@ -394,7 +396,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
|
|||
/// assert_eq!(diff.ratio(), 0.75);
|
||||
/// ```
|
||||
pub fn ratio(&self) -> f32 {
|
||||
diff_ratio(self.ops(), self.old.len(), self.new.len())
|
||||
get_diff_ratio(self.ops(), self.old.len(), self.new.len())
|
||||
}
|
||||
|
||||
/// Iterates over the changes the op expands to.
|
||||
|
|
@ -601,25 +603,6 @@ fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
|
|||
unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
|
||||
}
|
||||
|
||||
fn diff_ratio(ops: &[DiffOp], s1_len: usize, s2_len: usize) -> f32 {
|
||||
let matches = ops
|
||||
.iter()
|
||||
.map(|op| {
|
||||
if let DiffOp::Equal { len, .. } = *op {
|
||||
len
|
||||
} else {
|
||||
0
|
||||
}
|
||||
})
|
||||
.sum::<usize>();
|
||||
let len = s1_len + s2_len;
|
||||
if len == 0 {
|
||||
1.0
|
||||
} else {
|
||||
2.0 * matches as f32 / len as f32
|
||||
}
|
||||
}
|
||||
|
||||
// quick and dirty way to get an upper sequence ratio.
|
||||
fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
|
||||
let n = seq1.len() + seq2.len();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue