From e53427b56fabe345e76d3bb788e329991a39ccc9 Mon Sep 17 00:00:00 2001 From: Armin Ronacher Date: Mon, 1 Feb 2021 14:05:44 +0100 Subject: [PATCH] Expose get_diff_ratio --- CHANGELOG.md | 4 ++++ src/algorithms/capture.rs | 25 +++++++++++++++++++++++++ src/text/inline.rs | 6 +++--- src/text/mod.rs | 25 ++++--------------------- 4 files changed, 36 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb728e2..49dd024 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to similar are documented here. +## 0.6.0 + +* Add `get_diff_ratio`. + ## 0.5.0 * Add `DiffOp::apply_to_hook` to apply a captured op to a diff hook. diff --git a/src/algorithms/capture.rs b/src/algorithms/capture.rs index e97925e..264a566 100644 --- a/src/algorithms/capture.rs +++ b/src/algorithms/capture.rs @@ -247,6 +247,31 @@ pub fn group_diff_ops(mut ops: Vec, n: usize) -> Vec> { rv } +/// Return a measure of similarity in the range `0..=1`. +/// +/// A ratio of `1.0` means the two sequences are a complete match, a +/// ratio of `0.0` would indicate completely distinct sequences. The input +/// is the sequence of diff operations and the length of the old and new +/// sequence. +pub fn get_diff_ratio(ops: &[DiffOp], old_len: usize, new_len: usize) -> f32 { + let matches = ops + .iter() + .map(|op| { + if let DiffOp::Equal { len, .. } = *op { + len + } else { + 0 + } + }) + .sum::(); + let len = old_len + new_len; + if len == 0 { + 1.0 + } else { + 2.0 * matches as f32 / len as f32 + } +} + impl DiffHook for Capture { type Error = Infallible; diff --git a/src/text/inline.rs b/src/text/inline.rs index 65ef1fa..8e0feba 100644 --- a/src/text/inline.rs +++ b/src/text/inline.rs @@ -1,10 +1,10 @@ #![cfg(feature = "inline")] use std::fmt; -use crate::algorithms::{capture_diff, Algorithm, DiffOp, DiffTag}; +use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag}; use crate::text::{Change, ChangeTag, TextDiff}; -use super::{diff_ratio, split_unicode_words}; +use super::split_unicode_words; use std::ops::Index; @@ -201,7 +201,7 @@ pub(crate) fn iter_inline_changes<'diff>( 0..new_lookup.len(), ); - if diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < 0.5 { + if get_diff_ratio(&ops, old_lookup.len(), new_lookup.len()) < 0.5 { return Box::new(diff.iter_changes(op).map(|x| x.into())) as Box>; } diff --git a/src/text/mod.rs b/src/text/mod.rs index 75bd860..b95589d 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -82,7 +82,9 @@ mod udiff; pub use self::inline::*; pub use self::udiff::*; -use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag}; +use crate::algorithms::{ + capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag, +}; /// A builder type config for more complex uses of [`TextDiff`]. #[derive(Clone, Debug)] @@ -394,7 +396,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> { /// assert_eq!(diff.ratio(), 0.75); /// ``` pub fn ratio(&self) -> f32 { - diff_ratio(self.ops(), self.old.len(), self.new.len()) + get_diff_ratio(self.ops(), self.old.len(), self.new.len()) } /// Iterates over the changes the op expands to. @@ -601,25 +603,6 @@ fn split_graphemes(s: &str) -> impl Iterator { unicode_segmentation::UnicodeSegmentation::graphemes(s, true) } -fn diff_ratio(ops: &[DiffOp], s1_len: usize, s2_len: usize) -> f32 { - let matches = ops - .iter() - .map(|op| { - if let DiffOp::Equal { len, .. } = *op { - len - } else { - 0 - } - }) - .sum::(); - let len = s1_len + s2_len; - if len == 0 { - 1.0 - } else { - 2.0 * matches as f32 / len as f32 - } -} - // quick and dirty way to get an upper sequence ratio. fn upper_seq_ratio(seq1: &[T], seq2: &[T]) -> f32 { let n = seq1.len() + seq2.len();