From fdecdbc6c9bc6ce7ead8d3a96af6c2dbbd50fcb5 Mon Sep 17 00:00:00 2001 From: Barrett Ruth Date: Thu, 11 Sep 2025 15:18:29 -0500 Subject: [PATCH] fp --- examples/floating_point.rs | 65 +++++++ src/algorithms/lcs.rs | 30 +++ src/algorithms/mod.rs | 1 + src/algorithms/myers.rs | 370 ++++++++++++++++++++++++++++++++++++- src/algorithms/patience.rs | 30 +++ src/algorithms/utils.rs | 106 +++++++++++ src/common.rs | 103 +++++++++++ 7 files changed, 704 insertions(+), 1 deletion(-) create mode 100644 examples/floating_point.rs diff --git a/examples/floating_point.rs b/examples/floating_point.rs new file mode 100644 index 0000000..de71f7f --- /dev/null +++ b/examples/floating_point.rs @@ -0,0 +1,65 @@ +use likewise::{capture_diff_slices_fp, capture_diff_slices_fp_f64, Algorithm, ChangeTag}; + +fn main() { + // Example 1: Sensor readings with measurement tolerance + println!("=== Sensor Reading Comparison ==="); + let baseline_readings = vec![23.1, 24.5, 22.8, 25.2, 26.0]; + let current_readings = vec![23.12, 24.48, 22.85, 25.18, 26.03]; + + // Strict comparison would show many differences due to measurement noise + // FP comparison with 0.05 tolerance treats small variations as equal + let ops = capture_diff_slices_fp( + Algorithm::Myers, + &baseline_readings, + ¤t_readings, + 0.05 + ); + + println!("Baseline: {:?}", baseline_readings); + println!("Current: {:?}", current_readings); + println!("With epsilon=0.05: {} diff operations", ops.len()); + + for op in &ops { + let changes: Vec<_> = op.iter_changes(&baseline_readings, ¤t_readings).collect(); + for change in changes { + match change.tag() { + ChangeTag::Equal => println!(" ✓ Equal: {}", change.value()), + ChangeTag::Delete => println!(" - Remove: {}", change.value()), + ChangeTag::Insert => println!(" + Add: {}", change.value()), + } + } + } + + // Example 2: Different epsilon values + println!("\n=== Epsilon Sensitivity ==="); + let old = vec![1.0, 2.0, 3.0]; + let new = vec![1.001, 2.0, 2.999]; + + println!("Old: {:?}", old); + println!("New: {:?}", new); + + for &epsilon in &[0.0001, 0.001, 0.01] { + let ops = capture_diff_slices_fp(Algorithm::Myers, &old, &new, epsilon); + let all_equal = ops.len() == 1 && matches!(ops[0], likewise::DiffOp::Equal { len: 3, .. }); + println!(" epsilon={}: {} ({})", epsilon, ops.len(), + if all_equal { "all equal" } else { "differences found" }); + } + + // Example 3: Edge cases with NaN and infinity + println!("\n=== Edge Cases ==="); + let old_edge = vec![0.0, -0.0, f32::NAN, f32::INFINITY, f32::NEG_INFINITY]; + let new_edge = vec![-0.0, 0.0, f32::NAN, f32::INFINITY, f32::NEG_INFINITY]; + + let ops = capture_diff_slices_fp(Algorithm::Myers, &old_edge, &new_edge, 0.001); + println!("Old: {:?}", old_edge); + println!("New: {:?}", new_edge); + println!("Edge case handling: {} operations", ops.len()); + + // Example 4: f64 precision + println!("\n=== High Precision f64 ==="); + let old_f64 = vec![1.0000000000001, 2.0, 3.141592653589793]; + let new_f64 = vec![1.0000000000002, 2.0, 3.141592653589794]; + + let ops_f64 = capture_diff_slices_fp_f64(Algorithm::Myers, &old_f64, &new_f64, 1e-12); + println!("f64 comparison with epsilon=1e-12: {} operations", ops_f64.len()); +} \ No newline at end of file diff --git a/src/algorithms/lcs.rs b/src/algorithms/lcs.rs index 1c6a43b..b052bad 100644 --- a/src/algorithms/lcs.rs +++ b/src/algorithms/lcs.rs @@ -292,3 +292,33 @@ fn test_bad_range_regression() { ] ); } + +pub fn diff_fp_deadline( + d: &mut D, + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + crate::algorithms::myers::diff_fp_deadline(d, old, old_range, new, new_range, epsilon, deadline) +} + +pub fn diff_fp_f64_deadline( + d: &mut D, + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + crate::algorithms::myers::diff_fp_f64_deadline(d, old, old_range, new, new_range, epsilon, deadline) +} diff --git a/src/algorithms/mod.rs b/src/algorithms/mod.rs index 0a43afc..0e252e1 100644 --- a/src/algorithms/mod.rs +++ b/src/algorithms/mod.rs @@ -132,3 +132,4 @@ where { diff_deadline(alg, d, old, 0..old.len(), new, 0..new.len(), deadline) } + diff --git a/src/algorithms/myers.rs b/src/algorithms/myers.rs index c6c1bf7..2fda69f 100644 --- a/src/algorithms/myers.rs +++ b/src/algorithms/myers.rs @@ -21,7 +21,7 @@ use std::ops::{Index, IndexMut, Range}; -use crate::algorithms::utils::{common_prefix_len, common_suffix_len, is_empty_range}; +use crate::algorithms::utils::{common_prefix_len, common_suffix_len, common_prefix_len_fp, common_suffix_len_fp, common_prefix_len_fp_f64, common_suffix_len_fp_f64, is_empty_range}; use crate::algorithms::DiffHook; use crate::deadline_support::{deadline_exceeded, Instant}; @@ -440,3 +440,371 @@ fn test_finish_called() { diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap(); assert!(d.0); } + +/// Myers' diff algorithm with f32 epsilon comparison. +pub fn diff_fp_deadline( + d: &mut D, + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + let max_d = max_d(old_range.len(), new_range.len()); + let mut vb = V::new(max_d); + let mut vf = V::new(max_d); + conquer_fp(d, old, old_range, new, new_range, epsilon, &mut vf, &mut vb, deadline)?; + d.finish() +} + +/// Myers' diff algorithm with f64 epsilon comparison. +pub fn diff_fp_f64_deadline( + d: &mut D, + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + let max_d = max_d(old_range.len(), new_range.len()); + let mut vb = V::new(max_d); + let mut vf = V::new(max_d); + conquer_fp_f64(d, old, old_range, new, new_range, epsilon, &mut vf, &mut vb, deadline)?; + d.finish() +} + +fn find_middle_snake_fp( + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, + vf: &mut V, + vb: &mut V, + deadline: Option, +) -> Option<(usize, usize)> { + let n = old_range.len(); + let m = new_range.len(); + + let delta = n as isize - m as isize; + let odd = delta & 1 == 1; + + vf[1] = 0; + vb[1] = 0; + + let d_max = max_d(n, m); + assert!(vf.len() >= d_max); + assert!(vb.len() >= d_max); + + for d in 0..d_max as isize { + if deadline_exceeded(deadline) { + break; + } + + // Forward path + for k in (-d..=d).rev().step_by(2) { + let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { + vf[k + 1] + } else { + vf[k - 1] + 1 + }; + let y = (x as isize - k) as usize; + + let (x0, y0) = (x, y); + if x < old_range.len() && y < new_range.len() { + let advance = common_prefix_len_fp( + old, + old_range.start + x..old_range.end, + new, + new_range.start + y..new_range.end, + epsilon, + ); + x += advance; + } + + vf[k] = x; + + if odd && (k - delta).abs() <= (d - 1) { + if vf[k] + vb[-(k - delta)] >= n { + return Some((x0 + old_range.start, y0 + new_range.start)); + } + } + } + + // Backward path + for k in (-d..=d).rev().step_by(2) { + let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { + vb[k + 1] + } else { + vb[k - 1] + 1 + }; + let mut y = (x as isize - k) as usize; + + if x < n && y < m { + let advance = common_suffix_len_fp( + old, + old_range.start..old_range.start + n - x, + new, + new_range.start..new_range.start + m - y, + epsilon, + ); + x += advance; + y += advance; + } + + vb[k] = x; + + if !odd && (k - delta).abs() <= d { + if vb[k] + vf[-(k - delta)] >= n { + return Some((n - x + old_range.start, m - y + new_range.start)); + } + } + } + } + + None +} + +fn find_middle_snake_fp_f64( + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, + vf: &mut V, + vb: &mut V, + deadline: Option, +) -> Option<(usize, usize)> { + let n = old_range.len(); + let m = new_range.len(); + + let delta = n as isize - m as isize; + let odd = delta & 1 == 1; + + vf[1] = 0; + vb[1] = 0; + + let d_max = max_d(n, m); + assert!(vf.len() >= d_max); + assert!(vb.len() >= d_max); + + for d in 0..d_max as isize { + if deadline_exceeded(deadline) { + break; + } + + // Forward path + for k in (-d..=d).rev().step_by(2) { + let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { + vf[k + 1] + } else { + vf[k - 1] + 1 + }; + let y = (x as isize - k) as usize; + + let (x0, y0) = (x, y); + if x < old_range.len() && y < new_range.len() { + let advance = common_prefix_len_fp_f64( + old, + old_range.start + x..old_range.end, + new, + new_range.start + y..new_range.end, + epsilon, + ); + x += advance; + } + + vf[k] = x; + + if odd && (k - delta).abs() <= (d - 1) { + if vf[k] + vb[-(k - delta)] >= n { + return Some((x0 + old_range.start, y0 + new_range.start)); + } + } + } + + // Backward path + for k in (-d..=d).rev().step_by(2) { + let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { + vb[k + 1] + } else { + vb[k - 1] + 1 + }; + let mut y = (x as isize - k) as usize; + + if x < n && y < m { + let advance = common_suffix_len_fp_f64( + old, + old_range.start..old_range.start + n - x, + new, + new_range.start..new_range.start + m - y, + epsilon, + ); + x += advance; + y += advance; + } + + vb[k] = x; + + if !odd && (k - delta).abs() <= d { + if vb[k] + vf[-(k - delta)] >= n { + return Some((n - x + old_range.start, m - y + new_range.start)); + } + } + } + } + + None +} + +#[allow(clippy::too_many_arguments)] +fn conquer_fp( + d: &mut D, + old: &[f32], + mut old_range: Range, + new: &[f32], + mut new_range: Range, + epsilon: f32, + vf: &mut V, + vb: &mut V, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + // Check for common prefix + let common_prefix_len = common_prefix_len_fp(old, old_range.clone(), new, new_range.clone(), epsilon); + if common_prefix_len > 0 { + d.equal(old_range.start, new_range.start, common_prefix_len)?; + } + old_range.start += common_prefix_len; + new_range.start += common_prefix_len; + + // Check for common suffix + let common_suffix_len = common_suffix_len_fp(old, old_range.clone(), new, new_range.clone(), epsilon); + let common_suffix = ( + old_range.end - common_suffix_len, + new_range.end - common_suffix_len, + ); + old_range.end -= common_suffix_len; + new_range.end -= common_suffix_len; + + if is_empty_range(&old_range) && is_empty_range(&new_range) { + // Do nothing + } else if is_empty_range(&new_range) { + d.delete(old_range.start, old_range.len(), new_range.start)?; + } else if is_empty_range(&old_range) { + d.insert(old_range.start, new_range.start, new_range.len())?; + } else if let Some((x_start, y_start)) = find_middle_snake_fp( + old, + old_range.clone(), + new, + new_range.clone(), + epsilon, + vf, + vb, + deadline, + ) { + let (old_a, old_b) = split_at(old_range, x_start); + let (new_a, new_b) = split_at(new_range, y_start); + conquer_fp(d, old, old_a, new, new_a, epsilon, vf, vb, deadline)?; + conquer_fp(d, old, old_b, new, new_b, epsilon, vf, vb, deadline)?; + } else { + d.delete( + old_range.start, + old_range.end - old_range.start, + new_range.start, + )?; + d.insert( + old_range.start, + new_range.start, + new_range.end - new_range.start, + )?; + } + + if common_suffix_len > 0 { + d.equal(common_suffix.0, common_suffix.1, common_suffix_len)?; + } + + Ok(()) +} + +#[allow(clippy::too_many_arguments)] +fn conquer_fp_f64( + d: &mut D, + old: &[f64], + mut old_range: Range, + new: &[f64], + mut new_range: Range, + epsilon: f64, + vf: &mut V, + vb: &mut V, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + // Check for common prefix + let common_prefix_len = common_prefix_len_fp_f64(old, old_range.clone(), new, new_range.clone(), epsilon); + if common_prefix_len > 0 { + d.equal(old_range.start, new_range.start, common_prefix_len)?; + } + old_range.start += common_prefix_len; + new_range.start += common_prefix_len; + + // Check for common suffix + let common_suffix_len = common_suffix_len_fp_f64(old, old_range.clone(), new, new_range.clone(), epsilon); + let common_suffix = ( + old_range.end - common_suffix_len, + new_range.end - common_suffix_len, + ); + old_range.end -= common_suffix_len; + new_range.end -= common_suffix_len; + + if is_empty_range(&old_range) && is_empty_range(&new_range) { + // Do nothing + } else if is_empty_range(&new_range) { + d.delete(old_range.start, old_range.len(), new_range.start)?; + } else if is_empty_range(&old_range) { + d.insert(old_range.start, new_range.start, new_range.len())?; + } else if let Some((x_start, y_start)) = find_middle_snake_fp_f64( + old, + old_range.clone(), + new, + new_range.clone(), + epsilon, + vf, + vb, + deadline, + ) { + let (old_a, old_b) = split_at(old_range, x_start); + let (new_a, new_b) = split_at(new_range, y_start); + conquer_fp_f64(d, old, old_a, new, new_a, epsilon, vf, vb, deadline)?; + conquer_fp_f64(d, old, old_b, new, new_b, epsilon, vf, vb, deadline)?; + } else { + d.delete( + old_range.start, + old_range.end - old_range.start, + new_range.start, + )?; + d.insert( + old_range.start, + new_range.start, + new_range.end - new_range.start, + )?; + } + + if common_suffix_len > 0 { + d.equal(common_suffix.0, common_suffix.1, common_suffix_len)?; + } + + Ok(()) +} diff --git a/src/algorithms/patience.rs b/src/algorithms/patience.rs index c207c03..c5d757e 100644 --- a/src/algorithms/patience.rs +++ b/src/algorithms/patience.rs @@ -196,3 +196,33 @@ fn test_finish_called() { diff(&mut d, slice, 0..slice.len(), slice, 0..slice.len()).unwrap(); assert!(d.0); } + +pub fn diff_fp_deadline( + d: &mut D, + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + crate::algorithms::myers::diff_fp_deadline(d, old, old_range, new, new_range, epsilon, deadline) +} + +pub fn diff_fp_f64_deadline( + d: &mut D, + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, + deadline: Option, +) -> Result<(), D::Error> +where + D: DiffHook, +{ + crate::algorithms::myers::diff_fp_f64_deadline(d, old, old_range, new, new_range, epsilon, deadline) +} diff --git a/src/algorithms/utils.rs b/src/algorithms/utils.rs index 13501d0..e5c7305 100644 --- a/src/algorithms/utils.rs +++ b/src/algorithms/utils.rs @@ -117,6 +117,46 @@ where .count() } +/// Given two f32 lookups and ranges calculates the length of the common prefix with epsilon comparison. +pub fn common_prefix_len_fp( + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, +) -> usize { + if is_empty_range(&old_range) || is_empty_range(&new_range) { + return 0; + } + new_range + .zip(old_range) + .take_while( + #[inline(always)] + |x| fp_equal_f32(new[x.0], old[x.1], epsilon), + ) + .count() +} + +/// Given two f64 lookups and ranges calculates the length of the common prefix with epsilon comparison. +pub fn common_prefix_len_fp_f64( + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, +) -> usize { + if is_empty_range(&old_range) || is_empty_range(&new_range) { + return 0; + } + new_range + .zip(old_range) + .take_while( + #[inline(always)] + |x| fp_equal_f64(new[x.0], old[x.1], epsilon), + ) + .count() +} + /// Given two lookups and ranges calculates the length of common suffix. pub fn common_suffix_len( old: &Old, @@ -142,6 +182,72 @@ where .count() } +/// Given two f32 lookups and ranges calculates the length of common suffix with epsilon comparison. +pub fn common_suffix_len_fp( + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, +) -> usize { + if is_empty_range(&old_range) || is_empty_range(&new_range) { + return 0; + } + new_range + .rev() + .zip(old_range.rev()) + .take_while( + #[inline(always)] + |x| fp_equal_f32(new[x.0], old[x.1], epsilon), + ) + .count() +} + +/// Given two f64 lookups and ranges calculates the length of common suffix with epsilon comparison. +pub fn common_suffix_len_fp_f64( + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, +) -> usize { + if is_empty_range(&old_range) || is_empty_range(&new_range) { + return 0; + } + new_range + .rev() + .zip(old_range.rev()) + .take_while( + #[inline(always)] + |x| fp_equal_f64(new[x.0], old[x.1], epsilon), + ) + .count() +} + +/// Compare two f32 values with epsilon tolerance. +#[inline(always)] +pub fn fp_equal_f32(left: f32, right: f32, epsilon: f32) -> bool { + if left.is_nan() && right.is_nan() { + return true; + } + if left.is_nan() || right.is_nan() { + return false; + } + (left - right).abs() <= epsilon +} + +/// Compare two f64 values with epsilon tolerance. +#[inline(always)] +pub fn fp_equal_f64(left: f64, right: f64, epsilon: f64) -> bool { + if left.is_nan() && right.is_nan() { + return true; + } + if left.is_nan() || right.is_nan() { + return false; + } + (left - right).abs() <= epsilon +} + struct OffsetLookup { offset: usize, vec: Vec, diff --git a/src/common.rs b/src/common.rs index 8ce9a92..51f76c0 100644 --- a/src/common.rs +++ b/src/common.rs @@ -71,6 +71,86 @@ where capture_diff_deadline(alg, old, 0..old.len(), new, 0..new.len(), deadline) } +/// Creates a diff between old and new f32 slices with epsilon comparison. +pub fn capture_diff_slices_fp( + alg: Algorithm, + old: &[f32], + new: &[f32], + epsilon: f32, +) -> Vec { + capture_diff_slices_fp_deadline(alg, old, new, epsilon, None) +} + +/// Creates a diff between old and new f64 slices with epsilon comparison. +pub fn capture_diff_slices_fp_f64( + alg: Algorithm, + old: &[f64], + new: &[f64], + epsilon: f64, +) -> Vec { + capture_diff_slices_fp_f64_deadline(alg, old, new, epsilon, None) +} + +/// Creates a diff between old and new f32 slices with epsilon comparison and deadline. +pub fn capture_diff_slices_fp_deadline( + alg: Algorithm, + old: &[f32], + new: &[f32], + epsilon: f32, + deadline: Option, +) -> Vec { + capture_diff_fp_deadline(alg, old, 0..old.len(), new, 0..new.len(), epsilon, deadline) +} + +/// Creates a diff between old and new f64 slices with epsilon comparison and deadline. +pub fn capture_diff_slices_fp_f64_deadline( + alg: Algorithm, + old: &[f64], + new: &[f64], + epsilon: f64, + deadline: Option, +) -> Vec { + capture_diff_fp_f64_deadline(alg, old, 0..old.len(), new, 0..new.len(), epsilon, deadline) +} + +fn capture_diff_fp_deadline( + alg: Algorithm, + old: &[f32], + old_range: Range, + new: &[f32], + new_range: Range, + epsilon: f32, + deadline: Option, +) -> Vec { + let mut d = Compact::new(Replace::new(Capture::new()), old, new); + let result = match alg { + Algorithm::Myers => crate::algorithms::myers::diff_fp_deadline(&mut d, old, old_range, new, new_range, epsilon, deadline), + Algorithm::Patience => crate::algorithms::patience::diff_fp_deadline(&mut d, old, old_range, new, new_range, epsilon, deadline), + Algorithm::Lcs => crate::algorithms::lcs::diff_fp_deadline(&mut d, old, old_range, new, new_range, epsilon, deadline), + }; + result.unwrap(); + d.into_inner().into_inner().into_ops() +} + +fn capture_diff_fp_f64_deadline( + alg: Algorithm, + old: &[f64], + old_range: Range, + new: &[f64], + new_range: Range, + epsilon: f64, + deadline: Option, +) -> Vec { + let mut d = Compact::new(Replace::new(Capture::new()), old, new); + let result = match alg { + Algorithm::Myers => crate::algorithms::myers::diff_fp_f64_deadline(&mut d, old, old_range, new, new_range, epsilon, deadline), + Algorithm::Patience => crate::algorithms::patience::diff_fp_f64_deadline(&mut d, old, old_range, new, new_range, epsilon, deadline), + Algorithm::Lcs => crate::algorithms::lcs::diff_fp_f64_deadline(&mut d, old, old_range, new, new_range, epsilon, deadline), + }; + result.unwrap(); + d.into_inner().into_inner().into_ops() +} + /// Return a measure of similarity in the range `0..=1`. /// /// A ratio of `1.0` means the two sequences are a complete match, a @@ -183,3 +263,26 @@ fn test_non_string_iter_change() { ] ); } + +#[test] +fn test_fp_epsilon() { + let old = vec![1.0, 2.0, 3.0]; + let new = vec![1.001, 2.0, 2.999]; + + let ops_tight = capture_diff_slices_fp(Algorithm::Myers, &old, &new, 0.0001); + assert!(ops_tight.len() > 1); + + let ops_loose = capture_diff_slices_fp(Algorithm::Myers, &old, &new, 0.01); + assert_eq!(ops_loose.len(), 1); +} + +#[test] +fn test_fp_nan() { + let old = vec![f32::NAN]; + let new = vec![f32::NAN]; + + let ops = capture_diff_slices_fp(Algorithm::Myers, &old, &new, 0.001); + assert_eq!(ops.len(), 1); +} + +