Moved code around for unified diff handling

2021-01-30 22:42:01 +01:00 · 2021-01-30 22:42:01 +01:00 · 861c53889d
commit 861c53889d
parent 503d912262
10 changed files with 49 additions and 50 deletions
--- a/src/text/mod.rs
+++ b/src/text/mod.rs
@ -0,0 +1,784 @@
+//! Text diffing utilities.
+//!
+//! This provides helpful utilities for text (and more specifically line) diff
+//! operations.  The main type you want to work with is [`TextDiff`] which
+//! uses the underlying diff algorithms to expose a convenient API to work with
+//! texts.
+//!
+//! It can produce a unified diff and also let you iterate over the changeset
+//! directly if you want.
+//!
+//! Text diffing is available by default but can be disabled by turning off the
+//! default features.  The feature to enable to get it back is `text`.
+//!
+//! ## Examples
+//!
+//! A super simple example for how to generate a unified diff with three lines
+//! off context around the changes:
+//!
+//! ```rust
+//! # use similar::text::TextDiff;
+//! # let old_text = "";
+//! # let new_text = "";
+//! let diff = TextDiff::from_lines(old_text, new_text);
+//! let unified_diff = diff.unified_diff().header("old_file", "new_file").to_string();
+//! ```
+//!
+//! This is another example that iterates over the actual changes:
+//!
+//! ```rust
+//! # use similar::text::TextDiff;
+//! # let old_text = "";
+//! # let new_text = "";
+//! let diff = TextDiff::from_lines(old_text, new_text);
+//! for op in diff.ops() {
+//!     for change in diff.iter_changes(op) {
+//!         println!("{:?}", change);
+//!     }
+//! }
+//! ```
+//!
+//! ## Ops vs Changes
+//!
+//! Because very commonly two compared sequences will largely match this module
+//! splits it's functionality into two layers.  The first is inherited from the
+//! general [`algorithms`](crate::algorithms) module: changes are encoded as
+//! [diff operations](crate::algorithms::DiffOp).  These are ranges of the
+//! differences by index in the source sequence.  Because this can be cumbersome
+//! to work with a separate method [`TextDiff::iter_changes`] is provided which
+//! expands all the changes on an item by item level encoded in an operation.
+//!
+//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
+//! this even works for very long files if paired with this method.
+#![cfg(feature = "text")]
+use std::borrow::Cow;
+use std::cmp::Reverse;
+use std::collections::{BinaryHeap, HashMap};
+
+mod udiff;
+pub use self::udiff::*;
+
+use crate::algorithms::{capture_diff_slices, group_diff_ops, Algorithm, DiffOp, DiffTag};
+
+/// A builder type config for more complex uses of [`TextDiff`].
+#[derive(Clone, Debug)]
+pub struct TextDiffConfig {
+    algorithm: Algorithm,
+    newline_terminated: Option<bool>,
+}
+
+impl Default for TextDiffConfig {
+    fn default() -> TextDiffConfig {
+        TextDiffConfig {
+            algorithm: Algorithm::default(),
+            newline_terminated: None,
+        }
+    }
+}
+
+impl TextDiffConfig {
+    /// Changes the algorithm.
+    ///
+    /// The default algorithm is [`Algorithm::Myers`].
+    pub fn algorithm(&mut self, alg: Algorithm) -> &mut Self {
+        self.algorithm = alg;
+        self
+    }
+
+    /// Changes the newline termination flag.
+    ///
+    /// The default is automatic based on input.  This flag controls the
+    /// behavior of [`TextDiff::iter_changes`] and unified diff generation
+    /// with regards to newlines.  When the flag is set to `false` (which
+    /// is the default) then newlines are added.  Otherwise the newlines
+    /// from the source sequences are reused.
+    pub fn newline_terminated(&mut self, yes: bool) -> &mut Self {
+        self.newline_terminated = Some(yes);
+        self
+    }
+
+    /// Creates a diff of lines.
+    ///
+    /// This splits the text `old` and `new` into lines preserving newlines
+    /// in the input.
+    pub fn diff_lines<'old, 'new, 'bufs>(
+        &self,
+        old: &'old str,
+        new: &'new str,
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        self.diff(
+            Cow::Owned(split_lines(old).collect()),
+            Cow::Owned(split_lines(new).collect()),
+            true,
+        )
+    }
+
+    /// Creates a diff of words.
+    pub fn diff_words<'old, 'new, 'bufs>(
+        &self,
+        old: &'old str,
+        new: &'new str,
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        self.diff(
+            Cow::Owned(split_words(old).collect()),
+            Cow::Owned(split_words(new).collect()),
+            false,
+        )
+    }
+
+    /// Creates a diff of characters.
+    pub fn diff_chars<'old, 'new, 'bufs>(
+        &self,
+        old: &'old str,
+        new: &'new str,
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        self.diff(
+            Cow::Owned(split_chars(old).collect()),
+            Cow::Owned(split_chars(new).collect()),
+            false,
+        )
+    }
+
+    /// Creates a diff of graphemes.
+    ///
+    /// This requires the `unicode` feature.
+    #[cfg(feature = "unicode")]
+    pub fn diff_graphemes<'old, 'new, 'bufs>(
+        &self,
+        old: &'old str,
+        new: &'new str,
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        self.diff(
+            Cow::Owned(split_graphemes(old).collect()),
+            Cow::Owned(split_graphemes(new).collect()),
+            false,
+        )
+    }
+
+    /// Creates a diff of arbitrary slices.
+    pub fn diff_slices<'old, 'new, 'bufs>(
+        &self,
+        old: &'bufs [&'old str],
+        new: &'bufs [&'new str],
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
+    }
+
+    fn diff<'old, 'new, 'bufs>(
+        &self,
+        old: Cow<'bufs, [&'old str]>,
+        new: Cow<'bufs, [&'new str]>,
+        newline_terminated: bool,
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        let ops = capture_diff_slices(self.algorithm, &old, &new);
+        TextDiff {
+            old,
+            new,
+            ops,
+            newline_terminated: self.newline_terminated.unwrap_or(newline_terminated),
+            algorithm: self.algorithm,
+        }
+    }
+}
+
+/// Captures diff op codes for textual diffs
+pub struct TextDiff<'old, 'new, 'bufs> {
+    old: Cow<'bufs, [&'old str]>,
+    new: Cow<'bufs, [&'new str]>,
+    ops: Vec<DiffOp>,
+    newline_terminated: bool,
+    algorithm: Algorithm,
+}
+
+/// The tag of a change.
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
+pub enum ChangeTag {
+    Equal,
+    Delete,
+    Insert,
+}
+
+/// Represents the expanded textual change.
+///
+/// This type is returned from the [`TextDiff::iter_changes`] method.  It
+/// exists so that it's more convenient to work with textual differences as
+/// the underlying [`DiffOp`] does not know anything about strings.
+#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
+pub struct Change<'s> {
+    tag: ChangeTag,
+    old_index: Option<usize>,
+    new_index: Option<usize>,
+    value: &'s str,
+}
+
+impl<'s> Change<'s> {
+    /// Returns the change tag.
+    pub fn tag(&self) -> ChangeTag {
+        self.tag
+    }
+
+    /// Returns the old index if available.
+    pub fn old_index(&self) -> Option<usize> {
+        self.old_index
+    }
+
+    /// Returns the new index if available.
+    pub fn new_index(&self) -> Option<usize> {
+        self.new_index
+    }
+
+    /// Returns the changed value.
+    pub fn value(&self) -> &'s str {
+        self.value
+    }
+
+    /// Returns `true` for virtual changes.
+    ///
+    /// Virtual changes are changes that do not exist in either diff but are
+    /// necessary for a consistent user experience.  This currently only
+    /// applies to changes related to newline handling.  If lines are passed
+    /// to the [`TextDiff`] the [`TextDiff::newline_terminated`] flag is set
+    /// in which case newlines of the input are included in the changes.  However
+    /// if the trailing newline is missing it would mess up processing greatly.
+    /// Because of this a trailing virtual newline is automatically added for a
+    /// more consistent user experience.  This virtual newline can be detected
+    /// by explicitly checking for this flag.
+    pub fn is_virtual(&self) -> bool {
+        self.old_index.is_none() && self.new_index.is_none()
+    }
+}
+
+const VIRTUAL_NEWLINE_CHANGE: Change<'static> = Change {
+    tag: ChangeTag::Equal,
+    old_index: None,
+    new_index: None,
+    value: "\n",
+};
+
+impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
+    /// Configures a text differ before diffing.
+    pub fn configure() -> TextDiffConfig {
+        TextDiffConfig::default()
+    }
+
+    /// Creates a diff of lines.
+    ///
+    /// Equivalent to `TextDiff::configure().diff_lines(old, new)`.
+    pub fn from_lines(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
+        Self::configure().diff_lines(old, new)
+    }
+
+    /// Creates a diff of words.
+    ///
+    /// Equivalent to `TextDiff::configure().diff_words(old, new)`.
+    pub fn from_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
+        Self::configure().diff_words(old, new)
+    }
+
+    /// Creates a diff of chars.
+    ///
+    /// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
+    pub fn from_chars(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
+        Self::configure().diff_chars(old, new)
+    }
+
+    /// Creates a diff of graphemes.
+    ///
+    /// Equivalent to `TextDiff::configure().diff_graphemes(old, new)`.
+    ///
+    /// This requires the `unicode` feature.
+    #[cfg(feature = "unicode")]
+    pub fn from_graphemes(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
+        Self::configure().diff_graphemes(old, new)
+    }
+
+    /// Creates a diff of arbitrary slices.
+    ///
+    /// Equivalent to `TextDiff::configure().diff_slices(old, new)`.
+    pub fn from_slices(
+        old: &'bufs [&'old str],
+        new: &'bufs [&'new str],
+    ) -> TextDiff<'old, 'new, 'bufs> {
+        Self::configure().diff_slices(old, new)
+    }
+
+    /// The name of the algorithm that created the diff.
+    pub fn algorithm(&self) -> Algorithm {
+        self.algorithm
+    }
+
+    /// Returns `true` if items in the slice are newline terminated.
+    ///
+    /// This flag is used by the unified diff writer to determine if extra
+    /// newlines have to be added.
+    pub fn newline_terminated(&self) -> bool {
+        self.newline_terminated
+    }
+
+    /// Returns all old slices.
+    pub fn old_slices(&self) -> &[&'old str] {
+        &self.old
+    }
+
+    /// Returns all new slices.
+    pub fn new_slices(&self) -> &[&'new str] {
+        &self.new
+    }
+
+    /// Return a measure of the sequences' similarity in the range `0..=1`.
+    ///
+    /// A ratio of `1.0` means the two sequences are a complete match, a
+    /// ratio of `0.0` would indicate completely distinct sequences.
+    ///
+    /// ```rust
+    /// # use similar::text::TextDiff;
+    /// let diff = TextDiff::from_chars("abcd", "bcde");
+    /// assert_eq!(diff.ratio(), 0.75);
+    /// ```
+    pub fn ratio(&self) -> f32 {
+        let matches = self
+            .ops()
+            .iter()
+            .map(|op| {
+                if let DiffOp::Equal { len, .. } = *op {
+                    len
+                } else {
+                    0
+                }
+            })
+            .sum::<usize>();
+        let len = self.old.len() + self.new.len();
+        if len == 0 {
+            1.0
+        } else {
+            2.0 * matches as f32 / len as f32
+        }
+    }
+
+    /// Iterates over the changes the op expands to.
+    ///
+    /// This method is a convenient way to automatically resolve the different
+    /// ways in which a change could be encoded (insert/delete vs replace), look
+    /// up the value from the appropriate slice and also handle correct index
+    /// handling.
+    ///
+    /// In addition it has some custom handling to insert "virtual" newlines
+    /// for diffs where [`TextDiff::newline_terminated`] is `true` but the
+    /// diff does not end in newlines in the right places.  For more information
+    /// see [`Change::is_virtual`].
+    pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change> {
+        let newline_terminated = self.newline_terminated;
+        let (tag, old_range, new_range) = op.as_tag_tuple();
+        let mut old_index = old_range.start;
+        let mut new_index = new_range.start;
+        let mut old_slices = &self.old_slices()[op.old_range()];
+        let mut new_slices = &self.new_slices()[op.new_range()];
+
+        // figure out if a virtual newline has to be inserted
+        let mut virtual_newline = if newline_terminated {
+            let last_element = match tag {
+                DiffTag::Equal | DiffTag::Delete | DiffTag::Replace => old_slices.last(),
+                DiffTag::Insert => new_slices.last(),
+            };
+            if !last_element.map_or(false, |x| x.ends_with(&['\r', '\n'][..])) {
+                Some(VIRTUAL_NEWLINE_CHANGE)
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        std::iter::from_fn(move || match tag {
+            DiffTag::Equal => {
+                if let Some((&first, rest)) = old_slices.split_first() {
+                    old_slices = rest;
+                    old_index += 1;
+                    new_index += 1;
+                    Some(Change {
+                        tag: ChangeTag::Equal,
+                        old_index: Some(old_index - 1),
+                        new_index: Some(new_index - 1),
+                        value: first,
+                    })
+                } else {
+                    virtual_newline.take()
+                }
+            }
+            DiffTag::Delete => {
+                if let Some((&first, rest)) = old_slices.split_first() {
+                    old_slices = rest;
+                    old_index += 1;
+                    Some(Change {
+                        tag: ChangeTag::Delete,
+                        old_index: Some(old_index - 1),
+                        new_index: None,
+                        value: first,
+                    })
+                } else {
+                    virtual_newline.take()
+                }
+            }
+            DiffTag::Insert => {
+                if let Some((&first, rest)) = new_slices.split_first() {
+                    new_slices = rest;
+                    new_index += 1;
+                    Some(Change {
+                        tag: ChangeTag::Insert,
+                        old_index: None,
+                        new_index: Some(new_index - 1),
+                        value: first,
+                    })
+                } else {
+                    virtual_newline.take()
+                }
+            }
+            DiffTag::Replace => {
+                if let Some((&first, rest)) = old_slices.split_first() {
+                    old_slices = rest;
+                    old_index += 1;
+                    Some(Change {
+                        tag: ChangeTag::Delete,
+                        old_index: Some(old_index - 1),
+                        new_index: None,
+                        value: first,
+                    })
+                } else if let Some(virtual_newline) = virtual_newline.take() {
+                    Some(virtual_newline)
+                } else if let Some((&first, rest)) = new_slices.split_first() {
+                    new_slices = rest;
+                    new_index += 1;
+                    // check for another virtual newline
+                    if newline_terminated && rest.is_empty() && !first.ends_with(&['\r', '\n'][..])
+                    {
+                        virtual_newline = Some(VIRTUAL_NEWLINE_CHANGE);
+                    }
+                    Some(Change {
+                        tag: ChangeTag::Insert,
+                        old_index: None,
+                        new_index: Some(new_index - 1),
+                        value: first,
+                    })
+                } else {
+                    None
+                }
+            }
+        })
+    }
+
+    /// Returns the captured diff ops.
+    pub fn ops(&self) -> &[DiffOp] {
+        &self.ops
+    }
+
+    /// Isolate change clusters by eliminating ranges with no changes.
+    ///
+    /// This is equivalent to calling [`group_diff_ops`] on [`TextDiff::ops`].
+    pub fn grouped_ops(&self, n: usize) -> Vec<Vec<DiffOp>> {
+        group_diff_ops(self.ops().to_vec(), n)
+    }
+
+    /// Utility to return a unified diff formatter.
+    pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+        UnifiedDiff::from_text_diff(self)
+    }
+}
+
+/// Given a string splits it into lines.
+///
+/// This operation will preserve the newline separation character at the end.
+/// It supports all common newline sequences (`\r\n`, `\n` as well as `\r`).
+fn split_lines(s: &str) -> impl Iterator<Item = &str> {
+    let mut iter = s.char_indices().peekable();
+    let mut last_pos = 0;
+
+    std::iter::from_fn(move || {
+        if let Some((idx, c)) = iter.next() {
+            let mut rv = None;
+            if c == '\r' {
+                if iter.peek().map_or(false, |x| x.1 == '\n') {
+                    rv = Some(&s[last_pos..=idx + 1]);
+                    iter.next();
+                    last_pos = idx + 2;
+                } else {
+                    rv = Some(&s[last_pos..=idx]);
+                    last_pos = idx + 1;
+                }
+            } else if c == '\n' {
+                rv = Some(&s[last_pos..=idx]);
+                last_pos = idx + 1;
+            }
+            Some(rv)
+        } else if last_pos < s.len() {
+            let tmp = &s[last_pos..];
+            last_pos = s.len();
+            Some(Some(tmp))
+        } else {
+            None
+        }
+    })
+    .flatten()
+}
+
+/// Splits text into words with whitespace attached.
+fn split_words(s: &str) -> impl Iterator<Item = &str> {
+    let mut iter = s.char_indices().peekable();
+    let mut last_pos = 0;
+
+    std::iter::from_fn(move || {
+        if let Some((idx, c)) = iter.next() {
+            let mut rv = None;
+            if c.is_whitespace() {
+                let mut last = (idx, c);
+                while let Some(&(next_idx, next_char)) = iter.peek() {
+                    if !next_char.is_whitespace() {
+                        break;
+                    }
+                    iter.next();
+                    last = (next_idx, next_char);
+                }
+                let whitespace_end = last.0 + last.1.len_utf8();
+                rv = Some(&s[last_pos..whitespace_end]);
+                last_pos = whitespace_end;
+            }
+            Some(rv)
+        } else if last_pos < s.len() {
+            let tmp = &s[last_pos..];
+            last_pos = s.len();
+            Some(Some(tmp))
+        } else {
+            None
+        }
+    })
+    .flatten()
+}
+
+/// Splits text into characters.
+fn split_chars(s: &str) -> impl Iterator<Item = &str> {
+    s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
+}
+
+/// Splits text into graphemes.
+#[cfg(feature = "unicode")]
+fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
+    unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
+}
+
+// quick and dirty way to get an upper sequence ratio.
+fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
+    let n = seq1.len() + seq2.len();
+    if n == 0 {
+        1.0
+    } else {
+        2.0 * seq1.len().min(seq2.len()) as f32 / n as f32
+    }
+}
+
+/// Internal utility to calculate an upper bound for a ratio for
+/// [`get_close_matches`].  This is based on Python's difflib approach
+/// of considering the two sets to be multisets.
+///
+/// It counts the number of matches without regard to order, which is an
+/// obvious upper bound.
+struct QuickSeqRatio<'a>(HashMap<&'a str, i32>);
+
+impl<'a> QuickSeqRatio<'a> {
+    pub fn new(seq: &[&'a str]) -> QuickSeqRatio<'a> {
+        let mut counts = HashMap::new();
+        for &word in seq {
+            *counts.entry(word).or_insert(0) += 1;
+        }
+        QuickSeqRatio(counts)
+    }
+
+    pub fn calc(&self, seq: &[&str]) -> f32 {
+        let n = self.0.len() + seq.len();
+        if n == 0 {
+            return 1.0;
+        }
+
+        let mut available = HashMap::new();
+        let mut matches = 0;
+        for &word in seq {
+            let x = if let Some(count) = available.get(&word) {
+                *count
+            } else {
+                self.0.get(&word).copied().unwrap_or(0)
+            };
+            available.insert(word, x - 1);
+            if x > 0 {
+                matches += 1;
+            }
+        }
+
+        2.0 * matches as f32 / n as f32
+    }
+}
+
+/// Use the text differ to find `n` close matches.
+///
+/// `cutoff` defines the threshold which needs to be reached for a word
+/// to be considered similar.  See [`TextDiff::ratio`] for more information.
+///
+/// ```
+/// # use similar::text::get_close_matches;
+/// let matches = get_close_matches(
+///     "appel",
+///     &["ape", "apple", "peach", "puppy"][..],
+///     3,
+///     0.6
+/// );
+/// assert_eq!(matches, vec!["apple", "ape"]);
+/// ```
+pub fn get_close_matches<'a>(
+    word: &str,
+    possibilities: &[&'a str],
+    n: usize,
+    cutoff: f32,
+) -> Vec<&'a str> {
+    let mut matches = BinaryHeap::new();
+    let seq1 = split_chars(word).collect::<Vec<_>>();
+    let quick_ratio = QuickSeqRatio::new(&seq1);
+
+    for &possibility in possibilities {
+        let seq2 = split_chars(possibility).collect::<Vec<_>>();
+
+        if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
+            continue;
+        }
+
+        let diff = TextDiff::from_slices(&seq1, &seq2);
+        let ratio = diff.ratio();
+        if ratio >= cutoff {
+            // we're putting the word iself in reverse in so that matches with
+            // the same ratio are ordered lexicographically.
+            matches.push(((ratio * u32::MAX as f32) as u32, Reverse(possibility)));
+        }
+    }
+
+    let mut rv = vec![];
+    for _ in 0..n {
+        if let Some((_, elt)) = matches.pop() {
+            rv.push(elt.0);
+        } else {
+            break;
+        }
+    }
+
+    rv
+}
+
+#[test]
+fn test_split_lines() {
+    assert_eq!(
+        split_lines("first\nsecond\rthird\r\nfourth\nlast").collect::<Vec<_>>(),
+        vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
+    );
+    assert_eq!(split_lines("\n\n").collect::<Vec<_>>(), vec!["\n", "\n"]);
+    assert_eq!(split_lines("\n").collect::<Vec<_>>(), vec!["\n"]);
+    assert!(split_lines("").collect::<Vec<_>>().is_empty());
+}
+
+#[test]
+fn test_split_words() {
+    assert_eq!(
+        split_words("foo    bar baz\n\n  aha").collect::<Vec<_>>(),
+        ["foo    ", "bar ", "baz\n\n  ", "aha"]
+    );
+}
+
+#[test]
+fn test_split_chars() {
+    assert_eq!(
+        split_chars("abcfö❄️").collect::<Vec<_>>(),
+        vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
+    );
+}
+
+#[test]
+#[cfg(feature = "unicode")]
+fn test_split_graphemes() {
+    assert_eq!(
+        split_graphemes("abcfö❄️").collect::<Vec<_>>(),
+        vec!["a", "b", "c", "f", "ö", "❄️"]
+    );
+}
+
+#[test]
+fn test_captured_ops() {
+    let diff = TextDiff::from_lines(
+        "Hello World\nsome stuff here\nsome more stuff here\n",
+        "Hello World\nsome amazing stuff here\nsome more stuff here\n",
+    );
+    insta::assert_debug_snapshot!(&diff.ops());
+}
+
+#[test]
+fn test_unified_diff() {
+    let diff = TextDiff::from_lines(
+        "Hello World\nsome stuff here\nsome more stuff here\n",
+        "Hello World\nsome amazing stuff here\nsome more stuff here\n",
+    );
+    assert_eq!(diff.newline_terminated(), true);
+    insta::assert_snapshot!(&diff
+        .unified_diff()
+        .context_radius(3)
+        .header("old", "new")
+        .to_string());
+}
+
+#[test]
+fn test_line_ops() {
+    let diff = TextDiff::from_lines(
+        "Hello World\nsome stuff here\nsome more stuff here\n",
+        "Hello World\nsome amazing stuff here\nsome more stuff here\n",
+    );
+    assert_eq!(diff.newline_terminated(), true);
+    let changes = diff
+        .ops()
+        .iter()
+        .flat_map(|op| diff.iter_changes(op))
+        .collect::<Vec<_>>();
+    insta::assert_debug_snapshot!(&changes);
+}
+
+#[test]
+fn test_virtual_newlines() {
+    let diff = TextDiff::from_lines("a\nb", "a\nc\n");
+    assert_eq!(diff.newline_terminated(), true);
+    let changes = diff
+        .ops()
+        .iter()
+        .flat_map(|op| diff.iter_changes(op))
+        .collect::<Vec<_>>();
+    insta::assert_debug_snapshot!(&changes);
+}
+
+#[test]
+fn test_char_diff() {
+    let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
+    insta::assert_debug_snapshot!(diff.ops());
+}
+
+#[test]
+fn test_ratio() {
+    let diff = TextDiff::from_chars("abcd", "bcde");
+    assert_eq!(diff.ratio(), 0.75);
+    let diff = TextDiff::from_chars("", "");
+    assert_eq!(diff.ratio(), 1.0);
+}
+
+#[test]
+fn test_get_close_matches() {
+    let matches = get_close_matches("appel", &["ape", "apple", "peach", "puppy"][..], 3, 0.6);
+    assert_eq!(matches, vec!["apple", "ape"]);
+    let matches = get_close_matches(
+        "hulo",
+        &[
+            "hi", "hulu", "hali", "hoho", "amaz", "zulo", "blah", "hopp", "uulo", "aulo",
+        ][..],
+        5,
+        0.7,
+    );
+    assert_eq!(matches, vec!["aulo", "hulu", "uulo", "zulo"]);
+}
--- a/src/text/snapshots/similartextcaptured_ops.snap
+++ b/src/text/snapshots/similartextcaptured_ops.snap
@ -0,0 +1,22 @@
+---
+source: src/text/mod.rs
+expression: "&diff.ops()"
+---
+[
+    Equal {
+        old_index: 0,
+        new_index: 0,
+        len: 1,
+    },
+    Replace {
+        old_index: 1,
+        old_len: 1,
+        new_index: 1,
+        new_len: 1,
+    },
+    Equal {
+        old_index: 2,
+        new_index: 2,
+        len: 1,
+    },
+]
--- a/src/text/snapshots/similartextchar_diff.snap
+++ b/src/text/snapshots/similartextchar_diff.snap
@ -0,0 +1,39 @@
+---
+source: src/text/mod.rs
+expression: diff.ops()
+---
+[
+    Equal {
+        old_index: 0,
+        new_index: 0,
+        len: 1,
+    },
+    Replace {
+        old_index: 1,
+        old_len: 1,
+        new_index: 1,
+        new_len: 1,
+    },
+    Equal {
+        old_index: 2,
+        new_index: 2,
+        len: 5,
+    },
+    Replace {
+        old_index: 7,
+        old_len: 2,
+        new_index: 7,
+        new_len: 1,
+    },
+    Equal {
+        old_index: 9,
+        new_index: 8,
+        len: 1,
+    },
+    Replace {
+        old_index: 10,
+        old_len: 1,
+        new_index: 9,
+        new_len: 1,
+    },
+]
--- a/src/text/snapshots/similartextline_ops.snap
+++ b/src/text/snapshots/similartextline_ops.snap
@ -0,0 +1,42 @@
+---
+source: src/text/mod.rs
+expression: "&changes"
+---
+[
+    Change {
+        tag: Equal,
+        old_index: Some(
+            0,
+        ),
+        new_index: Some(
+            0,
+        ),
+        value: "Hello World\n",
+    },
+    Change {
+        tag: Delete,
+        old_index: Some(
+            1,
+        ),
+        new_index: None,
+        value: "some stuff here\n",
+    },
+    Change {
+        tag: Insert,
+        old_index: None,
+        new_index: Some(
+            1,
+        ),
+        value: "some amazing stuff here\n",
+    },
+    Change {
+        tag: Equal,
+        old_index: Some(
+            2,
+        ),
+        new_index: Some(
+            2,
+        ),
+        value: "some more stuff here\n",
+    },
+]
--- a/src/text/snapshots/similartextunified_diff.snap
+++ b/src/text/snapshots/similartextunified_diff.snap
@ -0,0 +1,12 @@
+---
+source: src/text/mod.rs
+expression: "&diff.unified_diff().context_radius(3).header(\"old\", \"new\").to_string()"
+---
+--- old
+++ new
+@@ -0 +2 @@
+ Hello World
+-some stuff here
+some amazing stuff here
+ some more stuff here
+
--- a/src/text/snapshots/similartextvirtual_newlines.snap
+++ b/src/text/snapshots/similartextvirtual_newlines.snap
@ -0,0 +1,38 @@
+---
+source: src/text/mod.rs
+expression: "&changes"
+---
+[
+    Change {
+        tag: Equal,
+        old_index: Some(
+            0,
+        ),
+        new_index: Some(
+            0,
+        ),
+        value: "a\n",
+    },
+    Change {
+        tag: Delete,
+        old_index: Some(
+            1,
+        ),
+        new_index: None,
+        value: "b",
+    },
+    Change {
+        tag: Equal,
+        old_index: None,
+        new_index: None,
+        value: "\n",
+    },
+    Change {
+        tag: Insert,
+        old_index: None,
+        new_index: Some(
+            1,
+        ),
+        value: "c\n",
+    },
+]
--- a/src/text/udiff.rs
+++ b/src/text/udiff.rs
@ -0,0 +1,220 @@
+//! This module provides unified diff functionality.
+//!
+//! This module is available for as long as the `text` feature is enabled which
+//! is enabled by default.
+//!
+//! ```rust
+//! use similar::text::TextDiff;
+//! # let old_text = "";
+//! # let new_text = "";
+//! let text_diff = TextDiff::from_lines(old_text, new_text);
+//! print!("{}", text_diff
+//!     .unified_diff()
+//!     .context_radius(10)
+//!     .header("old_file", "new_file"));
+//! ```
+
+use std::fmt;
+use std::ops::Range;
+
+use crate::algorithms::{Algorithm, DiffOp};
+use crate::text::{Change, ChangeTag, TextDiff};
+
+#[derive(Copy, Clone, Debug)]
+struct UnifiedDiffHunkRange(usize, usize);
+
+impl UnifiedDiffHunkRange {
+    fn new(range: Range<usize>) -> UnifiedDiffHunkRange {
+        UnifiedDiffHunkRange(range.start, range.end)
+    }
+
+    fn start(&self) -> usize {
+        self.0
+    }
+
+    fn end(&self) -> usize {
+        self.1
+    }
+}
+
+impl fmt::Display for UnifiedDiffHunkRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut beginning = self.start();
+        let len = self.end() - self.start();
+        if len == 1 {
+            write!(f, "{}", beginning)
+        } else {
+            if len == 0 {
+                // empty ranges begin at line just before the range
+                beginning -= 1;
+            }
+            write!(f, "{},{}", beginning, len)
+        }
+    }
+}
+
+/// Unified diff hunk header formatter.
+pub struct UnifiedHunkHeader {
+    old_range: UnifiedDiffHunkRange,
+    new_range: UnifiedDiffHunkRange,
+}
+
+impl UnifiedHunkHeader {
+    /// Creates a hunk header from a (non empty) slice of diff ops.
+    pub fn new(ops: &[DiffOp]) -> UnifiedHunkHeader {
+        UnifiedHunkHeader {
+            old_range: UnifiedDiffHunkRange::new(ops[0].old_range()),
+            new_range: UnifiedDiffHunkRange::new(ops[ops.len() - 1].new_range()),
+        }
+    }
+}
+
+impl fmt::Display for UnifiedHunkHeader {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "@@ -{} +{} @@", &self.old_range, &self.new_range)
+    }
+}
+
+/// Unified diff formatter.
+///
+/// The `Display` implementation renders a unified diff.
+pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+    diff: &'diff TextDiff<'old, 'new, 'bufs>,
+    context_radius: usize,
+    header: Option<(String, String)>,
+}
+
+impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+    /// Creates a formatter from a text diff object.
+    pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs>) -> Self {
+        UnifiedDiff {
+            diff,
+            context_radius: 5,
+            header: None,
+        }
+    }
+
+    /// Changes the context radius.  Defaults to `5`.
+    pub fn context_radius(&mut self, n: usize) -> &mut Self {
+        self.context_radius = n;
+        self
+    }
+
+    /// Sets a header to the diff.
+    pub fn header(&mut self, a: &str, b: &str) -> &mut Self {
+        self.header = Some((a.to_string(), b.to_string()));
+        self
+    }
+
+    /// Iterates over all hunks as configured.
+    pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs>> {
+        let diff = self.diff;
+        self.diff
+            .grouped_ops(self.context_radius)
+            .into_iter()
+            .filter(|ops| !ops.is_empty())
+            .map(move |ops| UnifiedDiffHunk::new(ops, diff))
+    }
+
+    fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
+        if let Some((a, b)) = header {
+            self.header(a, b);
+        }
+        self
+    }
+}
+
+/// Unified diff hunk formatter.
+///
+/// The `Display` this renders out a single unified diff's hunk.
+pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+    diff: &'diff TextDiff<'old, 'new, 'bufs>,
+    ops: Vec<DiffOp>,
+}
+
+impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+    /// Creates a new hunk for some operations.
+    pub fn new(
+        ops: Vec<DiffOp>,
+        diff: &'diff TextDiff<'old, 'new, 'bufs>,
+    ) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+        UnifiedDiffHunk { diff, ops }
+    }
+
+    /// Returns the header for the hunk.
+    pub fn header(&self) -> UnifiedHunkHeader {
+        UnifiedHunkHeader::new(&self.ops)
+    }
+
+    /// Returns all operations in the hunk.
+    pub fn ops(&self) -> &[DiffOp] {
+        &self.ops
+    }
+
+    /// Iterates over all changes in a hunk.
+    pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_>> + '_ {
+        // unclear why this needs Box::new here.  It seems to infer some really
+        // odd lifetimes I can't figure out how to work with.
+        (Box::new(
+            self.ops()
+                .iter()
+                .flat_map(move |op| self.diff.iter_changes(op)),
+        )) as Box<dyn Iterator<Item = _>>
+    }
+}
+
+impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let nl = if self.diff.newline_terminated() {
+            ""
+        } else {
+            "\n"
+        };
+        writeln!(f, "{}", self.header())?;
+        for change in self.iter_changes() {
+            write!(
+                f,
+                "{}{}{}",
+                match change.tag() {
+                    ChangeTag::Equal => ' ',
+                    ChangeTag::Delete => '-',
+                    ChangeTag::Insert => '+',
+                },
+                change.value(),
+                nl
+            )?;
+        }
+        Ok(())
+    }
+}
+
+impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut header = self.header.as_ref();
+        for hunk in self.iter_hunks() {
+            if let Some((old_file, new_file)) = header.take() {
+                writeln!(f, "--- {}", old_file)?;
+                writeln!(f, "+++ {}", new_file)?;
+            }
+            write!(f, "{}", hunk)?;
+        }
+        Ok(())
+    }
+}
+
+/// Quick way to get a unified diff as string.
+pub fn unified_diff<'old, 'new>(
+    alg: Algorithm,
+    old: &'old str,
+    new: &'new str,
+    n: usize,
+    header: Option<(&str, &str)>,
+) -> String {
+    TextDiff::configure()
+        .algorithm(alg)
+        .diff_lines(old, new)
+        .unified_diff()
+        .context_radius(n)
+        .header_opt(header)
+        .to_string()
+}