Add support for byte diffing (#7)

Restructured text diffing to also support bstr
2021-02-02 14:16:00 +01:00 · 2021-02-02 14:16:00 +01:00 · 4b85e70f91
commit 4b85e70f91
parent e53427b56f
11 changed files with 775 additions and 307 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,8 @@ All notable changes to similar are documented here.
 ## 0.6.0

 * Add `get_diff_ratio`.
+* Add support for byte diffing and change the text interface to abstract
+  over `DiffableStr`.

 ## 0.5.0

--- a/Cargo.toml
+++ b/Cargo.toml
@ -19,7 +19,11 @@ all-features = true
 default = ["text"]
 text = []
 inline = ["unicode"]
-unicode = ["text", "unicode-segmentation"]
+
+# this annoyingly currently also turns on bstr and not just bstr/unicode
+# unclear if this is fixable
+unicode = ["text", "unicode-segmentation", "bstr/unicode"]
+bytes = ["bstr", "text"]

 [dev-dependencies]
 insta = "1.5.2"
@ -27,6 +31,7 @@ console = "0.14.0"

 [dependencies]
 unicode-segmentation = { version = "1.7.1", optional = true }
+bstr = { version = "0.2.14", optional = true, default-features = false }

 [[example]]
 name = "terminal"
@ -34,11 +39,11 @@ required-features = ["text"]

 [[example]]
 name = "terminal-inline"
-required-features = ["text", "inline"]
+required-features = ["text", "inline", "bytes"]

 [[example]]
 name = "udiff"
-required-features = ["text"]
+required-features = ["text", "bytes"]

 [[example]]
 name = "close-matches"
--- a/1
+++ b/1
@ -10,6 +10,7 @@ test:
 	@cargo test
 	@cargo test --all-features
 	@cargo test --no-default-features
+	@cargo test --no-default-features --features bytes

 format:
 	@rustup component add rustfmt 2> /dev/null
--- a/examples/terminal-inline.rs
+++ b/examples/terminal-inline.rs
@ -1,5 +1,5 @@
 use std::fmt;
-use std::fs::read_to_string;
+use std::fs::read;
 use std::process::exit;

 use console::{style, Style};
@ -23,8 +23,8 @@ fn main() {
        exit(1);
    }

-    let old = read_to_string(&args[1]).unwrap();
-    let new = read_to_string(&args[2]).unwrap();
+    let old = read(&args[1]).unwrap();
+    let new = read(&args[2]).unwrap();
    let diff = TextDiff::from_lines(&old, &new);

    for (idx, group) in diff.grouped_ops(3).iter().enumerate() {
@ -44,7 +44,7 @@ fn main() {
                    style(Line(change.new_index())).dim(),
                    s.apply_to(sign).bold(),
                );
-                for &(emphasized, value) in change.values() {
+                for (emphasized, value) in change.iter_strings() {
                    if emphasized {
                        print!("{}", s.apply_to(value).underlined().on_black());
                    } else {
--- a/examples/terminal.rs
+++ b/examples/terminal.rs
@ -14,7 +14,7 @@ fn main() {
                ChangeTag::Insert => ("+", Style::new().green()),
                ChangeTag::Equal => (" ", Style::new()),
            };
-            print!("{}{}", style.apply_to(sign).bold(), style.apply_to(change),);
+            print!("{}{}", style.apply_to(sign).bold(), style.apply_to(change));
        }
    }
 }
--- a/examples/udiff.rs
+++ b/examples/udiff.rs
@ -1,4 +1,5 @@
-use std::fs::read_to_string;
+use std::fs::read;
+use std::io;
 use std::process::exit;

 use similar::text::TextDiff;
@ -10,13 +11,14 @@ fn main() {
        exit(1);
    }

-    let old = read_to_string(&args[1]).unwrap();
-    let new = read_to_string(&args[2]).unwrap();
-    print!(
-        "{}",
-        TextDiff::from_lines(&old, &new).unified_diff().header(
+    let old = read(&args[1]).unwrap();
+    let new = read(&args[2]).unwrap();
+    TextDiff::from_lines(&old, &new)
+        .unified_diff()
+        .header(
            &args[1].as_os_str().to_string_lossy(),
-            &args[2].as_os_str().to_string_lossy()
+            &args[2].as_os_str().to_string_lossy(),
        )
-    );
+        .to_writer(io::stdout())
+        .unwrap();
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -43,6 +43,8 @@
 //! * `unicode`: when this feature is enabled the text diffing functionality
 //!   gains the ability to diff on a grapheme instead of character level.  This
 //!   is particularly useful when working with text containing emojis.
+//! * `bytes`: when this feature is enabled the text module gains support for
+//!   working with byte slices.
 //! * `inline`: this feature gives access to additional functionality of the
 //!   `text` module to provide inline information about which values changed
 //!   in a line diff.  This currently also enables the `unicode` feature.
--- a/src/text/abstraction.rs
+++ b/src/text/abstraction.rs
@ -0,0 +1,425 @@
+#[cfg(feature = "bytes")]
+use bstr::ByteSlice;
+
+use std::borrow::Cow;
+use std::hash::Hash;
+use std::ops::Range;
+
+/// Reference to a [`DiffableStr`].
+pub trait DiffableStrRef {
+    /// The type of the resolved [`DiffableStr`].
+    type Output: DiffableStr + ?Sized;
+
+    /// Resolves the reference.
+    fn as_diffable_str(&self) -> &Self::Output;
+}
+
+impl<T: DiffableStr + ?Sized> DiffableStrRef for T {
+    type Output = T;
+
+    fn as_diffable_str(&self) -> &T {
+        self
+    }
+}
+
+impl DiffableStrRef for String {
+    type Output = str;
+
+    fn as_diffable_str(&self) -> &str {
+        self.as_str()
+    }
+}
+
+impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> {
+    type Output = T;
+
+    fn as_diffable_str(&self) -> &T {
+        &self
+    }
+}
+
+#[cfg(feature = "bytes")]
+impl DiffableStrRef for Vec<u8> {
+    type Output = [u8];
+
+    fn as_diffable_str(&self) -> &[u8] {
+        self.as_slice()
+    }
+}
+
+/// All supported diffable strings.
+///
+/// The text module can work with different types of strings depending
+/// on how the crate is compiled.  Out of the box `&str` is always supported
+/// but with the `bytes` feature one can also work with `[u8]` slices for
+/// as long as they are ASCII compatible.
+pub trait DiffableStr: Hash + PartialEq + PartialOrd + Ord + Eq + ToOwned {
+    /// Splits the value into newlines with newlines attached.
+    fn split_lines(&self) -> Vec<&Self>;
+
+    /// Splits the value into newlines with newlines separated.
+    fn split_lines_and_newlines(&self) -> Vec<&Self>;
+
+    /// Tokenizes into words.
+    fn split_words(&self) -> Vec<&Self>;
+
+    /// Splits the input into characters.
+    fn split_chars(&self) -> Vec<&Self>;
+
+    /// Splits into unicode words.
+    #[cfg(feature = "unicode")]
+    fn split_unicode_words(&self) -> Vec<&Self>;
+
+    /// Splits into unicode graphemes..
+    #[cfg(feature = "unicode")]
+    fn split_graphemes(&self) -> Vec<&Self>;
+
+    /// Decodes the string (potentially) lossy.
+    fn as_str(&self) -> Option<&str>;
+
+    /// Decodes the string (potentially) lossy.
+    fn as_str_lossy(&self) -> Cow<'_, str>;
+
+    /// Checks if the string ends in a newline.
+    fn ends_with_newline(&self) -> bool;
+
+    /// The length of the string.
+    fn len(&self) -> usize;
+
+    /// Slices the string.
+    fn slice(&self, rng: Range<usize>) -> &Self;
+
+    /// Returns the strings as slice of raw bytes.
+    fn as_bytes(&self) -> &[u8];
+}
+
+impl DiffableStr for str {
+    fn split_lines(&self) -> Vec<&Self> {
+        let mut iter = self.char_indices().peekable();
+        let mut last_pos = 0;
+        let mut lines = vec![];
+
+        while let Some((idx, c)) = iter.next() {
+            if c == '\r' {
+                if iter.peek().map_or(false, |x| x.1 == '\n') {
+                    lines.push(&self[last_pos..=idx + 1]);
+                    iter.next();
+                    last_pos = idx + 2;
+                } else {
+                    lines.push(&self[last_pos..=idx]);
+                    last_pos = idx + 1;
+                }
+            } else if c == '\n' {
+                lines.push(&self[last_pos..=idx]);
+                last_pos = idx + 1;
+            }
+        }
+
+        if last_pos < self.len() {
+            lines.push(&self[last_pos..]);
+        }
+
+        lines
+    }
+
+    fn split_lines_and_newlines(&self) -> Vec<&Self> {
+        let mut rv = vec![];
+        let mut iter = self.char_indices().peekable();
+
+        while let Some((idx, c)) = iter.next() {
+            let is_newline = c == '\r' || c == '\n';
+            let start = idx;
+            let mut end = idx + c.len_utf8();
+            while let Some(&(_, next_char)) = iter.peek() {
+                if (next_char == '\r' || next_char == '\n') != is_newline {
+                    break;
+                }
+                iter.next();
+                end += next_char.len_utf8();
+            }
+            rv.push(&self[start..end]);
+        }
+
+        rv
+    }
+
+    fn split_words(&self) -> Vec<&Self> {
+        let mut iter = self.char_indices().peekable();
+        let mut rv = vec![];
+
+        while let Some((idx, c)) = iter.next() {
+            let is_whitespace = c.is_whitespace();
+            let start = idx;
+            let mut end = idx + c.len_utf8();
+            while let Some(&(_, next_char)) = iter.peek() {
+                if next_char.is_whitespace() != is_whitespace {
+                    break;
+                }
+                iter.next();
+                end += next_char.len_utf8();
+            }
+            rv.push(&self[start..end]);
+        }
+
+        rv
+    }
+
+    fn split_chars(&self) -> Vec<&Self> {
+        self.char_indices()
+            .map(move |(i, c)| &self[i..i + c.len_utf8()])
+            .collect()
+    }
+
+    #[cfg(feature = "unicode")]
+    fn split_unicode_words(&self) -> Vec<&Self> {
+        unicode_segmentation::UnicodeSegmentation::split_word_bounds(self).collect()
+    }
+
+    #[cfg(feature = "unicode")]
+    fn split_graphemes(&self) -> Vec<&Self> {
+        unicode_segmentation::UnicodeSegmentation::graphemes(self, true).collect()
+    }
+
+    fn as_str(&self) -> Option<&str> {
+        Some(self)
+    }
+
+    fn as_str_lossy(&self) -> Cow<'_, str> {
+        Cow::Borrowed(self)
+    }
+
+    fn ends_with_newline(&self) -> bool {
+        self.ends_with(&['\r', '\n'][..])
+    }
+
+    fn len(&self) -> usize {
+        str::len(self)
+    }
+
+    fn slice(&self, rng: Range<usize>) -> &Self {
+        &self[rng]
+    }
+
+    fn as_bytes(&self) -> &[u8] {
+        str::as_bytes(self)
+    }
+}
+
+#[cfg(feature = "bytes")]
+impl DiffableStr for [u8] {
+    fn split_lines(&self) -> Vec<&Self> {
+        let mut iter = self.char_indices().peekable();
+        let mut last_pos = 0;
+        let mut lines = vec![];
+
+        while let Some((_, end, c)) = iter.next() {
+            if c == '\r' {
+                if iter.peek().map_or(false, |x| x.2 == '\n') {
+                    lines.push(&self[last_pos..end + 1]);
+                    iter.next();
+                    last_pos = end + 1;
+                } else {
+                    lines.push(&self[last_pos..end]);
+                    last_pos = end;
+                }
+            } else if c == '\n' {
+                lines.push(&self[last_pos..end]);
+                last_pos = end;
+            }
+        }
+
+        if last_pos < self.len() {
+            lines.push(&self[last_pos..]);
+        }
+
+        lines
+    }
+
+    fn split_lines_and_newlines(&self) -> Vec<&Self> {
+        let mut rv = vec![];
+        let mut iter = self.char_indices().peekable();
+
+        while let Some((start, mut end, c)) = iter.next() {
+            let is_newline = c == '\r' || c == '\n';
+            while let Some(&(_, new_end, next_char)) = iter.peek() {
+                if (next_char == '\r' || next_char == '\n') != is_newline {
+                    break;
+                }
+                iter.next();
+                end = new_end;
+            }
+            rv.push(&self[start..end]);
+        }
+
+        rv
+    }
+
+    fn split_words(&self) -> Vec<&Self> {
+        let mut iter = self.char_indices().peekable();
+        let mut rv = vec![];
+
+        while let Some((start, mut end, c)) = iter.next() {
+            let is_whitespace = c.is_whitespace();
+            while let Some(&(_, new_end, next_char)) = iter.peek() {
+                if next_char.is_whitespace() != is_whitespace {
+                    break;
+                }
+                iter.next();
+                end = new_end;
+            }
+            rv.push(&self[start..end]);
+        }
+
+        rv
+    }
+
+    #[cfg(feature = "unicode")]
+    fn split_unicode_words(&self) -> Vec<&Self> {
+        self.words_with_breaks().map(|x| x.as_bytes()).collect()
+    }
+
+    #[cfg(feature = "unicode")]
+    fn split_graphemes(&self) -> Vec<&Self> {
+        self.graphemes().map(|x| x.as_bytes()).collect()
+    }
+
+    fn split_chars(&self) -> Vec<&Self> {
+        self.char_indices()
+            .map(move |(start, end, _)| &self[start..end])
+            .collect()
+    }
+
+    fn as_str(&self) -> Option<&str> {
+        std::str::from_utf8(self).ok()
+    }
+
+    fn as_str_lossy(&self) -> Cow<'_, str> {
+        String::from_utf8_lossy(self)
+    }
+
+    fn ends_with_newline(&self) -> bool {
+        matches!(self.last_byte(), Some(b'\r') | Some(b'\n'))
+    }
+
+    fn len(&self) -> usize {
+        <[u8]>::len(self)
+    }
+
+    fn slice(&self, rng: Range<usize>) -> &Self {
+        &self[rng]
+    }
+
+    fn as_bytes(&self) -> &[u8] {
+        self
+    }
+}
+
+#[test]
+fn test_split_lines() {
+    assert_eq!(
+        DiffableStr::split_lines("first\nsecond\rthird\r\nfourth\nlast"),
+        vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
+    );
+    assert_eq!(DiffableStr::split_lines("\n\n"), vec!["\n", "\n"]);
+    assert_eq!(DiffableStr::split_lines("\n"), vec!["\n"]);
+    assert!(DiffableStr::split_lines("").is_empty());
+}
+
+#[test]
+fn test_split_words() {
+    assert_eq!(
+        DiffableStr::split_words("foo    bar baz\n\n  aha"),
+        ["foo", "    ", "bar", " ", "baz", "\n\n  ", "aha"]
+    );
+}
+
+#[test]
+fn test_split_chars() {
+    assert_eq!(
+        DiffableStr::split_chars("abcfö❄️"),
+        vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
+    );
+}
+
+#[test]
+#[cfg(feature = "unicode")]
+fn test_split_graphemes() {
+    assert_eq!(
+        DiffableStr::split_graphemes("abcfö❄️"),
+        vec!["a", "b", "c", "f", "ö", "❄️"]
+    );
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_lines_bytes() {
+    assert_eq!(
+        DiffableStr::split_lines("first\nsecond\rthird\r\nfourth\nlast".as_bytes()),
+        vec![
+            "first\n".as_bytes(),
+            "second\r".as_bytes(),
+            "third\r\n".as_bytes(),
+            "fourth\n".as_bytes(),
+            "last".as_bytes()
+        ]
+    );
+    assert_eq!(
+        DiffableStr::split_lines("\n\n".as_bytes()),
+        vec!["\n".as_bytes(), "\n".as_bytes()]
+    );
+    assert_eq!(
+        DiffableStr::split_lines("\n".as_bytes()),
+        vec!["\n".as_bytes()]
+    );
+    assert!(DiffableStr::split_lines("".as_bytes()).is_empty());
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_words_bytes() {
+    assert_eq!(
+        DiffableStr::split_words("foo    bar baz\n\n  aha".as_bytes()),
+        [
+            &b"foo"[..],
+            &b"    "[..],
+            &b"bar"[..],
+            &b" "[..],
+            &b"baz"[..],
+            &b"\n\n  "[..],
+            &b"aha"[..]
+        ]
+    );
+}
+
+#[test]
+#[cfg(feature = "bytes")]
+fn test_split_chars_bytes() {
+    assert_eq!(
+        DiffableStr::split_chars("abcfö❄️".as_bytes()),
+        vec![
+            &b"a"[..],
+            &b"b"[..],
+            &b"c"[..],
+            &b"f"[..],
+            "ö".as_bytes(),
+            "❄".as_bytes(),
+            "\u{fe0f}".as_bytes()
+        ]
+    );
+}
+
+#[test]
+#[cfg(all(feature = "bytes", feature = "unicode"))]
+fn test_split_graphemes_bytes() {
+    assert_eq!(
+        DiffableStr::split_graphemes("abcfö❄️".as_bytes()),
+        vec![
+            &b"a"[..],
+            &b"b"[..],
+            &b"c"[..],
+            &b"f"[..],
+            "ö".as_bytes(),
+            "❄️".as_bytes()
+        ]
+    );
+}
--- a/src/text/inline.rs
+++ b/src/text/inline.rs
@ -1,24 +1,23 @@
 #![cfg(feature = "inline")]
+use std::borrow::Cow;
 use std::fmt;

 use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag};
-use crate::text::{Change, ChangeTag, TextDiff};
-
-use super::split_unicode_words;
+use crate::text::{Change, ChangeTag, DiffableStr, TextDiff};

 use std::ops::Index;

-struct MultiLookup<'bufs, 's> {
-    strings: &'bufs [&'s str],
-    seqs: Vec<(&'s str, usize, usize)>,
+struct MultiLookup<'bufs, 's, T: DiffableStr + ?Sized> {
+    strings: &'bufs [&'s T],
+    seqs: Vec<(&'s T, usize, usize)>,
 }

-impl<'bufs, 's> MultiLookup<'bufs, 's> {
-    fn new(strings: &'bufs [&'s str]) -> MultiLookup<'bufs, 's> {
+impl<'bufs, 's, T: DiffableStr + ?Sized> MultiLookup<'bufs, 's, T> {
+    fn new(strings: &'bufs [&'s T]) -> MultiLookup<'bufs, 's, T> {
        let mut seqs = Vec::new();
        for (string_idx, string) in strings.iter().enumerate() {
            let mut offset = 0;
-            for word in split_unicode_words(string) {
+            for word in string.split_unicode_words() {
                seqs.push((word, string_idx, offset));
                offset += word.len();
            }
@ -30,7 +29,7 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
        self.seqs.len()
    }

-    fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s str)> {
+    fn get_original_slices(&self, idx: usize, len: usize) -> Vec<(usize, &'s T)> {
        let mut last = None;
        let mut rv = Vec::new();

@ -44,7 +43,8 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
                    } else {
                        rv.push((
                            last_str_idx,
-                            &self.strings[last_str_idx][start_char_idx..start_char_idx + last_len],
+                            self.strings[last_str_idx]
+                                .slice(start_char_idx..start_char_idx + last_len),
                        ));
                        Some((str_idx, char_idx, s.len()))
                    }
@ -55,7 +55,7 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
        if let Some((str_idx, start_char_idx, len)) = last {
            rv.push((
                str_idx,
-                &self.strings[str_idx][start_char_idx..start_char_idx + len],
+                self.strings[str_idx].slice(start_char_idx..start_char_idx + len),
            ));
        }

@ -63,43 +63,26 @@ impl<'bufs, 's> MultiLookup<'bufs, 's> {
    }
 }

-impl<'bufs, 's> Index<usize> for MultiLookup<'bufs, 's> {
-    type Output = str;
+impl<'bufs, 's, T: DiffableStr + ?Sized> Index<usize> for MultiLookup<'bufs, 's, T> {
+    type Output = T;

    fn index(&self, index: usize) -> &Self::Output {
        &self.seqs[index].0
    }
 }

-fn partition_newlines(s: &str) -> impl Iterator<Item = (&str, bool)> {
-    let mut iter = s.char_indices().peekable();
-
-    std::iter::from_fn(move || {
-        if let Some((idx, c)) = iter.next() {
-            let is_newline = c == '\r' || c == '\n';
-            let start = idx;
-            let mut end = idx + c.len_utf8();
-            while let Some(&(_, next_char)) = iter.peek() {
-                if (next_char == '\r' || next_char == '\n') != is_newline {
-                    break;
-                }
-                iter.next();
-                end += next_char.len_utf8();
-            }
-            Some((&s[start..end], is_newline))
-        } else {
-            None
-        }
-    })
-}
-
-fn push_values<'s>(v: &mut Vec<Vec<(bool, &'s str)>>, idx: usize, emphasized: bool, s: &'s str) {
+fn push_values<'s, T: DiffableStr + ?Sized>(
+    v: &mut Vec<Vec<(bool, &'s T)>>,
+    idx: usize,
+    emphasized: bool,
+    s: &'s T,
+) {
    v.resize_with(v.len().max(idx + 1), Vec::new);
    // newlines cause all kinds of wacky stuff if they end up highlighted.
    // because of this we want to unemphasize all newlines we encounter.
    if emphasized {
-        for (seg, is_nl) in partition_newlines(s) {
-            v[idx].push((!is_nl, seg));
+        for seg in s.split_lines_and_newlines() {
+            v[idx].push((!seg.ends_with_newline(), seg));
        }
    } else {
        v[idx].push((false, s));
@ -110,15 +93,15 @@ fn push_values<'s>(v: &mut Vec<Vec<(bool, &'s str)>>, idx: usize, emphasized: bo
 ///
 /// This is like [`Change`] but with inline highlight info.
 #[derive(Debug, PartialEq, Eq, Hash, Clone, Ord, PartialOrd)]
-pub struct InlineChange<'s> {
+pub struct InlineChange<'s, T: DiffableStr + ?Sized> {
    tag: ChangeTag,
    old_index: Option<usize>,
    new_index: Option<usize>,
-    values: Vec<(bool, &'s str)>,
+    values: Vec<(bool, &'s T)>,
    missing_newline: bool,
 }

-impl<'s> InlineChange<'s> {
+impl<'s, T: DiffableStr + ?Sized> InlineChange<'s, T> {
    /// Returns the change tag.
    pub fn tag(&self) -> ChangeTag {
        self.tag
@ -135,10 +118,23 @@ impl<'s> InlineChange<'s> {
    }

    /// Returns the changed values.
-    pub fn values(&self) -> &[(bool, &'s str)] {
+    ///
+    /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
+    /// is true if it should be highlighted as an inline diff.
+    pub fn values(&self) -> &[(bool, &'s T)] {
        &self.values
    }

+    /// Iterates over all (potentially lossy) utf-8 decoded values.
+    ///
+    /// Each item is a tuple in the form `(emphasized, value)` where `emphasized`
+    /// is true if it should be highlighted as an inline diff.
+    pub fn iter_strings(&self) -> impl Iterator<Item = (bool, Cow<'_, str>)> {
+        self.values()
+            .iter()
+            .map(|(emphasized, raw_value)| (*emphasized, raw_value.as_str_lossy()))
+    }
+
    /// Returns `true` if this change needs to be followed up by a
    /// missing newline.
    pub fn missing_newline(&self) -> bool {
@ -146,8 +142,8 @@ impl<'s> InlineChange<'s> {
    }
 }

-impl<'s> From<Change<'s>> for InlineChange<'s> {
-    fn from(change: Change<'s>) -> InlineChange<'s> {
+impl<'s, T: DiffableStr + ?Sized> From<Change<'s, T>> for InlineChange<'s, T> {
+    fn from(change: Change<'s, T>) -> InlineChange<'s, T> {
        InlineChange {
            tag: change.tag(),
            old_index: change.old_index(),
@ -158,9 +154,9 @@ impl<'s> From<Change<'s>> for InlineChange<'s> {
    }
 }

-impl<'s> fmt::Display for InlineChange<'s> {
+impl<'s, T: DiffableStr + ?Sized> fmt::Display for InlineChange<'s, T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        for &(emphasized, value) in &self.values {
+        for (emphasized, value) in self.iter_strings() {
            let marker = match (emphasized, self.tag) {
                (false, _) | (true, ChangeTag::Equal) => "",
                (true, ChangeTag::Delete) => "-",
@ -175,10 +171,13 @@ impl<'s> fmt::Display for InlineChange<'s> {
    }
 }

-pub(crate) fn iter_inline_changes<'diff>(
-    diff: &'diff TextDiff,
+pub(crate) fn iter_inline_changes<'diff, T>(
+    diff: &'diff TextDiff<'_, '_, '_, T>,
    op: &DiffOp,
-) -> impl Iterator<Item = InlineChange<'diff>> {
+) -> impl Iterator<Item = InlineChange<'diff, T>>
+where
+    T: DiffableStr + ?Sized,
+{
    let newline_terminated = diff.newline_terminated;
    let (tag, old_range, new_range) = op.as_tag_tuple();

@ -267,7 +266,7 @@ pub(crate) fn iter_inline_changes<'diff>(

    if newline_terminated
        && !old_slices.is_empty()
-        && !old_slices[old_slices.len() - 1].ends_with(&['\r', '\n'][..])
+        && !old_slices[old_slices.len() - 1].ends_with_newline()
    {
        if let Some(last) = rv.last_mut() {
            last.missing_newline = true;
@ -287,7 +286,7 @@ pub(crate) fn iter_inline_changes<'diff>(

    if newline_terminated
        && !new_slices.is_empty()
-        && !new_slices[new_slices.len() - 1].ends_with(&['\r', '\n'][..])
+        && !new_slices[new_slices.len() - 1].ends_with_newline()
    {
        if let Some(last) = rv.last_mut() {
            last.missing_newline = true;
--- a/src/text/mod.rs
+++ b/src/text/mod.rs
@ -11,7 +11,7 @@
 //! Text diffing is available by default but can be disabled by turning off the
 //! default features.  The feature to enable to get it back is `text`.
 //!
-//! ## Examples
+//! # Examples
 //!
 //! A super simple example for how to generate a unified diff with three lines
 //! off context around the changes:
@ -38,7 +38,7 @@
 //! }
 //! ```
 //!
-//! ## Ops vs Changes
+//! # Ops vs Changes
 //!
 //! Because very commonly two compared sequences will largely match this module
 //! splits it's functionality into two layers.  The first is inherited from the
@ -51,7 +51,7 @@
 //! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes
 //! this even works for very long files if paired with this method.
 //!
-//! ## Trailing Newlines
+//! # Trailing Newlines
 //!
 //! When working with line diffs (and unified diffs in general) there are two
 //! "philosophies" to look at lines.  One is to diff lines without their newline
@ -68,11 +68,30 @@
 //! either rendering a virtual newline at that position or to indicate it in
 //! different ways.  For instance the unified diff code will render the special
 //! `\ No newline at end of file` marker.
+//!
+//! # Bytes vs Unicode
+//!
+//! This module concerns itself with a loser definition of "text" than you would
+//! normally see in Rust.  While by default it can only operate on [`str`] types
+//! by enabling the `bytes` feature it gains support for byte slices with some
+//! caveats.
+//!
+//! A lot of text diff functionality assumes that what is being diffed constiutes
+//! text, but in the real world it can often be challenging to ensure that this is
+//! all valid utf-8.  Because of this the crate is built so that most functinality
+//! also still works with bytes for as long as they are roughtly ASCII compatible.
+//!
+//! This means you will be successful in creating a unified diff from latin1
+//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you
+//! will only get garbage.
 #![cfg(feature = "text")]
 use std::borrow::Cow;
 use std::cmp::Reverse;
 use std::collections::{BinaryHeap, HashMap};
 use std::fmt;
+use std::hash::Hash;
+
+mod abstraction;

 #[cfg(feature = "inline")]
 mod inline;
@ -82,6 +101,8 @@ mod udiff;
 pub use self::inline::*;
 pub use self::udiff::*;

+pub use crate::text::abstraction::*;
+
 use crate::algorithms::{
    capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
 };
@ -127,14 +148,14 @@ impl TextDiffConfig {
    ///
    /// This splits the text `old` and `new` into lines preserving newlines
    /// in the input.
-    pub fn diff_lines<'old, 'new, 'bufs>(
+    pub fn diff_lines<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
        &self,
-        old: &'old str,
-        new: &'new str,
-    ) -> TextDiff<'old, 'new, 'bufs> {
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
        self.diff(
-            Cow::Owned(split_lines(old).collect()),
-            Cow::Owned(split_lines(new).collect()),
+            Cow::Owned(old.as_diffable_str().split_lines()),
+            Cow::Owned(new.as_diffable_str().split_lines()),
            true,
        )
    }
@ -142,14 +163,27 @@ impl TextDiffConfig {
    /// Creates a diff of words.
    ///
    /// This splits the text into words and whitespace.
-    pub fn diff_words<'old, 'new, 'bufs>(
+    pub fn diff_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
        &self,
-        old: &'old str,
-        new: &'new str,
-    ) -> TextDiff<'old, 'new, 'bufs> {
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
        self.diff(
-            Cow::Owned(split_words(old).collect()),
-            Cow::Owned(split_words(new).collect()),
+            Cow::Owned(old.as_diffable_str().split_words()),
+            Cow::Owned(new.as_diffable_str().split_words()),
+            false,
+        )
+    }
+
+    /// Creates a diff of characters.
+    pub fn diff_chars<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
+        &self,
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+        self.diff(
+            Cow::Owned(old.as_diffable_str().split_chars()),
+            Cow::Owned(new.as_diffable_str().split_chars()),
            false,
        )
    }
@ -162,27 +196,14 @@ impl TextDiffConfig {
    ///
    /// This requires the `unicode` feature.
    #[cfg(feature = "unicode")]
-    pub fn diff_unicode_words<'old, 'new, 'bufs>(
+    pub fn diff_unicode_words<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
        &self,
-        old: &'old str,
-        new: &'new str,
-    ) -> TextDiff<'old, 'new, 'bufs> {
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
        self.diff(
-            Cow::Owned(split_unicode_words(old).collect()),
-            Cow::Owned(split_unicode_words(new).collect()),
-            false,
-        )
-    }
-
-    /// Creates a diff of characters.
-    pub fn diff_chars<'old, 'new, 'bufs>(
-        &self,
-        old: &'old str,
-        new: &'new str,
-    ) -> TextDiff<'old, 'new, 'bufs> {
-        self.diff(
-            Cow::Owned(split_chars(old).collect()),
-            Cow::Owned(split_chars(new).collect()),
+            Cow::Owned(old.as_diffable_str().split_unicode_words()),
+            Cow::Owned(new.as_diffable_str().split_unicode_words()),
            false,
        )
    }
@ -191,33 +212,33 @@ impl TextDiffConfig {
    ///
    /// This requires the `unicode` feature.
    #[cfg(feature = "unicode")]
-    pub fn diff_graphemes<'old, 'new, 'bufs>(
+    pub fn diff_graphemes<'old, 'new, 'bufs, T: DiffableStrRef + ?Sized>(
        &self,
-        old: &'old str,
-        new: &'new str,
-    ) -> TextDiff<'old, 'new, 'bufs> {
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
        self.diff(
-            Cow::Owned(split_graphemes(old).collect()),
-            Cow::Owned(split_graphemes(new).collect()),
+            Cow::Owned(old.as_diffable_str().split_graphemes()),
+            Cow::Owned(new.as_diffable_str().split_graphemes()),
            false,
        )
    }

    /// Creates a diff of arbitrary slices.
-    pub fn diff_slices<'old, 'new, 'bufs>(
+    pub fn diff_slices<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
        &self,
-        old: &'bufs [&'old str],
-        new: &'bufs [&'new str],
-    ) -> TextDiff<'old, 'new, 'bufs> {
+        old: &'bufs [&'old T],
+        new: &'bufs [&'new T],
+    ) -> TextDiff<'old, 'new, 'bufs, T> {
        self.diff(Cow::Borrowed(old), Cow::Borrowed(new), false)
    }

-    fn diff<'old, 'new, 'bufs>(
+    fn diff<'old, 'new, 'bufs, T: DiffableStr + ?Sized>(
        &self,
-        old: Cow<'bufs, [&'old str]>,
-        new: Cow<'bufs, [&'new str]>,
+        old: Cow<'bufs, [&'old T]>,
+        new: Cow<'bufs, [&'new T]>,
        newline_terminated: bool,
-    ) -> TextDiff<'old, 'new, 'bufs> {
+    ) -> TextDiff<'old, 'new, 'bufs, T> {
        let ops = capture_diff_slices(self.algorithm, &old, &new);
        TextDiff {
            old,
@ -230,9 +251,9 @@ impl TextDiffConfig {
 }

 /// Captures diff op codes for textual diffs
-pub struct TextDiff<'old, 'new, 'bufs> {
-    old: Cow<'bufs, [&'old str]>,
-    new: Cow<'bufs, [&'new str]>,
+pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
+    old: Cow<'bufs, [&'old T]>,
+    new: Cow<'bufs, [&'new T]>,
    ops: Vec<DiffOp>,
    newline_terminated: bool,
    algorithm: Algorithm,
@ -255,26 +276,26 @@ pub enum ChangeTag {
 /// exists so that it's more convenient to work with textual differences as
 /// the underlying [`DiffOp`] does not know anything about strings.
 #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
-pub struct Change<'s> {
+pub struct Change<'s, T: DiffableStr + ?Sized> {
    tag: ChangeTag,
    old_index: Option<usize>,
    new_index: Option<usize>,
-    value: &'s str,
+    value: &'s T,
    missing_newline: bool,
 }

-impl<'s> fmt::Display for Change<'s> {
+impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{}{}",
-            self.value(),
+            self.as_str_lossy(),
            if self.missing_newline { "\n" } else { "" }
        )
    }
 }

-impl<'s> Change<'s> {
+impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
    /// Returns the change tag.
    pub fn tag(&self) -> ChangeTag {
        self.tag
@ -290,11 +311,21 @@ impl<'s> Change<'s> {
        self.new_index
    }

-    /// Returns the changed value.
-    pub fn value(&self) -> &'s str {
+    /// Returns the underlying changed value.
+    pub fn value(&self) -> &'s T {
        self.value
    }

+    /// Returns the value as string if it is utf-8.
+    pub fn as_str(&self) -> Option<&'s str> {
+        T::as_str(self.value)
+    }
+
+    /// Returns the value (lossy) decoded as utf-8 string.
+    pub fn as_str_lossy(&self) -> Cow<'s, str> {
+        T::as_str_lossy(self.value)
+    }
+
    /// Returns `true` if this change needs to be followed up by a
    /// missing newline.
    ///
@ -305,7 +336,7 @@ impl<'s> Change<'s> {
    }
 }

-impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
+impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs, str> {
    /// Configures a text differ before diffing.
    pub fn configure() -> TextDiffConfig {
        TextDiffConfig::default()
@ -314,15 +345,31 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
    /// Creates a diff of lines.
    ///
    /// Equivalent to `TextDiff::configure().diff_lines(old, new)`.
-    pub fn from_lines(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
-        Self::configure().diff_lines(old, new)
+    pub fn from_lines<T: DiffableStrRef + ?Sized>(
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+        TextDiff::configure().diff_lines(old, new)
    }

    /// Creates a diff of words.
    ///
    /// Equivalent to `TextDiff::configure().diff_words(old, new)`.
-    pub fn from_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
-        Self::configure().diff_words(old, new)
+    pub fn from_words<T: DiffableStrRef + ?Sized>(
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+        TextDiff::configure().diff_words(old, new)
+    }
+
+    /// Creates a diff of chars.
+    ///
+    /// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
+    pub fn from_chars<T: DiffableStrRef + ?Sized>(
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+        TextDiff::configure().diff_chars(old, new)
    }

    /// Creates a diff of unicode words.
@ -331,15 +378,11 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
    ///
    /// This requires the `unicode` feature.
    #[cfg(feature = "unicode")]
-    pub fn from_unicode_words(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
-        Self::configure().diff_unicode_words(old, new)
-    }
-
-    /// Creates a diff of chars.
-    ///
-    /// Equivalent to `TextDiff::configure().diff_chars(old, new)`.
-    pub fn from_chars(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
-        Self::configure().diff_chars(old, new)
+    pub fn from_unicode_words<T: DiffableStrRef + ?Sized>(
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+        TextDiff::configure().diff_unicode_words(old, new)
    }

    /// Creates a diff of graphemes.
@ -348,18 +391,23 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
    ///
    /// This requires the `unicode` feature.
    #[cfg(feature = "unicode")]
-    pub fn from_graphemes(old: &'old str, new: &'new str) -> TextDiff<'old, 'new, 'bufs> {
-        Self::configure().diff_graphemes(old, new)
+    pub fn from_graphemes<T: DiffableStrRef + ?Sized>(
+        old: &'old T,
+        new: &'new T,
+    ) -> TextDiff<'old, 'new, 'bufs, T::Output> {
+        TextDiff::configure().diff_graphemes(old, new)
    }
+}

+impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'new, 'bufs, T> {
    /// Creates a diff of arbitrary slices.
    ///
    /// Equivalent to `TextDiff::configure().diff_slices(old, new)`.
    pub fn from_slices(
-        old: &'bufs [&'old str],
-        new: &'bufs [&'new str],
-    ) -> TextDiff<'old, 'new, 'bufs> {
-        Self::configure().diff_slices(old, new)
+        old: &'bufs [&'old T],
+        new: &'bufs [&'new T],
+    ) -> TextDiff<'old, 'new, 'bufs, T> {
+        TextDiff::configure().diff_slices(old, new)
    }

    /// The name of the algorithm that created the diff.
@ -376,12 +424,12 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
    }

    /// Returns all old slices.
-    pub fn old_slices(&self) -> &[&'old str] {
+    pub fn old_slices(&self) -> &[&'old T] {
        &self.old
    }

    /// Returns all new slices.
-    pub fn new_slices(&self) -> &[&'new str] {
+    pub fn new_slices(&self) -> &[&'new T] {
        &self.new
    }

@ -405,7 +453,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
    /// ways in which a change could be encoded (insert/delete vs replace), look
    /// up the value from the appropriate slice and also handle correct index
    /// handling.
-    pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change> {
+    pub fn iter_changes(&self, op: &DiffOp) -> impl Iterator<Item = Change<'_, T>> {
        let newline_terminated = self.newline_terminated;
        let (tag, old_range, new_range) = op.as_tag_tuple();
        let mut old_index = old_range.start;
@ -426,7 +474,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
                        value: first,
                        missing_newline: newline_terminated
                            && rest.is_empty()
-                            && !first.ends_with(&['\r', '\n'][..]),
+                            && !first.ends_with_newline(),
                    })
                } else {
                    None
@ -443,7 +491,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
                        value: first,
                        missing_newline: newline_terminated
                            && rest.is_empty()
-                            && !first.ends_with(&['\r', '\n'][..]),
+                            && !first.ends_with_newline(),
                    })
                } else {
                    None
@ -460,7 +508,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
                        value: first,
                        missing_newline: newline_terminated
                            && rest.is_empty()
-                            && !first.ends_with(&['\r', '\n'][..]),
+                            && !first.ends_with_newline(),
                    })
                } else {
                    None
@ -477,7 +525,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
                        value: first,
                        missing_newline: newline_terminated
                            && rest.is_empty()
-                            && !first.ends_with(&['\r', '\n'][..]),
+                            && !first.ends_with_newline(),
                    })
                } else if let Some((&first, rest)) = new_slices.split_first() {
                    new_slices = rest;
@ -489,7 +537,7 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
                        value: first,
                        missing_newline: newline_terminated
                            && rest.is_empty()
-                            && !first.ends_with(&['\r', '\n'][..]),
+                            && !first.ends_with_newline(),
                    })
                } else {
                    None
@ -498,17 +546,6 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
        })
    }

-    /// Iterates over the changes the op expands to with inline emphasis.
-    ///
-    /// This is very similar to [`TextDiff::iter_changes`] but it performs a second
-    /// level diff on adjacent line replacements.  The exact behavior of
-    /// this function with regards to how it detects those inline changes
-    /// is currently not defined and will likely change over time.
-    #[cfg(feature = "inline")]
-    pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange> {
-        iter_inline_changes(self, op)
-    }
-
    /// Returns the captured diff ops.
    pub fn ops(&self) -> &[DiffOp] {
        &self.ops
@ -522,85 +559,20 @@ impl<'old, 'new, 'bufs> TextDiff<'old, 'new, 'bufs> {
    }

    /// Utility to return a unified diff formatter.
-    pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+    pub fn unified_diff<'diff>(&'diff self) -> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
        UnifiedDiff::from_text_diff(self)
    }
-}

-/// Given a string splits it into lines.
-///
-/// This operation will preserve the newline separation character at the end.
-/// It supports all common newline sequences (`\r\n`, `\n` as well as `\r`).
-fn split_lines(s: &str) -> impl Iterator<Item = &str> {
-    let mut iter = s.char_indices().peekable();
-    let mut last_pos = 0;
-
-    std::iter::from_fn(move || {
-        if let Some((idx, c)) = iter.next() {
-            let mut rv = None;
-            if c == '\r' {
-                if iter.peek().map_or(false, |x| x.1 == '\n') {
-                    rv = Some(&s[last_pos..=idx + 1]);
-                    iter.next();
-                    last_pos = idx + 2;
-                } else {
-                    rv = Some(&s[last_pos..=idx]);
-                    last_pos = idx + 1;
-                }
-            } else if c == '\n' {
-                rv = Some(&s[last_pos..=idx]);
-                last_pos = idx + 1;
-            }
-            Some(rv)
-        } else if last_pos < s.len() {
-            let tmp = &s[last_pos..];
-            last_pos = s.len();
-            Some(Some(tmp))
-        } else {
-            None
-        }
-    })
-    .flatten()
-}
-
-/// Partitions at whitespace.
-fn split_words(s: &str) -> impl Iterator<Item = &str> {
-    let mut iter = s.char_indices().peekable();
-
-    std::iter::from_fn(move || {
-        if let Some((idx, c)) = iter.next() {
-            let is_whitespace = c.is_whitespace();
-            let start = idx;
-            let mut end = idx + c.len_utf8();
-            while let Some(&(_, next_char)) = iter.peek() {
-                if next_char.is_whitespace() != is_whitespace {
-                    break;
-                }
-                iter.next();
-                end += next_char.len_utf8();
-            }
-            Some(&s[start..end])
-        } else {
-            None
-        }
-    })
-}
-
-/// Splits words according to unicode rules.
-#[cfg(feature = "unicode")]
-fn split_unicode_words(s: &str) -> impl Iterator<Item = &str> {
-    unicode_segmentation::UnicodeSegmentation::split_word_bounds(s)
-}
-
-/// Splits text into characters.
-fn split_chars(s: &str) -> impl Iterator<Item = &str> {
-    s.char_indices().map(move |(i, c)| &s[i..i + c.len_utf8()])
-}
-
-/// Splits text into graphemes.
-#[cfg(feature = "unicode")]
-fn split_graphemes(s: &str) -> impl Iterator<Item = &str> {
-    unicode_segmentation::UnicodeSegmentation::graphemes(s, true)
+    /// Iterates over the changes the op expands to with inline emphasis.
+    ///
+    /// This is very similar to [`TextDiff::iter_changes`] but it performs a second
+    /// level diff on adjacent line replacements.  The exact behavior of
+    /// this function with regards to how it detects those inline changes
+    /// is currently not defined and will likely change over time.
+    #[cfg(feature = "inline")]
+    pub fn iter_inline_changes(&self, op: &DiffOp) -> impl Iterator<Item = InlineChange<'_, T>> {
+        iter_inline_changes(self, op)
+    }
 }

 // quick and dirty way to get an upper sequence ratio.
@ -619,10 +591,10 @@ fn upper_seq_ratio<T: PartialEq>(seq1: &[T], seq2: &[T]) -> f32 {
 ///
 /// It counts the number of matches without regard to order, which is an
 /// obvious upper bound.
-struct QuickSeqRatio<'a>(HashMap<&'a str, i32>);
+struct QuickSeqRatio<'a, T: DiffableStrRef + ?Sized>(HashMap<&'a T, i32>);

-impl<'a> QuickSeqRatio<'a> {
-    pub fn new(seq: &[&'a str]) -> QuickSeqRatio<'a> {
+impl<'a, T: DiffableStrRef + Hash + Eq + ?Sized> QuickSeqRatio<'a, T> {
+    pub fn new(seq: &[&'a T]) -> QuickSeqRatio<'a, T> {
        let mut counts = HashMap::new();
        for &word in seq {
            *counts.entry(word).or_insert(0) += 1;
@ -630,7 +602,7 @@ impl<'a> QuickSeqRatio<'a> {
        QuickSeqRatio(counts)
    }

-    pub fn calc(&self, seq: &[&str]) -> f32 {
+    pub fn calc(&self, seq: &[&T]) -> f32 {
        let n = self.0.len() + seq.len();
        if n == 0 {
            return 1.0;
@ -669,18 +641,18 @@ impl<'a> QuickSeqRatio<'a> {
 /// );
 /// assert_eq!(matches, vec!["apple", "ape"]);
 /// ```
-pub fn get_close_matches<'a>(
-    word: &str,
-    possibilities: &[&'a str],
+pub fn get_close_matches<'a, T: DiffableStr + ?Sized>(
+    word: &T,
+    possibilities: &[&'a T],
    n: usize,
    cutoff: f32,
-) -> Vec<&'a str> {
+) -> Vec<&'a T> {
    let mut matches = BinaryHeap::new();
-    let seq1 = split_chars(word).collect::<Vec<_>>();
+    let seq1 = word.split_chars();
    let quick_ratio = QuickSeqRatio::new(&seq1);

    for &possibility in possibilities {
-        let seq2 = split_chars(possibility).collect::<Vec<_>>();
+        let seq2 = possibility.split_chars();

        if upper_seq_ratio(&seq1, &seq2) < cutoff || quick_ratio.calc(&seq2) < cutoff {
            continue;
@ -707,42 +679,6 @@ pub fn get_close_matches<'a>(
    rv
 }

-#[test]
-fn test_split_lines() {
-    assert_eq!(
-        split_lines("first\nsecond\rthird\r\nfourth\nlast").collect::<Vec<_>>(),
-        vec!["first\n", "second\r", "third\r\n", "fourth\n", "last"]
-    );
-    assert_eq!(split_lines("\n\n").collect::<Vec<_>>(), vec!["\n", "\n"]);
-    assert_eq!(split_lines("\n").collect::<Vec<_>>(), vec!["\n"]);
-    assert!(split_lines("").collect::<Vec<_>>().is_empty());
-}
-
-#[test]
-fn test_split_words() {
-    assert_eq!(
-        split_words("foo    bar baz\n\n  aha").collect::<Vec<_>>(),
-        ["foo", "    ", "bar", " ", "baz", "\n\n  ", "aha"]
-    );
-}
-
-#[test]
-fn test_split_chars() {
-    assert_eq!(
-        split_chars("abcfö❄️").collect::<Vec<_>>(),
-        vec!["a", "b", "c", "f", "ö", "❄", "\u{fe0f}"]
-    );
-}
-
-#[test]
-#[cfg(feature = "unicode")]
-fn test_split_graphemes() {
-    assert_eq!(
-        split_graphemes("abcfö❄️").collect::<Vec<_>>(),
-        vec!["a", "b", "c", "f", "ö", "❄️"]
-    );
-}
-
 #[test]
 fn test_captured_ops() {
    let diff = TextDiff::from_lines(
@ -782,10 +718,9 @@ fn test_unified_diff() {

 #[test]
 fn test_line_ops() {
-    let diff = TextDiff::from_lines(
-        "Hello World\nsome stuff here\nsome more stuff here\n",
-        "Hello World\nsome amazing stuff here\nsome more stuff here\n",
-    );
+    let a = "Hello World\nsome stuff here\nsome more stuff here\n";
+    let b = "Hello World\nsome amazing stuff here\nsome more stuff here\n";
+    let diff = TextDiff::from_lines(a, b);
    assert_eq!(diff.newline_terminated(), true);
    let changes = diff
        .ops()
@ -793,6 +728,19 @@ fn test_line_ops() {
        .flat_map(|op| diff.iter_changes(op))
        .collect::<Vec<_>>();
    insta::assert_debug_snapshot!(&changes);
+
+    #[cfg(feature = "bytes")]
+    {
+        let byte_diff = TextDiff::from_lines(a.as_bytes(), b.as_bytes());
+        let byte_changes = byte_diff
+            .ops()
+            .iter()
+            .flat_map(|op| byte_diff.iter_changes(op))
+            .collect::<Vec<_>>();
+        for (change, byte_change) in changes.iter().zip(byte_changes.iter()) {
+            assert_eq!(change.as_str_lossy(), byte_change.as_str_lossy());
+        }
+    }
 }

 #[test]
@ -811,6 +759,12 @@ fn test_virtual_newlines() {
 fn test_char_diff() {
    let diff = TextDiff::from_chars("Hello World", "Hallo Welt");
    insta::assert_debug_snapshot!(diff.ops());
+
+    #[cfg(feature = "bytes")]
+    {
+        let byte_diff = TextDiff::from_chars("Hello World".as_bytes(), "Hallo Welt".as_bytes());
+        assert_eq!(diff.ops(), byte_diff.ops());
+    }
 }

 #[test]
--- a/src/text/udiff.rs
+++ b/src/text/udiff.rs
@ -13,13 +13,23 @@
 //!     .context_radius(10)
 //!     .header("old_file", "new_file"));
 //! ```
+//!
+//! # Unicode vs Bytes
+//!
+//! The [`UnifiedDiff`] type supports both unicode and byte diffs for all
+//! types compatible with [`DiffableStr`].  You can pick between the two
+//! versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
+//! The former uses [`DiffableStr::as_str_lossy`], the latter uses
+//! [`DiffableStr::as_bytes`] for each line.

-use std::fmt;
 use std::ops::Range;
+use std::{fmt, io};

 use crate::algorithms::{Algorithm, DiffOp};
 use crate::text::{Change, ChangeTag, TextDiff};

+use super::DiffableStr;
+
 #[derive(Copy, Clone, Debug)]
 struct UnifiedDiffHunkRange(usize, usize);

@ -77,17 +87,34 @@ impl fmt::Display for UnifiedHunkHeader {

 /// Unified diff formatter.
 ///
-/// The `Display` implementation renders a unified diff.
-pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs> {
-    diff: &'diff TextDiff<'old, 'new, 'bufs>,
+/// ```rust
+/// use similar::text::TextDiff;
+/// # let old_text = "";
+/// # let new_text = "";
+/// let text_diff = TextDiff::from_lines(old_text, new_text);
+/// print!("{}", text_diff
+///     .unified_diff()
+///     .context_radius(10)
+///     .header("old_file", "new_file"));
+/// ```
+///
+/// ## Unicode vs Bytes
+///
+/// The [`UnifiedDiff`] type supports both unicode and byte diffs for all
+/// types compatible with [`DiffableStr`].  You can pick between the two
+/// versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`].
+/// The former uses [`DiffableStr::as_str_lossy`], the latter uses
+/// [`DiffableStr::as_bytes`] for each line.
+pub struct UnifiedDiff<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
+    diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
    context_radius: usize,
    missing_newline_hint: bool,
    header: Option<(String, String)>,
 }

-impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> UnifiedDiff<'diff, 'old, 'new, 'bufs, T> {
    /// Creates a formatter from a text diff object.
-    pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs>) -> Self {
+    pub fn from_text_diff(diff: &'diff TextDiff<'old, 'new, 'bufs, T>) -> Self {
        UnifiedDiff {
            diff,
            context_radius: 3,
@ -127,7 +154,7 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
    }

    /// Iterates over all hunks as configured.
-    pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs>> {
+    pub fn iter_hunks(&self) -> impl Iterator<Item = UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>> {
        let diff = self.diff;
        let missing_newline_hint = self.missing_newline_hint;
        self.diff
@ -137,6 +164,19 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
            .map(move |ops| UnifiedDiffHunk::new(ops, diff, missing_newline_hint))
    }

+    /// Write the unified diff as bytes to the output stream.
+    pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
+        let mut header = self.header.as_ref();
+        for hunk in self.iter_hunks() {
+            if let Some((old_file, new_file)) = header.take() {
+                writeln!(w, "--- {}", old_file)?;
+                writeln!(w, "+++ {}", new_file)?;
+            }
+            write!(w, "{}", hunk)?;
+        }
+        Ok(())
+    }
+
    fn header_opt(&mut self, header: Option<(&str, &str)>) -> &mut Self {
        if let Some((a, b)) = header {
            self.header(a, b);
@ -148,19 +188,21 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiff<'diff, 'old, 'new, 'bufs> {
 /// Unified diff hunk formatter.
 ///
 /// The `Display` this renders out a single unified diff's hunk.
-pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
-    diff: &'diff TextDiff<'old, 'new, 'bufs>,
+pub struct UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
+    diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
    ops: Vec<DiffOp>,
    missing_newline_hint: bool,
 }

-impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized>
+    UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
+{
    /// Creates a new hunk for some operations.
    pub fn new(
        ops: Vec<DiffOp>,
-        diff: &'diff TextDiff<'old, 'new, 'bufs>,
+        diff: &'diff TextDiff<'old, 'new, 'bufs, T>,
        missing_newline_hint: bool,
-    ) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+    ) -> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T> {
        UnifiedDiffHunk {
            diff,
            ops,
@ -184,7 +226,7 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
    }

    /// Iterates over all changes in a hunk.
-    pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_>> + '_ {
+    pub fn iter_changes(&self) -> impl Iterator<Item = Change<'_, T>> + '_ {
        // unclear why this needs Box::new here.  It seems to infer some really
        // odd lifetimes I can't figure out how to work with.
        (Box::new(
@ -193,9 +235,43 @@ impl<'diff, 'old, 'new, 'bufs> UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
                .flat_map(move |op| self.diff.iter_changes(op)),
        )) as Box<dyn Iterator<Item = _>>
    }
+
+    /// Write the hunk as bytes to the output stream.
+    pub fn to_writer<W: io::Write>(&self, mut w: W) -> Result<(), io::Error> {
+        let mut wrote_header = false;
+        for change in self.iter_changes() {
+            if !wrote_header {
+                writeln!(w, "{}", self.header())?;
+                wrote_header = true;
+            }
+            write!(
+                w,
+                "{}",
+                match change.tag() {
+                    ChangeTag::Equal => ' ',
+                    ChangeTag::Delete => '-',
+                    ChangeTag::Insert => '+',
+                },
+            )?;
+            w.write_all(change.value().as_bytes())?;
+            if self.diff.newline_terminated() {
+                write!(w, "\n")?;
+            }
+            if change.missing_newline() {
+                if self.missing_newline_hint {
+                    writeln!(w, "\n\\ No newline at end of file")?;
+                } else {
+                    writeln!(w)?;
+                }
+            }
+        }
+        Ok(())
+    }
 }

-impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs> {
+impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
+    for UnifiedDiffHunk<'diff, 'old, 'new, 'bufs, T>
+{
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let nl = if self.diff.newline_terminated() {
            ""
@ -216,7 +292,7 @@ impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'ne
                    ChangeTag::Delete => '-',
                    ChangeTag::Insert => '+',
                },
-                change.value(),
+                change.as_str_lossy(),
                nl
            )?;
            if change.missing_newline() {
@ -231,7 +307,9 @@ impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiffHunk<'diff, 'old, 'ne
    }
 }

-impl<'diff, 'old, 'new, 'bufs> fmt::Display for UnifiedDiff<'diff, 'old, 'new, 'bufs> {
+impl<'diff, 'old, 'new, 'bufs, T: DiffableStr + ?Sized> fmt::Display
+    for UnifiedDiff<'diff, 'old, 'new, 'bufs, T>
+{
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let mut header = self.header.as_ref();
        for hunk in self.iter_hunks() {