Restructure crate layout

This commit is contained in:
Armin Ronacher 2021-02-02 22:44:52 +01:00
parent 1f73e01ff1
commit 1991955c52
10 changed files with 323 additions and 283 deletions

View file

@ -40,7 +40,9 @@ fn main() {
* Myer's diff
* Patience diff
* Diffing on arbitrary comparable sequences
* Line, word, character and grapheme level diffing
* Text and Byte diffing
* Unified diff generation
## License and Links

View file

@ -3,7 +3,8 @@ use std::fs::read;
use std::process::exit;
use console::{style, Style};
use similar::text::{ChangeTag, TextDiff};
use similar::text::TextDiff;
use similar::ChangeTag;
struct Line(Option<usize>);

View file

@ -1,5 +1,6 @@
use console::Style;
use similar::text::{ChangeTag, TextDiff};
use similar::text::TextDiff;
use similar::ChangeTag;
fn main() {
let diff = TextDiff::from_lines(

View file

@ -1,159 +1,7 @@
use crate::algorithms::hook::DiffHook;
use std::convert::Infallible;
use std::ops::Range;
/// Utility enum to capture a diff operation.
///
/// This is used by [`Capture`](crate::algorithms::Capture).
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum DiffOp {
/// A segment is equal (see [`DiffHook::equal`])
Equal {
/// The starting index in the old sequence.
old_index: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the segment.
len: usize,
},
/// A segment was deleted (see [`DiffHook::delete`])
Delete {
/// The starting index in the old sequence.
old_index: usize,
/// The length of the old segment.
old_len: usize,
/// The starting index in the new sequence.
new_index: usize,
},
/// A segment was inserted (see [`DiffHook::insert`])
Insert {
/// The starting index in the old sequence.
old_index: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the new segment.
new_len: usize,
},
/// A segment was replaced (see [`DiffHook::replace`])
Replace {
/// The starting index in the old sequence.
old_index: usize,
/// The length of the old segment.
old_len: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the new segment.
new_len: usize,
},
}
/// The tag of a diff operation.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum DiffTag {
/// The diff op encodes an equal segment.
Equal,
/// The diff op encodes a deleted segment.
Delete,
/// The diff op encodes an inserted segment.
Insert,
/// The diff op encodes a replaced segment.
Replace,
}
impl DiffOp {
/// Returns the tag of the operation.
pub fn tag(self) -> DiffTag {
self.as_tag_tuple().0
}
/// Returns the old range.
pub fn old_range(&self) -> Range<usize> {
self.as_tag_tuple().1
}
/// Returns the new range.
pub fn new_range(&self) -> Range<usize> {
self.as_tag_tuple().2
}
/// Transform the op into a tuple of diff tag and ranges.
///
/// This is useful when operating on slices. The returned format is
/// `(tag, i1..i2, j1..j2)`:
///
/// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]`
/// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case).
/// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case).
/// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`.
pub fn as_tag_tuple(&self) -> (DiffTag, Range<usize>, Range<usize>) {
match *self {
DiffOp::Equal {
old_index,
new_index,
len,
} => (
DiffTag::Equal,
old_index..old_index + len,
new_index..new_index + len,
),
DiffOp::Delete {
old_index,
new_index,
old_len,
} => (
DiffTag::Delete,
old_index..old_index + old_len,
new_index..new_index,
),
DiffOp::Insert {
old_index,
new_index,
new_len,
} => (
DiffTag::Insert,
old_index..old_index,
new_index..new_index + new_len,
),
DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => (
DiffTag::Replace,
old_index..old_index + old_len,
new_index..new_index + new_len,
),
}
}
/// Apply this operation to a diff hook.
pub fn apply_to_hook<D: DiffHook>(&self, d: &mut D) -> Result<(), D::Error> {
match *self {
DiffOp::Equal {
old_index,
new_index,
len,
} => d.equal(old_index, new_index, len),
DiffOp::Delete {
old_index,
old_len,
new_index,
} => d.delete(old_index, old_len, new_index),
DiffOp::Insert {
old_index,
new_index,
new_len,
} => d.insert(old_index, new_index, new_len),
DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => d.replace(old_index, old_len, new_index, new_len),
}
}
}
use crate::algorithms::hook::DiffHook;
use crate::DiffOp;
/// Isolate change clusters by eliminating ranges with no changes.
///

View file

@ -14,6 +14,20 @@
//!
//! Most of the crate operates on the [`Algorithm`] enum which abstracts over
//! the different algorithms.
//!
//! # Example
//!
//! This is a simple example that shows how you can calculate the difference
//! between two sequences and capture the [`DiffOp`]s into a vector.
//!
//! ```rust
//! use similar::Algorithm;
//! use similar::algorithms::capture_diff_slices;
//!
//! let a = vec![1, 2, 3, 4, 5];
//! let b = vec![1, 2, 3, 4, 7];
//! let ops = capture_diff_slices(Algorithm::Myers, &a, &b);
//! ```
// general traits and utilities
mod capture;
@ -23,7 +37,9 @@ mod replace;
use std::hash::Hash;
use std::ops::{Index, Range};
pub use capture::{get_diff_ratio, group_diff_ops, Capture, DiffOp, DiffTag};
use crate::types::{Algorithm, DiffOp};
pub use capture::{get_diff_ratio, group_diff_ops, Capture};
pub use hook::DiffHook;
pub use replace::Replace;
@ -31,22 +47,6 @@ pub use replace::Replace;
pub mod myers;
pub mod patience;
/// An enum representing a diffing algorithm.
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub enum Algorithm {
/// Picks the myers algorithm from [`myers`]
Myers,
/// Picks the patience algorithm from [`patience`]
Patience,
}
impl Default for Algorithm {
/// Returns the default algorithm ([`Algorithm::Myers`]).
fn default() -> Algorithm {
Algorithm::Myers
}
}
/// Creates a diff between old and new with the given algorithm.
///
/// Diffs `old`, between indices `old_range` and `new` between indices `new_range`.

View file

@ -4,7 +4,8 @@
//!
//! ```rust
//! # #[cfg(feature = "text")] {
//! use similar::text::{ChangeTag, TextDiff};
//! use similar::ChangeTag;
//! use similar::text::TextDiff;
//!
//! let diff = TextDiff::from_lines(
//! "Hello World\nThis is the second line.\nThis is the third.",
@ -42,12 +43,18 @@
//! If the crate is used without default features it's removed.
//! * `unicode`: when this feature is enabled the text diffing functionality
//! gains the ability to diff on a grapheme instead of character level. This
//! is particularly useful when working with text containing emojis.
//! * `bytes`: when this feature is enabled the text module gains support for
//! working with byte slices.
//! is particularly useful when working with text containing emojis. This
//! pulls in some relatively complex dependencies for working with the unicode
//! database.
//! * `bytes`: this feature adds support for working with byte slices in the
//! [`text`] module in addition to unicode strings. This pulls in the
//! [`bstr`] dependency.
//! * `inline`: this feature gives access to additional functionality of the
//! `text` module to provide inline information about which values changed
//! [`text`] module to provide inline information about which values changed
//! in a line diff. This currently also enables the `unicode` feature.
#![warn(missing_docs)]
pub mod algorithms;
pub mod text;
mod types;
pub use self::types::*;

View file

@ -2,8 +2,9 @@
use std::borrow::Cow;
use std::fmt;
use crate::algorithms::{capture_diff, get_diff_ratio, Algorithm, DiffOp, DiffTag};
use crate::text::{Change, ChangeTag, DiffableStr, TextDiff};
use crate::algorithms::{capture_diff, get_diff_ratio};
use crate::text::{DiffableStr, TextDiff};
use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag};
use std::ops::Index;

View file

@ -43,7 +43,7 @@
//! Because very commonly two compared sequences will largely match this module
//! splits it's functionality into two layers. The first is inherited from the
//! general [`algorithms`](crate::algorithms) module: changes are encoded as
//! [diff operations](crate::algorithms::DiffOp). These are ranges of the
//! [diff operations](crate::DiffOp). These are ranges of the
//! differences by index in the source sequence. Because this can be cumbersome
//! to work with a separate method [`TextDiff::iter_changes`] is provided which
//! expands all the changes on an item by item level encoded in an operation.
@ -88,8 +88,6 @@
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::BinaryHeap;
use std::fmt;
use std::hash::Hash;
mod abstraction;
#[cfg(feature = "inline")]
@ -103,9 +101,8 @@ pub use self::inline::InlineChange;
pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader};
use self::utils::{upper_seq_ratio, QuickSeqRatio};
use crate::algorithms::{
capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, DiffOp, DiffTag,
};
use crate::algorithms::{capture_diff_slices, get_diff_ratio, group_diff_ops};
use crate::types::{Algorithm, Change, ChangeTag, DiffOp, DiffTag};
/// A builder type config for more complex uses of [`TextDiff`].
#[derive(Clone, Debug)]
@ -250,101 +247,6 @@ impl TextDiffConfig {
}
}
/// The tag of a change.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum ChangeTag {
/// The change indicates equality (not a change)
Equal,
/// The change indicates deleted text.
Delete,
/// The change indicates inserted text.
Insert,
}
impl fmt::Display for ChangeTag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}",
match &self {
ChangeTag::Equal => ' ',
ChangeTag::Delete => '-',
ChangeTag::Insert => '+',
}
)
}
}
/// Represents the expanded textual change.
///
/// This type is returned from the [`TextDiff::iter_changes`] method. It
/// exists so that it's more convenient to work with textual differences as
/// the underlying [`DiffOp`] does not know anything about strings.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub struct Change<'s, T: DiffableStr + ?Sized> {
tag: ChangeTag,
old_index: Option<usize>,
new_index: Option<usize>,
value: &'s T,
missing_newline: bool,
}
impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}{}",
self.to_string_lossy(),
if self.missing_newline { "\n" } else { "" }
)
}
}
impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
/// Returns the change tag.
pub fn tag(&self) -> ChangeTag {
self.tag
}
/// Returns the old index if available.
pub fn old_index(&self) -> Option<usize> {
self.old_index
}
/// Returns the new index if available.
pub fn new_index(&self) -> Option<usize> {
self.new_index
}
/// Returns the underlying changed value.
///
/// Depending on the type of the underlying [`DiffableStr`] this value is
/// more or less useful. If you always want to have a utf-8 string it's
/// best to use the [`Change::as_str`] and [`Change::to_string_lossy`] methods.
pub fn value(&self) -> &'s T {
self.value
}
/// Returns the value as string if it is utf-8.
pub fn as_str(&self) -> Option<&'s str> {
T::as_str(self.value)
}
/// Returns the value (lossy) decoded as utf-8 string.
pub fn to_string_lossy(&self) -> Cow<'s, str> {
T::to_string_lossy(self.value)
}
/// Returns `true` if this change needs to be followed up by a
/// missing newline.
///
/// The [`std::fmt::Display`] implementation of [`Change`] will automatically
/// insert a newline after the value if this is true.
pub fn missing_newline(&self) -> bool {
self.missing_newline
}
}
/// Captures diff op codes for textual diffs
pub struct TextDiff<'old, 'new, 'bufs, T: DiffableStr + ?Sized> {
old: Cow<'bufs, [&'old T]>,

View file

@ -25,8 +25,8 @@
use std::ops::Range;
use std::{fmt, io};
use crate::algorithms::{Algorithm, DiffOp};
use crate::text::{Change, TextDiff};
use crate::text::TextDiff;
use crate::types::{Algorithm, Change, DiffOp};
use super::DiffableStr;

278
src/types.rs Normal file
View file

@ -0,0 +1,278 @@
use std::fmt;
use std::ops::Range;
use crate::algorithms::DiffHook;
/// An enum representing a diffing algorithm.
#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
pub enum Algorithm {
/// Picks the myers algorithm from [`crate::algorithms::myers`]
Myers,
/// Picks the patience algorithm from [`crate::algorithms::patience`]
Patience,
}
impl Default for Algorithm {
/// Returns the default algorithm ([`Algorithm::Myers`]).
fn default() -> Algorithm {
Algorithm::Myers
}
}
/// The tag of a change.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum ChangeTag {
/// The change indicates equality (not a change)
Equal,
/// The change indicates deleted text.
Delete,
/// The change indicates inserted text.
Insert,
}
impl fmt::Display for ChangeTag {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}",
match &self {
ChangeTag::Equal => ' ',
ChangeTag::Delete => '-',
ChangeTag::Insert => '+',
}
)
}
}
/// Represents the expanded textual change.
///
/// This type is returned from the [`crate::text::TextDiff::iter_changes`] method.
/// It exists so that it's more convenient to work with textual differences as
/// the underlying [`DiffOp`] does not know anything about strings.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub struct Change<'s, T: ?Sized> {
pub(crate) tag: ChangeTag,
pub(crate) old_index: Option<usize>,
pub(crate) new_index: Option<usize>,
pub(crate) value: &'s T,
pub(crate) missing_newline: bool,
}
impl<'s, T: ?Sized> Change<'s, T> {
/// Returns the change tag.
pub fn tag(&self) -> ChangeTag {
self.tag
}
/// Returns the old index if available.
pub fn old_index(&self) -> Option<usize> {
self.old_index
}
/// Returns the new index if available.
pub fn new_index(&self) -> Option<usize> {
self.new_index
}
/// Returns the underlying changed value.
///
/// Depending on the type of the underlying [`crate::text::DiffableStr`]
/// this value is more or less useful. If you always want to have a utf-8
/// string it's best to use the [`Change::as_str`] and
/// [`Change::to_string_lossy`] methods.
pub fn value(&self) -> &'s T {
self.value
}
}
/// Utility enum to capture a diff operation.
///
/// This is used by [`Capture`](crate::algorithms::Capture).
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum DiffOp {
/// A segment is equal (see [`DiffHook::equal`])
Equal {
/// The starting index in the old sequence.
old_index: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the segment.
len: usize,
},
/// A segment was deleted (see [`DiffHook::delete`])
Delete {
/// The starting index in the old sequence.
old_index: usize,
/// The length of the old segment.
old_len: usize,
/// The starting index in the new sequence.
new_index: usize,
},
/// A segment was inserted (see [`DiffHook::insert`])
Insert {
/// The starting index in the old sequence.
old_index: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the new segment.
new_len: usize,
},
/// A segment was replaced (see [`DiffHook::replace`])
Replace {
/// The starting index in the old sequence.
old_index: usize,
/// The length of the old segment.
old_len: usize,
/// The starting index in the new sequence.
new_index: usize,
/// The length of the new segment.
new_len: usize,
},
}
/// The tag of a diff operation.
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, Ord, PartialOrd)]
pub enum DiffTag {
/// The diff op encodes an equal segment.
Equal,
/// The diff op encodes a deleted segment.
Delete,
/// The diff op encodes an inserted segment.
Insert,
/// The diff op encodes a replaced segment.
Replace,
}
impl DiffOp {
/// Returns the tag of the operation.
pub fn tag(self) -> DiffTag {
self.as_tag_tuple().0
}
/// Returns the old range.
pub fn old_range(&self) -> Range<usize> {
self.as_tag_tuple().1
}
/// Returns the new range.
pub fn new_range(&self) -> Range<usize> {
self.as_tag_tuple().2
}
/// Transform the op into a tuple of diff tag and ranges.
///
/// This is useful when operating on slices. The returned format is
/// `(tag, i1..i2, j1..j2)`:
///
/// * `Replace`: `a[i1..i2]` should be replaced by `b[j1..j2]`
/// * `Delete`: `a[i1..i2]` should be deleted (`j1 == j2` in this case).
/// * `Insert`: `b[j1..j2]` should be inserted at `a[i1..i2]` (`i1 == i2` in this case).
/// * `Equal`: `a[i1..i2]` is equal to `b[j1..j2]`.
pub fn as_tag_tuple(&self) -> (DiffTag, Range<usize>, Range<usize>) {
match *self {
DiffOp::Equal {
old_index,
new_index,
len,
} => (
DiffTag::Equal,
old_index..old_index + len,
new_index..new_index + len,
),
DiffOp::Delete {
old_index,
new_index,
old_len,
} => (
DiffTag::Delete,
old_index..old_index + old_len,
new_index..new_index,
),
DiffOp::Insert {
old_index,
new_index,
new_len,
} => (
DiffTag::Insert,
old_index..old_index,
new_index..new_index + new_len,
),
DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => (
DiffTag::Replace,
old_index..old_index + old_len,
new_index..new_index + new_len,
),
}
}
/// Apply this operation to a diff hook.
pub fn apply_to_hook<D: DiffHook>(&self, d: &mut D) -> Result<(), D::Error> {
match *self {
DiffOp::Equal {
old_index,
new_index,
len,
} => d.equal(old_index, new_index, len),
DiffOp::Delete {
old_index,
old_len,
new_index,
} => d.delete(old_index, old_len, new_index),
DiffOp::Insert {
old_index,
new_index,
new_len,
} => d.insert(old_index, new_index, new_len),
DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => d.replace(old_index, old_len, new_index, new_len),
}
}
}
#[cfg(feature = "text")]
mod text_additions {
use super::*;
use crate::text::DiffableStr;
use std::borrow::Cow;
impl<'s, T: DiffableStr + ?Sized> Change<'s, T> {
/// Returns the value as string if it is utf-8.
pub fn as_str(&self) -> Option<&'s str> {
T::as_str(self.value)
}
/// Returns the value (lossy) decoded as utf-8 string.
pub fn to_string_lossy(&self) -> Cow<'s, str> {
T::to_string_lossy(self.value)
}
/// Returns `true` if this change needs to be followed up by a
/// missing newline.
///
/// The [`std::fmt::Display`] implementation of [`Change`] will automatically
/// insert a newline after the value if this is true.
pub fn missing_newline(&self) -> bool {
self.missing_newline
}
}
impl<'s, T: DiffableStr + ?Sized> fmt::Display for Change<'s, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{}{}",
self.to_string_lossy(),
if self.missing_newline { "\n" } else { "" }
)
}
}
}