diff --git a/examples/close-matches.rs b/examples/close-matches.rs index b88c9a9..ebe283a 100644 --- a/examples/close-matches.rs +++ b/examples/close-matches.rs @@ -1,4 +1,4 @@ -use similar::text::get_close_matches; +use similar::get_close_matches; fn main() { let words = vec![ diff --git a/examples/terminal-inline.rs b/examples/terminal-inline.rs index 997cae8..4c5d74f 100644 --- a/examples/terminal-inline.rs +++ b/examples/terminal-inline.rs @@ -3,8 +3,7 @@ use std::fs::read; use std::process::exit; use console::{style, Style}; -use similar::text::TextDiff; -use similar::ChangeTag; +use similar::{ChangeTag, TextDiff}; struct Line(Option); diff --git a/examples/terminal.rs b/examples/terminal.rs index 03ad4ef..1a90327 100644 --- a/examples/terminal.rs +++ b/examples/terminal.rs @@ -1,6 +1,5 @@ use console::Style; -use similar::text::TextDiff; -use similar::ChangeTag; +use similar::{ChangeTag, TextDiff}; fn main() { let diff = TextDiff::from_lines( diff --git a/examples/udiff.rs b/examples/udiff.rs index d46f46c..3d8eb9c 100644 --- a/examples/udiff.rs +++ b/examples/udiff.rs @@ -2,7 +2,7 @@ use std::fs::read; use std::io; use std::process::exit; -use similar::text::TextDiff; +use similar::TextDiff; fn main() { let args: Vec<_> = std::env::args_os().collect(); diff --git a/src/lib.rs b/src/lib.rs index 686277a..73671e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,8 +4,7 @@ //! //! ```rust //! # #[cfg(feature = "text")] { -//! use similar::ChangeTag; -//! use similar::text::TextDiff; +//! use similar::{ChangeTag, TextDiff}; //! //! let diff = TextDiff::from_lines( //! "Hello World\nThis is the second line.\nThis is the third.", @@ -25,38 +24,108 @@ //! # } //! ``` //! -//! ## Functionality +//! # API +//! +//! The API of the crate is split into high and low level functionality. Most +//! of what you probably want to use is available toplevel. Additionally the +//! following sub modules exist: //! //! * [`algorithms`]: This implements the different types of diffing algorithms. //! It provides both low level access to the algorithms with the minimal //! trait bounds necessary, as well as a generic interface. -//! * [`text`]: This extends the general diffing functionality to text (and more -//! specifically line) based diff operations. +//! * [`udiff`]: Unified diff functionality. //! -//! ## Features +//! # Sequence Diffing +//! +//! If you want to diff sequences generally indexable things you can use the +//! [`capture_diff`] and [`capture_diff_slices`] functions. They will directly +//! diff an indexable object or slice and return a vector of [`DiffOp`] objects. +//! +//! # Text Diffing +//! +//! Similar provides helpful utilities for text (and more specifically line) diff +//! operations. The main type you want to work with is [`TextDiff`] which +//! uses the underlying diff algorithms to expose a convenient API to work with +//! texts. +//! +//! ## Trailing Newlines +//! +//! When working with line diffs (and unified diffs in general) there are two +//! "philosophies" to look at lines. One is to diff lines without their newline +//! character, the other is to diff with the newline character. Typically the +//! latter is done because text files do not _have_ to end in a newline character. +//! As a result there is a difference between `foo\n` and `foo` as far as diffs +//! are concerned. +//! +//! In similar this is handled on the [`Change`] or [`InlineChange`] level. If +//! a diff was created via [`TextDiff::from_lines`] the text diffing system is +//! instructed to check if there are missing newlines encountered. If that is +//! the case the [`Change`] object will return true from the +//! [`Change::missing_newline`] method so the caller knows to handle this by +//! either rendering a virtual newline at that position or to indicate it in +//! different ways. For instance the unified diff code will render the special +//! `\ No newline at end of file` marker. +//! +//! ## Bytes vs Unicode +//! +//! Similar module concerns itself with a loser definition of "text" than you would +//! normally see in Rust. While by default it can only operate on [`str`] types +//! by enabling the `bytes` feature it gains support for byte slices with some +//! caveats. +//! +//! A lot of text diff functionality assumes that what is being diffed constiutes +//! text, but in the real world it can often be challenging to ensure that this is +//! all valid utf-8. Because of this the crate is built so that most functinality +//! also still works with bytes for as long as they are roughtly ASCII compatible. +//! +//! This means you will be successful in creating a unified diff from latin1 +//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you +//! will only get garbage. +//! +//! # Ops vs Changes +//! +//! Because very commonly two compared sequences will largely match this module +//! splits it's functionality into two layers: +//! +//! Changes are encoded as [diff operations](crate::DiffOp). These are +//! ranges of the differences by index in the source sequence. Because this +//! can be cumbersome to work with a separate method [`DiffOp::iter_changes`] +//! (and [`TextDiff::iter_changes`] when working with text diffs) is provided +//! which expands all the changes on an item by item level encoded in an operation. +//! +//! As the [`TextDiff::grouped_ops`] method can isolate clusters of changes +//! this even works for very long files if paired with this method. +//! +//! # Feature Flags //! //! The crate by default does not have any dependencies however for some use //! cases it's useful to pull in extra functionality. Likewise you can turn //! off some functionality. //! -//! * `text`: this feature is enabled by default and enables the [`text`] module. +//! * `text`: this feature is enabled by default and enables the text based +//! diffing types such as [`TextDiff`]. //! If the crate is used without default features it's removed. //! * `unicode`: when this feature is enabled the text diffing functionality //! gains the ability to diff on a grapheme instead of character level. This //! is particularly useful when working with text containing emojis. This //! pulls in some relatively complex dependencies for working with the unicode //! database. -//! * `bytes`: this feature adds support for working with byte slices in the -//! [`text`] module in addition to unicode strings. This pulls in the +//! * `bytes`: this feature adds support for working with byte slices in text +//! APIs in addition to unicode strings. This pulls in the //! [`bstr`] dependency. //! * `inline`: this feature gives access to additional functionality of the -//! [`text`] module to provide inline information about which values changed +//! text diffing to provide inline information about which values changed //! in a line diff. This currently also enables the `unicode` feature. #![warn(missing_docs)] pub mod algorithms; -pub mod text; +pub mod udiff; mod common; +#[cfg(feature = "text")] +mod text; mod types; + pub use self::common::*; +#[cfg(feature = "text")] +pub use self::text::*; pub use self::types::*; diff --git a/src/text/snapshots/similar__text__udiff__unified_diff.snap b/src/snapshots/similar__udiff__unified_diff.snap similarity index 87% rename from src/text/snapshots/similar__text__udiff__unified_diff.snap rename to src/snapshots/similar__udiff__unified_diff.snap index 74d419d..799b38c 100644 --- a/src/text/snapshots/similar__text__udiff__unified_diff.snap +++ b/src/snapshots/similar__udiff__unified_diff.snap @@ -1,5 +1,5 @@ --- -source: src/text/udiff.rs +source: src/udiff.rs expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()" --- --- a.txt diff --git a/src/text/snapshots/similar__text__udiff__unified_diff_newline_hint-2.snap b/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap similarity index 88% rename from src/text/snapshots/similar__text__udiff__unified_diff_newline_hint-2.snap rename to src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap index 574e77a..7e8489e 100644 --- a/src/text/snapshots/similar__text__udiff__unified_diff_newline_hint-2.snap +++ b/src/snapshots/similar__udiff__unified_diff_newline_hint-2.snap @@ -1,5 +1,5 @@ --- -source: src/text/udiff.rs +source: src/udiff.rs expression: "&diff.unified_diff().missing_newline_hint(false).header(\"a.txt\",\n \"b.txt\").to_string()" --- --- a.txt diff --git a/src/text/snapshots/similar__text__udiff__unified_diff_newline_hint.snap b/src/snapshots/similar__udiff__unified_diff_newline_hint.snap similarity index 85% rename from src/text/snapshots/similar__text__udiff__unified_diff_newline_hint.snap rename to src/snapshots/similar__udiff__unified_diff_newline_hint.snap index 0502549..0d3b137 100644 --- a/src/text/snapshots/similar__text__udiff__unified_diff_newline_hint.snap +++ b/src/snapshots/similar__udiff__unified_diff_newline_hint.snap @@ -1,5 +1,5 @@ --- -source: src/text/udiff.rs +source: src/udiff.rs expression: "&diff.unified_diff().header(\"a.txt\", \"b.txt\").to_string()" --- --- a.txt diff --git a/src/text/mod.rs b/src/text/mod.rs index d6f4a40..260f28c 100644 --- a/src/text/mod.rs +++ b/src/text/mod.rs @@ -1,90 +1,4 @@ //! Text diffing utilities. -//! -//! This provides helpful utilities for text (and more specifically line) diff -//! operations. The main type you want to work with is [`TextDiff`] which -//! uses the underlying diff algorithms to expose a convenient API to work with -//! texts. -//! -//! It can produce a unified diff and also let you iterate over the changeset -//! directly if you want. -//! -//! Text diffing is available by default but can be disabled by turning off the -//! default features. The feature to enable to get it back is `text`. -//! -//! # Examples -//! -//! A super simple example for how to generate a unified diff with three lines -//! off context around the changes: -//! -//! ```rust -//! # use similar::text::TextDiff; -//! # let old_text = ""; -//! # let new_text = ""; -//! let diff = TextDiff::from_lines(old_text, new_text); -//! let unified_diff = diff.unified_diff().header("old_file", "new_file").to_string(); -//! ``` -//! -//! This is another example that iterates over the actual changes: -//! -//! ```rust -//! # use similar::text::TextDiff; -//! # let old_text = ""; -//! # let new_text = ""; -//! let diff = TextDiff::from_lines(old_text, new_text); -//! for op in diff.ops() { -//! for change in diff.iter_changes(op) { -//! println!("{:?}", change); -//! } -//! } -//! ``` -//! -//! # Ops vs Changes -//! -//! Because very commonly two compared sequences will largely match this module -//! splits it's functionality into two layers. The first is inherited from the -//! general [`algorithms`](crate::algorithms) module: changes are encoded as -//! [diff operations](crate::DiffOp). These are ranges of the -//! differences by index in the source sequence. Because this can be cumbersome -//! to work with a separate method [`TextDiff::iter_changes`] is provided which -//! expands all the changes on an item by item level encoded in an operation. -//! -//! Because the [`TextDiff::grouped_ops`] method can isolate clusters of changes -//! this even works for very long files if paired with this method. -//! -//! # Trailing Newlines -//! -//! When working with line diffs (and unified diffs in general) there are two -//! "philosophies" to look at lines. One is to diff lines without their newline -//! character, the other is to diff with the newline character. Typically the -//! latter is done because text files do not _have_ to end in a newline character. -//! As a result there is a difference between `foo\n` and `foo` as far as diffs -//! are concerned. -//! -//! In similar this is handled on the [`Change`] or [`InlineChange`] level. If -//! a diff was created via [`TextDiff::from_lines`] the text diffing system is -//! instructed to check if there are missing newlines encountered. If that is -//! the case the [`Change`] object will return true from the -//! [`Change::missing_newline`] method so the caller knows to handle this by -//! either rendering a virtual newline at that position or to indicate it in -//! different ways. For instance the unified diff code will render the special -//! `\ No newline at end of file` marker. -//! -//! # Bytes vs Unicode -//! -//! This module concerns itself with a loser definition of "text" than you would -//! normally see in Rust. While by default it can only operate on [`str`] types -//! by enabling the `bytes` feature it gains support for byte slices with some -//! caveats. -//! -//! A lot of text diff functionality assumes that what is being diffed constiutes -//! text, but in the real world it can often be challenging to ensure that this is -//! all valid utf-8. Because of this the crate is built so that most functinality -//! also still works with bytes for as long as they are roughtly ASCII compatible. -//! -//! This means you will be successful in creating a unified diff from latin1 -//! encoded bytes but if you try to do the same with EBCDIC encoded bytes you -//! will only get garbage. -#![cfg(feature = "text")] use std::borrow::Cow; use std::cmp::Reverse; use std::collections::BinaryHeap; @@ -92,15 +6,14 @@ use std::collections::BinaryHeap; mod abstraction; #[cfg(feature = "inline")] mod inline; -mod udiff; mod utils; pub use self::abstraction::{DiffableStr, DiffableStrRef}; #[cfg(feature = "inline")] pub use self::inline::InlineChange; -pub use self::udiff::{unified_diff, UnifiedDiff, UnifiedDiffHunk, UnifiedHunkHeader}; use self::utils::{upper_seq_ratio, QuickSeqRatio}; +use crate::udiff::UnifiedDiff; use crate::{capture_diff_slices, get_diff_ratio, group_diff_ops, Algorithm, Change, DiffOp}; /// A builder type config for more complex uses of [`TextDiff`]. @@ -358,7 +271,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n /// ratio of `0.0` would indicate completely distinct sequences. /// /// ```rust - /// # use similar::text::TextDiff; + /// # use similar::TextDiff; /// let diff = TextDiff::from_chars("abcd", "bcde"); /// assert_eq!(diff.ratio(), 0.75); /// ``` @@ -411,7 +324,7 @@ impl<'old, 'new, 'bufs, T: DiffableStr + ?Sized + 'old + 'new> TextDiff<'old, 'n /// to be considered similar. See [`TextDiff::ratio`] for more information. /// /// ``` -/// # use similar::text::get_close_matches; +/// # use similar::get_close_matches; /// let matches = get_close_matches( /// "appel", /// &["ape", "apple", "peach", "puppy"][..], diff --git a/src/text/udiff.rs b/src/udiff.rs similarity index 98% rename from src/text/udiff.rs rename to src/udiff.rs index f1d0ff9..486ac6f 100644 --- a/src/text/udiff.rs +++ b/src/udiff.rs @@ -1,10 +1,10 @@ //! This module provides unified diff functionality. //! -//! This module is available for as long as the `text` feature is enabled which -//! is enabled by default. +//! It is available for as long as the `text` feature is enabled which +//! is enabled by default: //! //! ```rust -//! use similar::text::TextDiff; +//! use similar::TextDiff; //! # let old_text = ""; //! # let new_text = ""; //! let text_diff = TextDiff::from_lines(old_text, new_text); @@ -21,15 +21,13 @@ //! versions by using [`UnifiedDiff.to_string`] or [`UnifiedDiff.to_writer`]. //! The former uses [`DiffableStr::to_string_lossy`], the latter uses //! [`DiffableStr::as_bytes`] for each line. - +#[cfg(feature = "text")] use std::ops::Range; use std::{fmt, io}; -use crate::text::TextDiff; +use crate::text::{DiffableStr, TextDiff}; use crate::types::{Algorithm, Change, DiffOp}; -use super::DiffableStr; - struct MissingNewlineHint(bool); impl fmt::Display for MissingNewlineHint { @@ -99,7 +97,7 @@ impl fmt::Display for UnifiedHunkHeader { /// Unified diff formatter. /// /// ```rust -/// use similar::text::TextDiff; +/// use similar::TextDiff; /// # let old_text = ""; /// # let new_text = ""; /// let text_diff = TextDiff::from_lines(old_text, new_text);