Move bytes implementation into a submodule in abstractions

This commit is contained in:
Armin Ronacher 2021-02-03 14:29:24 +01:00
parent 8a6d1716ec
commit 3164c72062

View file

@ -1,6 +1,3 @@
#[cfg(feature = "bytes")]
use bstr::ByteSlice;
use std::borrow::Cow; use std::borrow::Cow;
use std::hash::Hash; use std::hash::Hash;
use std::ops::Range; use std::ops::Range;
@ -47,15 +44,6 @@ impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> {
} }
} }
#[cfg(feature = "bytes")]
impl DiffableStrRef for Vec<u8> {
type Output = [u8];
fn as_diffable_str(&self) -> &[u8] {
self.as_slice()
}
}
/// All supported diffable strings. /// All supported diffable strings.
/// ///
/// The text module can work with different types of strings depending /// The text module can work with different types of strings depending
@ -221,115 +209,129 @@ impl DiffableStr for str {
} }
} }
/// Allows viewing ASCII compatible byte slices as strings.
///
/// Requires the `bytes` feature.
#[cfg(feature = "bytes")] #[cfg(feature = "bytes")]
impl DiffableStr for [u8] { mod bytes_support {
fn tokenize_lines(&self) -> Vec<&Self> { use super::*;
let mut iter = self.char_indices().peekable();
let mut last_pos = 0;
let mut lines = vec![];
while let Some((_, end, c)) = iter.next() { use bstr::ByteSlice;
if c == '\r' {
if iter.peek().map_or(false, |x| x.2 == '\n') { impl DiffableStrRef for Vec<u8> {
lines.push(&self[last_pos..end + 1]); type Output = [u8];
iter.next();
last_pos = end + 1; fn as_diffable_str(&self) -> &[u8] {
} else { self.as_slice()
}
}
/// Allows viewing ASCII compatible byte slices as strings.
///
/// Requires the `bytes` feature.
impl DiffableStr for [u8] {
fn tokenize_lines(&self) -> Vec<&Self> {
let mut iter = self.char_indices().peekable();
let mut last_pos = 0;
let mut lines = vec![];
while let Some((_, end, c)) = iter.next() {
if c == '\r' {
if iter.peek().map_or(false, |x| x.2 == '\n') {
lines.push(&self[last_pos..end + 1]);
iter.next();
last_pos = end + 1;
} else {
lines.push(&self[last_pos..end]);
last_pos = end;
}
} else if c == '\n' {
lines.push(&self[last_pos..end]); lines.push(&self[last_pos..end]);
last_pos = end; last_pos = end;
} }
} else if c == '\n' {
lines.push(&self[last_pos..end]);
last_pos = end;
} }
if last_pos < self.len() {
lines.push(&self[last_pos..]);
}
lines
} }
if last_pos < self.len() { fn tokenize_lines_and_newlines(&self) -> Vec<&Self> {
lines.push(&self[last_pos..]); let mut rv = vec![];
} let mut iter = self.char_indices().peekable();
lines while let Some((start, mut end, c)) = iter.next() {
} let is_newline = c == '\r' || c == '\n';
while let Some(&(_, new_end, next_char)) = iter.peek() {
fn tokenize_lines_and_newlines(&self) -> Vec<&Self> { if (next_char == '\r' || next_char == '\n') != is_newline {
let mut rv = vec![]; break;
let mut iter = self.char_indices().peekable(); }
iter.next();
while let Some((start, mut end, c)) = iter.next() { end = new_end;
let is_newline = c == '\r' || c == '\n';
while let Some(&(_, new_end, next_char)) = iter.peek() {
if (next_char == '\r' || next_char == '\n') != is_newline {
break;
} }
iter.next(); rv.push(&self[start..end]);
end = new_end;
} }
rv.push(&self[start..end]);
rv
} }
rv fn tokenize_words(&self) -> Vec<&Self> {
} let mut iter = self.char_indices().peekable();
let mut rv = vec![];
fn tokenize_words(&self) -> Vec<&Self> { while let Some((start, mut end, c)) = iter.next() {
let mut iter = self.char_indices().peekable(); let is_whitespace = c.is_whitespace();
let mut rv = vec![]; while let Some(&(_, new_end, next_char)) = iter.peek() {
if next_char.is_whitespace() != is_whitespace {
while let Some((start, mut end, c)) = iter.next() { break;
let is_whitespace = c.is_whitespace(); }
while let Some(&(_, new_end, next_char)) = iter.peek() { iter.next();
if next_char.is_whitespace() != is_whitespace { end = new_end;
break;
} }
iter.next(); rv.push(&self[start..end]);
end = new_end;
} }
rv.push(&self[start..end]);
rv
} }
rv #[cfg(feature = "unicode")]
} fn tokenize_unicode_words(&self) -> Vec<&Self> {
self.words_with_breaks().map(|x| x.as_bytes()).collect()
}
#[cfg(feature = "unicode")] #[cfg(feature = "unicode")]
fn tokenize_unicode_words(&self) -> Vec<&Self> { fn tokenize_graphemes(&self) -> Vec<&Self> {
self.words_with_breaks().map(|x| x.as_bytes()).collect() self.graphemes().map(|x| x.as_bytes()).collect()
} }
#[cfg(feature = "unicode")] fn tokenize_chars(&self) -> Vec<&Self> {
fn tokenize_graphemes(&self) -> Vec<&Self> { self.char_indices()
self.graphemes().map(|x| x.as_bytes()).collect() .map(move |(start, end, _)| &self[start..end])
} .collect()
}
fn tokenize_chars(&self) -> Vec<&Self> { fn as_str(&self) -> Option<&str> {
self.char_indices() std::str::from_utf8(self).ok()
.map(move |(start, end, _)| &self[start..end]) }
.collect()
}
fn as_str(&self) -> Option<&str> { fn to_string_lossy(&self) -> Cow<'_, str> {
std::str::from_utf8(self).ok() String::from_utf8_lossy(self)
} }
fn to_string_lossy(&self) -> Cow<'_, str> { fn ends_with_newline(&self) -> bool {
String::from_utf8_lossy(self) matches!(self.last_byte(), Some(b'\r') | Some(b'\n'))
} }
fn ends_with_newline(&self) -> bool { fn len(&self) -> usize {
matches!(self.last_byte(), Some(b'\r') | Some(b'\n')) <[u8]>::len(self)
} }
fn len(&self) -> usize { fn slice(&self, rng: Range<usize>) -> &Self {
<[u8]>::len(self) &self[rng]
} }
fn slice(&self, rng: Range<usize>) -> &Self { fn as_bytes(&self) -> &[u8] {
&self[rng] self
} }
fn as_bytes(&self) -> &[u8] {
self
} }
} }