deps: drop 'bstr'

It's now just a dev dependency. It wasn't really carrying its weight.
BurntSushi · Feb 14, 2023 · a0e8388 · a0e8388
1 parent 9e1126a
commit a0e8388
Show file tree

Hide file tree

Showing 4 changed files with 120 additions and 14 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -12,6 +12,7 @@ license = "Unlicense/MIT"
 categories = ["encoding", "parser-implementations"]
 exclude = ["/.github", "/ci/*", "/scripts/*"]
 edition = "2021"
+resolver = "2"
 
 [workspace]
 members = ["csv-core", "csv-index"]
@@ -20,13 +21,13 @@ members = ["csv-core", "csv-index"]
 bench = false
 
 [dependencies]
-bstr = { version = "0.2.1", features = ["serde1"] }
-csv-core = { path = "csv-core", version = "0.1.6" }
+csv-core = { path = "csv-core", version = "0.1.10" }
 itoa = "1"
 ryu = "1"
 serde = "1.0.55"
 
 [dev-dependencies]
+bstr = { version = "1.2.0", default-features = false, features = ["alloc", "serde"] }
 serde = { version = "1.0.55", features = ["derive"] }
 
 [profile.release]

diff --git a/src/byte_record.rs b/src/byte_record.rs
@@ -5,10 +5,7 @@ use std::{
  result,
 };
 
-use {
- bstr::{BString, ByteSlice},
- serde::de::Deserialize,
-};
+use serde::de::Deserialize;
 
 use crate::{
  deserializer::deserialize_byte_record,
@@ -73,11 +70,12 @@ impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
 
 impl fmt::Debug for ByteRecord {
  fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- let mut fields = vec![];
- for field in self {
- fields.push(BString::from(field.to_vec()));
- }
- write!(f, "ByteRecord({:?})", fields)
+ write!(f, "ByteRecord(")?;
+ f.debug_list()
+ .entries(self.iter().map(crate::debug::Bytes))
+ .finish()?;
+ write!(f, ")")?;
+ Ok(())
  }
 }
 
@@ -375,8 +373,8 @@ impl ByteRecord {
  let mut trimmed =
  ByteRecord::with_capacity(self.as_slice().len(), self.len());
  trimmed.set_position(self.position().cloned());
- for field in &*self {
- trimmed.push_field(field.trim());
+ for field in self.iter() {
+ trimmed.push_field(trim_ascii(field));
  }
  *self = trimmed;
  }
@@ -552,7 +550,7 @@ impl ByteRecord {
  // Otherwise, we must check each field individually to ensure that
  // it's valid UTF-8.
  for (i, field) in self.iter().enumerate() {
- if let Err(err) = field.to_str() {
+ if let Err(err) = std::str::from_utf8(field) {
  return Err(new_utf8_error(i, err.valid_up_to()));
  }
  }
@@ -857,6 +855,32 @@ impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
  }
 }
 
+fn trim_ascii(bytes: &[u8]) -> &[u8] {
+ trim_ascii_start(trim_ascii_end(bytes))
+}
+
+fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
+ while let [first, rest @ ..] = bytes {
+ if first.is_ascii_whitespace() {
+ bytes = rest;
+ } else {
+ break;
+ }
+ }
+ bytes
+}
+
+fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
+ while let [rest @ .., last] = bytes {
+ if last.is_ascii_whitespace() {
+ bytes = rest;
+ } else {
+ break;
+ }
+ }
+ bytes
+}
+
 #[cfg(test)]
 mod tests {
  use crate::string_record::StringRecord;

diff --git a/src/debug.rs b/src/debug.rs
@@ -0,0 +1,80 @@
+/// A type that provides a human readable debug impl for arbitrary bytes.
+///
+/// This generally works best when the bytes are presumed to be mostly UTF-8,
+/// but will work for anything.
+///
+/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
+pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
+
+impl<'a> core::fmt::Debug for Bytes<'a> {
+ fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+ write!(f, "\"")?;
+ // This is a sad re-implementation of a similar impl found in bstr.
+ let mut bytes = self.0;
+ while let Some(result) = utf8_decode(bytes) {
+ let ch = match result {
+ Ok(ch) => ch,
+ Err(byte) => {
+ write!(f, r"\x{:02x}", byte)?;
+ bytes = &bytes[1..];
+ continue;
+ }
+ };
+ bytes = &bytes[ch.len_utf8()..];
+ match ch {
+ '\0' => write!(f, "\\0")?,
+ // ASCII control characters except \0, \n, \r, \t
+ '\x01'..='\x08'
+ | '\x0b'
+ | '\x0c'
+ | '\x0e'..='\x19'
+ | '\x7f' => {
+ write!(f, "\\x{:02x}", u32::from(ch))?;
+ }
+ '\n' | '\r' | '\t' | _ => {
+ write!(f, "{}", ch.escape_debug())?;
+ }
+ }
+ }
+ write!(f, "\"")?;
+ Ok(())
+ }
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the given
+/// byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
+ fn len(byte: u8) -> Option<usize> {
+ if byte <= 0x7F {
+ return Some(1);
+ } else if byte & 0b1100_0000 == 0b1000_0000 {
+ return None;
+ } else if byte <= 0b1101_1111 {
+ Some(2)
+ } else if byte <= 0b1110_1111 {
+ Some(3)
+ } else if byte <= 0b1111_0111 {
+ Some(4)
+ } else {
+ None
+ }
+ }
+
+ if bytes.is_empty() {
+ return None;
+ }
+ let len = match len(bytes[0]) {
+ None => return Some(Err(bytes[0])),
+ Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
+ Some(1) => return Some(Ok(char::from(bytes[0]))),
+ Some(len) => len,
+ };
+ match core::str::from_utf8(&bytes[..len]) {
+ Ok(s) => Some(Ok(s.chars().next().unwrap())),
+ Err(_) => Some(Err(bytes[0])),
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -164,6 +164,7 @@ pub use crate::{
 
 mod byte_record;
 pub mod cookbook;
+mod debug;
 mod deserializer;
 mod error;
 mod reader;