Skip to content

Commit

Permalink
deps: drop 'bstr'
Browse files Browse the repository at this point in the history
It's now just a dev dependency. It wasn't really carrying its weight.
  • Loading branch information
BurntSushi committed Feb 14, 2023
1 parent 9e1126a commit a0e8388
Show file tree
Hide file tree
Showing 4 changed files with 120 additions and 14 deletions.
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ license = "Unlicense/MIT"
categories = ["encoding", "parser-implementations"]
exclude = ["/.github", "/ci/*", "/scripts/*"]
edition = "2021"
resolver = "2"

[workspace]
members = ["csv-core", "csv-index"]
Expand All @@ -20,13 +21,13 @@ members = ["csv-core", "csv-index"]
bench = false

[dependencies]
bstr = { version = "0.2.1", features = ["serde1"] }
csv-core = { path = "csv-core", version = "0.1.6" }
csv-core = { path = "csv-core", version = "0.1.10" }
itoa = "1"
ryu = "1"
serde = "1.0.55"

[dev-dependencies]
bstr = { version = "1.2.0", default-features = false, features = ["alloc", "serde"] }
serde = { version = "1.0.55", features = ["derive"] }

[profile.release]
Expand Down
48 changes: 36 additions & 12 deletions src/byte_record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@ use std::{
result,
};

use {
bstr::{BString, ByteSlice},
serde::de::Deserialize,
};
use serde::de::Deserialize;

use crate::{
deserializer::deserialize_byte_record,
Expand Down Expand Up @@ -73,11 +70,12 @@ impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {

impl fmt::Debug for ByteRecord {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut fields = vec![];
for field in self {
fields.push(BString::from(field.to_vec()));
}
write!(f, "ByteRecord({:?})", fields)
write!(f, "ByteRecord(")?;
f.debug_list()
.entries(self.iter().map(crate::debug::Bytes))
.finish()?;
write!(f, ")")?;
Ok(())
}
}

Expand Down Expand Up @@ -375,8 +373,8 @@ impl ByteRecord {
let mut trimmed =
ByteRecord::with_capacity(self.as_slice().len(), self.len());
trimmed.set_position(self.position().cloned());
for field in &*self {
trimmed.push_field(field.trim());
for field in self.iter() {
trimmed.push_field(trim_ascii(field));
}
*self = trimmed;
}
Expand Down Expand Up @@ -552,7 +550,7 @@ impl ByteRecord {
// Otherwise, we must check each field individually to ensure that
// it's valid UTF-8.
for (i, field) in self.iter().enumerate() {
if let Err(err) = field.to_str() {
if let Err(err) = std::str::from_utf8(field) {
return Err(new_utf8_error(i, err.valid_up_to()));
}
}
Expand Down Expand Up @@ -857,6 +855,32 @@ impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
}
}

fn trim_ascii(bytes: &[u8]) -> &[u8] {
trim_ascii_start(trim_ascii_end(bytes))
}

fn trim_ascii_start(mut bytes: &[u8]) -> &[u8] {
while let [first, rest @ ..] = bytes {
if first.is_ascii_whitespace() {
bytes = rest;
} else {
break;
}
}
bytes
}

fn trim_ascii_end(mut bytes: &[u8]) -> &[u8] {
while let [rest @ .., last] = bytes {
if last.is_ascii_whitespace() {
bytes = rest;
} else {
break;
}
}
bytes
}

#[cfg(test)]
mod tests {
use crate::string_record::StringRecord;
Expand Down
80 changes: 80 additions & 0 deletions src/debug.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/// A type that provides a human readable debug impl for arbitrary bytes.
///
/// This generally works best when the bytes are presumed to be mostly UTF-8,
/// but will work for anything.
///
/// N.B. This is copied nearly verbatim from regex-automata. Sigh.
pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);

impl<'a> core::fmt::Debug for Bytes<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "\"")?;
// This is a sad re-implementation of a similar impl found in bstr.
let mut bytes = self.0;
while let Some(result) = utf8_decode(bytes) {
let ch = match result {
Ok(ch) => ch,
Err(byte) => {
write!(f, r"\x{:02x}", byte)?;
bytes = &bytes[1..];
continue;
}
};
bytes = &bytes[ch.len_utf8()..];
match ch {
'\0' => write!(f, "\\0")?,
// ASCII control characters except \0, \n, \r, \t
'\x01'..='\x08'
| '\x0b'
| '\x0c'
| '\x0e'..='\x19'
| '\x7f' => {
write!(f, "\\x{:02x}", u32::from(ch))?;
}
'\n' | '\r' | '\t' | _ => {
write!(f, "{}", ch.escape_debug())?;
}
}
}
write!(f, "\"")?;
Ok(())
}
}

/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
///
/// If no valid encoding of a codepoint exists at the beginning of the given
/// byte slice, then the first byte is returned instead.
///
/// This returns `None` if and only if `bytes` is empty.
pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
fn len(byte: u8) -> Option<usize> {
if byte <= 0x7F {
return Some(1);
} else if byte & 0b1100_0000 == 0b1000_0000 {
return None;
} else if byte <= 0b1101_1111 {
Some(2)
} else if byte <= 0b1110_1111 {
Some(3)
} else if byte <= 0b1111_0111 {
Some(4)
} else {
None
}
}

if bytes.is_empty() {
return None;
}
let len = match len(bytes[0]) {
None => return Some(Err(bytes[0])),
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
Some(1) => return Some(Ok(char::from(bytes[0]))),
Some(len) => len,
};
match core::str::from_utf8(&bytes[..len]) {
Ok(s) => Some(Ok(s.chars().next().unwrap())),
Err(_) => Some(Err(bytes[0])),
}
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ pub use crate::{

mod byte_record;
pub mod cookbook;
mod debug;
mod deserializer;
mod error;
mod reader;
Expand Down

0 comments on commit a0e8388

Please sign in to comment.