Skip to content

Commit

Permalink
csv/error: box the error.
Browse files Browse the repository at this point in the history
The error type appears to have grown quite large and can result in lots of
byte copying. It's not clear exactly how much it costs, but boxing the
error results in less work.

Since we keep the box a secret, this adds a new ErrorKind that can be
extracted from a Error if one wants to do case analysis.

Since this rejiggers the Error type from a public enum to a private struct,
this is a breaking change.
  • Loading branch information
BurntSushi committed May 25, 2017
1 parent b8175c4 commit 34e957e
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 85 deletions.
10 changes: 5 additions & 5 deletions src/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use serde::de::{
use serde::de::value::BorrowedBytesDeserializer;

use byte_record::{ByteRecord, ByteRecordIter};
use error::Error;
use error::{Error, ErrorKind, new_error};
use string_record::{StringRecord, StringRecordIter};

use self::DeserializeErrorKind as DEK;
Expand All @@ -27,10 +27,10 @@ pub fn deserialize_string_record<'de, D: Deserialize<'de>>(
field: 0,
});
D::deserialize(&mut deser).map_err(|err| {
Error::Deserialize {
new_error(ErrorKind::Deserialize {
pos: record.position().map(Clone::clone),
err: err,
}
})
})
}

Expand All @@ -44,10 +44,10 @@ pub fn deserialize_byte_record<'de, D: Deserialize<'de>>(
field: 0,
});
D::deserialize(&mut deser).map_err(|err| {
Error::Deserialize {
new_error(ErrorKind::Deserialize {
pos: record.position().map(Clone::clone),
err: err,
}
})
})
}

Expand Down
95 changes: 69 additions & 26 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ use std::str;
use byte_record::{ByteRecord, Position};
use deserializer::DeserializeError;

/// A crate private constructor for `Error`.
pub fn new_error(kind: ErrorKind) -> Error {
// TODO(burntsushi): Use `pub(crate)` when it stabilizes.
Error(Box::new(kind))
}

/// A type alias for `Result<T, csv::Error>`.
pub type Result<T> = result::Result<T, Error>;

Expand All @@ -19,7 +25,34 @@ pub type Result<T> = result::Result<T, Error>;
/// `flexible` option enabled and one is reading records as raw byte strings,
/// then no error can occur.
#[derive(Debug)]
pub enum Error {
pub struct Error(Box<ErrorKind>);

impl Error {
/// Return the specific type of this error.
pub fn kind(&self) -> &ErrorKind {
&self.0
}

/// Unwrap this error into its underlying type.
pub fn into_kind(self) -> ErrorKind {
*self.0
}

/// Returns true if this is an I/O error.
///
/// If this is true, the underlying `ErrorKind` is guaranteed to be
/// `ErrorKind::Io`.
pub fn is_io_error(&self) -> bool {
match *self.0 {
ErrorKind::Io(_) => true,
_ => false,
}
}
}

/// The specific type of an error.
#[derive(Debug)]
pub enum ErrorKind {
/// An I/O error that occurred while reading CSV data.
Io(io::Error),
/// A UTF-8 decoding error that occured while reading CSV data into Rust
Expand Down Expand Up @@ -59,11 +92,18 @@ pub enum Error {
/// The deserialization error.
err: DeserializeError,
},
/// Hints that destructuring should not be exhaustive.
///
/// This enum may grow additional variants, so this makes sure clients
/// don't count on exhaustive matching. (Otherwise, adding a new variant
/// could break existing code.)
#[doc(hidden)]
__Nonexhaustive,
}

impl From<io::Error> for Error {
fn from(err: io::Error) -> Error {
Error::Io(err)
new_error(ErrorKind::Io(err))
}
}

Expand All @@ -75,50 +115,52 @@ impl From<Error> for io::Error {

impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::Io(ref err) => err.description(),
Error::Utf8 { ref err, .. } => err.description(),
Error::UnequalLengths{..} => "record of different length found",
Error::Seek => "headers unavailable on seeked CSV reader",
Error::Serialize(ref err) => err,
Error::Deserialize { ref err, .. } => err.description(),
match *self.0 {
ErrorKind::Io(ref err) => err.description(),
ErrorKind::Utf8 { ref err, .. } => err.description(),
ErrorKind::UnequalLengths{..} => "record of different length found",
ErrorKind::Seek => "headers unavailable on seeked CSV reader",
ErrorKind::Serialize(ref err) => err,
ErrorKind::Deserialize { ref err, .. } => err.description(),
_ => unreachable!(),
}
}

fn cause(&self) -> Option<&StdError> {
match *self {
Error::Io(ref err) => Some(err),
Error::Utf8 { ref err, .. } => Some(err),
Error::UnequalLengths{..} => None,
Error::Seek => None,
Error::Serialize(_) => None,
Error::Deserialize { ref err, .. } => Some(err),
match *self.0 {
ErrorKind::Io(ref err) => Some(err),
ErrorKind::Utf8 { ref err, .. } => Some(err),
ErrorKind::UnequalLengths{..} => None,
ErrorKind::Seek => None,
ErrorKind::Serialize(_) => None,
ErrorKind::Deserialize { ref err, .. } => Some(err),
_ => unreachable!(),
}
}
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Io(ref err) => err.fmt(f),
Error::Utf8 { pos: None, ref err } => {
match *self.0 {
ErrorKind::Io(ref err) => err.fmt(f),
ErrorKind::Utf8 { pos: None, ref err } => {
write!(f, "CSV parse error: field {}: {}", err.field(), err)
}
Error::Utf8 { pos: Some(ref pos), ref err } => {
ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
write!(
f,
"CSV parse error: record {} \
(line {}, field: {}, byte: {}): {}",
pos.record(), pos.line(), err.field(), pos.byte(), err)
}
Error::UnequalLengths { pos: None, expected_len, len } => {
ErrorKind::UnequalLengths { pos: None, expected_len, len } => {
write!(
f, "CSV error: \
found record with {} fields, but the previous record \
has {} fields",
len, expected_len)
}
Error::UnequalLengths {
ErrorKind::UnequalLengths {
pos: Some(ref pos), expected_len, len
} => {
write!(
Expand All @@ -127,24 +169,25 @@ impl fmt::Display for Error {
has {} fields",
pos.record(), pos.line(), pos.byte(), len, expected_len)
}
Error::Seek => {
ErrorKind::Seek => {
write!(f, "CSV error: cannot access headers of CSV data \
when the parser was seeked before the first record \
could be read")
}
Error::Serialize(ref err) => {
ErrorKind::Serialize(ref err) => {
write!(f, "CSV write error: {}", err)
}
Error::Deserialize { pos: None, ref err } => {
ErrorKind::Deserialize { pos: None, ref err } => {
write!(f, "CSV deserialize error: {}", err)
}
Error::Deserialize { pos: Some(ref pos), ref err } => {
ErrorKind::Deserialize { pos: Some(ref pos), ref err } => {
write!(
f,
"CSV deserialize error: record {} \
(line: {}, byte: {}): {}",
pos.record(), pos.line(), pos.byte(), err)
}
_ => unreachable!(),
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,9 @@ use serde::{Deserialize, Deserializer};

pub use byte_record::{ByteRecord, ByteRecordIter, Position};
pub use deserializer::{DeserializeError, DeserializeErrorKind};
pub use error::{Error, FromUtf8Error, IntoInnerError, Result, Utf8Error};
pub use error::{
Error, ErrorKind, FromUtf8Error, IntoInnerError, Result, Utf8Error,
};
pub use reader::{
Reader, ReaderBuilder,
DeserializeRecordsIntoIter, DeserializeRecordsIter,
Expand Down
62 changes: 36 additions & 26 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use csv_core::{
use serde::de::DeserializeOwned;

use byte_record::{self, ByteRecord, Position};
use error::{Error, Result, Utf8Error};
use error::{ErrorKind, Result, Utf8Error, new_error};
use string_record::{self, StringRecord};
use Terminator;

Expand Down Expand Up @@ -280,7 +280,7 @@ impl ReaderBuilder {
/// extern crate csv;
///
/// use std::error::Error;
/// use csv::ReaderBuilder;
/// use csv::{ErrorKind, ReaderBuilder};
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<Error>> {
Expand All @@ -294,15 +294,15 @@ impl ReaderBuilder {
/// .from_reader(data.as_bytes());
///
/// if let Some(Err(err)) = rdr.records().next() {
/// match err {
/// csv::Error::UnequalLengths { expected_len, len, .. } => {
/// match *err.kind() {
/// ErrorKind::UnequalLengths { expected_len, len, .. } => {
/// // The header row has 3 fields...
/// assert_eq!(expected_len, 3);
/// // ... but the first row has only 2 fields.
/// assert_eq!(len, 2);
/// Ok(())
/// }
/// wrong => {
/// ref wrong => {
/// Err(From::from(format!(
/// "expected UnequalLengths error but got {:?}",
/// wrong)))
Expand Down Expand Up @@ -1265,10 +1265,10 @@ impl<R: io::Read> Reader<R> {
let headers = self.state.headers.as_ref().unwrap();
match headers.string_record {
Ok(ref record) => Ok(record),
Err(ref err) => Err(Error::Utf8 {
Err(ref err) => Err(new_error(ErrorKind::Utf8 {
pos: headers.byte_record.position().map(Clone::clone),
err: err.clone(),
}),
})),
}
}

Expand Down Expand Up @@ -1811,11 +1811,11 @@ impl ReaderState {
None => self.first_field_count = Some(record.len() as u64),
Some(expected) => {
if record.len() as u64 != expected {
return Err(Error::UnequalLengths {
return Err(new_error(ErrorKind::UnequalLengths {
pos: record.position().map(Clone::clone),
expected_len: expected,
len: record.len() as u64,
});
}));
}
}
}
Expand Down Expand Up @@ -2092,7 +2092,7 @@ mod tests {
use std::io;

use byte_record::ByteRecord;
use error::Error;
use error::ErrorKind;
use string_record::StringRecord;

use super::{ReaderBuilder, Position};
Expand Down Expand Up @@ -2151,12 +2151,17 @@ mod tests {
assert_eq!("foo", s(&rec[0]));

match rdr.read_byte_record(&mut rec) {
Err(Error::UnequalLengths {
expected_len: 1,
pos,
len: 2,
}) => {
assert_eq!(pos, Some(newpos(4, 2, 1)));
Err(err) => {
match *err.kind() {
ErrorKind::UnequalLengths {
expected_len: 1,
ref pos,
len: 2,
} => {
assert_eq!(pos, &Some(newpos(4, 2, 1)));
}
ref wrong => panic!("match failed, got {:?}", wrong),
}
}
wrong => panic!("match failed, got {:?}", wrong),
}
Expand Down Expand Up @@ -2198,12 +2203,17 @@ mod tests {
assert_eq!("foo", s(&rec[0]));

match rdr.read_byte_record(&mut rec) {
Err(Error::UnequalLengths {
expected_len: 1,
pos,
len: 2,
}) => {
assert_eq!(pos, Some(newpos(4, 2, 1)));
Err(err) => {
match err.kind() {
&ErrorKind::UnequalLengths {
expected_len: 1,
ref pos,
len: 2,
} => {
assert_eq!(pos, &Some(newpos(4, 2, 1)));
}
wrong => panic!("match failed, got {:?}", wrong),
}
}
wrong => panic!("match failed, got {:?}", wrong),
}
Expand Down Expand Up @@ -2272,13 +2282,13 @@ mod tests {
assert_eq!(b"b\xFFar", &headers[1]);
assert_eq!(b"baz", &headers[2]);
}
match rdr.headers().unwrap_err() {
Error::Utf8 { pos: Some(pos), err } => {
assert_eq!(pos, newpos(0, 1, 0));
match *rdr.headers().unwrap_err().kind() {
ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
assert_eq!(pos, &newpos(0, 1, 0));
assert_eq!(err.field(), 1);
assert_eq!(err.valid_up_to(), 1);
}
err => panic!("match failed, got {:?}", err),
ref err => panic!("match failed, got {:?}", err),
}
}

Expand Down
Loading

0 comments on commit 34e957e

Please sign in to comment.