-
Notifications
You must be signed in to change notification settings - Fork 212
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactoring and performance improvements.
The major change here is that the zero-allocation reader has doubled its performance. This has resulted in a perf boost to the record/decoder iterators, but not as dramatic. In doing this, I've refactored pieces of the code, which includes some public facing changes. 1. `ByteString` is no longer a newtype because it no longer provided any added benefit over `Vec<u8>`. Instead, it is a type alias. Since `ByteString` deref'd to `Vec<u8>`, it's possible your code will need no changes. If you used `ByteString` specific things (like its constructor), then you'll need to replace it with standard `Vec` functions 2. Parse errors have been tweaked. Notably, line/column numbers are no longer recorded. Instead, record/field numbers are saved. (This was done for performance reasons.) See the documentation for the error's new structure. 3. The `index` sub-module has received some documentation love and some small naming tweaks. Notably, the `csv` method was removed in favor of `Deref`/`DerefMut` impls on `Indexed`. No changes to the format were made. 4. The `quote` and `escape` methods have had their argument types tweaked. It is currently no longer possible to specify "no quoting" to the parser. [breaking-change]
- Loading branch information
1 parent
1c37d57
commit c05997d
Showing
16 changed files
with
728 additions
and
829 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,9 @@ | ||
.*.swp | ||
doc | ||
tags | ||
examples/data/ss10pusa.csv | ||
examples/ss10pusa.csv | ||
build | ||
target | ||
Cargo.lock | ||
scratch* | ||
bench_large/huge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,16 @@ | ||
extern crate csv; | ||
|
||
use std::path::Path; | ||
|
||
fn main() { | ||
let huge = "../examples/data/ss10pusa.csv"; | ||
let mut rdr = csv::Reader::from_file(&Path::new(huge)); | ||
while !rdr.done() { | ||
loop { | ||
match rdr.next_field() { | ||
None => break, | ||
Some(f) => { f.unwrap(); } | ||
} | ||
let huge = ::std::env::args().nth(1).unwrap(); | ||
let mut rdr = csv::Reader::from_file(huge).unwrap(); | ||
let mut count = 0; | ||
loop { | ||
match rdr.next_bytes() { | ||
csv::NextField::Error(err) => panic!("{:?}", err), | ||
csv::NextField::EndOfCsv => break, | ||
csv::NextField::EndOfRecord => {} | ||
csv::NextField::Data(_) => { count += 1; } | ||
} | ||
} | ||
println!("{}", count); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
use std::borrow::{Cow, ToOwned}; | ||
use ByteString; | ||
|
||
/// A trait that permits borrowing byte vectors. | ||
/// | ||
/// This is useful for providing an API that can abstract over Unicode | ||
/// strings and byte strings. | ||
pub trait BorrowBytes { | ||
/// Borrow a byte vector. | ||
fn borrow_bytes<'a>(&'a self) -> &'a [u8]; | ||
} | ||
|
||
impl BorrowBytes for String { | ||
fn borrow_bytes(&self) -> &[u8] { self.as_bytes() } | ||
} | ||
|
||
impl BorrowBytes for str { | ||
fn borrow_bytes(&self) -> &[u8] { self.as_bytes() } | ||
} | ||
|
||
impl BorrowBytes for ByteString { | ||
fn borrow_bytes(&self) -> &[u8] { &**self } | ||
} | ||
|
||
impl BorrowBytes for [u8] { | ||
fn borrow_bytes(&self) -> &[u8] { self } | ||
} | ||
|
||
impl<'a, B: ?Sized> BorrowBytes for Cow<'a, B> | ||
where B: BorrowBytes + ToOwned, <B as ToOwned>::Owned: BorrowBytes { | ||
fn borrow_bytes(&self) -> &[u8] { | ||
match *self { | ||
Cow::Borrowed(v) => v.borrow_bytes(), | ||
Cow::Owned(ref v) => v.borrow_bytes(), | ||
} | ||
} | ||
} | ||
|
||
impl<'a, T: ?Sized + BorrowBytes> BorrowBytes for &'a T { | ||
fn borrow_bytes(&self) -> &[u8] { (*self).borrow_bytes() } | ||
} |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.