Skip to content

Commit

Permalink
Move to new std::io.
Browse files Browse the repository at this point in the history
This is a large breaking change, but most of the fixes should be cosmetic.

First and foremost, the `IntoVector` and `StrAllocating` traits have been
removed. In all cases, they can be replaced with `IntoCow<'a, [u8]>` or
`IntoCow<'a, str>` bounds.

Secondly, `CsvResult` was renamed to `Result`, in keeping with the
conventions established in the standard library.

Thirdly, the `from_file` constructors no longer take a `Path`. They
now has an `AsPath + ?Sized` bound (which is the same used in the
standard library).

Fourthly, the `seek` method on `csv::Reader` now takes only a starting
offset. The seek style is removed (because only `io::SeekFrom::Start`
is supported).

Fifthly, migrating to `std::io` changes many bounds (e.g., `io::Read`
instead of `io::Reader`) and some types (e.g., `io::Curosr<Vec<u8>>`
instead of `io::MemReader`).

Finally, note that the streaming example has been removed because there
are no `ChanReader` or `ChanWriter` types defined for the new `std::io`.

[breaking-change]
  • Loading branch information
BurntSushi committed Mar 4, 2015
1 parent a2b07a9 commit 7de753a
Show file tree
Hide file tree
Showing 16 changed files with 399 additions and 414 deletions.
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ license = "Unlicense"
name = "csv"

[dependencies]
rustc-serialize = "0.2.0"
byteorder = "*"
rustc-serialize = "*"

[dev-dependencies]
regex = "*"
Expand Down
8 changes: 2 additions & 6 deletions examples/nfl_plays.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#![feature(old_path)]

extern crate csv;
extern crate "rustc-serialize" as rustc_serialize;

use std::old_path::Path;

#[allow(dead_code)]
#[derive(RustcDecodable)]
struct Play {
Expand All @@ -24,9 +20,9 @@ struct Play {
}

fn main() {
let fp = &Path::new("./data/2012_nfl_pbp_data.csv");
let fp = "./data/2012_nfl_pbp_data.csv";
let mut dec = csv::Reader::from_file(fp).unwrap();

let mut dec = csv::Reader::from_file(fp);
match dec.decode::<Play>().collect::<Result<Vec<_>, _>>() {
Err(err) => panic!("{}", err),
Ok(plays) => {
Expand Down
7 changes: 1 addition & 6 deletions examples/simple.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,7 @@
#![feature(old_path)]

extern crate csv;

use std::old_path::Path;

fn main() {
let fp = &Path::new("./data/simple.csv");
let mut rdr = csv::Reader::from_file(fp);
let mut rdr = csv::Reader::from_file("./data/simple.csv").unwrap();

for record in rdr.decode() {
let (s1, s2, dist): (String, String, usize) = record.unwrap();
Expand Down
8 changes: 2 additions & 6 deletions examples/simple_missing.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#![feature(old_path)]

extern crate csv;
extern crate "rustc-serialize" as rustc_serialize;

use std::old_path::Path;

#[derive(RustcDecodable)]
struct Record {
s1: String,
Expand All @@ -13,8 +9,8 @@ struct Record {
}

fn main() {
let fp = &Path::new("./data/simple_missing.csv");
let mut rdr = csv::Reader::from_file(fp);
let fp = "./data/simple_missing.csv";
let mut rdr = csv::Reader::from_file(fp).unwrap();

for record in rdr.decode() {
let record: Record = record.unwrap();
Expand Down
8 changes: 2 additions & 6 deletions examples/simple_struct.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#![feature(old_path)]

extern crate csv;
extern crate "rustc-serialize" as rustc_serialize;

use std::old_path::Path;

#[derive(RustcDecodable)]
struct Record {
s1: String,
Expand All @@ -13,8 +9,8 @@ struct Record {
}

fn main() {
let fp = &Path::new("./data/simple.csv");
let mut rdr = csv::Reader::from_file(fp);
let fp = "./data/simple.csv";
let mut rdr = csv::Reader::from_file(fp).unwrap();

for record in rdr.decode() {
let record: Record = record.unwrap();
Expand Down
28 changes: 0 additions & 28 deletions examples/stream.rs

This file was deleted.

22 changes: 10 additions & 12 deletions src/bench.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::fmt::{Debug, Display};
use std::old_io as io;
use std::old_io::ByRefReader;
use std::old_io::Reader as IoReader;
use std::fs;
use std::io::{self, Read, ReadExt, Seek};
use test::Bencher;

use Reader;
Expand All @@ -12,17 +11,16 @@ fn ordie<T, E: Debug+Display>(r: Result<T, E>) -> T {
r.or_else(|e: E| -> Result<T, E> panic!(format!("{:?}", e))).unwrap()
}

fn file_to_mem(fp: &str) -> io::MemReader {
use std::old_path::Path;

let mut f = ordie(io::File::open(&Path::new(fp)));
let bs = ordie(f.read_to_end());
io::MemReader::new(bs)
fn file_to_mem(fp: &str) -> io::Cursor<Vec<u8>> {
let mut f = ordie(fs::File::open(fp));
let mut bs = vec![];
ordie(f.read_to_end(&mut bs));
io::Cursor::new(bs)
}

fn reader<'a>(rdr: &'a mut io::MemReader)
-> Reader<io::RefReader<'a, io::MemReader>> {
let _ = ordie(rdr.seek(0, io::SeekSet));
fn reader<'a>(rdr: &'a mut io::Cursor<Vec<u8>>)
-> Reader<&'a mut io::Cursor<Vec<u8>>> {
let _ = ordie(rdr.seek(io::SeekFrom::Start(0)));
Reader::from_reader(rdr.by_ref())
}

Expand Down
144 changes: 95 additions & 49 deletions src/buffered.rs
Original file line number Diff line number Diff line change
@@ -1,79 +1,125 @@
// This is a copy of the `std::io::BufferedReader` with one additional
// This is a copy of the `std::io::BufReader` with one additional
// method: `clear`. It resets the buffer to be empty (thereby losing any
// unread data).
use std::cmp;
use std::old_io::{Reader, Buffer, IoResult};
use std::fmt;
use std::io::{self, BufRead};
use std::slice;

static DEFAULT_BUF_SIZE: usize = 1024 * 64;

pub struct BufferedReader<R> {
/// Wraps a `Read` and buffers input from it
///
/// It can be excessively inefficient to work directly with a `Read` instance.
/// For example, every call to `read` on `TcpStream` results in a system call.
/// A `BufReader` performs large, infrequent reads on the underlying `Read`
/// and maintains an in-memory buffer of the results.
pub struct BufReader<R> {
inner: R,
buf: Vec<u8>,
pos: usize,
cap: usize,
buf: io::Cursor<Vec<u8>>,
}

impl<R: Reader> BufferedReader<R> {
/// Creates a new `BufferedReader` with the specified buffer capacity
pub fn with_capacity(cap: usize, inner: R) -> BufferedReader<R> {
// It's *much* faster to create an uninitialized buffer than it is to
// fill everything in with 0. This buffer is entirely an implementation
// detail and is never exposed, so we're safe to not initialize
// everything up-front. This allows creation of BufferedReader
// instances to be very cheap (large mallocs are not nearly as
// expensive as large callocs).
let mut buf = Vec::with_capacity(cap);
unsafe { buf.set_len(cap); }
BufferedReader {
impl<R: io::Read> BufReader<R> {
/// Creates a new `BufReader` with a default buffer capacity
pub fn new(inner: R) -> BufReader<R> {
BufReader::with_capacity(DEFAULT_BUF_SIZE, inner)
}

/// Creates a new `BufReader` with the specified buffer capacity
pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
BufReader {
inner: inner,
buf: buf,
pos: 0,
cap: 0,
buf: io::Cursor::new(Vec::with_capacity(cap)),
}
}

pub fn new(inner: R) -> BufferedReader<R> {
BufferedReader::with_capacity(DEFAULT_BUF_SIZE, inner)
}
/// Gets a reference to the underlying reader.
#[allow(dead_code)] pub fn get_ref(&self) -> &R { &self.inner }

/// Gets a mutable reference to the underlying reader.
///
/// # Warning
///
/// It is inadvisable to directly read from the underlying reader.
pub fn get_mut(&mut self) -> &mut R { &mut self.inner }

/// Unwraps this `BufReader`, returning the underlying reader.
///
/// Note that any leftover data in the internal buffer is lost.
#[allow(dead_code)] pub fn into_inner(self) -> R { self.inner }

pub fn clear(&mut self) {
let cap = self.buf.capacity();
unsafe { self.buf.set_len(cap); }
self.pos = 0;
self.cap = 0;
self.buf.set_position(0);
self.buf.get_mut().truncate(0);
}
}

impl<R: Reader> Buffer for BufferedReader<R> {
fn fill_buf<'a>(&'a mut self) -> IoResult<&'a [u8]> {
if self.pos == self.cap {
self.cap = try!(self.inner.read(self.buf.as_mut_slice()));
self.pos = 0;
impl<R: io::Read> io::Read for BufReader<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
// If we don't have any buffered data and we're doing a massive read
// (larger than our internal buffer), bypass our internal buffer
// entirely.
if self.buf.get_ref().len() == self.buf.position() as usize &&
buf.len() >= self.buf.get_ref().capacity() {
return self.inner.read(buf);
}
Ok(&self.buf[self.pos..self.cap])
try!(self.fill_buf());
self.buf.read(buf)
}
}

impl<R: io::Read> io::BufRead for BufReader<R> {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
// If we've reached the end of our internal buffer then we need to fetch
// some more data from the underlying reader.
if self.buf.position() as usize == self.buf.get_ref().len() {
self.buf.set_position(0);
let v = self.buf.get_mut();
v.truncate(0);
let inner = &mut self.inner;
try!(with_end_to_cap(v, |b| inner.read(b)));
}
self.buf.fill_buf()
}

fn consume(&mut self, amt: usize) {
self.pos += amt;
assert!(self.pos <= self.cap);
self.buf.consume(amt)
}
}

impl<R: Reader> Reader for BufferedReader<R> {
fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
if self.pos == self.cap && buf.len() >= self.buf.capacity() {
return self.inner.read(buf);
}
let nread = {
let available = try!(self.fill_buf());
let nread = cmp::min(available.len(), buf.len());
slice::bytes::copy_memory(buf, &available[..nread]);
nread
};
self.pos += nread;
Ok(nread)
impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "BufReader {{ reader: {:?}, buffer: {}/{} }}",
self.inner, self.buf.position(), self.buf.get_ref().len())
}
}

// Acquires a slice of the vector `v` from its length to its capacity
// (uninitialized data), reads into it, and then updates the length.
//
// This function is leveraged to efficiently read some bytes into a destination
// vector without extra copying and taking advantage of the space that's already
// in `v`.
//
// The buffer we're passing down, however, is pointing at uninitialized data
// (the end of a `Vec`), and many operations will be *much* faster if we don't
// have to zero it out. In order to prevent LLVM from generating an `undef`
// value when reads happen from this uninitialized memory, we force LLVM to
// think it's initialized by sending it through a black box. This should prevent
// actual undefined behavior after optimizations.
fn with_end_to_cap<F>(v: &mut Vec<u8>, f: F) -> io::Result<usize>
where F: FnOnce(&mut [u8]) -> io::Result<usize> {
unsafe {
let n = try!(f({
let base = v.as_mut_ptr().offset(v.len() as isize);
slice::from_raw_parts_mut(base, v.capacity() - v.len())
}));

// If the closure (typically a `read` implementation) reported that it
// read a larger number of bytes than the vector actually has, we need
// to be sure to clamp the vector to at most its capacity.
let new_len = cmp::min(v.capacity(), v.len() + n);
v.set_len(new_len);
return Ok(n);
}
}
Loading

0 comments on commit 7de753a

Please sign in to comment.