Move to new std::io.

This is a large breaking change, but most of the fixes should be cosmetic. First and foremost, the `IntoVector` and `StrAllocating` traits have been removed. In all cases, they can be replaced with `IntoCow<'a, [u8]>` or `IntoCow<'a, str>` bounds. Secondly, `CsvResult` was renamed to `Result`, in keeping with the conventions established in the standard library. Thirdly, the `from_file` constructors no longer take a `Path`. They now has an `AsPath + ?Sized` bound (which is the same used in the standard library). Fourthly, the `seek` method on `csv::Reader` now takes only a starting offset. The seek style is removed (because only `io::SeekFrom::Start` is supported). Fifthly, migrating to `std::io` changes many bounds (e.g., `io::Read` instead of `io::Reader`) and some types (e.g., `io::Curosr<Vec<u8>>` instead of `io::MemReader`). Finally, note that the streaming example has been removed because there are no `ChanReader` or `ChanWriter` types defined for the new `std::io`. [breaking-change]
BurntSushi · Mar 4, 2015 · 7de753a · 7de753a
1 parent a2b07a9
commit 7de753a
Show file tree

Hide file tree

Showing 16 changed files with 399 additions and 414 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,7 +14,8 @@ license = "Unlicense"
 name = "csv"
 
 [dependencies]
-rustc-serialize = "0.2.0"
+byteorder = "*"
+rustc-serialize = "*"
 
 [dev-dependencies]
 regex = "*"

diff --git a/examples/nfl_plays.rs b/examples/nfl_plays.rs
@@ -1,10 +1,6 @@
-#![feature(old_path)]
-
 extern crate csv;
 extern crate "rustc-serialize" as rustc_serialize;
 
-use std::old_path::Path;
-
 #[allow(dead_code)]
 #[derive(RustcDecodable)]
 struct Play {
@@ -24,9 +20,9 @@ struct Play {
 }
 
 fn main() {
- let fp = &Path::new("./data/2012_nfl_pbp_data.csv");
+ let fp = "./data/2012_nfl_pbp_data.csv";
+ let mut dec = csv::Reader::from_file(fp).unwrap();
 
- let mut dec = csv::Reader::from_file(fp);
  match dec.decode::<Play>().collect::<Result<Vec<_>, _>>() {
  Err(err) => panic!("{}", err),
  Ok(plays) => {

diff --git a/examples/simple.rs b/examples/simple.rs
@@ -1,12 +1,7 @@
-#![feature(old_path)]
-
 extern crate csv;
 
-use std::old_path::Path;
-
 fn main() {
- let fp = &Path::new("./data/simple.csv");
- let mut rdr = csv::Reader::from_file(fp);
+ let mut rdr = csv::Reader::from_file("./data/simple.csv").unwrap();
 
  for record in rdr.decode() {
  let (s1, s2, dist): (String, String, usize) = record.unwrap();

diff --git a/examples/simple_missing.rs b/examples/simple_missing.rs
@@ -1,10 +1,6 @@
-#![feature(old_path)]
-
 extern crate csv;
 extern crate "rustc-serialize" as rustc_serialize;
 
-use std::old_path::Path;
-
 #[derive(RustcDecodable)]
 struct Record {
  s1: String,
@@ -13,8 +9,8 @@ struct Record {
 }
 
 fn main() {
- let fp = &Path::new("./data/simple_missing.csv");
- let mut rdr = csv::Reader::from_file(fp);
+ let fp = "./data/simple_missing.csv";
+ let mut rdr = csv::Reader::from_file(fp).unwrap();
 
  for record in rdr.decode() {
  let record: Record = record.unwrap();

diff --git a/examples/simple_struct.rs b/examples/simple_struct.rs
@@ -1,10 +1,6 @@
-#![feature(old_path)]
-
 extern crate csv;
 extern crate "rustc-serialize" as rustc_serialize;
 
-use std::old_path::Path;
-
 #[derive(RustcDecodable)]
 struct Record {
  s1: String,
@@ -13,8 +9,8 @@ struct Record {
 }
 
 fn main() {
- let fp = &Path::new("./data/simple.csv");
- let mut rdr = csv::Reader::from_file(fp);
+ let fp = "./data/simple.csv";
+ let mut rdr = csv::Reader::from_file(fp).unwrap();
 
  for record in rdr.decode() {
  let record: Record = record.unwrap();

diff --git a/examples/stream.rs b/examples/stream.rs
diff --git a/src/bench.rs b/src/bench.rs
@@ -1,7 +1,6 @@
 use std::fmt::{Debug, Display};
-use std::old_io as io;
-use std::old_io::ByRefReader;
-use std::old_io::Reader as IoReader;
+use std::fs;
+use std::io::{self, Read, ReadExt, Seek};
 use test::Bencher;
 
 use Reader;
@@ -12,17 +11,16 @@ fn ordie<T, E: Debug+Display>(r: Result<T, E>) -> T {
  r.or_else(|e: E| -> Result<T, E> panic!(format!("{:?}", e))).unwrap()
 }
 
-fn file_to_mem(fp: &str) -> io::MemReader {
- use std::old_path::Path;
-
- let mut f = ordie(io::File::open(&Path::new(fp)));
- let bs = ordie(f.read_to_end());
- io::MemReader::new(bs)
+fn file_to_mem(fp: &str) -> io::Cursor<Vec<u8>> {
+ let mut f = ordie(fs::File::open(fp));
+ let mut bs = vec![];
+ ordie(f.read_to_end(&mut bs));
+ io::Cursor::new(bs)
 }
 
-fn reader<'a>(rdr: &'a mut io::MemReader)
- -> Reader<io::RefReader<'a, io::MemReader>> {
- let _ = ordie(rdr.seek(0, io::SeekSet));
+fn reader<'a>(rdr: &'a mut io::Cursor<Vec<u8>>)
+ -> Reader<&'a mut io::Cursor<Vec<u8>>> {
+ let _ = ordie(rdr.seek(io::SeekFrom::Start(0)));
  Reader::from_reader(rdr.by_ref())
 }
 

diff --git a/src/buffered.rs b/src/buffered.rs
@@ -1,79 +1,125 @@
-// This is a copy of the `std::io::BufferedReader` with one additional
+// This is a copy of the `std::io::BufReader` with one additional
 // method: `clear`. It resets the buffer to be empty (thereby losing any
 // unread data).
 use std::cmp;
-use std::old_io::{Reader, Buffer, IoResult};
+use std::fmt;
+use std::io::{self, BufRead};
 use std::slice;
 
 static DEFAULT_BUF_SIZE: usize = 1024 * 64;
 
-pub struct BufferedReader<R> {
+/// Wraps a `Read` and buffers input from it
+///
+/// It can be excessively inefficient to work directly with a `Read` instance.
+/// For example, every call to `read` on `TcpStream` results in a system call.
+/// A `BufReader` performs large, infrequent reads on the underlying `Read`
+/// and maintains an in-memory buffer of the results.
+pub struct BufReader<R> {
  inner: R,
- buf: Vec<u8>,
- pos: usize,
- cap: usize,
+ buf: io::Cursor<Vec<u8>>,
 }
 
-impl<R: Reader> BufferedReader<R> {
- /// Creates a new `BufferedReader` with the specified buffer capacity
- pub fn with_capacity(cap: usize, inner: R) -> BufferedReader<R> {
- // It's *much* faster to create an uninitialized buffer than it is to
- // fill everything in with 0. This buffer is entirely an implementation
- // detail and is never exposed, so we're safe to not initialize
- // everything up-front. This allows creation of BufferedReader
- // instances to be very cheap (large mallocs are not nearly as
- // expensive as large callocs).
- let mut buf = Vec::with_capacity(cap);
- unsafe { buf.set_len(cap); }
- BufferedReader {
+impl<R: io::Read> BufReader<R> {
+ /// Creates a new `BufReader` with a default buffer capacity
+ pub fn new(inner: R) -> BufReader<R> {
+ BufReader::with_capacity(DEFAULT_BUF_SIZE, inner)
+ }
+
+ /// Creates a new `BufReader` with the specified buffer capacity
+ pub fn with_capacity(cap: usize, inner: R) -> BufReader<R> {
+ BufReader {
  inner: inner,
- buf: buf,
- pos: 0,
- cap: 0,
+ buf: io::Cursor::new(Vec::with_capacity(cap)),
  }
  }
 
- pub fn new(inner: R) -> BufferedReader<R> {
- BufferedReader::with_capacity(DEFAULT_BUF_SIZE, inner)
- }
+ /// Gets a reference to the underlying reader.
+ #[allow(dead_code)] pub fn get_ref(&self) -> &R { &self.inner }
 
+ /// Gets a mutable reference to the underlying reader.
+ ///
+ /// # Warning
+ ///
+ /// It is inadvisable to directly read from the underlying reader.
  pub fn get_mut(&mut self) -> &mut R { &mut self.inner }
 
+ /// Unwraps this `BufReader`, returning the underlying reader.
+ ///
+ /// Note that any leftover data in the internal buffer is lost.
+ #[allow(dead_code)] pub fn into_inner(self) -> R { self.inner }
+
  pub fn clear(&mut self) {
- let cap = self.buf.capacity();
- unsafe { self.buf.set_len(cap); }
- self.pos = 0;
- self.cap = 0;
+ self.buf.set_position(0);
+ self.buf.get_mut().truncate(0);
  }
 }
 
-impl<R: Reader> Buffer for BufferedReader<R> {
- fn fill_buf<'a>(&'a mut self) -> IoResult<&'a [u8]> {
- if self.pos == self.cap {
- self.cap = try!(self.inner.read(self.buf.as_mut_slice()));
- self.pos = 0;
+impl<R: io::Read> io::Read for BufReader<R> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ // If we don't have any buffered data and we're doing a massive read
+ // (larger than our internal buffer), bypass our internal buffer
+ // entirely.
+ if self.buf.get_ref().len() == self.buf.position() as usize &&
+ buf.len() >= self.buf.get_ref().capacity() {
+ return self.inner.read(buf);
  }
- Ok(&self.buf[self.pos..self.cap])
+ try!(self.fill_buf());
+ self.buf.read(buf)
+ }
+}
+
+impl<R: io::Read> io::BufRead for BufReader<R> {
+ fn fill_buf(&mut self) -> io::Result<&[u8]> {
+ // If we've reached the end of our internal buffer then we need to fetch
+ // some more data from the underlying reader.
+ if self.buf.position() as usize == self.buf.get_ref().len() {
+ self.buf.set_position(0);
+ let v = self.buf.get_mut();
+ v.truncate(0);
+ let inner = &mut self.inner;
+ try!(with_end_to_cap(v, |b| inner.read(b)));
+ }
+ self.buf.fill_buf()
  }
 
  fn consume(&mut self, amt: usize) {
- self.pos += amt;
- assert!(self.pos <= self.cap);
+ self.buf.consume(amt)
  }
 }
 
-impl<R: Reader> Reader for BufferedReader<R> {
- fn read(&mut self, buf: &mut [u8]) -> IoResult<usize> {
- if self.pos == self.cap && buf.len() >= self.buf.capacity() {
- return self.inner.read(buf);
- }
- let nread = {
- let available = try!(self.fill_buf());
- let nread = cmp::min(available.len(), buf.len());
- slice::bytes::copy_memory(buf, &available[..nread]);
- nread
- };
- self.pos += nread;
- Ok(nread)
+impl<R> fmt::Debug for BufReader<R> where R: fmt::Debug {
+ fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+ write!(fmt, "BufReader {{ reader: {:?}, buffer: {}/{} }}",
+ self.inner, self.buf.position(), self.buf.get_ref().len())
+ }
+}
+
+// Acquires a slice of the vector `v` from its length to its capacity
+// (uninitialized data), reads into it, and then updates the length.
+//
+// This function is leveraged to efficiently read some bytes into a destination
+// vector without extra copying and taking advantage of the space that's already
+// in `v`.
+//
+// The buffer we're passing down, however, is pointing at uninitialized data
+// (the end of a `Vec`), and many operations will be *much* faster if we don't
+// have to zero it out. In order to prevent LLVM from generating an `undef`
+// value when reads happen from this uninitialized memory, we force LLVM to
+// think it's initialized by sending it through a black box. This should prevent
+// actual undefined behavior after optimizations.
+fn with_end_to_cap<F>(v: &mut Vec<u8>, f: F) -> io::Result<usize>
+ where F: FnOnce(&mut [u8]) -> io::Result<usize> {
+ unsafe {
+ let n = try!(f({
+ let base = v.as_mut_ptr().offset(v.len() as isize);
+ slice::from_raw_parts_mut(base, v.capacity() - v.len())
+ }));
+
+ // If the closure (typically a `read` implementation) reported that it
+ // read a larger number of bytes than the vector actually has, we need
+ // to be sure to clamp the vector to at most its capacity.
+ let new_len = cmp::min(v.capacity(), v.len() + n);
+ v.set_len(new_len);
+ return Ok(n);
  }
 }