src/reader.rs

use std::fs::File;
use std::io::{self, BufRead, Seek};
use std::marker::PhantomData;
use std::path::Path;
use std::result;

use csv_core::{Reader as CoreReader, ReaderBuilder as CoreReaderBuilder};
use serde::de::DeserializeOwned;

use crate::byte_record::{ByteRecord, Position};
use crate::error::{Error, ErrorKind, Result, Utf8Error};
use crate::string_record::StringRecord;
use crate::{Terminator, Trim};

/// Builds a CSV reader with various configuration knobs.
///
/// This builder can be used to tweak the field delimiter, record terminator
/// and more. Once a CSV `Reader` is built, its configuration cannot be
/// changed.
#[derive(Debug)]
pub struct ReaderBuilder {
    capacity: usize,
    flexible: bool,
    has_headers: bool,
    trim: Trim,
    /// The underlying CSV parser builder.
    ///
    /// We explicitly put this on the heap because CoreReaderBuilder embeds an
    /// entire DFA transition table, which along with other things, tallies up
    /// to almost 500 bytes on the stack.
    builder: Box<CoreReaderBuilder>,
}

impl Default for ReaderBuilder {
    fn default() -> ReaderBuilder {
        ReaderBuilder {
            capacity: 8 * (1 << 10),
            flexible: false,
            has_headers: true,
            trim: Trim::default(),
            builder: Box::new(CoreReaderBuilder::default()),
        }
    }
}

impl ReaderBuilder {
    /// Create a new builder for configuring CSV parsing.
    ///
    /// To convert a builder into a reader, call one of the methods starting
    /// with `from_`.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{ReaderBuilder, StringRecord};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// Concord,United States,42695
    /// ";
    ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
    ///
    ///     let records = rdr
    ///         .records()
    ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
    ///     assert_eq!(records, vec![
    ///         vec!["Boston", "United States", "4628910"],
    ///         vec!["Concord", "United States", "42695"],
    ///     ]);
    ///     Ok(())
    /// }
    /// ```
    pub fn new() -> ReaderBuilder {
        ReaderBuilder::default()
    }

    /// Build a CSV parser from this configuration that reads data from the
    /// given file path.
    ///
    /// If there was a problem opening the file at the given path, then this
    /// returns the corresponding error.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let mut rdr = ReaderBuilder::new().from_path("foo.csv")?;
    ///     for result in rdr.records() {
    ///         let record = result?;
    ///         println!("{:?}", record);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    pub fn from_path<P: AsRef<Path>>(&self, path: P) -> Result<Reader<File>> {
        Ok(Reader::new(self, File::open(path)?))
    }

    /// Build a CSV parser from this configuration that reads data from `rdr`.
    ///
    /// Note that the CSV reader is buffered automatically, so you should not
    /// wrap `rdr` in a buffered reader like `io::BufReader`.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// Concord,United States,42695
    /// ";
    ///     let mut rdr = ReaderBuilder::new().from_reader(data.as_bytes());
    ///     for result in rdr.records() {
    ///         let record = result?;
    ///         println!("{:?}", record);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    pub fn from_reader<R: io::Read>(&self, rdr: R) -> Reader<R> {
        Reader::new(self, rdr)
    }

    /// The field delimiter to use when parsing CSV.
    ///
    /// The default is `b','`.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city;country;pop
    /// Boston;United States;4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .delimiter(b';')
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn delimiter(&mut self, delimiter: u8) -> &mut ReaderBuilder {
        self.builder.delimiter(delimiter);
        self
    }

    /// Whether to treat the first row as a special header row.
    ///
    /// By default, the first row is treated as a special header row, which
    /// means the header is never returned by any of the record reading methods
    /// or iterators. When this is disabled (`yes` set to `false`), the first
    /// row is not treated specially.
    ///
    /// Note that the `headers` and `byte_headers` methods are unaffected by
    /// whether this is set. Those methods always return the first record.
    ///
    /// # Example
    ///
    /// This example shows what happens when `has_headers` is disabled.
    /// Namely, the first row is treated just like any other row.
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .has_headers(false)
    ///         .from_reader(data.as_bytes());
    ///     let mut iter = rdr.records();
    ///
    ///     // Read the first record.
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["city", "country", "pop"]);
    ///     } else {
    ///         return Err(From::from(
    ///             "expected at least two records but got none"));
    ///     }
    ///
    ///     // Read the second record.
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///     } else {
    ///         return Err(From::from(
    ///             "expected at least two records but got one"))
    ///     }
    ///     Ok(())
    /// }
    /// ```
    pub fn has_headers(&mut self, yes: bool) -> &mut ReaderBuilder {
        self.has_headers = yes;
        self
    }

    /// Whether the number of fields in records is allowed to change or not.
    ///
    /// When disabled (which is the default), parsing CSV data will return an
    /// error if a record is found with a number of fields different from the
    /// number of fields in a previous record.
    ///
    /// When enabled, this error checking is turned off.
    ///
    /// # Example: flexible records enabled
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     // Notice that the first row is missing the population count.
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .flexible(true)
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    ///
    /// # Example: flexible records disabled
    ///
    /// This shows the error that appears when records of unequal length
    /// are found and flexible records have been disabled (which is the
    /// default).
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{ErrorKind, ReaderBuilder};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     // Notice that the first row is missing the population count.
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .flexible(false)
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(Err(err)) = rdr.records().next() {
    ///         match *err.kind() {
    ///             ErrorKind::UnequalLengths { expected_len, len, .. } => {
    ///                 // The header row has 3 fields...
    ///                 assert_eq!(expected_len, 3);
    ///                 // ... but the first row has only 2 fields.
    ///                 assert_eq!(len, 2);
    ///                 Ok(())
    ///             }
    ///             ref wrong => {
    ///                 Err(From::from(format!(
    ///                     "expected UnequalLengths error but got {:?}",
    ///                     wrong)))
    ///             }
    ///         }
    ///     } else {
    ///         Err(From::from(
    ///             "expected at least one errored record but got none"))
    ///     }
    /// }
    /// ```
    pub fn flexible(&mut self, yes: bool) -> &mut ReaderBuilder {
        self.flexible = yes;
        self
    }

    /// Whether fields are trimmed of leading and trailing whitespace or not.
    ///
    /// By default, no trimming is performed. This method permits one to
    /// override that behavior and choose one of the following options:
    ///
    /// 1. `Trim::Headers` trims only header values.
    /// 2. `Trim::Fields` trims only non-header or "field" values.
    /// 3. `Trim::All` trims both header and non-header values.
    ///
    /// A value is only interpreted as a header value if this CSV reader is
    /// configured to read a header record (which is the default).
    ///
    /// When reading string records, characters meeting the definition of
    /// Unicode whitespace are trimmed. When reading byte records, characters
    /// meeting the definition of ASCII whitespace are trimmed. ASCII
    /// whitespace characters correspond to the set `[\t\n\v\f\r ]`.
    ///
    /// # Example
    ///
    /// This example shows what happens when all values are trimmed.
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{ReaderBuilder, StringRecord, Trim};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city ,   country ,  pop
    /// Boston,\"
    ///    United States\",4628910
    /// Concord,   United States   ,42695
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .trim(Trim::All)
    ///         .from_reader(data.as_bytes());
    ///     let records = rdr
    ///         .records()
    ///         .collect::<Result<Vec<StringRecord>, csv::Error>>()?;
    ///     assert_eq!(records, vec![
    ///         vec!["Boston", "United States", "4628910"],
    ///         vec!["Concord", "United States", "42695"],
    ///     ]);
    ///     Ok(())
    /// }
    /// ```
    pub fn trim(&mut self, trim: Trim) -> &mut ReaderBuilder {
        self.trim = trim;
        self
    }

    /// The record terminator to use when parsing CSV.
    ///
    /// A record terminator can be any single byte. The default is a special
    /// value, `Terminator::CRLF`, which treats any occurrence of `\r`, `\n`
    /// or `\r\n` as a single record terminator.
    ///
    /// # Example: `$` as a record terminator
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{ReaderBuilder, Terminator};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "city,country,pop$Boston,United States,4628910";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .terminator(Terminator::Any(b'$'))
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn terminator(&mut self, term: Terminator) -> &mut ReaderBuilder {
        self.builder.terminator(term.to_core());
        self
    }

    /// The quote character to use when parsing CSV.
    ///
    /// The default is `b'"'`.
    ///
    /// # Example: single quotes instead of double quotes
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,'United States',4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .quote(b'\'')
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn quote(&mut self, quote: u8) -> &mut ReaderBuilder {
        self.builder.quote(quote);
        self
    }

    /// The escape character to use when parsing CSV.
    ///
    /// In some variants of CSV, quotes are escaped using a special escape
    /// character like `\` (instead of escaping quotes by doubling them).
    ///
    /// By default, recognizing these idiosyncratic escapes is disabled.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,\"The \\\"United\\\" States\",4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .escape(Some(b'\\'))
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec![
    ///             "Boston", "The \"United\" States", "4628910",
    ///         ]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn escape(&mut self, escape: Option<u8>) -> &mut ReaderBuilder {
        self.builder.escape(escape);
        self
    }

    /// Enable double quote escapes.
    ///
    /// This is enabled by default, but it may be disabled. When disabled,
    /// doubled quotes are not interpreted as escapes.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,\"The \"\"United\"\" States\",4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .double_quote(false)
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec![
    ///             "Boston", "The \"United\"\" States\"", "4628910",
    ///         ]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn double_quote(&mut self, yes: bool) -> &mut ReaderBuilder {
        self.builder.double_quote(yes);
        self
    }

    /// Enable or disable quoting.
    ///
    /// This is enabled by default, but it may be disabled. When disabled,
    /// quotes are not treated specially.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,\"The United States,4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .quoting(false)
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec![
    ///             "Boston", "\"The United States", "4628910",
    ///         ]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn quoting(&mut self, yes: bool) -> &mut ReaderBuilder {
        self.builder.quoting(yes);
        self
    }

    /// The comment character to use when parsing CSV.
    ///
    /// If the start of a record begins with the byte given here, then that
    /// line is ignored by the CSV parser.
    ///
    /// This is disabled by default.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// #Concord,United States,42695
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .comment(Some(b'#'))
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn comment(&mut self, comment: Option<u8>) -> &mut ReaderBuilder {
        self.builder.comment(comment);
        self
    }

    /// A convenience method for specifying a configuration to read ASCII
    /// delimited text.
    ///
    /// This sets the delimiter and record terminator to the ASCII unit
    /// separator (`\x1F`) and record separator (`\x1E`), respectively.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::ReaderBuilder;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city\x1Fcountry\x1Fpop\x1EBoston\x1FUnited States\x1F4628910";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .ascii()
    ///         .from_reader(data.as_bytes());
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn ascii(&mut self) -> &mut ReaderBuilder {
        self.builder.ascii();
        self
    }

    /// Set the capacity (in bytes) of the buffer used in the CSV reader.
    /// This defaults to a reasonable setting.
    pub fn buffer_capacity(&mut self, capacity: usize) -> &mut ReaderBuilder {
        self.capacity = capacity;
        self
    }

    /// Enable or disable the NFA for parsing CSV.
    ///
    /// This is intended to be a debug option. The NFA is always slower than
    /// the DFA.
    #[doc(hidden)]
    pub fn nfa(&mut self, yes: bool) -> &mut ReaderBuilder {
        self.builder.nfa(yes);
        self
    }
}

/// A already configured CSV reader.
///
/// A CSV reader takes as input CSV data and transforms that into standard Rust
/// values. The most flexible way to read CSV data is as a sequence of records,
/// where a record is a sequence of fields and each field is a string. However,
/// a reader can also deserialize CSV data into Rust types like `i64` or
/// `(String, f64, f64, f64)` or even a custom struct automatically using
/// Serde.
///
/// # Configuration
///
/// A CSV reader has a couple convenient constructor methods like `from_path`
/// and `from_reader`. However, if you want to configure the CSV reader to use
/// a different delimiter or quote character (among many other things), then
/// you should use a [`ReaderBuilder`](struct.ReaderBuilder.html) to construct
/// a `Reader`. For example, to change the field delimiter:
///
/// ```
/// use std::error::Error;
/// use csv::ReaderBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
///     let data = "\
/// city;country;pop
/// Boston;United States;4628910
/// ";
///     let mut rdr = ReaderBuilder::new()
///         .delimiter(b';')
///         .from_reader(data.as_bytes());
///
///     if let Some(result) = rdr.records().next() {
///         let record = result?;
///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
///         Ok(())
///     } else {
///         Err(From::from("expected at least one record but got none"))
///     }
/// }
/// ```
///
/// # Error handling
///
/// In general, CSV *parsing* does not ever return an error. That is, there is
/// no such thing as malformed CSV data. Instead, this reader will prioritize
/// finding a parse over rejecting CSV data that it does not understand. This
/// choice was inspired by other popular CSV parsers, but also because it is
/// pragmatic. CSV data varies wildly, so even if the CSV data is malformed,
/// it might still be possible to work with the data. In the land of CSV, there
/// is no "right" or "wrong," only "right" and "less right."
///
/// With that said, a number of errors can occur while reading CSV data:
///
/// * By default, all records in CSV data must have the same number of fields.
///   If a record is found with a different number of fields than a prior
///   record, then an error is returned. This behavior can be disabled by
///   enabling flexible parsing via the `flexible` method on
///   [`ReaderBuilder`](struct.ReaderBuilder.html).
/// * When reading CSV data from a resource (like a file), it is possible for
///   reading from the underlying resource to fail. This will return an error.
///   For subsequent calls to the `Reader` after encountering a such error
///   (unless `seek` is used), it will behave as if end of file had been
///   reached, in order to avoid running into infinite loops when still
///   attempting to read the next record when one has errored.
/// * When reading CSV data into `String` or `&str` fields (e.g., via a
///   [`StringRecord`](struct.StringRecord.html)), UTF-8 is strictly
///   enforced. If CSV data is invalid UTF-8, then an error is returned. If
///   you want to read invalid UTF-8, then you should use the byte oriented
///   APIs such as [`ByteRecord`](struct.ByteRecord.html). If you need explicit
///   support for another encoding entirely, then you'll need to use another
///   crate to transcode your CSV data to UTF-8 before parsing it.
/// * When using Serde to deserialize CSV data into Rust types, it is possible
///   for a number of additional errors to occur. For example, deserializing
///   a field `xyz` into an `i32` field will result in an error.
///
/// For more details on the precise semantics of errors, see the
/// [`Error`](enum.Error.html) type.
#[derive(Debug)]
pub struct Reader<R> {
    /// The underlying CSV parser.
    ///
    /// We explicitly put this on the heap because CoreReader embeds an entire
    /// DFA transition table, which along with other things, tallies up to
    /// almost 500 bytes on the stack.
    core: Box<CoreReader>,
    /// The underlying reader.
    rdr: io::BufReader<R>,
    /// Various state tracking.
    ///
    /// There is more state embedded in the `CoreReader`.
    state: ReaderState,
}

#[derive(Debug)]
struct ReaderState {
    /// When set, this contains the first row of any parsed CSV data.
    ///
    /// This is always populated, regardless of whether `has_headers` is set.
    headers: Option<Headers>,
    /// When set, the first row of parsed CSV data is excluded from things
    /// that read records, like iterators and `read_record`.
    has_headers: bool,
    /// When set, there is no restriction on the length of records. When not
    /// set, every record must have the same number of fields, or else an error
    /// is reported.
    flexible: bool,
    trim: Trim,
    /// The number of fields in the first record parsed.
    first_field_count: Option<u64>,
    /// The current position of the parser.
    ///
    /// Note that this position is only observable by callers at the start
    /// of a record. More granular positions are not supported.
    cur_pos: Position,
    /// Whether the first record has been read or not.
    first: bool,
    /// Whether the reader has been seeked or not.
    seeked: bool,
    /// Whether EOF of the underlying reader has been reached or not.
    ///
    /// IO errors on the underlying reader will be considered as an EOF for
    /// subsequent read attempts, as it would be incorrect to keep on trying
    /// to read when the underlying reader has broken.
    ///
    /// For clarity, having the best `Debug` impl and in case they need to be
    /// treated differently at some point, we store whether the `EOF` is
    /// considered because an actual EOF happened, or because we encoundered
    /// an IO error.
    /// This has no additional runtime cost.
    eof: ReaderEofState,
}

/// Whether EOF of the underlying reader has been reached or not.
///
/// IO errors on the underlying reader will be considered as an EOF for
/// subsequent read attempts, as it would be incorrect to keep on trying
/// to read when the underlying reader has broken.
///
/// For clarity, having the best `Debug` impl and in case they need to be
/// treated differently at some point, we store whether the `EOF` is
/// considered because an actual EOF happened, or because we encoundered
/// an IO error
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum ReaderEofState {
    NotEof,
    Eof,
    IOError,
}

/// Headers encapsulates any data associated with the headers of CSV data.
///
/// The headers always correspond to the first row.
#[derive(Debug)]
struct Headers {
    /// The header, as raw bytes.
    byte_record: ByteRecord,
    /// The header, as valid UTF-8 (or a UTF-8 error).
    string_record: result::Result<StringRecord, Utf8Error>,
}

impl Reader<Reader<File>> {
    /// Create a new CSV parser with a default configuration for the given
    /// file path.
    ///
    /// To customize CSV parsing, use a `ReaderBuilder`.
    ///
    /// # Example
    ///
    /// ```no_run
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let mut rdr = Reader::from_path("foo.csv")?;
    ///     for result in rdr.records() {
    ///         let record = result?;
    ///         println!("{:?}", record);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Reader<File>> {
        ReaderBuilder::new().from_path(path)
    }
}

impl<R: io::Read> Reader<R> {
    /// Create a new CSV reader given a builder and a source of underlying
    /// bytes.
    fn new(builder: &ReaderBuilder, rdr: R) -> Reader<R> {
        Reader {
            core: Box::new(builder.builder.build()),
            rdr: io::BufReader::with_capacity(builder.capacity, rdr),
            state: ReaderState {
                headers: None,
                has_headers: builder.has_headers,
                flexible: builder.flexible,
                trim: builder.trim,
                first_field_count: None,
                cur_pos: Position::new(),
                first: false,
                seeked: false,
                eof: ReaderEofState::NotEof,
            },
        }
    }

    /// Create a new CSV parser with a default configuration for the given
    /// reader.
    ///
    /// To customize CSV parsing, use a `ReaderBuilder`.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// Concord,United States,42695
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     for result in rdr.records() {
    ///         let record = result?;
    ///         println!("{:?}", record);
    ///     }
    ///     Ok(())
    /// }
    /// ```
    pub fn from_reader(rdr: R) -> Reader<R> {
        ReaderBuilder::new().from_reader(rdr)
    }

    /// Returns a borrowed iterator over deserialized records.
    ///
    /// Each item yielded by this iterator is a `Result<D, Error>`.
    /// Therefore, in order to access the record, callers must handle the
    /// possibility of error (typically with `try!` or `?`).
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this does not include the first record. Additionally,
    /// if `has_headers` is enabled, then deserializing into a struct will
    /// automatically align the values in each row to the fields of a struct
    /// based on the header row.
    ///
    /// # Example
    ///
    /// This shows how to deserialize CSV data into normal Rust structs. The
    /// fields of the header row are used to match up the values in each row
    /// to the fields of the struct.
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::Reader;
    /// use serde::Deserialize;
    ///
    /// #[derive(Debug, Deserialize, Eq, PartialEq)]
    /// struct Row {
    ///     city: String,
    ///     country: String,
    ///     #[serde(rename = "popcount")]
    ///     population: u64,
    /// }
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,popcount
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.deserialize();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record: Row = result?;
    ///         assert_eq!(record, Row {
    ///             city: "Boston".to_string(),
    ///             country: "United States".to_string(),
    ///             population: 4628910,
    ///         });
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    ///
    /// # Rules
    ///
    /// For the most part, any Rust type that maps straight-forwardly to a CSV
    /// record is supported. This includes maps, structs, tuples and tuple
    /// structs. Other Rust types, such as `Vec`s, arrays, and enums have
    /// a more complicated story. In general, when working with CSV data, one
    /// should avoid *nested sequences* as much as possible.
    ///
    /// Maps, structs, tuples and tuple structs map to CSV records in a simple
    /// way. Tuples and tuple structs decode their fields in the order that
    /// they are defined. Structs will do the same only if `has_headers` has
    /// been disabled using [`ReaderBuilder`](struct.ReaderBuilder.html),
    /// otherwise, structs and maps are deserialized based on the fields
    /// defined in the header row. (If there is no header row, then
    /// deserializing into a map will result in an error.)
    ///
    /// Nested sequences are supported in a limited capacity. Namely, they
    /// are flattened. As a result, it's often useful to use a `Vec` to capture
    /// a "tail" of fields in a record:
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::ReaderBuilder;
    /// use serde::Deserialize;
    ///
    /// #[derive(Debug, Deserialize, Eq, PartialEq)]
    /// struct Row {
    ///     label: String,
    ///     values: Vec<i32>,
    /// }
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "foo,1,2,3";
    ///     let mut rdr = ReaderBuilder::new()
    ///         .has_headers(false)
    ///         .from_reader(data.as_bytes());
    ///     let mut iter = rdr.deserialize();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record: Row = result?;
    ///         assert_eq!(record, Row {
    ///             label: "foo".to_string(),
    ///             values: vec![1, 2, 3],
    ///         });
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    ///
    /// In the above example, adding another field to the `Row` struct after
    /// the `values` field will result in a deserialization error. This is
    /// because the deserializer doesn't know when to stop reading fields
    /// into the `values` vector, so it will consume the rest of the fields in
    /// the record leaving none left over for the additional field.
    ///
    /// Finally, simple enums in Rust can be deserialized as well. Namely,
    /// enums must either be variants with no arguments or variants with a
    /// single argument. Variants with no arguments are deserialized based on
    /// which variant name the field matches. Variants with a single argument
    /// are deserialized based on which variant can store the data. The latter
    /// is only supported when using "untagged" enum deserialization. The
    /// following example shows both forms in action:
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::Reader;
    /// use serde::Deserialize;
    ///
    /// #[derive(Debug, Deserialize, PartialEq)]
    /// struct Row {
    ///     label: Label,
    ///     value: Number,
    /// }
    ///
    /// #[derive(Debug, Deserialize, PartialEq)]
    /// #[serde(rename_all = "lowercase")]
    /// enum Label {
    ///     Celsius,
    ///     Fahrenheit,
    /// }
    ///
    /// #[derive(Debug, Deserialize, PartialEq)]
    /// #[serde(untagged)]
    /// enum Number {
    ///     Integer(i64),
    ///     Float(f64),
    /// }
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// label,value
    /// celsius,22.2222
    /// fahrenheit,72
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.deserialize();
    ///
    ///     // Read the first record.
    ///     if let Some(result) = iter.next() {
    ///         let record: Row = result?;
    ///         assert_eq!(record, Row {
    ///             label: Label::Celsius,
    ///             value: Number::Float(22.2222),
    ///         });
    ///     } else {
    ///         return Err(From::from(
    ///             "expected at least two records but got none"));
    ///     }
    ///
    ///     // Read the second record.
    ///     if let Some(result) = iter.next() {
    ///         let record: Row = result?;
    ///         assert_eq!(record, Row {
    ///             label: Label::Fahrenheit,
    ///             value: Number::Integer(72),
    ///         });
    ///         Ok(())
    ///     } else {
    ///         Err(From::from(
    ///             "expected at least two records but got only one"))
    ///     }
    /// }
    /// ```
    pub fn deserialize<D>(&mut self) -> DeserializeRecordsIter<R, D>
    where
        D: DeserializeOwned,
    {
        DeserializeRecordsIter::new(self)
    }

    /// Returns an owned iterator over deserialized records.
    ///
    /// Each item yielded by this iterator is a `Result<D, Error>`.
    /// Therefore, in order to access the record, callers must handle the
    /// possibility of error (typically with `try!` or `?`).
    ///
    /// This is mostly useful when you want to return a CSV iterator or store
    /// it somewhere.
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this does not include the first record. Additionally,
    /// if `has_headers` is enabled, then deserializing into a struct will
    /// automatically align the values in each row to the fields of a struct
    /// based on the header row.
    ///
    /// For more detailed deserialization rules, see the documentation on the
    /// `deserialize` method.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    ///
    /// use csv::Reader;
    /// use serde::Deserialize;
    ///
    /// #[derive(Debug, Deserialize, Eq, PartialEq)]
    /// struct Row {
    ///     city: String,
    ///     country: String,
    ///     #[serde(rename = "popcount")]
    ///     population: u64,
    /// }
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,popcount
    /// Boston,United States,4628910
    /// ";
    ///     let rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.into_deserialize();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record: Row = result?;
    ///         assert_eq!(record, Row {
    ///             city: "Boston".to_string(),
    ///             country: "United States".to_string(),
    ///             population: 4628910,
    ///         });
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn into_deserialize<D>(self) -> DeserializeRecordsIntoIter<R, D>
    where
        D: DeserializeOwned,
    {
        DeserializeRecordsIntoIter::new(self)
    }

    /// Returns a borrowed iterator over all records as strings.
    ///
    /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
    /// Therefore, in order to access the record, callers must handle the
    /// possibility of error (typically with `try!` or `?`).
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this does not include the first record.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.records();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn records(&mut self) -> StringRecordsIter<R> {
        StringRecordsIter::new(self)
    }

    /// Returns an owned iterator over all records as strings.
    ///
    /// Each item yielded by this iterator is a `Result<StringRecord, Error>`.
    /// Therefore, in order to access the record, callers must handle the
    /// possibility of error (typically with `try!` or `?`).
    ///
    /// This is mostly useful when you want to return a CSV iterator or store
    /// it somewhere.
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this does not include the first record.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.into_records();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn into_records(self) -> StringRecordsIntoIter<R> {
        StringRecordsIntoIter::new(self)
    }

    /// Returns a borrowed iterator over all records as raw bytes.
    ///
    /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
    /// Therefore, in order to access the record, callers must handle the
    /// possibility of error (typically with `try!` or `?`).
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this does not include the first record.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.byte_records();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn byte_records(&mut self) -> ByteRecordsIter<R> {
        ByteRecordsIter::new(self)
    }

    /// Returns an owned iterator over all records as raw bytes.
    ///
    /// Each item yielded by this iterator is a `Result<ByteRecord, Error>`.
    /// Therefore, in order to access the record, callers must handle the
    /// possibility of error (typically with `try!` or `?`).
    ///
    /// This is mostly useful when you want to return a CSV iterator or store
    /// it somewhere.
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this does not include the first record.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let rdr = Reader::from_reader(data.as_bytes());
    ///     let mut iter = rdr.into_byte_records();
    ///
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn into_byte_records(self) -> ByteRecordsIntoIter<R> {
        ByteRecordsIntoIter::new(self)
    }

    /// Returns a reference to the first row read by this parser.
    ///
    /// If no row has been read yet, then this will force parsing of the first
    /// row.
    ///
    /// If there was a problem parsing the row or if it wasn't valid UTF-8,
    /// then this returns an error.
    ///
    /// If the underlying reader emits EOF before any data, then this returns
    /// an empty record.
    ///
    /// Note that this method may be used regardless of whether `has_headers`
    /// was enabled (but it is enabled by default).
    ///
    /// # Example
    ///
    /// This example shows how to get the header row of CSV data. Notice that
    /// the header row does not appear as a record in the iterator!
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///
    ///     // We can read the headers before iterating.
    ///     {
    ///         // `headers` borrows from the reader, so we put this in its
    ///         // own scope. That way, the borrow ends before we try iterating
    ///         // below. Alternatively, we could clone the headers.
    ///         let headers = rdr.headers()?;
    ///         assert_eq!(headers, vec!["city", "country", "pop"]);
    ///     }
    ///
    ///     if let Some(result) = rdr.records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///     } else {
    ///         return Err(From::from(
    ///             "expected at least one record but got none"))
    ///     }
    ///
    ///     // We can also read the headers after iterating.
    ///     let headers = rdr.headers()?;
    ///     assert_eq!(headers, vec!["city", "country", "pop"]);
    ///     Ok(())
    /// }
    /// ```
    pub fn headers(&mut self) -> Result<&StringRecord> {
        if self.state.headers.is_none() {
            let mut record = ByteRecord::new();
            self.read_byte_record_impl(&mut record)?;
            self.set_headers_impl(Err(record));
        }
        let headers = self.state.headers.as_ref().unwrap();
        match headers.string_record {
            Ok(ref record) => Ok(record),
            Err(ref err) => Err(Error::new(ErrorKind::Utf8 {
                pos: headers.byte_record.position().map(Clone::clone),
                err: err.clone(),
            })),
        }
    }

    /// Returns a reference to the first row read by this parser as raw bytes.
    ///
    /// If no row has been read yet, then this will force parsing of the first
    /// row.
    ///
    /// If there was a problem parsing the row then this returns an error.
    ///
    /// If the underlying reader emits EOF before any data, then this returns
    /// an empty record.
    ///
    /// Note that this method may be used regardless of whether `has_headers`
    /// was enabled (but it is enabled by default).
    ///
    /// # Example
    ///
    /// This example shows how to get the header row of CSV data. Notice that
    /// the header row does not appear as a record in the iterator!
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::Reader;
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///
    ///     // We can read the headers before iterating.
    ///     {
    ///         // `headers` borrows from the reader, so we put this in its
    ///         // own scope. That way, the borrow ends before we try iterating
    ///         // below. Alternatively, we could clone the headers.
    ///         let headers = rdr.byte_headers()?;
    ///         assert_eq!(headers, vec!["city", "country", "pop"]);
    ///     }
    ///
    ///     if let Some(result) = rdr.byte_records().next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///     } else {
    ///         return Err(From::from(
    ///             "expected at least one record but got none"))
    ///     }
    ///
    ///     // We can also read the headers after iterating.
    ///     let headers = rdr.byte_headers()?;
    ///     assert_eq!(headers, vec!["city", "country", "pop"]);
    ///     Ok(())
    /// }
    /// ```
    pub fn byte_headers(&mut self) -> Result<&ByteRecord> {
        if self.state.headers.is_none() {
            let mut record = ByteRecord::new();
            self.read_byte_record_impl(&mut record)?;
            self.set_headers_impl(Err(record));
        }
        Ok(&self.state.headers.as_ref().unwrap().byte_record)
    }

    /// Set the headers of this CSV parser manually.
    ///
    /// This overrides any other setting (including `set_byte_headers`). Any
    /// automatic detection of headers is disabled. This may be called at any
    /// time.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{Reader, StringRecord};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///
    ///     assert_eq!(rdr.headers()?, vec!["city", "country", "pop"]);
    ///     rdr.set_headers(StringRecord::from(vec!["a", "b", "c"]));
    ///     assert_eq!(rdr.headers()?, vec!["a", "b", "c"]);
    ///
    ///     Ok(())
    /// }
    /// ```
    pub fn set_headers(&mut self, headers: StringRecord) {
        self.set_headers_impl(Ok(headers));
    }

    /// Set the headers of this CSV parser manually as raw bytes.
    ///
    /// This overrides any other setting (including `set_headers`). Any
    /// automatic detection of headers is disabled. This may be called at any
    /// time.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{Reader, ByteRecord};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///
    ///     assert_eq!(rdr.byte_headers()?, vec!["city", "country", "pop"]);
    ///     rdr.set_byte_headers(ByteRecord::from(vec!["a", "b", "c"]));
    ///     assert_eq!(rdr.byte_headers()?, vec!["a", "b", "c"]);
    ///
    ///     Ok(())
    /// }
    /// ```
    pub fn set_byte_headers(&mut self, headers: ByteRecord) {
        self.set_headers_impl(Err(headers));
    }

    fn set_headers_impl(
        &mut self,
        headers: result::Result<StringRecord, ByteRecord>,
    ) {
        // If we have string headers, then get byte headers. But if we have
        // byte headers, then get the string headers (or a UTF-8 error).
        let (mut str_headers, mut byte_headers) = match headers {
            Ok(string) => {
                let bytes = string.clone().into_byte_record();
                (Ok(string), bytes)
            }
            Err(bytes) => {
                match StringRecord::from_byte_record(bytes.clone()) {
                    Ok(str_headers) => (Ok(str_headers), bytes),
                    Err(err) => (Err(err.utf8_error().clone()), bytes),
                }
            }
        };
        if self.state.trim.should_trim_headers() {
            if let Ok(ref mut str_headers) = str_headers.as_mut() {
                str_headers.trim();
            }
            byte_headers.trim();
        }
        self.state.headers = Some(Headers {
            byte_record: byte_headers,
            string_record: str_headers,
        });
    }

    /// Read a single row into the given record. Returns false when no more
    /// records could be read.
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this will never read the first record.
    ///
    /// This method is useful when you want to read records as fast as
    /// as possible. It's less ergonomic than an iterator, but it permits the
    /// caller to reuse the `StringRecord` allocation, which usually results
    /// in higher throughput.
    ///
    /// Records read via this method are guaranteed to have a position set
    /// on them, even if the reader is at EOF or if an error is returned.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{Reader, StringRecord};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     let mut record = StringRecord::new();
    ///
    ///     if rdr.read_record(&mut record)? {
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn read_record(&mut self, record: &mut StringRecord) -> Result<bool> {
        let result = record.read(self);
        // We need to trim again because trimming string records includes
        // Unicode whitespace. (ByteRecord trimming only includes ASCII
        // whitespace.)
        if self.state.trim.should_trim_fields() {
            record.trim();
        }
        result
    }

    /// Read a single row into the given byte record. Returns false when no
    /// more records could be read.
    ///
    /// If `has_headers` was enabled via a `ReaderBuilder` (which is the
    /// default), then this will never read the first record.
    ///
    /// This method is useful when you want to read records as fast as
    /// as possible. It's less ergonomic than an iterator, but it permits the
    /// caller to reuse the `ByteRecord` allocation, which usually results
    /// in higher throughput.
    ///
    /// Records read via this method are guaranteed to have a position set
    /// on them, even if the reader is at EOF or if an error is returned.
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use csv::{ByteRecord, Reader};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,pop
    /// Boston,United States,4628910
    /// ";
    ///     let mut rdr = Reader::from_reader(data.as_bytes());
    ///     let mut record = ByteRecord::new();
    ///
    ///     if rdr.read_byte_record(&mut record)? {
    ///         assert_eq!(record, vec!["Boston", "United States", "4628910"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn read_byte_record(
        &mut self,
        record: &mut ByteRecord,
    ) -> Result<bool> {
        if !self.state.seeked && !self.state.has_headers && !self.state.first {
            // If the caller indicated "no headers" and we haven't yielded the
            // first record yet, then we should yield our header row if we have
            // one.
            if let Some(ref headers) = self.state.headers {
                self.state.first = true;
                record.clone_from(&headers.byte_record);
                if self.state.trim.should_trim_fields() {
                    record.trim();
                }
                return Ok(!record.is_empty());
            }
        }
        let ok = self.read_byte_record_impl(record)?;
        self.state.first = true;
        if !self.state.seeked && self.state.headers.is_none() {
            self.set_headers_impl(Err(record.clone()));
            // If the end user indicated that we have headers, then we should
            // never return the first row. Instead, we should attempt to
            // read and return the next one.
            if self.state.has_headers {
                let result = self.read_byte_record_impl(record);
                if self.state.trim.should_trim_fields() {
                    record.trim();
                }
                return result;
            }
        } else if self.state.trim.should_trim_fields() {
            record.trim();
        }
        Ok(ok)
    }

    /// Read a byte record from the underlying CSV reader, without accounting
    /// for headers.
    #[inline(always)]
    fn read_byte_record_impl(
        &mut self,
        record: &mut ByteRecord,
    ) -> Result<bool> {
        use csv_core::ReadRecordResult::*;

        record.clear();
        record.set_position(Some(self.state.cur_pos.clone()));
        if self.state.eof != ReaderEofState::NotEof {
            return Ok(false);
        }
        let (mut outlen, mut endlen) = (0, 0);
        loop {
            let (res, nin, nout, nend) = {
                let input_res = self.rdr.fill_buf();
                if input_res.is_err() {
                    self.state.eof = ReaderEofState::IOError;
                }
                let input = input_res?;
                let (fields, ends) = record.as_parts();
                self.core.read_record(
                    input,
                    &mut fields[outlen..],
                    &mut ends[endlen..],
                )
            };
            self.rdr.consume(nin);
            let byte = self.state.cur_pos.byte();
            self.state
                .cur_pos
                .set_byte(byte + nin as u64)
                .set_line(self.core.line());
            outlen += nout;
            endlen += nend;
            match res {
                InputEmpty => continue,
                OutputFull => {
                    record.expand_fields();
                    continue;
                }
                OutputEndsFull => {
                    record.expand_ends();
                    continue;
                }
                Record => {
                    record.set_len(endlen);
                    self.state.add_record(record)?;
                    return Ok(true);
                }
                End => {
                    self.state.eof = ReaderEofState::Eof;
                    return Ok(false);
                }
            }
        }
    }

    /// Return the current position of this CSV reader.
    ///
    /// The byte offset in the position returned can be used to `seek` this
    /// reader. In particular, seeking to a position returned here on the same
    /// data will result in parsing the same subsequent record.
    ///
    /// # Example: reading the position
    ///
    /// ```
    /// use std::error::Error;
    /// use std::io;
    /// use csv::{Reader, Position};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,popcount
    /// Boston,United States,4628910
    /// Concord,United States,42695
    /// ";
    ///     let rdr = Reader::from_reader(io::Cursor::new(data));
    ///     let mut iter = rdr.into_records();
    ///     let mut pos = Position::new();
    ///     loop {
    ///         // Read the position immediately before each record.
    ///         let next_pos = iter.reader().position().clone();
    ///         if iter.next().is_none() {
    ///             break;
    ///         }
    ///         pos = next_pos;
    ///     }
    ///
    ///     // `pos` should now be the position immediately before the last
    ///     // record.
    ///     assert_eq!(pos.byte(), 51);
    ///     assert_eq!(pos.line(), 3);
    ///     assert_eq!(pos.record(), 2);
    ///     Ok(())
    /// }
    /// ```
    pub fn position(&self) -> &Position {
        &self.state.cur_pos
    }

    /// Returns true if and only if this reader has been exhausted.
    ///
    /// When this returns true, no more records can be read from this reader
    /// (unless it has been seeked to another position).
    ///
    /// # Example
    ///
    /// ```
    /// use std::error::Error;
    /// use std::io;
    /// use csv::{Reader, Position};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,popcount
    /// Boston,United States,4628910
    /// Concord,United States,42695
    /// ";
    ///     let mut rdr = Reader::from_reader(io::Cursor::new(data));
    ///     assert!(!rdr.is_done());
    ///     for result in rdr.records() {
    ///         let _ = result?;
    ///     }
    ///     assert!(rdr.is_done());
    ///     Ok(())
    /// }
    /// ```
    pub fn is_done(&self) -> bool {
        self.state.eof != ReaderEofState::NotEof
    }

    /// Returns true if and only if this reader has been configured to
    /// interpret the first record as a header record.
    pub fn has_headers(&self) -> bool {
        self.state.has_headers
    }

    /// Returns a reference to the underlying reader.
    pub fn get_ref(&self) -> &R {
        self.rdr.get_ref()
    }

    /// Returns a mutable reference to the underlying reader.
    pub fn get_mut(&mut self) -> &mut R {
        self.rdr.get_mut()
    }

    /// Unwraps this CSV reader, returning the underlying reader.
    ///
    /// Note that any leftover data inside this reader's internal buffer is
    /// lost.
    pub fn into_inner(self) -> R {
        self.rdr.into_inner()
    }
}

impl<R: io::Read + io::Seek> Reader<R> {
    /// Seeks the underlying reader to the position given.
    ///
    /// This comes with a few caveats:
    ///
    /// * Any internal buffer associated with this reader is cleared.
    /// * If the given position does not correspond to a position immediately
    ///   before the start of a record, then the behavior of this reader is
    ///   unspecified.
    /// * Any special logic that skips the first record in the CSV reader
    ///   when reading or iterating over records is disabled.
    ///
    /// If the given position has a byte offset equivalent to the current
    /// position, then no seeking is performed.
    ///
    /// If the header row has not already been read, then this will attempt
    /// to read the header row before seeking. Therefore, it is possible that
    /// this returns an error associated with reading CSV data.
    ///
    /// Note that seeking is performed based only on the byte offset in the
    /// given position. Namely, the record or line numbers in the position may
    /// be incorrect, but this will cause any future position generated by
    /// this CSV reader to be similarly incorrect.
    ///
    /// # Example: seek to parse a record twice
    ///
    /// ```
    /// use std::error::Error;
    /// use std::io;
    /// use csv::{Reader, Position};
    ///
    /// # fn main() { example().unwrap(); }
    /// fn example() -> Result<(), Box<dyn Error>> {
    ///     let data = "\
    /// city,country,popcount
    /// Boston,United States,4628910
    /// Concord,United States,42695
    /// ";
    ///     let rdr = Reader::from_reader(io::Cursor::new(data));
    ///     let mut iter = rdr.into_records();
    ///     let mut pos = Position::new();
    ///     loop {
    ///         // Read the position immediately before each record.
    ///         let next_pos = iter.reader().position().clone();
    ///         if iter.next().is_none() {
    ///             break;
    ///         }
    ///         pos = next_pos;
    ///     }
    ///
    ///     // Now seek the reader back to `pos`. This will let us read the
    ///     // last record again.
    ///     iter.reader_mut().seek(pos)?;
    ///     let mut iter = iter.into_reader().into_records();
    ///     if let Some(result) = iter.next() {
    ///         let record = result?;
    ///         assert_eq!(record, vec!["Concord", "United States", "42695"]);
    ///         Ok(())
    ///     } else {
    ///         Err(From::from("expected at least one record but got none"))
    ///     }
    /// }
    /// ```
    pub fn seek(&mut self, pos: Position) -> Result<()> {
        self.byte_headers()?;
        self.state.seeked = true;
        if pos.byte() == self.state.cur_pos.byte() {
            return Ok(());
        }
        self.rdr.seek(io::SeekFrom::Start(pos.byte()))?;
        self.core.reset();
        self.core.set_line(pos.line());
        self.state.cur_pos = pos;
        self.state.eof = ReaderEofState::NotEof;
        Ok(())
    }

    /// This is like `seek`, but provides direct control over how the seeking
    /// operation is performed via `io::SeekFrom`.
    ///
    /// The `pos` position given *should* correspond the position indicated
    /// by `seek_from`, but there is no requirement. If the `pos` position
    /// given is incorrect, then the position information returned by this
    /// reader will be similarly incorrect.
    ///
    /// If the header row has not already been read, then this will attempt
    /// to read the header row before seeking. Therefore, it is possible that
    /// this returns an error associated with reading CSV data.
    ///
    /// Unlike `seek`, this will always cause an actual seek to be performed.
    pub fn seek_raw(
        &mut self,
        seek_from: io::SeekFrom,
        pos: Position,
    ) -> Result<()> {
        self.byte_headers()?;
        self.state.seeked = true;
        self.rdr.seek(seek_from)?;
        self.core.reset();
        self.core.set_line(pos.line());
        self.state.cur_pos = pos;
        self.state.eof = ReaderEofState::NotEof;
        Ok(())
    }
}

impl ReaderState {
    #[inline(always)]
    fn add_record(&mut self, record: &ByteRecord) -> Result<()> {
        let i = self.cur_pos.record();
        self.cur_pos.set_record(i.checked_add(1).unwrap());
        if !self.flexible {
            match self.first_field_count {
                None => self.first_field_count = Some(record.len() as u64),
                Some(expected) => {
                    if record.len() as u64 != expected {
                        return Err(Error::new(ErrorKind::UnequalLengths {
                            pos: record.position().map(Clone::clone),
                            expected_len: expected,
                            len: record.len() as u64,
                        }));
                    }
                }
            }
        }
        Ok(())
    }
}

/// An owned iterator over deserialized records.
///
/// The type parameter `R` refers to the underlying `io::Read` type, and `D`
/// refers to the type that this iterator will deserialize a record into.
pub struct DeserializeRecordsIntoIter<R, D> {
    rdr: Reader<R>,
    rec: StringRecord,
    headers: Option<StringRecord>,
    _priv: PhantomData<D>,
}

impl<R: io::Read, D: DeserializeOwned> DeserializeRecordsIntoIter<R, D> {
    fn new(mut rdr: Reader<R>) -> DeserializeRecordsIntoIter<R, D> {
        let headers = if !rdr.state.has_headers {
            None
        } else {
            rdr.headers().ok().map(Clone::clone)
        };
        DeserializeRecordsIntoIter {
            rdr: rdr,
            rec: StringRecord::new(),
            headers: headers,
            _priv: PhantomData,
        }
    }

    /// Return a reference to the underlying CSV reader.
    pub fn reader(&self) -> &Reader<R> {
        &self.rdr
    }

    /// Return a mutable reference to the underlying CSV reader.
    pub fn reader_mut(&mut self) -> &mut Reader<R> {
        &mut self.rdr
    }

    /// Drop this iterator and return the underlying CSV reader.
    pub fn into_reader(self) -> Reader<R> {
        self.rdr
    }
}

impl<R: io::Read, D: DeserializeOwned> Iterator
    for DeserializeRecordsIntoIter<R, D>
{
    type Item = Result<D>;

    fn next(&mut self) -> Option<Result<D>> {
        match self.rdr.read_record(&mut self.rec) {
            Err(err) => Some(Err(err)),
            Ok(false) => None,
            Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
        }
    }
}

/// A borrowed iterator over deserialized records.
///
/// The lifetime parameter `'r` refers to the lifetime of the underlying
/// CSV `Reader`. The type parameter `R` refers to the underlying `io::Read`
/// type, and `D` refers to the type that this iterator will deserialize a
/// record into.
pub struct DeserializeRecordsIter<'r, R: 'r, D> {
    rdr: &'r mut Reader<R>,
    rec: StringRecord,
    headers: Option<StringRecord>,
    _priv: PhantomData<D>,
}

impl<'r, R: io::Read, D: DeserializeOwned> DeserializeRecordsIter<'r, R, D> {
    fn new(rdr: &'r mut Reader<R>) -> DeserializeRecordsIter<'r, R, D> {
        let headers = if !rdr.state.has_headers {
            None
        } else {
            rdr.headers().ok().map(Clone::clone)
        };
        DeserializeRecordsIter {
            rdr: rdr,
            rec: StringRecord::new(),
            headers: headers,
            _priv: PhantomData,
        }
    }

    /// Return a reference to the underlying CSV reader.
    pub fn reader(&self) -> &Reader<R> {
        &self.rdr
    }

    /// Return a mutable reference to the underlying CSV reader.
    pub fn reader_mut(&mut self) -> &mut Reader<R> {
        &mut self.rdr
    }
}

impl<'r, R: io::Read, D: DeserializeOwned> Iterator
    for DeserializeRecordsIter<'r, R, D>
{
    type Item = Result<D>;

    fn next(&mut self) -> Option<Result<D>> {
        match self.rdr.read_record(&mut self.rec) {
            Err(err) => Some(Err(err)),
            Ok(false) => None,
            Ok(true) => Some(self.rec.deserialize(self.headers.as_ref())),
        }
    }
}

/// An owned iterator over records as strings.
pub struct StringRecordsIntoIter<R> {
    rdr: Reader<R>,
    rec: StringRecord,
}

impl<R: io::Read> StringRecordsIntoIter<R> {
    fn new(rdr: Reader<R>) -> StringRecordsIntoIter<R> {
        StringRecordsIntoIter { rdr: rdr, rec: StringRecord::new() }
    }

    /// Return a reference to the underlying CSV reader.
    pub fn reader(&self) -> &Reader<R> {
        &self.rdr
    }

    /// Return a mutable reference to the underlying CSV reader.
    pub fn reader_mut(&mut self) -> &mut Reader<R> {
        &mut self.rdr
    }

    /// Drop this iterator and return the underlying CSV reader.
    pub fn into_reader(self) -> Reader<R> {
        self.rdr
    }
}

impl<R: io::Read> Iterator for StringRecordsIntoIter<R> {
    type Item = Result<StringRecord>;

    fn next(&mut self) -> Option<Result<StringRecord>> {
        match self.rdr.read_record(&mut self.rec) {
            Err(err) => Some(Err(err)),
            Ok(true) => Some(Ok(self.rec.clone())),
            Ok(false) => None,
        }
    }
}

/// A borrowed iterator over records as strings.
///
/// The lifetime parameter `'r` refers to the lifetime of the underlying
/// CSV `Reader`.
pub struct StringRecordsIter<'r, R: 'r> {
    rdr: &'r mut Reader<R>,
    rec: StringRecord,
}

impl<'r, R: io::Read> StringRecordsIter<'r, R> {
    fn new(rdr: &'r mut Reader<R>) -> StringRecordsIter<'r, R> {
        StringRecordsIter { rdr: rdr, rec: StringRecord::new() }
    }

    /// Return a reference to the underlying CSV reader.
    pub fn reader(&self) -> &Reader<R> {
        &self.rdr
    }

    /// Return a mutable reference to the underlying CSV reader.
    pub fn reader_mut(&mut self) -> &mut Reader<R> {
        &mut self.rdr
    }
}

impl<'r, R: io::Read> Iterator for StringRecordsIter<'r, R> {
    type Item = Result<StringRecord>;

    fn next(&mut self) -> Option<Result<StringRecord>> {
        match self.rdr.read_record(&mut self.rec) {
            Err(err) => Some(Err(err)),
            Ok(true) => Some(Ok(self.rec.clone())),
            Ok(false) => None,
        }
    }
}

/// An owned iterator over records as raw bytes.
pub struct ByteRecordsIntoIter<R> {
    rdr: Reader<R>,
    rec: ByteRecord,
}

impl<R: io::Read> ByteRecordsIntoIter<R> {
    fn new(rdr: Reader<R>) -> ByteRecordsIntoIter<R> {
        ByteRecordsIntoIter { rdr: rdr, rec: ByteRecord::new() }
    }

    /// Return a reference to the underlying CSV reader.
    pub fn reader(&self) -> &Reader<R> {
        &self.rdr
    }

    /// Return a mutable reference to the underlying CSV reader.
    pub fn reader_mut(&mut self) -> &mut Reader<R> {
        &mut self.rdr
    }

    /// Drop this iterator and return the underlying CSV reader.
    pub fn into_reader(self) -> Reader<R> {
        self.rdr
    }
}

impl<R: io::Read> Iterator for ByteRecordsIntoIter<R> {
    type Item = Result<ByteRecord>;

    fn next(&mut self) -> Option<Result<ByteRecord>> {
        match self.rdr.read_byte_record(&mut self.rec) {
            Err(err) => Some(Err(err)),
            Ok(true) => Some(Ok(self.rec.clone())),
            Ok(false) => None,
        }
    }
}

/// A borrowed iterator over records as raw bytes.
///
/// The lifetime parameter `'r` refers to the lifetime of the underlying
/// CSV `Reader`.
pub struct ByteRecordsIter<'r, R: 'r> {
    rdr: &'r mut Reader<R>,
    rec: ByteRecord,
}

impl<'r, R: io::Read> ByteRecordsIter<'r, R> {
    fn new(rdr: &'r mut Reader<R>) -> ByteRecordsIter<'r, R> {
        ByteRecordsIter { rdr: rdr, rec: ByteRecord::new() }
    }

    /// Return a reference to the underlying CSV reader.
    pub fn reader(&self) -> &Reader<R> {
        &self.rdr
    }

    /// Return a mutable reference to the underlying CSV reader.
    pub fn reader_mut(&mut self) -> &mut Reader<R> {
        &mut self.rdr
    }
}

impl<'r, R: io::Read> Iterator for ByteRecordsIter<'r, R> {
    type Item = Result<ByteRecord>;

    fn next(&mut self) -> Option<Result<ByteRecord>> {
        match self.rdr.read_byte_record(&mut self.rec) {
            Err(err) => Some(Err(err)),
            Ok(true) => Some(Ok(self.rec.clone())),
            Ok(false) => None,
        }
    }
}

#[cfg(test)]
mod tests {
    use std::io;

    use crate::byte_record::ByteRecord;
    use crate::error::ErrorKind;
    use crate::string_record::StringRecord;

    use super::{Position, ReaderBuilder, Trim};

    fn b(s: &str) -> &[u8] {
        s.as_bytes()
    }
    fn s(b: &[u8]) -> &str {
        ::std::str::from_utf8(b).unwrap()
    }

    fn newpos(byte: u64, line: u64, record: u64) -> Position {
        let mut p = Position::new();
        p.set_byte(byte).set_line(line).set_record(record);
        p
    }

    #[test]
    fn read_byte_record() {
        let data = b("foo,\"b,ar\",baz\nabc,mno,xyz");
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader(data);
        let mut rec = ByteRecord::new();

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("foo", s(&rec[0]));
        assert_eq!("b,ar", s(&rec[1]));
        assert_eq!("baz", s(&rec[2]));

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("abc", s(&rec[0]));
        assert_eq!("mno", s(&rec[1]));
        assert_eq!("xyz", s(&rec[2]));

        assert!(!rdr.read_byte_record(&mut rec).unwrap());
    }

    #[test]
    fn read_trimmed_records_and_headers() {
        let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
        let mut rdr = ReaderBuilder::new()
            .has_headers(true)
            .trim(Trim::All)
            .from_reader(data);
        let mut rec = ByteRecord::new();
        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!("1", s(&rec[0]));
        assert_eq!("2", s(&rec[1]));
        assert_eq!("3", s(&rec[2]));
        let mut rec = StringRecord::new();
        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!("1", &rec[0]);
        assert_eq!("", &rec[1]);
        assert_eq!("3", &rec[2]);
        {
            let headers = rdr.headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!("foo", &headers[0]);
            assert_eq!("bar", &headers[1]);
            assert_eq!("baz", &headers[2]);
        }
    }

    #[test]
    fn read_trimmed_header() {
        let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
        let mut rdr = ReaderBuilder::new()
            .has_headers(true)
            .trim(Trim::Headers)
            .from_reader(data);
        let mut rec = ByteRecord::new();
        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!("  1", s(&rec[0]));
        assert_eq!("  2", s(&rec[1]));
        assert_eq!("  3", s(&rec[2]));
        {
            let headers = rdr.headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!("foo", &headers[0]);
            assert_eq!("bar", &headers[1]);
            assert_eq!("baz", &headers[2]);
        }
    }

    #[test]
    fn read_trimed_header_invalid_utf8() {
        let data = &b"foo,  b\xFFar,\tbaz\na,b,c\nd,e,f"[..];
        let mut rdr = ReaderBuilder::new()
            .has_headers(true)
            .trim(Trim::Headers)
            .from_reader(data);
        let mut rec = StringRecord::new();

        // force the headers to be read
        let _ = rdr.read_record(&mut rec);
        // Check the byte headers are trimmed
        {
            let headers = rdr.byte_headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!(b"foo", &headers[0]);
            assert_eq!(b"b\xFFar", &headers[1]);
            assert_eq!(b"baz", &headers[2]);
        }
        match *rdr.headers().unwrap_err().kind() {
            ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
                assert_eq!(pos, &newpos(0, 1, 0));
                assert_eq!(err.field(), 1);
                assert_eq!(err.valid_up_to(), 3);
            }
            ref err => panic!("match failed, got {:?}", err),
        }
    }

    #[test]
    fn read_trimmed_records() {
        let data = b("foo,  bar,\tbaz\n  1,  2,  3\n1\t,\t,3\t\t");
        let mut rdr = ReaderBuilder::new()
            .has_headers(true)
            .trim(Trim::Fields)
            .from_reader(data);
        let mut rec = ByteRecord::new();
        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!("1", s(&rec[0]));
        assert_eq!("2", s(&rec[1]));
        assert_eq!("3", s(&rec[2]));
        {
            let headers = rdr.headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!("foo", &headers[0]);
            assert_eq!("  bar", &headers[1]);
            assert_eq!("\tbaz", &headers[2]);
        }
    }

    #[test]
    fn read_record_unequal_fails() {
        let data = b("foo\nbar,baz");
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader(data);
        let mut rec = ByteRecord::new();

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(1, rec.len());
        assert_eq!("foo", s(&rec[0]));

        match rdr.read_byte_record(&mut rec) {
            Err(err) => match *err.kind() {
                ErrorKind::UnequalLengths {
                    expected_len: 1,
                    ref pos,
                    len: 2,
                } => {
                    assert_eq!(pos, &Some(newpos(4, 2, 1)));
                }
                ref wrong => panic!("match failed, got {:?}", wrong),
            },
            wrong => panic!("match failed, got {:?}", wrong),
        }
    }

    #[test]
    fn read_record_unequal_ok() {
        let data = b("foo\nbar,baz");
        let mut rdr = ReaderBuilder::new()
            .has_headers(false)
            .flexible(true)
            .from_reader(data);
        let mut rec = ByteRecord::new();

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(1, rec.len());
        assert_eq!("foo", s(&rec[0]));

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(2, rec.len());
        assert_eq!("bar", s(&rec[0]));
        assert_eq!("baz", s(&rec[1]));

        assert!(!rdr.read_byte_record(&mut rec).unwrap());
    }

    // This tests that even if we get a CSV error, we can continue reading
    // if we want.
    #[test]
    fn read_record_unequal_continue() {
        let data = b("foo\nbar,baz\nquux");
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader(data);
        let mut rec = ByteRecord::new();

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(1, rec.len());
        assert_eq!("foo", s(&rec[0]));

        match rdr.read_byte_record(&mut rec) {
            Err(err) => match err.kind() {
                &ErrorKind::UnequalLengths {
                    expected_len: 1,
                    ref pos,
                    len: 2,
                } => {
                    assert_eq!(pos, &Some(newpos(4, 2, 1)));
                }
                wrong => panic!("match failed, got {:?}", wrong),
            },
            wrong => panic!("match failed, got {:?}", wrong),
        }

        assert!(rdr.read_byte_record(&mut rec).unwrap());
        assert_eq!(1, rec.len());
        assert_eq!("quux", s(&rec[0]));

        assert!(!rdr.read_byte_record(&mut rec).unwrap());
    }

    #[test]
    fn read_record_headers() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f");
        let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
        let mut rec = StringRecord::new();

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("a", &rec[0]);

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("d", &rec[0]);

        assert!(!rdr.read_record(&mut rec).unwrap());

        {
            let headers = rdr.byte_headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!(b"foo", &headers[0]);
            assert_eq!(b"bar", &headers[1]);
            assert_eq!(b"baz", &headers[2]);
        }
        {
            let headers = rdr.headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!("foo", &headers[0]);
            assert_eq!("bar", &headers[1]);
            assert_eq!("baz", &headers[2]);
        }
    }

    #[test]
    fn read_record_headers_invalid_utf8() {
        let data = &b"foo,b\xFFar,baz\na,b,c\nd,e,f"[..];
        let mut rdr = ReaderBuilder::new().has_headers(true).from_reader(data);
        let mut rec = StringRecord::new();

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("a", &rec[0]);

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("d", &rec[0]);

        assert!(!rdr.read_record(&mut rec).unwrap());

        // Check that we can read the headers as raw bytes, but that
        // if we read them as strings, we get an appropriate UTF-8 error.
        {
            let headers = rdr.byte_headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!(b"foo", &headers[0]);
            assert_eq!(b"b\xFFar", &headers[1]);
            assert_eq!(b"baz", &headers[2]);
        }
        match *rdr.headers().unwrap_err().kind() {
            ErrorKind::Utf8 { pos: Some(ref pos), ref err } => {
                assert_eq!(pos, &newpos(0, 1, 0));
                assert_eq!(err.field(), 1);
                assert_eq!(err.valid_up_to(), 1);
            }
            ref err => panic!("match failed, got {:?}", err),
        }
    }

    #[test]
    fn read_record_no_headers_before() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f");
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader(data);
        let mut rec = StringRecord::new();

        {
            let headers = rdr.headers().unwrap();
            assert_eq!(3, headers.len());
            assert_eq!("foo", &headers[0]);
            assert_eq!("bar", &headers[1]);
            assert_eq!("baz", &headers[2]);
        }

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("foo", &rec[0]);

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("a", &rec[0]);

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("d", &rec[0]);

        assert!(!rdr.read_record(&mut rec).unwrap());
    }

    #[test]
    fn read_record_no_headers_after() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f");
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader(data);
        let mut rec = StringRecord::new();

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("foo", &rec[0]);

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("a", &rec[0]);

        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("d", &rec[0]);

        assert!(!rdr.read_record(&mut rec).unwrap());

        let headers = rdr.headers().unwrap();
        assert_eq!(3, headers.len());
        assert_eq!("foo", &headers[0]);
        assert_eq!("bar", &headers[1]);
        assert_eq!("baz", &headers[2]);
    }

    #[test]
    fn seek() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
        rdr.seek(newpos(18, 3, 2)).unwrap();

        let mut rec = StringRecord::new();

        assert_eq!(18, rdr.position().byte());
        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("d", &rec[0]);

        assert_eq!(24, rdr.position().byte());
        assert_eq!(4, rdr.position().line());
        assert_eq!(3, rdr.position().record());
        assert!(rdr.read_record(&mut rec).unwrap());
        assert_eq!(3, rec.len());
        assert_eq!("g", &rec[0]);

        assert!(!rdr.read_record(&mut rec).unwrap());
    }

    // Test that we can read headers after seeking even if the headers weren't
    // explicit read before seeking.
    #[test]
    fn seek_headers_after() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
        rdr.seek(newpos(18, 3, 2)).unwrap();
        assert_eq!(rdr.headers().unwrap(), vec!["foo", "bar", "baz"]);
    }

    // Test that we can read headers after seeking if the headers were read
    // before seeking.
    #[test]
    fn seek_headers_before_after() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
        let headers = rdr.headers().unwrap().clone();
        rdr.seek(newpos(18, 3, 2)).unwrap();
        assert_eq!(&headers, rdr.headers().unwrap());
    }

    // Test that even if we didn't read headers before seeking, if we seek to
    // the current byte offset, then no seeking is done and therefore we can
    // still read headers after seeking.
    #[test]
    fn seek_headers_no_actual_seek() {
        let data = b("foo,bar,baz\na,b,c\nd,e,f\ng,h,i");
        let mut rdr = ReaderBuilder::new().from_reader(io::Cursor::new(data));
        rdr.seek(Position::new()).unwrap();
        assert_eq!("foo", &rdr.headers().unwrap()[0]);
    }

    // Test that position info is reported correctly in absence of headers.
    #[test]
    fn positions_no_headers() {
        let mut rdr = ReaderBuilder::new()
            .has_headers(false)
            .from_reader("a,b,c\nx,y,z".as_bytes())
            .into_records();

        let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
        assert_eq!(pos.byte(), 0);
        assert_eq!(pos.line(), 1);
        assert_eq!(pos.record(), 0);

        let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
        assert_eq!(pos.byte(), 6);
        assert_eq!(pos.line(), 2);
        assert_eq!(pos.record(), 1);
    }

    // Test that position info is reported correctly with headers.
    #[test]
    fn positions_headers() {
        let mut rdr = ReaderBuilder::new()
            .has_headers(true)
            .from_reader("a,b,c\nx,y,z".as_bytes())
            .into_records();

        let pos = rdr.next().unwrap().unwrap().position().unwrap().clone();
        assert_eq!(pos.byte(), 6);
        assert_eq!(pos.line(), 2);
        assert_eq!(pos.record(), 1);
    }

    // Test that reading headers on empty data yields an empty record.
    #[test]
    fn headers_on_empty_data() {
        let mut rdr = ReaderBuilder::new().from_reader("".as_bytes());
        let r = rdr.byte_headers().unwrap();
        assert_eq!(r.len(), 0);
    }

    // Test that reading the first record on empty data works.
    #[test]
    fn no_headers_on_empty_data() {
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
        assert_eq!(rdr.records().count(), 0);
    }

    // Test that reading the first record on empty data works, even if
    // we've tried to read headers before hand.
    #[test]
    fn no_headers_on_empty_data_after_headers() {
        let mut rdr =
            ReaderBuilder::new().has_headers(false).from_reader("".as_bytes());
        assert_eq!(rdr.headers().unwrap().len(), 0);
        assert_eq!(rdr.records().count(), 0);
    }
}