Skip to content

Commit

Permalink
api: automatically escape fields that contain the comment character
Browse files Browse the repository at this point in the history
Previously, if data is written with QuoteStyle::Necessary, and the
first field of a row happens to contain a comment character, the row
will be ignored as a comment when later reading it back in.

This change adds a `comment` property to Writer, and automatically
quotes fields that have the provided comment character in them, so they
round-trip correctly.

Closes #283
  • Loading branch information
dae authored and BurntSushi committed Oct 3, 2023
1 parent 574ae1f commit 0f64d3f
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
38 changes: 38 additions & 0 deletions csv-core/src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ impl WriterBuilder {
quote: b'"',
escape: b'\\',
double_quote: true,
comment: None,
};
WriterBuilder { wtr: wtr }
}
Expand Down Expand Up @@ -56,6 +57,13 @@ impl WriterBuilder {
}
_ => unreachable!(),
}
// If the first field of a row starts with a comment character,
// it needs to be quoted, or the row will not be readable later.
// As requires_quotes is calculated in advance, we force quotes
// when a comment character is encountered anywhere in the field.
if let Some(comment) = self.wtr.comment {
wtr.requires_quotes[comment as usize] = true;
}
wtr
}

Expand Down Expand Up @@ -119,6 +127,17 @@ impl WriterBuilder {
self.wtr.double_quote = yes;
self
}

/// The comment character that will be used when later reading the file.
///
/// If `quote_style` is set to `QuoteStyle::Necessary`, a field will
/// be quoted if the comment character is detected anywhere in the field.
///
/// The default value is None.
pub fn comment(&mut self, comment: Option<u8>) -> &mut WriterBuilder {
self.wtr.comment = comment;
self
}
}

impl Default for WriterBuilder {
Expand Down Expand Up @@ -166,6 +185,7 @@ pub struct Writer {
quote: u8,
escape: u8,
double_quote: bool,
comment: Option<u8>,
}

impl Clone for Writer {
Expand All @@ -183,6 +203,7 @@ impl Clone for Writer {
quote: self.quote,
escape: self.escape,
double_quote: self.double_quote,
comment: self.comment,
}
}
}
Expand Down Expand Up @@ -1044,4 +1065,21 @@ mod tests {
inp = &inp[1..];
assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#);
}

#[test]
fn comment_char_is_automatically_quoted() {
let mut wtr = WriterBuilder::new().comment(Some(b'#')).build();
let out = &mut [0; 1024];

assert_field!(
wtr,
b("# abc"),
&mut out[..],
5,
6,
InputEmpty,
"\"# abc"
);
assert_write!(wtr, finish, &mut out[..], 1, InputEmpty, "\"");
}
}
37 changes: 37 additions & 0 deletions src/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,34 @@ impl WriterBuilder {
self
}

/// The comment character that will be used when later reading the file.
///
/// If `quote_style` is set to `QuoteStyle::Necessary`, a field will
/// be quoted if the comment character is detected anywhere in the field.
///
/// The default value is None.
///
/// # Example
///
/// ```
/// use std::error::Error;
/// use csv::WriterBuilder;
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let mut wtr =
/// WriterBuilder::new().comment(Some(b'#')).from_writer(Vec::new());
/// wtr.write_record(&["# comment", "another"]).unwrap();
/// let buf = wtr.into_inner().unwrap();
/// assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n");
/// Ok(())
/// }
/// ```
pub fn comment(&mut self, comment: Option<u8>) -> &mut WriterBuilder {
self.builder.comment(comment);
self
}

/// Set the capacity (in bytes) of the internal buffer used in the CSV
/// writer. This defaults to a reasonable setting.
pub fn buffer_capacity(&mut self, capacity: usize) -> &mut WriterBuilder {
Expand Down Expand Up @@ -1414,4 +1442,13 @@ mod tests {
wtr.serialize((true, 1.3, "hi")).unwrap();
assert_eq!(wtr_as_string(wtr), "true,1.3,hi\n");
}

#[test]
fn comment_char_is_automatically_quoted() {
let mut wtr =
WriterBuilder::new().comment(Some(b'#')).from_writer(Vec::new());
wtr.write_record(&["# comment", "another"]).unwrap();
let buf = wtr.into_inner().unwrap();
assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n");
}
}

0 comments on commit 0f64d3f

Please sign in to comment.