From 0f64d3f3322b30af7a38e222bd7dad18eac38b2b Mon Sep 17 00:00:00 2001 From: Damien Elmes Date: Wed, 8 Jun 2022 18:01:27 +1000 Subject: [PATCH] api: automatically escape fields that contain the comment character Previously, if data is written with QuoteStyle::Necessary, and the first field of a row happens to contain a comment character, the row will be ignored as a comment when later reading it back in. This change adds a `comment` property to Writer, and automatically quotes fields that have the provided comment character in them, so they round-trip correctly. Closes #283 --- csv-core/src/writer.rs | 38 ++++++++++++++++++++++++++++++++++++++ src/writer.rs | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/csv-core/src/writer.rs b/csv-core/src/writer.rs index 4f94301..ba196b6 100644 --- a/csv-core/src/writer.rs +++ b/csv-core/src/writer.rs @@ -26,6 +26,7 @@ impl WriterBuilder { quote: b'"', escape: b'\\', double_quote: true, + comment: None, }; WriterBuilder { wtr: wtr } } @@ -56,6 +57,13 @@ impl WriterBuilder { } _ => unreachable!(), } + // If the first field of a row starts with a comment character, + // it needs to be quoted, or the row will not be readable later. + // As requires_quotes is calculated in advance, we force quotes + // when a comment character is encountered anywhere in the field. + if let Some(comment) = self.wtr.comment { + wtr.requires_quotes[comment as usize] = true; + } wtr } @@ -119,6 +127,17 @@ impl WriterBuilder { self.wtr.double_quote = yes; self } + + /// The comment character that will be used when later reading the file. + /// + /// If `quote_style` is set to `QuoteStyle::Necessary`, a field will + /// be quoted if the comment character is detected anywhere in the field. + /// + /// The default value is None. + pub fn comment(&mut self, comment: Option) -> &mut WriterBuilder { + self.wtr.comment = comment; + self + } } impl Default for WriterBuilder { @@ -166,6 +185,7 @@ pub struct Writer { quote: u8, escape: u8, double_quote: bool, + comment: Option, } impl Clone for Writer { @@ -183,6 +203,7 @@ impl Clone for Writer { quote: self.quote, escape: self.escape, double_quote: self.double_quote, + comment: self.comment, } } } @@ -1044,4 +1065,21 @@ mod tests { inp = &inp[1..]; assert_quote!(inp, out, 1, 2, InputEmpty, r#""""#); } + + #[test] + fn comment_char_is_automatically_quoted() { + let mut wtr = WriterBuilder::new().comment(Some(b'#')).build(); + let out = &mut [0; 1024]; + + assert_field!( + wtr, + b("# abc"), + &mut out[..], + 5, + 6, + InputEmpty, + "\"# abc" + ); + assert_write!(wtr, finish, &mut out[..], 1, InputEmpty, "\""); + } } diff --git a/src/writer.rs b/src/writer.rs index 392c207..9fef1f1 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -470,6 +470,34 @@ impl WriterBuilder { self } + /// The comment character that will be used when later reading the file. + /// + /// If `quote_style` is set to `QuoteStyle::Necessary`, a field will + /// be quoted if the comment character is detected anywhere in the field. + /// + /// The default value is None. + /// + /// # Example + /// + /// ``` + /// use std::error::Error; + /// use csv::WriterBuilder; + /// + /// # fn main() { example().unwrap(); } + /// fn example() -> Result<(), Box> { + /// let mut wtr = + /// WriterBuilder::new().comment(Some(b'#')).from_writer(Vec::new()); + /// wtr.write_record(&["# comment", "another"]).unwrap(); + /// let buf = wtr.into_inner().unwrap(); + /// assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n"); + /// Ok(()) + /// } + /// ``` + pub fn comment(&mut self, comment: Option) -> &mut WriterBuilder { + self.builder.comment(comment); + self + } + /// Set the capacity (in bytes) of the internal buffer used in the CSV /// writer. This defaults to a reasonable setting. pub fn buffer_capacity(&mut self, capacity: usize) -> &mut WriterBuilder { @@ -1414,4 +1442,13 @@ mod tests { wtr.serialize((true, 1.3, "hi")).unwrap(); assert_eq!(wtr_as_string(wtr), "true,1.3,hi\n"); } + + #[test] + fn comment_char_is_automatically_quoted() { + let mut wtr = + WriterBuilder::new().comment(Some(b'#')).from_writer(Vec::new()); + wtr.write_record(&["# comment", "another"]).unwrap(); + let buf = wtr.into_inner().unwrap(); + assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n"); + } }