Skip to content

Commit

Permalink
feat: add invalid_result helper function
Browse files Browse the repository at this point in the history
Closes #345
  • Loading branch information
lucatrv committed Feb 27, 2024
1 parent 533d37b commit 17faf8c
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ csv-core = { path = "csv-core", version = "0.1.11" }
itoa = "1"
ryu = "1"
serde = "1.0.55"
serde-value = "0.7.0"

[dev-dependencies]
bstr = { version = "1.7.0", default-features = false, features = ["alloc", "serde"] }
Expand Down
30 changes: 30 additions & 0 deletions examples/tutorial-read-serde-invalid-03.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#![allow(dead_code)]
use std::{error::Error, io, process};

use serde::Deserialize;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "PascalCase")]
struct Record {
latitude: f64,
longitude: f64,
#[serde(deserialize_with = "csv::invalid_result")]
population: Result<u64, String>,
city: String,
state: String,
}

fn run() -> Result<(), Box<dyn Error>> {
let mut rdr = csv::Reader::from_reader(io::stdin());
for result in rdr.deserialize() {
let record: Record = result?;
println!("{:?}", record);
}
Ok(())
}

fn main() {
if let Err(err) = run() {
println!("{}", err);
process::exit(1);
}
}
98 changes: 98 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ impl Default for Trim {
/// `Option<T>` is deserialized with non-empty but invalid data, then the value
/// will be `None` and the error will be ignored.
///
/// Use the [`invalid_result`](./fn.invalid_result.html) function if you want to
/// return the invalid values as `Err<String>` instead of discarding them.
///
/// # Example
///
/// This example shows how to parse CSV records with numerical data, even if
Expand Down Expand Up @@ -343,3 +346,98 @@ where
{
Option::<T>::deserialize(de).or_else(|_| Ok(None))
}

/// A custom Serde deserializer for possibly invalid `Result<T, String>` fields.
///
/// When deserializing CSV data, it is sometimes desirable to return separately
/// fields with invalid data. For example, there might be a field that is
/// usually a number, but will occasionally contain garbage data that causes
/// number parsing to fail.
///
/// You might be inclined to use, say, `Result<i32, String>` for fields such at
/// this. However this will not compile out of the box, because Serde does not
/// know when to return `Ok<i32>` and when to return `Err<String>`.
///
/// This function allows you to define the following behavior: if `Result<T,
/// String>` is deserialized with valid data, then the valid value will be
/// returned as `Ok<T>`, while if it is deserialized with empty or invalid data,
/// then the invalid value will be converted to `String` and returned as
/// `Err<String>`. Note that any invalid UTF-8 bytes are lossily converted to
/// `String`, therefore this function will never fail.
///
/// Use the [`invalid_option`](./fn.invalid_option.html) function if you want to
/// discard the invalid values instead of returning them as `Err<String>`.
///
/// # Example
///
/// This example shows how to parse CSV records with numerical data, even if
/// some numerical data is absent or invalid. Without the
/// `serde(deserialize_with = "...")` annotations, this example would not
/// compile.
///
/// ```
/// use std::error::Error;
///
/// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
/// struct Row {
/// #[serde(deserialize_with = "csv::invalid_result")]
/// a: Result<i32, String>,
/// #[serde(deserialize_with = "csv::invalid_result")]
/// b: Result<i32, String>,
/// #[serde(deserialize_with = "csv::invalid_result")]
/// c: Result<i32, String>,
/// }
///
/// # fn main() { example().unwrap(); }
/// fn example() -> Result<(), Box<dyn Error>> {
/// let data = "\
/// a,b,c
/// 5,\"\",xyz
/// ";
/// let mut rdr = csv::Reader::from_reader(data.as_bytes());
/// if let Some(result) = rdr.deserialize().next() {
/// let record: Row = result?;
/// assert_eq!(record, Row { a: Ok(5), b: Err(String::new()), c: Err(String::from("xyz")) });
/// Ok(())
/// } else {
/// Err(From::from("expected at least one record but got none"))
/// }
/// }
/// ```
pub fn invalid_result<'de, D, T>(
de: D,
) -> result::Result<result::Result<T, String>, D::Error>
where
D: Deserializer<'de>,
T: Deserialize<'de>,
{
let value = serde_value::Value::deserialize(de)?;
let result = T::deserialize(value.clone()).map_err(|_| match value {
serde_value::Value::Bool(b) => b.to_string(),
serde_value::Value::U8(u) => u.to_string(),
serde_value::Value::U16(u) => u.to_string(),
serde_value::Value::U32(u) => u.to_string(),
serde_value::Value::U64(u) => u.to_string(),
serde_value::Value::I8(i) => i.to_string(),
serde_value::Value::I16(i) => i.to_string(),
serde_value::Value::I32(i) => i.to_string(),
serde_value::Value::I64(i) => i.to_string(),
serde_value::Value::F32(f) => f.to_string(),
serde_value::Value::F64(f) => f.to_string(),
serde_value::Value::Char(c) => c.to_string(),
serde_value::Value::String(s) => s,
serde_value::Value::Unit => String::new(),
serde_value::Value::Option(option) => {
format!("{:?}", option)
}
serde_value::Value::Newtype(newtype) => {
format!("{:?}", newtype)
}
serde_value::Value::Seq(seq) => format!("{:?}", seq),
serde_value::Value::Map(map) => format!("{:?}", map),
serde_value::Value::Bytes(bytes) => {
String::from_utf8_lossy(&bytes).into_owned()
}
});
Ok(result)
}
56 changes: 56 additions & 0 deletions src/tutorial.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,62 @@ function is a generic helper function that does one very simple thing: when
applied to `Option` fields, it will convert any deserialization error into a
`None` value. This is useful when you need to work with messy CSV data.
Sometimes you might need to return invalid fields instead of discarding them.
For this you can use the similar
[`invalid_result`](../fn.invalid_result.html)
function, which works as follows: when applied to `Result<T, String>` fields,
it will convert any invalid filed to a `String` and return it as `Err(string)`.
Note that any invalid UTF-8 bytes are lossily converted to `String`, therefore
this function will never fail.
This behavior can be achieved with very minor changes to the previous example:
```no_run
//tutorial-read-serde-invalid-03.rs
# #![allow(dead_code)]
# use std::{error::Error, io, process};
#
# use serde::Deserialize;
#[derive(Debug, Deserialize)]
#[serde(rename_all = "PascalCase")]
struct Record {
latitude: f64,
longitude: f64,
#[serde(deserialize_with = "csv::invalid_result")]
population: Result<u64, String>,
city: String,
state: String,
}
fn run() -> Result<(), Box<dyn Error>> {
let mut rdr = csv::Reader::from_reader(io::stdin());
for result in rdr.deserialize() {
let record: Record = result?;
println!("{:?}", record);
}
Ok(())
}
#
# fn main() {
# if let Err(err) = run() {
# println!("{}", err);
# process::exit(1);
# }
# }
```
If you compile and run this last example, then it should run to completion just
like the previous one but with the following output:
```text
$ cargo build
$ ./target/debug/csvtutor < uspop-null.csv
Record { latitude: 65.2419444, longitude: -165.2716667, population: Err(""), city: "Davidsons Landing", state: "AK" }
Record { latitude: 60.5544444, longitude: -151.2583333, population: Ok(7610), city: "Kenai", state: "AK" }
Record { latitude: 33.7133333, longitude: -87.3886111, population: Err(""), city: "Oakman", state: "AL" }
# ... and more
```
# Writing CSV
In this section we'll show a few examples that write CSV data. Writing CSV data
Expand Down

0 comments on commit 17faf8c

Please sign in to comment.