Skip to content

Commit

Permalink
sniff: also return file kind along with mime type
Browse files Browse the repository at this point in the history
  • Loading branch information
jqnatividad committed Jul 15, 2023
1 parent a22a719 commit fc25754
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions src/cmd/sniff.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ struct SniffStruct {
flexible: bool,
is_utf8: bool,
detected_mime: String,
detected_kind: String,
retrieved_size: usize,
file_size: usize,
sampled_records: usize,
Expand Down Expand Up @@ -203,6 +204,7 @@ impl fmt::Display for SniffStruct {
writeln!(f, "Flexible: {}", self.flexible)?;
writeln!(f, "Is UTF8: {}", self.is_utf8)?;
writeln!(f, "Detected Mime Type: {}", self.detected_mime)?;
writeln!(f, "Detected Kind: {}", self.detected_kind)?;
writeln!(
f,
"Retrieved Size (bytes): {}",
Expand Down Expand Up @@ -267,6 +269,7 @@ struct SniffFileStruct {
display_path: String,
file_to_sniff: String,
detected_mime: String,
detected_kind: String,
tempfile_flag: bool,
retrieved_size: usize,
file_size: usize,
Expand Down Expand Up @@ -478,9 +481,12 @@ async fn get_file_to_sniff(args: &Args, tmpdir: &tempfile::TempDir) -> CliResult
let wtr_file_path;
let mut csv_candidate = true;
let mut detected_mime = String::new();
let mut detected_kind: String = String::new();

if !args.flag_quick {
detected_mime = FileFormat::from_file(file.path())?.media_type().to_string();
let file_format = FileFormat::from_file(file.path())?;
detected_mime = file_format.media_type().to_string();
detected_kind = format!("{:?}", file_format.kind());
csv_candidate =
detected_mime.starts_with("text/") || detected_mime == "application/csv";
}
Expand Down Expand Up @@ -558,6 +564,7 @@ async fn get_file_to_sniff(args: &Args, tmpdir: &tempfile::TempDir) -> CliResult
display_path: url,
file_to_sniff: wtr_file_path,
detected_mime,
detected_kind,
tempfile_flag: true,
retrieved_size: downloaded,
file_size: if total_size == usize::MAX {
Expand Down Expand Up @@ -626,6 +633,7 @@ async fn get_file_to_sniff(args: &Args, tmpdir: &tempfile::TempDir) -> CliResult
display_path: canonical_path,
file_to_sniff: path,
detected_mime: String::new(),
detected_kind: String::new(),
tempfile_flag: false,
retrieved_size: file_size,
file_size,
Expand Down Expand Up @@ -665,6 +673,7 @@ async fn get_file_to_sniff(args: &Args, tmpdir: &tempfile::TempDir) -> CliResult
display_path: "stdin".to_string(),
file_to_sniff: path_string,
detected_mime: String::new(),
detected_kind: String::new(),
tempfile_flag: true,
retrieved_size: file_size,
file_size,
Expand Down Expand Up @@ -727,13 +736,15 @@ async fn sniff_main(mut args: Args) -> CliResult<()> {

// if we don't have a mime type or its a snappy file and --no-infer is disabled,
// let's try to infer the mime type
let file_kind: String;
let file_type = if sfile_info.detected_mime.is_empty()
|| sfile_info.detected_mime == "application/x-snappy-framed" && !args.flag_no_infer
{
FileFormat::from_file(&sfile_info.file_to_sniff)?
.media_type()
.to_string()
let file_format = FileFormat::from_file(&sfile_info.file_to_sniff)?;
file_kind = format!("{:?}", file_format.kind());
file_format.media_type().to_string()
} else {
file_kind = sfile_info.detected_kind.clone();
sfile_info.detected_mime.clone()
};

Expand All @@ -755,6 +766,7 @@ async fn sniff_main(mut args: Args) -> CliResult<()> {
"title": "sniff mime type",
"meta": {
"detected_mime_type": file_type,
"detected_kind": file_kind,
"size": size,
"last_modified": last_modified,
}
Expand All @@ -772,6 +784,7 @@ async fn sniff_main(mut args: Args) -> CliResult<()> {
"detail": format!("File is not a CSV file. Detected mime type: {file_type}"),
"meta": {
"detected_mime_type": file_type,
"detected_kind": file_kind,
"size": size,
"last_modified": last_modified,
}
Expand Down Expand Up @@ -953,6 +966,7 @@ async fn sniff_main(mut args: Args) -> CliResult<()> {
} else {
file_type.clone()
},
detected_kind: file_kind.clone(),
retrieved_size: sfile_info.retrieved_size,
file_size: sfile_info.file_size,
sampled_records: if sampled_records > num_records {
Expand Down Expand Up @@ -994,6 +1008,7 @@ async fn sniff_main(mut args: Args) -> CliResult<()> {
"detail": format!("{}", sniff_error.unwrap()),
"meta": {
"detected_mime_type": file_type,
"detected_kind": file_kind,
"size": sfile_info.file_size,
"last_modified": sfile_info.last_modified,
}
Expand Down

0 comments on commit fc25754

Please sign in to comment.