Skip to content

Commit

Permalink
Merge pull request #1792 from jqnatividad/fix-search-json
Browse files Browse the repository at this point in the history
`search`: fix `--json` output
  • Loading branch information
jqnatividad committed May 4, 2024
2 parents 541baf4 + 0b0b4e3 commit 3b4a2ca
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 26 deletions.
69 changes: 57 additions & 12 deletions src/cmd/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ search options:
the row number of the first match to stderr.
Return exit code 1 if no match is found.
No output is produced.
--preview-match <arg> Preview the first <arg> matches or all the matches found in
<arg> milliseconds, whichever occurs first. Returns the preview
--preview-match <arg> Preview the first N matches or all the matches found in
N milliseconds, whichever occurs first. Returns the preview
to stderr. Output is still written to stdout or --output as usual.
-c, --count Return number of matches to stderr.
--size-limit <mb> Set the approximate size limit (MB) of the compiled
Expand Down Expand Up @@ -97,7 +97,7 @@ struct Args {
flag_dfa_size_limit: usize,
flag_json: bool,
flag_quick: bool,
flag_preview_match: Option<usize>,
flag_preview_match: Option<u64>,
flag_count: bool,
flag_progressbar: bool,
flag_quiet: bool,
Expand Down Expand Up @@ -125,9 +125,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
.no_headers(args.flag_no_headers)
.select(args.flag_select);

// args struct booleans in hot loop assigned to local variables
// to help the compiler optimize the code
let flag_quick = args.flag_quick;
let flag_quiet = args.flag_quiet || args.flag_json;
let flag_json = args.flag_json;
let flag_no_headers = args.flag_no_headers;

let mut rdr = rconfig.reader()?;
let mut wtr = Config::new(&args.flag_output).writer()?;
Expand Down Expand Up @@ -174,6 +177,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
#[allow(unused_assignments)]
let mut matched_rows = String::with_capacity(20); // to save on allocs

let mut json_started = false;
let mut is_first = true;

// if preview_match is set, we do an initial loop for the
// first N matches or all the matches found in N milliseconds
if let Some(preview_match) = args.flag_preview_match {
Expand All @@ -190,11 +196,20 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
stderr_wtr.write_record(&headers)?;
}

// write the JSON array start
if flag_json {
json_wtr.write_all(b"[")?;
stderr_jsonwtr.write_all(b"[")?;
json_started = true;
}

let mut preview_match_ctr = 0;
let preview_timeout = std::time::Duration::from_millis(preview_match as u64);
let mut is_first_stderr = true;
let preview_timeout = std::time::Duration::from_millis(preview_match);
let start_time = std::time::Instant::now();
while rdr.read_byte_record(&mut record)? {
row_ctr += 1;

m = sel.select(&record).any(|f| pattern.is_match(f));
if invert_match {
m = !m;
Expand All @@ -208,15 +223,17 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
if flag_json {
util::write_json_record(
&mut stderr_jsonwtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first_stderr,
)?;
util::write_json_record(
&mut json_wtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
stderr_wtr.write_record(&record)?;
Expand All @@ -239,9 +256,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
if flag_json {
util::write_json_record(
&mut json_wtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
Expand All @@ -250,16 +268,17 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
if flag_json {
util::write_json_record(
&mut json_wtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
}
}
}
match_ctr = preview_match_ctr as u64;
match_ctr = preview_match_ctr;
#[cfg(any(feature = "feature_capable", feature = "lite"))]
if show_progress {
progress.inc(row_ctr);
Expand All @@ -273,10 +292,19 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
start_time.elapsed().as_millis()
);
}
if flag_json {
stderr_jsonwtr.write_all(b"]")?;
stderr_jsonwtr.flush()?;
}
}

if flag_json && !json_started {
json_wtr.write_all(b"[")?;
}

while rdr.read_byte_record(&mut record)? {
row_ctr += 1;

#[cfg(any(feature = "feature_capable", feature = "lite"))]
if show_progress {
progress.inc(1);
Expand All @@ -301,19 +329,36 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
b"0"
});
if flag_json {
util::write_json_record(&mut json_wtr, args.flag_no_headers, &headers, &record)?;
util::write_json_record(
&mut json_wtr,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
}
} else if m {
if flag_json {
util::write_json_record(&mut json_wtr, args.flag_no_headers, &headers, &record)?;
util::write_json_record(
&mut json_wtr,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
}
}
}
wtr.flush()?;
if flag_json {
json_wtr.write_all(b"]")?;
json_wtr.flush()?;
} else {
wtr.flush()?;
}

#[cfg(any(feature = "feature_capable", feature = "lite"))]
if show_progress {
Expand Down
37 changes: 28 additions & 9 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@ pub fn create_json_writer(
Ok(writer)
}

/// iterate over the CSV ByteRecords and write them to the JSON file
pub fn write_json(
output: &Option<String>,
no_headers: bool,
Expand All @@ -1716,11 +1717,13 @@ pub fn write_json(

let rec_len = header_vec.len().saturating_sub(1);
let mut temp_val;
let mut json_string_val: serde_json::Value;
let null_val = "null".to_string();
let mut json_string_val: serde_json::Value;

for record in records {
if !is_first {
if is_first {
is_first = false;
} else {
// Write a comma before each record except the first one
write!(json_wtr, ",")?;
}
Expand All @@ -1734,7 +1737,7 @@ pub fn write_json(
if temp_val.is_empty() {
temp_val.clone_from(&null_val);
} else {
// we round-trip the value to serde_json::Value
// we round-trip the value to serde_json
// to escape the string properly per JSON spec
json_string_val = serde_json::Value::String(temp_val);
temp_val = json_string_val.to_string();
Expand All @@ -1745,34 +1748,45 @@ pub fn write_json(
unsafe {
write!(
&mut json_wtr,
"\"{key}\":{value},",
r#""{key}":{value},"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
}
} else {
// last column in the JSON record, no comma
unsafe {
write!(
&mut json_wtr,
"\"{key}\":{value}",
r#""{key}":{value}"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
}
}
}
write!(json_wtr, "}}")?;
is_first = false;
}
// Write the closing bracket for the JSON array
writeln!(json_wtr, "]")?;

Ok(json_wtr.flush()?)
}

/// write a single csv::ByteRecord to a JSON record writer
/// if no_headers is true, the column index (0-based) is used as the key
/// if no_headers is false, the header is used as the key
/// if is_first is true, a comma is not written before the record
/// if is_first is false, a comma is written before the record
/// is_first is passed as a mutable reference so that it can be updated
/// in this helper function efficiently
/// in this way, we can stream JSON records to a writer
pub fn write_json_record<W: std::io::Write>(
json_wtr: &mut W,
no_headers: bool,
headers: &csv::ByteRecord,
record: &csv::ByteRecord,
is_first: &mut bool,
) -> std::io::Result<()> {
let header_vec: Vec<String> = headers
.iter()
Expand All @@ -1791,7 +1805,12 @@ pub fn write_json_record<W: std::io::Write>(
let mut json_string_val: serde_json::Value;
let null_val = "null".to_string();

write!(json_wtr, "{{")?;
if *is_first {
write!(json_wtr, "{{")?;
*is_first = false;
} else {
write!(json_wtr, ",{{")?;
}
for (idx, b) in record.iter().enumerate() {
if let Ok(val) = simdutf8::basic::from_utf8(b) {
temp_val = val.to_owned();
Expand All @@ -1808,7 +1827,7 @@ pub fn write_json_record<W: std::io::Write>(
unsafe {
write!(
json_wtr,
"\"{key}\":{value},",
r#""{key}":{value},"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
Expand All @@ -1817,7 +1836,7 @@ pub fn write_json_record<W: std::io::Write>(
unsafe {
write!(
json_wtr,
"\"{key}\":{value}",
r#""{key}":{value}"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
Expand Down
Loading

0 comments on commit 3b4a2ca

Please sign in to comment.