Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

search: fix --json output #1792

Merged
merged 3 commits into from
May 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 57 additions & 12 deletions src/cmd/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ search options:
the row number of the first match to stderr.
Return exit code 1 if no match is found.
No output is produced.
--preview-match <arg> Preview the first <arg> matches or all the matches found in
<arg> milliseconds, whichever occurs first. Returns the preview
--preview-match <arg> Preview the first N matches or all the matches found in
N milliseconds, whichever occurs first. Returns the preview
to stderr. Output is still written to stdout or --output as usual.
-c, --count Return number of matches to stderr.
--size-limit <mb> Set the approximate size limit (MB) of the compiled
Expand Down Expand Up @@ -97,7 +97,7 @@ struct Args {
flag_dfa_size_limit: usize,
flag_json: bool,
flag_quick: bool,
flag_preview_match: Option<usize>,
flag_preview_match: Option<u64>,
flag_count: bool,
flag_progressbar: bool,
flag_quiet: bool,
Expand Down Expand Up @@ -125,9 +125,12 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
.no_headers(args.flag_no_headers)
.select(args.flag_select);

// args struct booleans in hot loop assigned to local variables
// to help the compiler optimize the code
let flag_quick = args.flag_quick;
let flag_quiet = args.flag_quiet || args.flag_json;
let flag_json = args.flag_json;
let flag_no_headers = args.flag_no_headers;

let mut rdr = rconfig.reader()?;
let mut wtr = Config::new(&args.flag_output).writer()?;
Expand Down Expand Up @@ -174,6 +177,9 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
#[allow(unused_assignments)]
let mut matched_rows = String::with_capacity(20); // to save on allocs

let mut json_started = false;
let mut is_first = true;

// if preview_match is set, we do an initial loop for the
// first N matches or all the matches found in N milliseconds
if let Some(preview_match) = args.flag_preview_match {
Expand All @@ -190,11 +196,20 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
stderr_wtr.write_record(&headers)?;
}

// write the JSON array start
if flag_json {
json_wtr.write_all(b"[")?;
stderr_jsonwtr.write_all(b"[")?;
json_started = true;
}

let mut preview_match_ctr = 0;
let preview_timeout = std::time::Duration::from_millis(preview_match as u64);
let mut is_first_stderr = true;
let preview_timeout = std::time::Duration::from_millis(preview_match);
let start_time = std::time::Instant::now();
while rdr.read_byte_record(&mut record)? {
row_ctr += 1;

m = sel.select(&record).any(|f| pattern.is_match(f));
if invert_match {
m = !m;
Expand All @@ -208,15 +223,17 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
if flag_json {
util::write_json_record(
&mut stderr_jsonwtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first_stderr,
)?;
util::write_json_record(
&mut json_wtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
stderr_wtr.write_record(&record)?;
Expand All @@ -239,9 +256,10 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
if flag_json {
util::write_json_record(
&mut json_wtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
Expand All @@ -250,16 +268,17 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
if flag_json {
util::write_json_record(
&mut json_wtr,
args.flag_no_headers,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
}
}
}
match_ctr = preview_match_ctr as u64;
match_ctr = preview_match_ctr;
#[cfg(any(feature = "feature_capable", feature = "lite"))]
if show_progress {
progress.inc(row_ctr);
Expand All @@ -273,10 +292,19 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
start_time.elapsed().as_millis()
);
}
if flag_json {
stderr_jsonwtr.write_all(b"]")?;
stderr_jsonwtr.flush()?;
}
}

if flag_json && !json_started {
json_wtr.write_all(b"[")?;
}

while rdr.read_byte_record(&mut record)? {
row_ctr += 1;

#[cfg(any(feature = "feature_capable", feature = "lite"))]
if show_progress {
progress.inc(1);
Expand All @@ -301,19 +329,36 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
b"0"
});
if flag_json {
util::write_json_record(&mut json_wtr, args.flag_no_headers, &headers, &record)?;
util::write_json_record(
&mut json_wtr,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
}
} else if m {
if flag_json {
util::write_json_record(&mut json_wtr, args.flag_no_headers, &headers, &record)?;
util::write_json_record(
&mut json_wtr,
flag_no_headers,
&headers,
&record,
&mut is_first,
)?;
} else {
wtr.write_byte_record(&record)?;
}
}
}
wtr.flush()?;
if flag_json {
json_wtr.write_all(b"]")?;
json_wtr.flush()?;
} else {
wtr.flush()?;
}

#[cfg(any(feature = "feature_capable", feature = "lite"))]
if show_progress {
Expand Down
37 changes: 28 additions & 9 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1690,6 +1690,7 @@ pub fn create_json_writer(
Ok(writer)
}

/// iterate over the CSV ByteRecords and write them to the JSON file
pub fn write_json(
output: &Option<String>,
no_headers: bool,
Expand All @@ -1716,11 +1717,13 @@ pub fn write_json(

let rec_len = header_vec.len().saturating_sub(1);
let mut temp_val;
let mut json_string_val: serde_json::Value;
let null_val = "null".to_string();
let mut json_string_val: serde_json::Value;

for record in records {
if !is_first {
if is_first {
is_first = false;
} else {
// Write a comma before each record except the first one
write!(json_wtr, ",")?;
}
Expand All @@ -1734,7 +1737,7 @@ pub fn write_json(
if temp_val.is_empty() {
temp_val.clone_from(&null_val);
} else {
// we round-trip the value to serde_json::Value
// we round-trip the value to serde_json
// to escape the string properly per JSON spec
json_string_val = serde_json::Value::String(temp_val);
temp_val = json_string_val.to_string();
Expand All @@ -1745,34 +1748,45 @@ pub fn write_json(
unsafe {
write!(
&mut json_wtr,
"\"{key}\":{value},",
r#""{key}":{value},"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
}
} else {
// last column in the JSON record, no comma
unsafe {
write!(
&mut json_wtr,
"\"{key}\":{value}",
r#""{key}":{value}"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
}
}
}
write!(json_wtr, "}}")?;
is_first = false;
}
// Write the closing bracket for the JSON array
writeln!(json_wtr, "]")?;

Ok(json_wtr.flush()?)
}

/// write a single csv::ByteRecord to a JSON record writer
/// if no_headers is true, the column index (0-based) is used as the key
/// if no_headers is false, the header is used as the key
/// if is_first is true, a comma is not written before the record
/// if is_first is false, a comma is written before the record
/// is_first is passed as a mutable reference so that it can be updated
/// in this helper function efficiently
/// in this way, we can stream JSON records to a writer
pub fn write_json_record<W: std::io::Write>(
json_wtr: &mut W,
no_headers: bool,
headers: &csv::ByteRecord,
record: &csv::ByteRecord,
is_first: &mut bool,
) -> std::io::Result<()> {
let header_vec: Vec<String> = headers
.iter()
Expand All @@ -1791,7 +1805,12 @@ pub fn write_json_record<W: std::io::Write>(
let mut json_string_val: serde_json::Value;
let null_val = "null".to_string();

write!(json_wtr, "{{")?;
if *is_first {
write!(json_wtr, "{{")?;
*is_first = false;
} else {
write!(json_wtr, ",{{")?;
}
for (idx, b) in record.iter().enumerate() {
if let Ok(val) = simdutf8::basic::from_utf8(b) {
temp_val = val.to_owned();
Expand All @@ -1808,7 +1827,7 @@ pub fn write_json_record<W: std::io::Write>(
unsafe {
write!(
json_wtr,
"\"{key}\":{value},",
r#""{key}":{value},"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
Expand All @@ -1817,7 +1836,7 @@ pub fn write_json_record<W: std::io::Write>(
unsafe {
write!(
json_wtr,
"\"{key}\":{value}",
r#""{key}":{value}"#,
key = header_vec.get_unchecked(idx),
value = temp_val
)?;
Expand Down
Loading
Loading