Skip to content

Commit

Permalink
split: add --no-headers logic for --kb-size option
Browse files Browse the repository at this point in the history
  • Loading branch information
jqnatividad committed Feb 22, 2024
1 parent a6f3f33 commit be74040
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 8 deletions.
12 changes: 8 additions & 4 deletions src/cmd/split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,10 +166,14 @@ impl Args {
let mut headerbuf_wtr = csv::WriterBuilder::new().from_writer(vec![]);

headerbuf_wtr.write_byte_record(&headers)?;
// safety: we know the inner vec is valid
let header_string =
simdutf8::basic::from_utf8(&headerbuf_wtr.into_inner().unwrap())?.to_string();
let header_byte_size = header_string.len();
let header_byte_size = if self.flag_no_headers {
0
} else {
// safety: we know the inner vec is valid
let header_string =
simdutf8::basic::from_utf8(&headerbuf_wtr.into_inner().unwrap())?.to_string();
header_string.len()
};

let mut wtr = self.new_writer(&headers, 0, self.flag_pad)?;
let mut i = 0;
Expand Down
33 changes: 29 additions & 4 deletions tests/test_split.rs
Original file line number Diff line number Diff line change
Expand Up @@ -780,8 +780,8 @@ fn split_nooutdir() {
}

#[test]
fn split_kbsize_boston_2k() {
let wrk = Workdir::new("split_kbsize_boston_2k");
fn split_kbsize_boston_5k() {
let wrk = Workdir::new("split_kbsize_boston_5k");
let test_file = wrk.load_test_file("boston311-100.csv");

let mut cmd = wrk.command("split");
Expand All @@ -805,8 +805,8 @@ fn split_kbsize_boston_2k() {
}

#[test]
fn split_kbsize_boston_2k_padded() {
let wrk = Workdir::new("split_kbsize_boston_2k_padded");
fn split_kbsize_boston_5k_padded() {
let wrk = Workdir::new("split_kbsize_boston_5k_padded");
let test_file = wrk.load_test_file("boston311-100.csv");

let mut cmd = wrk.command("split");
Expand All @@ -830,3 +830,28 @@ fn split_kbsize_boston_2k_padded() {
assert!(wrk.path("testme-086.csv").exists());
assert!(wrk.path("testme-095.csv").exists());
}

#[test]
fn split_kbsize_boston_5k_no_headers() {
let wrk = Workdir::new("split_kbsize_boston_5k_no_headers");
let test_file = wrk.load_test_file("boston311-100.csv");

let mut cmd = wrk.command("split");
cmd.args(["--kb-size", "5"])
.arg(&wrk.path("."))
.arg("--no-headers")
.arg(test_file);
wrk.run(&mut cmd);

assert!(wrk.path("0.csv").exists());
assert!(wrk.path("12.csv").exists());
assert!(wrk.path("21.csv").exists());
assert!(wrk.path("29.csv").exists());
assert!(wrk.path("39.csv").exists());
assert!(wrk.path("48.csv").exists());
assert!(wrk.path("56.csv").exists());
assert!(wrk.path("66.csv").exists());
assert!(wrk.path("76.csv").exists());
assert!(wrk.path("84.csv").exists());
assert!(wrk.path("93.csv").exists());
}

0 comments on commit be74040

Please sign in to comment.