Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stats: add --cache-threshold autoindex creation/deletion logic #1809

Merged
merged 4 commits into from
May 10, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
tests: added stats --cache-threshold negative arg tests
  • Loading branch information
jqnatividad committed May 9, 2024
commit 917794d7a1fc6bfa3cf6d7e300dc0e05a1f2dbc7
77 changes: 76 additions & 1 deletion tests/test_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,7 @@ fn stats_cache() {
cmd.arg("--infer-dates")
.arg("--dates-whitelist")
.arg("all")
// set cache threshold to 1 byte to force cache creation
// set cache threshold to 1 to force cache creation
.args(["--cache-threshold", "1"])
.arg(test_file);

Expand All @@ -918,6 +918,81 @@ fn stats_cache() {
assert!(Path::new(&wrk.path("boston311-100.stats.csv.json")).exists());
}

#[test]
fn stats_cache_negative_threshold() {
use std::path::Path;

let wrk = Workdir::new("stats_cache_negative_threshold");
let test_file = wrk.load_test_file("boston311-100.csv");

let mut cmd = wrk.command("stats");
cmd.arg("--infer-dates")
.arg("--dates-whitelist")
.arg("all")
// set cache threshold to -10240 to set autoindex_size to 10 kb
// and to force cache creation
.args(["-c", "-10240"])
.arg(test_file.clone());

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

// the index file SHOULD have been created as the input file size > 10 kb
assert!(Path::new(&format!("{test_file}.idx")).exists());
// assert!(Path::new(&format!("{test_file}.idx")).exists());

wrk.create("in2.csv", got);

// removed variance & stddev columns as its causing flaky CI test for float values
let mut cmd = wrk.command("select");
cmd.arg("1-9,12-").arg("in2.csv");

let got2: String = wrk.stdout(&mut cmd);
let expected2 = wrk.load_test_resource("boston311-100-stats.csv");

assert_eq!(dos2unix(&got2), dos2unix(&expected2).trim_end());

// check that the stats cache files were created
assert!(Path::new(&wrk.path("boston311-100.stats.csv")).exists());
assert!(Path::new(&wrk.path("boston311-100.stats.csv.json")).exists());
}

#[test]
fn stats_cache_negative_threshold_unmet() {
use std::path::Path;

let wrk = Workdir::new("stats_cache_negative_threshold_unmet");
let test_file = wrk.load_test_file("boston311-100.csv");

let mut cmd = wrk.command("stats");
cmd.arg("--infer-dates")
.arg("--dates-whitelist")
.arg("all")
// set cache threshold to -51200 to set autoindex_size to 50 kb
// and to force cache creation
.args(["--cache-threshold", "-51200"])
.arg(test_file.clone());

let got: Vec<Vec<String>> = wrk.read_stdout(&mut cmd);

// the index file SHOULD NOT have been created as the input file < 50 kb
assert!(!Path::new(&format!("{test_file}.idx")).exists());

wrk.create("in2.csv", got);

// removed variance & stddev columns as its causing flaky CI test for float values
let mut cmd = wrk.command("select");
cmd.arg("1-9,12-").arg("in2.csv");

let got2: String = wrk.stdout(&mut cmd);
let expected2 = wrk.load_test_resource("boston311-100-stats.csv");

assert_eq!(dos2unix(&got2), dos2unix(&expected2).trim_end());

// check that the stats cache files were created
assert!(Path::new(&wrk.path("boston311-100.stats.csv")).exists());
assert!(Path::new(&wrk.path("boston311-100.stats.csv.json")).exists());
}

#[test]
fn stats_infer_boolean_1_0() {
let wrk = Workdir::new("stats_infer_boolean_1_0");
Expand Down