Skip to content

Commit

Permalink
Merge pull request #1837 from jqnatividad/polars-countstar-groupby-fix
Browse files Browse the repository at this point in the history
apply Polars SQL `count(*) group` by fix
  • Loading branch information
jqnatividad committed May 24, 2024
2 parents 563f5c8 + 66c51a6 commit d427014
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 80 deletions.
55 changes: 19 additions & 36 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,9 @@ dynfmt = { git = "https://github.com/jqnatividad/dynfmt", branch = "2021-clippy_
grex = { git = "https://github.com/pemistahl/grex", rev = "0c8ab87" }
# use unreleased version of calamine with fixes
calamine = { git = "https://github.com/tafia/calamine", rev = "58c8ba2" }
# polars 0.40.0 with unreleased fixes
polars = { git = "https://github.com/pola-rs/polars", rev = "d4c3aba" }
polars-ops = { git = "https://github.com/pola-rs/polars", rev = "d4c3aba" }

[features]
default = ["mimalloc"]
Expand Down
25 changes: 13 additions & 12 deletions src/cmd/count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,18 +237,19 @@ pub fn polars_count_input(
},
};
let optimization_state = polars::lazy::frame::OptState {
projection_pushdown: true,
predicate_pushdown: true,
type_coercion: true,
simplify_expr: true,
file_caching: true,
slice_pushdown: true,
comm_subplan_elim: false,
comm_subexpr_elim: true,
streaming: true,
fast_projection: true,
eager: false,
row_estimate: true,
projection_pushdown: true,
predicate_pushdown: true,
cluster_with_columns: true,
type_coercion: true,
simplify_expr: true,
file_caching: true,
slice_pushdown: true,
comm_subplan_elim: false,
comm_subexpr_elim: true,
streaming: true,
fast_projection: true,
eager: false,
row_estimate: true,
};
ctx.register("sql_lf", lazy_df.with_optimizations(optimization_state));
"SELECT COUNT(*) FROM sql_lf".to_string()
Expand Down
27 changes: 14 additions & 13 deletions src/cmd/joinp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
(true, false, false, false, false, false) => join.run(JoinType::Left, validation, false),
(false, true, false, false, false, false) => join.run(JoinType::Anti, validation, false),
(false, false, true, false, false, false) => join.run(JoinType::Semi, validation, false),
(false, false, false, true, false, false) => join.run(JoinType::Outer, validation, false),
(false, false, false, true, false, false) => join.run(JoinType::Full, validation, false),
(false, false, false, false, true, false) => join.run(JoinType::Cross, validation, false),
(false, false, false, false, false, true) => {
// safety: flag_strategy is always is_some() as it has a default value
Expand Down Expand Up @@ -397,18 +397,19 @@ impl JoinStruct {
}
} else {
polars::lazy::frame::OptState {
projection_pushdown: true,
predicate_pushdown: true,
type_coercion: true,
simplify_expr: true,
file_caching: true,
slice_pushdown: true,
comm_subplan_elim: true,
comm_subexpr_elim: true,
streaming: self.streaming,
fast_projection: true,
eager: false,
row_estimate: true,
projection_pushdown: true,
predicate_pushdown: true,
cluster_with_columns: true,
type_coercion: true,
simplify_expr: true,
file_caching: true,
slice_pushdown: true,
comm_subplan_elim: true,
comm_subexpr_elim: true,
streaming: self.streaming,
fast_projection: true,
eager: false,
row_estimate: true,
}
};
log::debug!("Optimization state: {optimization_state:?}");
Expand Down
25 changes: 13 additions & 12 deletions src/cmd/sqlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -559,18 +559,19 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
}
} else {
polars::lazy::frame::OptState {
projection_pushdown: true,
predicate_pushdown: true,
type_coercion: true,
simplify_expr: true,
file_caching: !args.flag_low_memory,
slice_pushdown: true,
comm_subplan_elim: !args.flag_low_memory,
comm_subexpr_elim: true,
streaming: args.flag_low_memory,
fast_projection: true,
eager: false,
row_estimate: true,
projection_pushdown: true,
predicate_pushdown: true,
cluster_with_columns: true,
type_coercion: true,
simplify_expr: true,
file_caching: !args.flag_low_memory,
slice_pushdown: true,
comm_subplan_elim: !args.flag_low_memory,
comm_subexpr_elim: true,
streaming: args.flag_low_memory,
fast_projection: true,
eager: false,
row_estimate: true,
}
};
// gated by log::log_enabled!(log::Level::Debug) to avoid the
Expand Down
14 changes: 7 additions & 7 deletions tests/test_sqlp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ fn sqlp_join_cross() {
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_groupby_orderby() {
let wrk = Workdir::new("sqlp_boston311_groupby_orderby");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand Down Expand Up @@ -206,7 +206,7 @@ fn sqlp_boston311_groupby_orderby() {
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_groupby_orderby_with_table_alias() {
let wrk = Workdir::new("sqlp_boston311_groupby_orderby");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand Down Expand Up @@ -852,7 +852,7 @@ fn sqlp_boston311_explain() {
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_sql_script() {
let wrk = Workdir::new("sqlp_boston311_sql_script");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand Down Expand Up @@ -889,7 +889,7 @@ select ward,count(*) as cnt from temp_table2 group by ward order by cnt desc, wa
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_sql_script_json() {
let wrk = Workdir::new("sqlp_boston311_sql_script_json");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand All @@ -913,7 +913,7 @@ select ward,count(*) as cnt from temp_table2 group by ward order by cnt desc, wa
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_sql_script_jsonl() {
let wrk = Workdir::new("sqlp_boston311_sql_script_jsonl");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand Down Expand Up @@ -945,7 +945,7 @@ select ward,count(*) as cnt from temp_table2 group by ward order by cnt desc, wa
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_cte_script() {
let wrk = Workdir::new("sqlp_boston311_cte");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand Down Expand Up @@ -973,7 +973,7 @@ select ward,count(*) as cnt from boston311_roxbury group by ward order by cnt de
}

#[test]
#[ignore = "temporarily disable due to a bug in polars aliasing"]
// #[ignore = "temporarily disable due to a bug in polars aliasing"]
fn sqlp_boston311_cte() {
let wrk = Workdir::new("sqlp_boston311_cte");
let test_file = wrk.load_test_file("boston311-100.csv");
Expand Down

0 comments on commit d427014

Please sign in to comment.