Skip to content

Commit

Permalink
benchmarks: v4.2.0
Browse files Browse the repository at this point in the history
addl benchmarks for `excel` & `frequency`
renamed sqlp_vs_duckdb benchmarks so they're next to each other for comparison

[skip ci[
  • Loading branch information
jqnatividad committed Apr 25, 2024
1 parent 7bcd59e commit b83ad3a
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions scripts/benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@
arg_pat="$1"

# the version of this script
bm_version=4.1.0
bm_version=4.2.0

# CONFIGURABLE VARIABLES ---------------------------------------
# change as needed to reflect your environment/workloads

# the path to the qsv binary, change this if you're not using the prebuilt binaries
# e.g. you compiled a tuned version of qsv with different features and/or CPU optimizations enabled
# qsv_bin=../target/release/qsv
# qsv_bin=../target/release/qsvlite
# qsv_bin=../target/debug/qsv
qsv_bin=qsv
# the path to the qsv binary that we dogfood to run the benchmarks
Expand Down Expand Up @@ -471,6 +471,7 @@ run enum_uuid "$qsv_bin" enum --uuid "$data"
run enum_constant "$qsv_bin" enum --constant "NYC" "$data"
run enum_copy "$qsv_bin" enum --copy Agency "$data"
run excel "$qsv_bin" excel benchmark_data.xlsx
run excel_error_format_formula "$qsv_bin" excel --error-format both benchmark_data.xlsx
run excel_metadata "$qsv_bin" excel --metadata c benchmark_data.xlsx
run excel_metadata_short "$qsv_bin" excel --metadata s benchmark_data.xlsx
run exclude "$qsv_bin" exclude \'Incident Zip\' "$data" \'Incident Zip\' data_to_exclude.csv
Expand All @@ -494,6 +495,10 @@ run fmt_no_final_newline "$qsv_bin" fmt --no-final-newline "$data"
run foreach "$qsv_bin" foreach City "echo {}" "$data"
run frequency "$qsv_bin" frequency "$data"
run --index frequency_index "$qsv_bin" frequency "$data"
run frequency_no_limit "$qsv_bin" frequency --limit 0 "$data"
run --index frequency_no_limit_index "$qsv_bin" frequency --limit 0 "$data"
run frequency_other_sorted "$qsv_bin" frequency --other-sorted "$data"
run --index frequency_other_sorted_index "$qsv_bin" frequency --other-sorted "$data"
run frequency_selregex "$qsv_bin" frequency -s /^R/ "$data"
run frequency_j1 "$qsv_bin" frequency -j 1 "$data"
run frequency_ignorecase "$qsv_bin" frequency -i "$data"
Expand Down Expand Up @@ -579,9 +584,9 @@ run --index split_chunks_index "$qsv_bin" split --chunks 20 split_tempdir_chunks
run --index split_chunks_index_j1 "$qsv_bin" split --chunks 20 -j 1 split_tempdir_chunks_idx_j1
run sqlp "$qsv_bin" sqlp "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"'
run sqlp_aggregations "$qsv_bin" sqlp "$data" -Q '"select Borough, count(*) from _t_1 where \"Complaint Type\"='\''Noise'\'' group by Borough"'
run sqlp_vs_duckdb_aggregations duckdb :memory: '"select Borough, count(*) from read_csv_auto('\'''$data''\'') where \"Complaint Type\"='\''Noise'\'' group by Borough"'
run sqlp_aggregations_vs_duckdb duckdb :memory: '"select Borough, count(*) from read_csv_auto('\'''$data''\'') where \"Complaint Type\"='\''Noise'\'' group by Borough"'
run sqlp_aggregations_expensive "$qsv_bin" sqlp SKIP_INPUT -Q expensive.sql
run sqlp_vs_duckdb_aggregations_expensive duckdb :memory: -c \".read expensiveduckdb.sql\"
run sqlp_aggregations_expensive_vs_duckdb duckdb :memory: -c \".read expensiveduckdb.sql\"
run sqlp_format_arrow "$qsv_bin" sqlp --format arrow "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"'
run sqlp_format_avro "$qsv_bin" sqlp --format avro "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"'
run sqlp_format_json "$qsv_bin" sqlp --format json "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"'
Expand Down

0 comments on commit b83ad3a

Please sign in to comment.