Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianAntico committed May 30, 2024
1 parent 1e34795 commit 02312dc
Show file tree
Hide file tree
Showing 12 changed files with 101 additions and 108 deletions.
122 changes: 41 additions & 81 deletions Collapse/Lags_collapse.R

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions CombineResults_Lags.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ Path <- "C:/Users/Bizon/Documents/GitHub/rappwd/"
datatable <- data.table::fread(paste0(Path, "BenchmarkResults_Lags.csv"))
polars <- data.table::fread(paste0(Path, "BenchmarkResultsPolars_Lags.csv"))
polars <- polars[, .SD, .SDcols = c("TimeInSeconds")]
# duckdb <- data.table::fread(paste0(Path, "BenchmarkResultsDuckDB_Lags.csv"))
# duckdb <- duckdb[, .SD, .SDcols = c("TimeInSeconds")]
duckdb <- data.table::fread(paste0(Path, "BenchmarkResultsDuckDB_Lags.csv"))
duckdb <- duckdb[, .SD, .SDcols = c("TimeInSeconds")]
pandas <- data.table::fread(paste0(Path, "BenchmarkResultsPandas_Lags.csv"))
pandas <- pandas[, .SD, .SDcols = c("TimeInSeconds")]
collapse <- data.table::fread(paste0(Path, "BenchmarkResultsCollapse_Lags.csv"))
Expand All @@ -14,7 +14,7 @@ collapse <- collapse[, .SD, .SDcols = c("TimeInSeconds")]
# Modify Column Names for Joining
data.table::setnames(datatable, "TimeInSeconds", "2_Datatable")
data.table::setnames(polars, "TimeInSeconds", "4_Polars")
# data.table::setnames(duckdb, "TimeInSeconds", "5_DuckDB")
data.table::setnames(duckdb, "TimeInSeconds", "5_DuckDB")
data.table::setnames(pandas, "TimeInSeconds", "3_Pandas")
data.table::setnames(collapse, "TimeInSeconds", "1_Collapse")

Expand All @@ -25,7 +25,7 @@ datatable <- datatable[, .SD, .SDcols = c("Method", "Experiment", "2_Datatable")
dt <- cbind(
datatable,
polars,
# duckdb,
duckdb,
pandas,
collapse)

Expand All @@ -36,7 +36,7 @@ dt <- data.table::melt.data.table(
measure.vars = c(
"2_Datatable",
"4_Polars",
# "5_DuckDB",
"5_DuckDB",
"3_Pandas",
"1_Collapse"),
value.name = "Time In Seconds")
Expand All @@ -47,7 +47,7 @@ data.table::setorderv(dt, cols = "variable", -1)

# Plot 1M Case
AutoPlots::Plot.Bar(
dt = dt[c(1:15, 47:61, 93:107, 139:153)], # 185:199)],
dt = dt[c(1:15, 47:61, 93:107, 139:153, 185:199)],
PreAgg = TRUE,
XVar = "Experiment",
YVar = "Time In Seconds",
Expand Down Expand Up @@ -85,7 +85,7 @@ AutoPlots::Plot.Bar(

# Plot 10M Case
AutoPlots::Plot.Bar(
dt = dt[c(16:30, 62:76, 108:122, 154:168)], # 200:214)],
dt = dt[c(16:30, 62:76, 108:122, 154:168, 200:214)],
PreAgg = TRUE,
XVar = "Experiment",
YVar = "Time In Seconds",
Expand Down Expand Up @@ -115,15 +115,15 @@ AutoPlots::Plot.Bar(
title.textShadowOffsetX = -1,
xaxis.fontSize = 14,
yaxis.fontSize = 35,
xaxis.rotate = 0,
xaxis.rotate = 35,
yaxis.rotate = 0,
ContainLabel = TRUE,
Debug = FALSE
)

# Plot 100M Case
AutoPlots::Plot.Bar(
dt = dt[c(31:45, 77:91, 123:137, 169:183)], # 215:229)],
dt = dt[c(31:45, 77:91, 123:137, 169:183, 215:229)],
PreAgg = TRUE,
XVar = "Experiment",
YVar = "Time In Seconds",
Expand Down
3 changes: 3 additions & 0 deletions Datatable/Lags_datatable.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ library(data.table)
## 1M 1N 1D 0G
data <- fread(paste0(Path, "FakeBevData1M.csv"))
BenchmarkResults <- data.table::fread(paste0(Path, "BenchmarkResults_Lags.csv"))
setorderv(x = data, cols = c("Customer","Brand","Category","Beverage Flavor", "Date"), order = c(1,1,1,1,1))
start <- Sys.time()
data[, paste0("Lag Daily Liters ", 1L:5L) := shift(x = `Daily Liters`, n = 1L:5L)]
end <- Sys.time()
Expand Down Expand Up @@ -248,6 +249,7 @@ gc()

## 10M 1N 1D 0G
data <- fread(paste0(Path, "FakeBevData10M.csv"))
setorderv(x = data, cols = c("Customer","Brand","Category","Beverage Flavor", "Date"), order = c(1,1,1,1,1))
BenchmarkResults <- data.table::fread(paste0(Path, "BenchmarkResults_Lags.csv"))
start <- Sys.time()
data[, paste0("Lag Daily Liters ", 1L:5L) := shift(x = `Daily Liters`, n = 1L:5L)]
Expand Down Expand Up @@ -425,6 +427,7 @@ gc()

## 100M 1N 1D 0G
data <- fread(paste0(Path, "FakeBevData100M.csv"))
setorderv(x = data, cols = c("Customer","Brand","Category","Beverage Flavor", "Date"), order = c(1,1,1,1,1))
BenchmarkResults <- data.table::fread(paste0(Path, "BenchmarkResults_Lags.csv"))
start <- Sys.time()
data[, paste0("Lag Daily Liters ", 1L:5L) := shift(x = `Daily Liters`, n = 1L:5L)]
Expand Down
15 changes: 0 additions & 15 deletions DuckDB/Lags_DuckDB.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,21 +54,6 @@ BenchmarkResults <- data.table::data.table(
"100M 3N 1D 3G 5L",
"100M 3N 1D 4G 5L",

"1B 1N 1D 0G 5L",
"1B 1N 1D 1G 5L",
"1B 1N 1D 2G 5L",
"1B 1N 1D 3G 5L",
"1B 1N 1D 4G 5L",
"1B 2N 1D 0G 5L",
"1B 2N 1D 1G 5L",
"1B 2N 1D 2G 5L",
"1B 2N 1D 3G 5L",
"1B 2N 1D 4G 5L",
"1B 3N 1D 0G 5L",
"1B 3N 1D 1G 5L",
"1B 3N 1D 2G 5L",
"1B 3N 1D 3G 5L",
"1B 3N 1D 4G 5L",
"Total Runtime"),

TimeInSeconds = c(rep(-0.1, 46))
Expand Down
Binary file modified Images/100MResults.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/100MResults_Lags_WithDuckDB.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/100MResults_Lags_WithoutDuckDB.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Images/10MResults.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/10MResults_Lags.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified Images/1MResults.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Images/1MResults_Lags.PNG
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
51 changes: 48 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The datasets utilized replicates a real world example of a beverage company's da
* Group-By with Sum Aggregation
* Melt
* Cast
* Lag (coming soon)
* Lags

<br>

Expand Down Expand Up @@ -74,6 +74,22 @@ The datasets utilized replicates a real world example of a beverage company's da

</details>

### Cast Data
<details><summary> Click here to see steps </summary>

* Fork the repo and clone it to your local machine
* Modify the Path variable at the top of each script to reflect your file location
* Run FakeBevDataBuilds.R to generate the benchmarking datasets
* Run Lags_datatable.R
* Run Lags_DuckDB.R
* Run Lags_Polars.py
* Run Lags_Pandas.py
* Run Lags_collapse.py
* Run CombineResults_Lags
* Done!

</details>

<br>

## Machine Specs
Expand Down Expand Up @@ -123,7 +139,7 @@ In the plots below the x-axis "Experiments" shows four letters with numbers in f

<br>

### Melt Data
### Melt
<details><summary> Click here to see results </summary>

<br>
Expand All @@ -150,7 +166,7 @@ In the plots below the x-axis "Experiments" shows four letters with numbers in f

<br>

### Cast Data
### Cast
<details><summary> Click here to see results </summary>

<br>
Expand All @@ -166,3 +182,32 @@ In the plots below the x-axis "Experiments" shows four letters with numbers in f
![](https://github.com/AdrianAntico/Benchmarks/raw/main/Images/100MResults_Cast.PNG)

</details>

<br>

### Lags
<details><summary> Click here to see results </summary>

<br>

![](https://github.com/AdrianAntico/Benchmarks/raw/main/Images/1MResults_Lags.PNG)

<br>

![](https://github.com/AdrianAntico/Benchmarks/raw/main/Images/10MResults_Lags.PNG)

<br>

##### With DuckDB: Note - DuckDB timed out after a few successful runs

![](https://github.com/AdrianAntico/Benchmarks/raw/main/Images/100MResults_Lags_WithDuckDB.PNG)

<br>

##### Without DuckDB

![](https://github.com/AdrianAntico/Benchmarks/raw/main/Images/100MResults_Lags_WithoutDuckDB.PNG)

</details>

<br>

0 comments on commit 02312dc

Please sign in to comment.