Skip to content

Commit

Permalink
[SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TP…
Browse files Browse the repository at this point in the history
…CDSQueryBenchmark

### What changes were proposed in this pull request?

This PR intends to fix the behaviour of query filters in `TPCDSQueryBenchmark`. We can use an option `--query-filter` for selecting TPCDS queries to run, e.g., `--query-filter q6,q8,q13`. But, the current master has a weird behaviour about the option. For example, if we pass `--query-filter q6` so as to run the TPCDS q6 only, `TPCDSQueryBenchmark` runs `q6` and `q6-v2.7` because the `filterQueries` method does not respect the name suffix. So, there is no way now to run the TPCDS q6 only.

### Why are the changes needed?

Bugfix.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually checked.

Closes apache#30324 from maropu/FilterBugInTPCDSQueryBenchmark.

Authored-by: Takeshi Yamamuro <[email protected]>
Signed-off-by: Takeshi Yamamuro <[email protected]>
  • Loading branch information
maropu committed Nov 11, 2020
1 parent 6d5d030 commit 4b36797
Showing 1 changed file with 14 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,16 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
}
}

def filterQueries(
private def filterQueries(
origQueries: Seq[String],
args: TPCDSQueryBenchmarkArguments): Seq[String] = {
if (args.queryFilter.nonEmpty) {
origQueries.filter(args.queryFilter.contains)
queryFilter: Set[String],
nameSuffix: String = ""): Seq[String] = {
if (queryFilter.nonEmpty) {
if (nameSuffix.nonEmpty) {
origQueries.filter { name => queryFilter.contains(s"$name$nameSuffix") }
} else {
origQueries.filter(queryFilter.contains)
}
} else {
origQueries
}
Expand All @@ -125,15 +130,17 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")

// This list only includes TPC-DS v2.7 queries that are different from v1.4 ones
val nameSuffixForQueriesV2_7 = "-v2.7"
val tpcdsQueriesV2_7 = Seq(
"q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a",
"q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49",
"q51a", "q57", "q64", "q67a", "q70a", "q72", "q74", "q75", "q77a", "q78",
"q80a", "q86a", "q98")

// If `--query-filter` defined, filters the queries that this option selects
val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs)
val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs)
val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter)
val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs.queryFilter,
nameSuffix = nameSuffixForQueriesV2_7)

if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) {
throw new RuntimeException(
Expand All @@ -143,6 +150,6 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark {
val tableSizes = setupTables(benchmarkArgs.dataLocation)
runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes)
runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes,
nameSuffix = "-v2.7")
nameSuffix = nameSuffixForQueriesV2_7)
}
}

0 comments on commit 4b36797

Please sign in to comment.