diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh index 29957f25e370d..2162c16a8417e 100755 --- a/benchmarks/bench.sh +++ b/benchmarks/bench.sh @@ -102,6 +102,10 @@ topk_tpch: Benchmark of top-k (sorting with limit) queries on TPC-H external_aggr: External aggregation benchmark on TPC-H dataset (SF=1) wide_schema: Small-projection queries on a wide synthetic dataset (1024 cols × 256 files) — measures per-file metadata overhead (runs both 'wide' and 'narrow' subgroups: narrow is an internal baseline; the wide-vs-narrow ratio is the signal) +predicate_eval: Conjunctive (AND) filter-evaluation micro-benchmarks; each subgroup is a different predicate pattern, to test how an + adaptive predicate-ordering system behaves across them (see https://github.com/apache/datafusion/issues/11262) + (subgroups via BENCH_SUBGROUP: costsel, cost, selectivity, cardinality, width, scale, neutral, correlation, drift, nulls) + (toggle a system under test with its native DATAFUSION_* env var; size data with PRED_ROWS, string width with PRED_FILL) # ClickBench Benchmarks clickbench_1: ClickBench queries against a single parquet file @@ -245,6 +249,10 @@ main() { wide_schema) data_wide_schema ;; + predicate_eval) + # Data is generated inline by the suite's load SQL. + echo "predicate_eval: no external data to generate" + ;; tpcds) data_tpcds ;; @@ -458,6 +466,9 @@ main() { wide_schema) run_wide_schema ;; + predicate_eval) + run_predicate_eval + ;; tpcds) run_tpcds ;; @@ -778,6 +789,33 @@ run_wide_schema() { bash -c "$SQL_CARGO_COMMAND" } +# Runs the predicate_eval benchmark suite: conjunctive (AND) filter-evaluation +# micro-benchmarks where each subgroup is a different predicate pattern, used to +# test how an adaptive predicate-ordering system behaves across them (see +# https://github.com/apache/datafusion/issues/11262). Data is generated inline +# by the suite's load SQL, so there is no data step. +# +# By default the suite measures DataFusion's built-in left-deep AND short-circuit +# and sets no engine config of its own. To evaluate a system under test, export +# its native DATAFUSION_* config before invoking bench.sh -- the harness reads +# SessionConfig::from_env, and that environment is inherited here, e.g. +# DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING=true ./bench.sh run predicate_eval +# Suite-specific knobs (string-substituted into the load SQL, not engine config): +# BENCH_SUBGROUP run one subgroup (costsel, cost, selectivity, cardinality, +# width, scale, neutral, correlation, drift, nulls) +# PRED_ROWS synthetic row count (default 1_000_000; the scale subgroup +# overrides this per query) +# PRED_FILL filler chars per marker = string-column width knob +run_predicate_eval() { + echo "Running predicate_eval benchmark (subgroup=${BENCH_SUBGROUP:-all}, rows=${PRED_ROWS:-1000000})..." + debug_run env BENCH_NAME=predicate_eval \ + ${BENCH_SUBGROUP:+BENCH_SUBGROUP="${BENCH_SUBGROUP}"} \ + PRED_ROWS="${PRED_ROWS:-1000000}" \ + ${PRED_FILL:+PRED_FILL="${PRED_FILL}"} \ + ${QUERY:+BENCH_QUERY="${QUERY}"} \ + bash -c "$SQL_CARGO_COMMAND" +} + # Runs the tpch in memory (needs tpch parquet data) run_tpch_mem() { SCALE_FACTOR=$1 diff --git a/benchmarks/sql_benchmarks/README.md b/benchmarks/sql_benchmarks/README.md index 1705cf0d2f58b..be4427174db36 100644 --- a/benchmarks/sql_benchmarks/README.md +++ b/benchmarks/sql_benchmarks/README.md @@ -42,6 +42,7 @@ in the community: | `tpcds` | TPC‑DS queries | | `tpch` | TPC‑H queries | | `wide_schema` | Small-projection queries on a wide (1024-col, 256-file) synthetic dataset; runs `wide` + `narrow` subgroups for comparison | +| `predicate_eval` | Conjunctive (AND) filter-evaluation micro-benchmarks; each subgroup is a different predicate pattern, to test how an adaptive predicate-ordering system behaves across them ([#11262](https://github.com/apache/datafusion/issues/11262)). Subgroups (`BENCH_SUBGROUP`): `costsel`, `cost`, `selectivity`, `cardinality`, `width`, `scale`, `neutral`, `correlation`, `drift`, `nulls`. Toggle a system under test with its native `DATAFUSION_*` env var | # Running Benchmarks @@ -94,6 +95,8 @@ Some benchmarks use custom environment variables as outlined below: | BENCH_SORTED | Used in the sort_tpch benchmark to indicate whether the lineitem table should be sorted. | false | | SORTED_BY | Used in the clickbench_sorted benchmark to indicate the column to sort by. | `EventTime` | | SORTED_ORDER | Used in the clickbench_sorted benchmark to indicate the sort order of the column. | `ASC` | +| PRED_ROWS | Used in the predicate_eval benchmark to size the synthetic table (the `scale` subgroup overrides this per query). | `1000000` | +| PRED_FILL | Used in the predicate_eval benchmark as the string-column width knob (filler chars per marker). | `30` | ## How it works diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q30.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q30.benchmark new file mode 100644 index 0000000000000..760ea2ca902a4 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q30.benchmark @@ -0,0 +1,7 @@ +subgroup cardinality + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=cardinality +QPAD=30 +DATASET=ints +NAME=cardinality_q30_k2 diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q31.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q31.benchmark new file mode 100644 index 0000000000000..74f22715d1eb6 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q31.benchmark @@ -0,0 +1,7 @@ +subgroup cardinality + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=cardinality +QPAD=31 +DATASET=ints +NAME=cardinality_q31_k4 diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q32.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q32.benchmark new file mode 100644 index 0000000000000..b6b69c3852361 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q32.benchmark @@ -0,0 +1,7 @@ +subgroup cardinality + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=cardinality +QPAD=32 +DATASET=ints +NAME=cardinality_q32_k8 diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q33.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q33.benchmark new file mode 100644 index 0000000000000..1260e68137860 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cardinality/q33.benchmark @@ -0,0 +1,7 @@ +subgroup cardinality + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=cardinality +QPAD=33 +DATASET=ints +NAME=cardinality_q33_k16 diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q70.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q70.benchmark new file mode 100644 index 0000000000000..ef20f7dc495b8 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q70.benchmark @@ -0,0 +1,7 @@ +subgroup correlation + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=correlation +QPAD=70 +DATASET=corr +NAME=correlation_q70_independent diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q71.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q71.benchmark new file mode 100644 index 0000000000000..8875f6c44e359 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q71.benchmark @@ -0,0 +1,7 @@ +subgroup correlation + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=correlation +QPAD=71 +DATASET=corr +NAME=correlation_q71_positive diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q72.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q72.benchmark new file mode 100644 index 0000000000000..8109f1439aedb --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/correlation/q72.benchmark @@ -0,0 +1,7 @@ +subgroup correlation + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=correlation +QPAD=72 +DATASET=corr +NAME=correlation_q72_anti diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cost/q10.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cost/q10.benchmark new file mode 100644 index 0000000000000..9b864b859457d --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cost/q10.benchmark @@ -0,0 +1,7 @@ +subgroup cost + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=cost +QPAD=10 +DATASET=mixed +NAME=cost_q10_expensive_first diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cost/q11.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cost/q11.benchmark new file mode 100644 index 0000000000000..296ea443b3fec --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/cost/q11.benchmark @@ -0,0 +1,7 @@ +subgroup cost + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=cost +QPAD=11 +DATASET=mixed +NAME=cost_q11_cheap_first diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q01.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q01.benchmark new file mode 100644 index 0000000000000..abedd1d580831 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q01.benchmark @@ -0,0 +1,7 @@ +subgroup costsel + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=costsel +QPAD=01 +DATASET=markers +NAME=costsel_q01_regexp_selective_last diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q02.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q02.benchmark new file mode 100644 index 0000000000000..f50aab66427ec --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q02.benchmark @@ -0,0 +1,7 @@ +subgroup costsel + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=costsel +QPAD=02 +DATASET=markers +NAME=costsel_q02_regexp_selective_first diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q03.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q03.benchmark new file mode 100644 index 0000000000000..10c4ce184eb34 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/costsel/q03.benchmark @@ -0,0 +1,7 @@ +subgroup costsel + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=costsel +QPAD=03 +DATASET=mixed +NAME=costsel_q03_cheap_unselective_then_expensive_selective diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/drift/q80.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/drift/q80.benchmark new file mode 100644 index 0000000000000..970adc53f8017 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/drift/q80.benchmark @@ -0,0 +1,7 @@ +subgroup drift + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=drift +QPAD=80 +DATASET=drift +NAME=drift_q80_a_then_b diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/drift/q81.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/drift/q81.benchmark new file mode 100644 index 0000000000000..93cde75ffef87 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/drift/q81.benchmark @@ -0,0 +1,7 @@ +subgroup drift + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=drift +QPAD=81 +DATASET=drift +NAME=drift_q81_b_then_a diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/neutral/q60.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/neutral/q60.benchmark new file mode 100644 index 0000000000000..039fee622b48b --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/neutral/q60.benchmark @@ -0,0 +1,7 @@ +subgroup neutral + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=neutral +QPAD=60 +DATASET=ints +NAME=neutral_q60_cheap_uniform diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/neutral/q61.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/neutral/q61.benchmark new file mode 100644 index 0000000000000..edaf89b471c5f --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/neutral/q61.benchmark @@ -0,0 +1,7 @@ +subgroup neutral + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=neutral +QPAD=61 +DATASET=markers +NAME=neutral_q61_expensive_uniform diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/nulls/q90.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/nulls/q90.benchmark new file mode 100644 index 0000000000000..4835ef5a879aa --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/nulls/q90.benchmark @@ -0,0 +1,7 @@ +subgroup nulls + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=nulls +QPAD=90 +DATASET=ints +NAME=nulls_q90_no_nulls_control diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/nulls/q91.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/nulls/q91.benchmark new file mode 100644 index 0000000000000..c4a90a37f3e3f --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/nulls/q91.benchmark @@ -0,0 +1,7 @@ +subgroup nulls + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=nulls +QPAD=91 +DATASET=nulls +NAME=nulls_q91_half_null diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q50.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q50.benchmark new file mode 100644 index 0000000000000..0bef31e14f402 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q50.benchmark @@ -0,0 +1,8 @@ +subgroup scale + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=scale +QPAD=50 +DATASET=mixed +PRED_ROWS=5000 +NAME=scale_q50_5k diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q51.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q51.benchmark new file mode 100644 index 0000000000000..8f1315fb113b1 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q51.benchmark @@ -0,0 +1,8 @@ +subgroup scale + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=scale +QPAD=51 +DATASET=mixed +PRED_ROWS=100000 +NAME=scale_q51_100k diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q52.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q52.benchmark new file mode 100644 index 0000000000000..7ddbfc19b443d --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q52.benchmark @@ -0,0 +1,8 @@ +subgroup scale + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=scale +QPAD=52 +DATASET=mixed +PRED_ROWS=5000000 +NAME=scale_q52_5m diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q53.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q53.benchmark new file mode 100644 index 0000000000000..6cea5c44a108b --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/scale/q53.benchmark @@ -0,0 +1,8 @@ +subgroup scale + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=scale +QPAD=53 +DATASET=mixed +PRED_ROWS=50000000 +NAME=scale_q53_50m diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/selectivity/q20.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/selectivity/q20.benchmark new file mode 100644 index 0000000000000..077a62650d2f0 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/selectivity/q20.benchmark @@ -0,0 +1,7 @@ +subgroup selectivity + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=selectivity +QPAD=20 +DATASET=ints +NAME=selectivity_q20_unselective_first diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/selectivity/q21.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/selectivity/q21.benchmark new file mode 100644 index 0000000000000..24fc6ef4cd62f --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/selectivity/q21.benchmark @@ -0,0 +1,7 @@ +subgroup selectivity + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=selectivity +QPAD=21 +DATASET=ints +NAME=selectivity_q21_selective_first diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q40.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q40.benchmark new file mode 100644 index 0000000000000..df66cf16a37ec --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q40.benchmark @@ -0,0 +1,8 @@ +subgroup width + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=width +QPAD=40 +DATASET=markers +PRED_FILL=2 +NAME=width_q40_narrow diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q41.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q41.benchmark new file mode 100644 index 0000000000000..c260dc9985a0c --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q41.benchmark @@ -0,0 +1,8 @@ +subgroup width + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=width +QPAD=41 +DATASET=markers +PRED_FILL=30 +NAME=width_q41_wide diff --git a/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q42.benchmark b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q42.benchmark new file mode 100644 index 0000000000000..988ff59c70fe5 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/benchmarks/width/q42.benchmark @@ -0,0 +1,8 @@ +subgroup width + +template sql_benchmarks/predicate_eval/predicate_eval.benchmark.template +SUBGROUP=width +QPAD=42 +DATASET=markers +PRED_FILL=170 +NAME=width_q42_xwide diff --git a/benchmarks/sql_benchmarks/predicate_eval/init/cleanup.sql b/benchmarks/sql_benchmarks/predicate_eval/init/cleanup.sql new file mode 100644 index 0000000000000..48f076a9fa652 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/init/cleanup.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS t; diff --git a/benchmarks/sql_benchmarks/predicate_eval/load/corr.sql b/benchmarks/sql_benchmarks/predicate_eval/load/corr.sql new file mode 100644 index 0000000000000..2d7ceb73e608d --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/load/corr.sql @@ -0,0 +1,19 @@ +-- Correlation dataset: a base column plus derived columns that control the +-- *conditional* selectivity of one predicate given another (its selectivity +-- among the rows that already passed the other). +-- +-- x uniform [0,100) +-- x_pos = x (perfectly positively correlated: `x 0) +-- 'bbb' present in ~86% of rows (value % 7 <> 0) +-- 'ccc' present in ~80% of rows (value % 5 <> 0) +-- 'ddd' present in ~75% of rows (value % 4 <> 0) +-- 'rare' present in ~0.1% of rows (value % 1009 = 5) <- the selective one +-- +-- PRED_FILL sets the filler width per marker (the string-column width knob: ~6*PRED_FILL +-- chars per row), and PRED_ROWS sizes the table. +CREATE TABLE t AS +SELECT + repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 10 <> 0 THEN 'aaa' ELSE 'zzz' END + || repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 7 <> 0 THEN 'bbb' ELSE 'zzz' END + || repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 5 <> 0 THEN 'ccc' ELSE 'zzz' END + || repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 4 <> 0 THEN 'ddd' ELSE 'zzz' END + || repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 1009 = 5 THEN 'rare' ELSE 'zzzz' END + || repeat('q', ${PRED_FILL:-30}) AS s +FROM generate_series(1, ${PRED_ROWS:-1000000}); diff --git a/benchmarks/sql_benchmarks/predicate_eval/load/mixed.sql b/benchmarks/sql_benchmarks/predicate_eval/load/mixed.sql new file mode 100644 index 0000000000000..a51c1040daca6 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/load/mixed.sql @@ -0,0 +1,26 @@ +-- Mixed-cost dataset: cheap integer columns (`cN < k` ~ k% selectivity) +-- alongside one wide string column carrying three markers matched by expensive +-- `regexp_like`: +-- +-- 'rare' present in ~0.1% of rows (value % 1009 = 5) +-- 'ten' present in ~10% of rows (value % 10 = 0) +-- 'aaa' present in ~90% of rows (value % 10 <> 0) +-- +-- This lets a single table mix cheap integer compares with expensive regexp +-- scans at independently chosen selectivities (e.g. a cheap, unselective compare +-- next to an expensive, selective regexp). PRED_FILL is the string-width knob; +-- PRED_ROWS sizes the table. +CREATE TABLE t AS +SELECT + (value * 1) % 100 AS c0, + (value * 3) % 100 AS c1, + (value * 7) % 100 AS c2, + (value * 9) % 100 AS c3, + repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 1009 = 5 THEN 'rare' ELSE 'zzzz' END + || repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 10 = 0 THEN 'ten' ELSE 'zzz' END + || repeat('q', ${PRED_FILL:-30}) + || CASE WHEN value % 10 <> 0 THEN 'aaa' ELSE 'zzz' END + || repeat('q', ${PRED_FILL:-30}) AS s +FROM generate_series(1, ${PRED_ROWS:-1000000}); diff --git a/benchmarks/sql_benchmarks/predicate_eval/load/nulls.sql b/benchmarks/sql_benchmarks/predicate_eval/load/nulls.sql new file mode 100644 index 0000000000000..e5365eae7bd3c --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/load/nulls.sql @@ -0,0 +1,17 @@ +-- Null-density dataset: integer columns that are NULL on a controlled fraction +-- of rows, so predicate results are three-valued (the no-null `ints` dataset +-- has none). Among the non-null rows, `cN < k` still has ~k% selectivity. +-- +-- c0 NULL on ~50% of rows (even values) +-- c1 NULL on ~67% of rows (value % 3 <> 0) +-- c2 NULL on ~50% of rows (odd values) +-- c3 NULL on ~80% of rows (value % 5 <> 0) +-- +-- PRED_ROWS sizes the table. +CREATE TABLE t AS +SELECT + CASE WHEN value % 2 = 0 THEN (value * 1) % 100 END AS c0, + CASE WHEN value % 3 = 0 THEN (value * 7) % 100 END AS c1, + CASE WHEN value % 2 = 1 THEN (value * 9) % 100 END AS c2, + CASE WHEN value % 5 = 0 THEN (value * 11) % 100 END AS c3 +FROM generate_series(1, ${PRED_ROWS:-1000000}); diff --git a/benchmarks/sql_benchmarks/predicate_eval/predicate_eval.benchmark.template b/benchmarks/sql_benchmarks/predicate_eval/predicate_eval.benchmark.template new file mode 100644 index 0000000000000..0030a7e946eca --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/predicate_eval.benchmark.template @@ -0,0 +1,34 @@ +# Shared template for every predicate_eval benchmark. Each qNN.benchmark sets +# its `subgroup` directive and then includes this template with parameters: +# SUBGROUP subgroup name, also the query sub-directory (e.g. costsel) +# QPAD zero-padded query id, also the query file stem (e.g. 01) +# DATASET load script stem under load/ (e.g. markers) +# NAME criterion display name (e.g. costsel_q01_regexp_selective_last) +# Optional (consumed by the load scripts via ${...:-default}): +# PRED_ROWS synthetic row count (default 1_000_000) +# PRED_FILL filler chars per marker = string-column width knob (default 30) +# +# The run SQL lives in queries/${SUBGROUP}/q${QPAD}.sql so the WHERE clause is +# readable on its own. The table is always named `t`, so the assert and cleanup +# are uniform across datasets. +# +# The suite is implementation-agnostic and sets no engine config of its own: it +# measures DataFusion's built-in left-deep `AND` short-circuit by default. To +# evaluate a predicate-ordering system under test, set its native config via the +# environment (the bench harness builds its SessionContext with +# SessionConfig::from_env), e.g. +# DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING=true + +load sql_benchmarks/predicate_eval/load/${DATASET}.sql + +name ${NAME} +group predicate_eval + +assert I +SELECT count(*) > 0 FROM t; +---- +true + +run sql_benchmarks/predicate_eval/queries/${SUBGROUP}/q${QPAD}.sql + +cleanup sql_benchmarks/predicate_eval/init/cleanup.sql diff --git a/benchmarks/sql_benchmarks/predicate_eval/predicate_eval.suite b/benchmarks/sql_benchmarks/predicate_eval/predicate_eval.suite new file mode 100644 index 0000000000000..c5680463b1ea4 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/predicate_eval.suite @@ -0,0 +1,2 @@ +name = "predicate_eval" +description = "Micro-benchmarks for conjunctive (AND) filter evaluation. Each subgroup exercises a different predicate pattern (per-predicate cost, selectivity, conjunct count, string-column width, row count, correlation, selectivity drift, nulls, plus an order-neutral control) so the suite can show how an adaptive predicate-ordering system behaves across them -- the kind of change these benchmarks are meant to help drive, e.g. https://github.com/apache/datafusion/issues/11262. By default it measures DataFusion's built-in left-deep AND short-circuit and sets no engine config of its own; toggle a system under test with its native DATAFUSION_* env var (the harness reads SessionConfig::from_env), e.g. DATAFUSION_EXECUTION_ADAPTIVE_FILTER_REORDERING=true. Subgroups (BENCH_SUBGROUP): costsel, cost, selectivity, cardinality, width, scale, neutral, correlation, drift, nulls. Size synthetic data with PRED_ROWS and string-column width with PRED_FILL." diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q30.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q30.sql new file mode 100644 index 0000000000000..3be840e917383 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q30.sql @@ -0,0 +1,6 @@ +-- Hidden: cheap integer compares; `c1 < 5` matches ~5%, the `c0 < 90` family +-- ~90%. k = 2 here. q30..q33 sweep k = 2/4/8/16 with one ~5% predicate written +-- last among ~90% ones. +SELECT count(*) FROM t +WHERE c0 < 90 + AND c1 < 5; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q31.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q31.sql new file mode 100644 index 0000000000000..4ba84f8124be9 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q31.sql @@ -0,0 +1,6 @@ +-- k = 4: three ~90% compares followed by one ~5% compare. See q30. +SELECT count(*) FROM t +WHERE c0 < 90 + AND c1 < 90 + AND c2 < 90 + AND c3 < 5; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q32.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q32.sql new file mode 100644 index 0000000000000..d9e920cc62574 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q32.sql @@ -0,0 +1,10 @@ +-- k = 8: seven ~90% compares followed by one ~5% compare. See q30. +SELECT count(*) FROM t +WHERE c0 < 90 + AND c1 < 90 + AND c2 < 90 + AND c3 < 90 + AND c4 < 90 + AND c5 < 90 + AND c6 < 90 + AND c7 < 5; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q33.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q33.sql new file mode 100644 index 0000000000000..2408427ab7632 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/cardinality/q33.sql @@ -0,0 +1,18 @@ +-- k = 16: fifteen ~90% compares followed by one ~5% compare. See q30. +SELECT count(*) FROM t +WHERE c0 < 90 + AND c1 < 90 + AND c2 < 90 + AND c3 < 90 + AND c4 < 90 + AND c5 < 90 + AND c6 < 90 + AND c7 < 90 + AND c8 < 90 + AND c9 < 90 + AND c10 < 90 + AND c11 < 90 + AND c12 < 90 + AND c13 < 90 + AND c14 < 90 + AND c15 < 5; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q70.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q70.sql new file mode 100644 index 0000000000000..86e33534c705d --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q70.sql @@ -0,0 +1,6 @@ +-- Hidden: `x` and `ind` are independent, each ~20%, so the conjunction matches +-- ~4% and the second predicate is just as selective among the first's survivors +-- as on its own. Baseline for the correlation sweep. cf. q71, q72. +SELECT count(*) FROM t +WHERE x < 20 + AND ind < 20; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q71.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q71.sql new file mode 100644 index 0000000000000..eda61cc289e92 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q71.sql @@ -0,0 +1,6 @@ +-- Hidden: `x_pos` is a copy of `x`, so `x < 20 AND x_pos < 20` still matches +-- ~20% (not the ~4% independence would imply) -- the second predicate removes +-- none of the first's survivors. cf. q70. +SELECT count(*) FROM t +WHERE x < 20 + AND x_pos < 20; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q72.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q72.sql new file mode 100644 index 0000000000000..ff987524da6ed --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/correlation/q72.sql @@ -0,0 +1,6 @@ +-- Hidden: `x_anti` is `99 - x`, so `x < 50 AND x_anti < 50` is empty -- the +-- second predicate removes all of the first's survivors, though each matches +-- ~50% alone. cf. q70. +SELECT count(*) FROM t +WHERE x < 50 + AND x_anti < 50; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/cost/q10.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/cost/q10.sql new file mode 100644 index 0000000000000..b089ebc7a192a --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/cost/q10.sql @@ -0,0 +1,6 @@ +-- Hidden: both predicates match ~10%, but `regexp_like(s, 'ten')` scans the +-- string (expensive) while `c0 < 10` is a cheap compare. Equal selectivity, +-- unequal cost; expensive one written first. cf. q11 (opposite order). +SELECT count(*) FROM t +WHERE regexp_like(s, 'ten') + AND c0 < 10; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/cost/q11.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/cost/q11.sql new file mode 100644 index 0000000000000..82d748c93b3b2 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/cost/q11.sql @@ -0,0 +1,5 @@ +-- Same two predicates as q10 (both ~10%; regexp expensive, compare cheap), +-- opposite written order. cf. q10. +SELECT count(*) FROM t +WHERE c0 < 10 + AND regexp_like(s, 'ten'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q01.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q01.sql new file mode 100644 index 0000000000000..bc029ed5d8297 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q01.sql @@ -0,0 +1,10 @@ +-- Hidden in the data: the five markers have very different selectivities -- +-- 'aaa' ~90%, 'bbb' ~86%, 'ccc' ~80%, 'ddd' ~75%, 'rare' ~0.1% -- while every +-- regexp_like costs about the same. 'rare' (most selective) is written last. +-- cf. q02 (most selective written first). +SELECT count(*) FROM t +WHERE regexp_like(s, 'aaa') + AND regexp_like(s, 'bbb') + AND regexp_like(s, 'ccc') + AND regexp_like(s, 'ddd') + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q02.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q02.sql new file mode 100644 index 0000000000000..7f7fc61831ff0 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q02.sql @@ -0,0 +1,8 @@ +-- Same predicates and hidden selectivities as q01 ('rare' ~0.1% is the +-- selective one, the rest 75-90%), but with 'rare' written first. cf. q01. +SELECT count(*) FROM t +WHERE regexp_like(s, 'rare') + AND regexp_like(s, 'aaa') + AND regexp_like(s, 'bbb') + AND regexp_like(s, 'ccc') + AND regexp_like(s, 'ddd'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q03.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q03.sql new file mode 100644 index 0000000000000..a583a498b211c --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/costsel/q03.sql @@ -0,0 +1,6 @@ +-- Hidden: `c0 < 90` matches ~90% (cheap integer compare); `regexp_like(s, +-- 'rare')` matches ~0.1% (scans the wide string). The cheaper predicate is the +-- less selective one. +SELECT count(*) FROM t +WHERE c0 < 90 + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/drift/q80.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/drift/q80.sql new file mode 100644 index 0000000000000..b8cb61e85a478 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/drift/q80.sql @@ -0,0 +1,7 @@ +-- The non-obvious property: selectivity changes across the scan. Rows arrive in +-- `seq` order; `a_sel = 0` matches ~0.1% in the first 10% of rows and ~50% +-- after, `b_sel = 0` is the mirror -- so which predicate is more selective flips +-- partway through. cf. q81 (opposite order). +SELECT count(*) FROM t +WHERE a_sel = 0 + AND b_sel = 0; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/drift/q81.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/drift/q81.sql new file mode 100644 index 0000000000000..d65ef475cc0e0 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/drift/q81.sql @@ -0,0 +1,5 @@ +-- Same drifting predicates as q80 (a_sel/b_sel flip which is more selective +-- partway through the scan), opposite written order. cf. q80. +SELECT count(*) FROM t +WHERE b_sel = 0 + AND a_sel = 0; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/neutral/q60.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/neutral/q60.sql new file mode 100644 index 0000000000000..b217f56953272 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/neutral/q60.sql @@ -0,0 +1,7 @@ +-- Hidden: four integer compares of equal cost, each ~50% selective. Nothing is +-- selective and the costs are equal, so the predicates are interchangeable. +SELECT count(*) FROM t +WHERE c0 < 50 + AND c1 < 50 + AND c2 < 50 + AND c3 < 50; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/neutral/q61.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/neutral/q61.sql new file mode 100644 index 0000000000000..7029a3d9f8f7d --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/neutral/q61.sql @@ -0,0 +1,8 @@ +-- Hidden: four regexp scans of about equal cost, all unselective ('aaa' ~90%, +-- 'bbb' ~86%, 'ccc' ~80%, 'ddd' ~75%). Like q60 the predicates are +-- interchangeable, but here each one is expensive. +SELECT count(*) FROM t +WHERE regexp_like(s, 'aaa') + AND regexp_like(s, 'bbb') + AND regexp_like(s, 'ccc') + AND regexp_like(s, 'ddd'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/nulls/q90.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/nulls/q90.sql new file mode 100644 index 0000000000000..1efece4fbcd04 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/nulls/q90.sql @@ -0,0 +1,5 @@ +-- No-null baseline, from the `ints` dataset: two cheap compares (~50% each) over +-- columns with no NULLs. cf. q91 (same query, null-heavy columns). +SELECT count(*) FROM t +WHERE c0 < 50 + AND c1 < 50; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/nulls/q91.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/nulls/q91.sql new file mode 100644 index 0000000000000..c7f004b24b225 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/nulls/q91.sql @@ -0,0 +1,5 @@ +-- From the `nulls` dataset: same two compares as q90, but `c0`/`c1` are NULL on +-- ~50%/~67% of rows, so the predicate results are three-valued. cf. q90. +SELECT count(*) FROM t +WHERE c0 < 50 + AND c1 < 50; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q50.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q50.sql new file mode 100644 index 0000000000000..03a0f1c0db285 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q50.sql @@ -0,0 +1,6 @@ +-- Same predicates as costsel/q03 (`c0 < 90` ~90% cheap, `regexp_like(s, 'rare')` +-- ~0.1% expensive). q50..q53 sweep table size; here PRED_ROWS=5_000, roughly a +-- single batch. +SELECT count(*) FROM t +WHERE c0 < 90 + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q51.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q51.sql new file mode 100644 index 0000000000000..28174a5df4f44 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q51.sql @@ -0,0 +1,4 @@ +-- q50 at PRED_ROWS=100_000 (~12 batches). See q50. +SELECT count(*) FROM t +WHERE c0 < 90 + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q52.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q52.sql new file mode 100644 index 0000000000000..74938c4634f78 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q52.sql @@ -0,0 +1,4 @@ +-- q50 at PRED_ROWS=5_000_000 (~610 batches). See q50. +SELECT count(*) FROM t +WHERE c0 < 90 + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q53.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q53.sql new file mode 100644 index 0000000000000..8edb4d4a057d2 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/scale/q53.sql @@ -0,0 +1,4 @@ +-- q50 at PRED_ROWS=50_000_000 (~6100 batches); builds a ~9 GB table. See q50. +SELECT count(*) FROM t +WHERE c0 < 90 + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/selectivity/q20.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/selectivity/q20.sql new file mode 100644 index 0000000000000..3638f757a720d --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/selectivity/q20.sql @@ -0,0 +1,6 @@ +-- Hidden: two equally cheap integer compares of unequal selectivity -- `c4 < 95` +-- matches ~95%, `c0 < 5` matches ~5%. Less selective one written first. +-- cf. q21 (opposite order). +SELECT count(*) FROM t +WHERE c4 < 95 + AND c0 < 5; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/selectivity/q21.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/selectivity/q21.sql new file mode 100644 index 0000000000000..5181faf38784f --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/selectivity/q21.sql @@ -0,0 +1,5 @@ +-- Same two equally-cheap compares as q20 (`c4 < 95` ~95%, `c0 < 5` ~5%), +-- opposite written order. cf. q20. +SELECT count(*) FROM t +WHERE c0 < 5 + AND c4 < 95; diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/width/q40.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/width/q40.sql new file mode 100644 index 0000000000000..1b3df3e937eb3 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/width/q40.sql @@ -0,0 +1,9 @@ +-- Same predicate set and hidden selectivities as costsel/q01 ('rare' ~0.1%, the +-- rest 75-90%); only the string-column width differs across q40/q41/q42. Narrow: +-- PRED_FILL=2, ~12 chars/row. +SELECT count(*) FROM t +WHERE regexp_like(s, 'aaa') + AND regexp_like(s, 'bbb') + AND regexp_like(s, 'ccc') + AND regexp_like(s, 'ddd') + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/width/q41.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/width/q41.sql new file mode 100644 index 0000000000000..a03b576d9c959 --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/width/q41.sql @@ -0,0 +1,7 @@ +-- q40 with wide strings: PRED_FILL=30, ~186 chars/row. See q40. +SELECT count(*) FROM t +WHERE regexp_like(s, 'aaa') + AND regexp_like(s, 'bbb') + AND regexp_like(s, 'ccc') + AND regexp_like(s, 'ddd') + AND regexp_like(s, 'rare'); diff --git a/benchmarks/sql_benchmarks/predicate_eval/queries/width/q42.sql b/benchmarks/sql_benchmarks/predicate_eval/queries/width/q42.sql new file mode 100644 index 0000000000000..cb55d828e32ab --- /dev/null +++ b/benchmarks/sql_benchmarks/predicate_eval/queries/width/q42.sql @@ -0,0 +1,7 @@ +-- q40 with extra-wide strings: PRED_FILL=170, ~1KB/row. See q40. +SELECT count(*) FROM t +WHERE regexp_like(s, 'aaa') + AND regexp_like(s, 'bbb') + AND regexp_like(s, 'ccc') + AND regexp_like(s, 'ddd') + AND regexp_like(s, 'rare');