Skip to content

Commit a0e05f6

Browse files
authored
Add smj SQL benchmark (#22803)
## Which issue does this PR close? Part of #21706 ## Rationale for this change Continue work on sql benchmark migration. ## What changes are included in this PR? smj sql benchmark ## Are these changes tested? Yes `BENCH_NAME=smj cargo bench --bench sql` ## Are there any user-facing changes? No
1 parent ea5d448 commit a0e05f6

24 files changed

Lines changed: 658 additions & 0 deletions
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
name Q01
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q1: INNER 1M x 1M | 1:1
11+
WITH t1_sorted AS (
12+
SELECT value as key FROM range(1000000) ORDER BY value
13+
),
14+
t2_sorted AS (
15+
SELECT value as key FROM range(1000000) ORDER BY value
16+
)
17+
SELECT t1_sorted.key as k1, t2_sorted.key as k2
18+
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
19+
20+
cleanup
21+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name Q02
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q2: INNER 1M x 10M | 1:10
11+
WITH t1_sorted AS (
12+
SELECT value % 100000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 100000 as key, value as data
18+
FROM range(10000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
22+
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
23+
24+
cleanup
25+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name Q03
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q3: INNER 1M x 1M | 1:100
11+
WITH t1_sorted AS (
12+
SELECT value % 10000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 10000 as key, value as data
18+
FROM range(1000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
22+
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
23+
24+
cleanup
25+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name Q04
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q4: INNER 1M x 10M | 1:10 | 1%
11+
WITH t1_sorted AS (
12+
SELECT value % 100000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 100000 as key, value as data
18+
FROM range(10000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
22+
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
23+
WHERE t2_sorted.data % 100 = 0
24+
25+
cleanup
26+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name Q05
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q5: INNER 1M x 1M | 1:100 | 10%
11+
WITH t1_sorted AS (
12+
SELECT value % 10000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 10000 as key, value as data
18+
FROM range(1000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
22+
FROM t1_sorted JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
23+
WHERE t1_sorted.data <> t2_sorted.data AND t2_sorted.data % 10 = 0
24+
25+
cleanup
26+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name Q06
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q6: LEFT 1M x 10M | 1:10
11+
WITH t1_sorted AS (
12+
SELECT value % 105000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 100000 as key, value as data
18+
FROM range(10000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
22+
FROM t1_sorted LEFT JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
23+
24+
cleanup
25+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name Q07
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q7: LEFT 1M x 10M | 1:10 | 50%
11+
WITH t1_sorted AS (
12+
SELECT value % 100000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 100000 as key, value as data
18+
FROM range(10000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data as d1, t2_sorted.data as d2
22+
FROM t1_sorted LEFT JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
23+
WHERE t2_sorted.data IS NULL OR t2_sorted.data % 2 = 0
24+
25+
cleanup
26+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name Q08
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q8: FULL 1M x 1M | 1:10
11+
WITH t1_sorted AS (
12+
SELECT value % 100000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 125000 as key, value as data
18+
FROM range(1000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key as k1, t1_sorted.data as d1,
22+
t2_sorted.key as k2, t2_sorted.data as d2
23+
FROM t1_sorted FULL JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
24+
25+
cleanup
26+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name Q09
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q9: FULL 1M x 10M | 1:10 | 10%
11+
WITH t1_sorted AS (
12+
SELECT value % 100000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 100000 as key, value as data
18+
FROM range(10000000)
19+
ORDER BY key, data
20+
)
21+
SELECT t1_sorted.key as k1, t1_sorted.data as d1,
22+
t2_sorted.key as k2, t2_sorted.data as d2
23+
FROM t1_sorted FULL JOIN t2_sorted ON t1_sorted.key = t2_sorted.key
24+
WHERE (t1_sorted.data IS NULL OR t2_sorted.data IS NULL
25+
OR t1_sorted.data <> t2_sorted.data)
26+
AND (t1_sorted.data IS NULL OR t1_sorted.data % 10 = 0)
27+
28+
cleanup
29+
reset datafusion.optimizer.prefer_hash_join;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name Q10
2+
group smj
3+
4+
init
5+
set datafusion.optimizer.prefer_hash_join=false;
6+
7+
expect_plan SortMergeJoinExec
8+
9+
run
10+
-- Q10: LEFT SEMI 1M x 10M | 1:10
11+
WITH t1_sorted AS (
12+
SELECT value % 100000 as key, value as data
13+
FROM range(1000000)
14+
ORDER BY key, data
15+
),
16+
t2_sorted AS (
17+
SELECT value % 100000 as key
18+
FROM range(10000000)
19+
ORDER BY key
20+
)
21+
SELECT t1_sorted.key, t1_sorted.data
22+
FROM t1_sorted
23+
WHERE EXISTS (
24+
SELECT 1 FROM t2_sorted
25+
WHERE t2_sorted.key = t1_sorted.key
26+
)
27+
28+
cleanup
29+
reset datafusion.optimizer.prefer_hash_join;

0 commit comments

Comments
 (0)