|
5 | 5 | ISSUE: 6520
|
6 | 6 | TITLE: Efficient table scans for DBKEY-based range conditions
|
7 | 7 | DESCRIPTION:
|
8 |
| - We create table with very wide column and add there about 300 rows from rdb$types, with random data |
9 |
| - (in order to prevent RLE-compression which eventually can reduce number of data pages). |
10 |
| - Then we extract all values of rdb$db_key from this table and take into processing two of them. |
11 |
| - First value has 'distance' from starting db_key = 1/3 of total numbers of rows, second has similar |
12 |
| - distance from final db_key. |
13 |
| - Finally we launch trace and start query with SCOPED expression for RDB$DB_KEY: |
14 |
| - select count(*) from tmp_test_6278 where rdb$db_key between ? and ? |
15 |
| -
|
16 |
| - Trace must contain after this explained plan with "lower bound, upper bound" phrase and table statistics |
17 |
| - which shows number of reads = count of rows plus 1. |
18 |
| -
|
19 |
| - Before fix trace table statistics did not reflect scoped WHERE-expression on RDB$DB_KEY column. |
20 | 8 | JIRA: CORE-6278
|
21 | 9 | FBTEST: bugs.core_6278
|
| 10 | +NOTES: |
| 11 | + [07.05.2024] pzotov |
| 12 | + Test has been fully re-implemented. |
| 13 | + We can NOT assume that rdb$db_key values will be increased (in ASCII representation) while adding data |
| 14 | + into a table: smaller values of RDB$DB_KEY can appear *after* bigger ones (i.e. smaller RDB$DB_KEY will |
| 15 | + be physically closer to the end of table than bigger). |
| 16 | + Because of that, we check only EXPLAINED PLAN, without runtime statistics from trace log before. |
| 17 | +
|
| 18 | + On build 4.0.0.1865 (07-apr-2020) explained plan for scoped query (like 'rdb$db_key between ? and ?') |
| 19 | + returned "Table ... Full Scan" - WITHOUT "(lower bound, upper bound)". |
| 20 | +
|
| 21 | + Since build 4.0.0.1869 (08-apr-2020) this opewration is: "Table "TEST" Full Scan (lower bound, upper bound)". |
| 22 | + See commit: |
| 23 | + https://github.com/FirebirdSQL/firebird/commit/3ce4605e3cc9960afcf0224ea40e04f508669eca |
| 24 | +
|
| 25 | + Checked on 5.0.1.1394, 6.0.0.345. |
22 | 26 | """
|
23 | 27 |
|
24 | 28 | import pytest
|
25 | 29 | import re
|
26 | 30 | from firebird.qa import *
|
27 | 31 |
|
28 |
| -db = db_factory() |
| 32 | +init_sql = f""" |
| 33 | + create table test (s varchar(256)); |
| 34 | + commit; |
| 35 | + insert into test select lpad('', 256, uuid_to_char(gen_uuid())) from rdb$types a; |
| 36 | + commit; |
| 37 | +""" |
29 | 38 |
|
| 39 | +db = db_factory(init = init_sql) |
30 | 40 | act = python_act('db')
|
31 | 41 |
|
32 |
| -expected_stdout = """ |
33 |
| - -> Table "TMP_TEST_6278" Full Scan (lower bound, upper bound) |
34 |
| - Reads difference: EXPECTED. |
35 |
| -""" |
36 |
| - |
37 |
| -test_script = """ |
38 |
| - recreate table tmp_test_6278 (s varchar(32700)) ; |
39 |
| - insert into tmp_test_6278 select lpad('', 32700, uuid_to_char(gen_uuid())) from rdb$types ; |
40 |
| - commit ; |
41 |
| - set heading off ; |
42 |
| - set term ^ ; |
43 |
| - execute block returns( |
44 |
| - count_intermediate_rows int |
45 |
| - ) as |
46 |
| - declare dbkey_1 char(8) character set octets ; |
47 |
| - declare dbkey_2 char(8) character set octets ; |
48 |
| - declare sttm varchar(255) ; |
49 |
| - begin |
50 |
| - select max(iif( ri=1, dbkey, null)), max(iif( ri=2, dbkey, null)) |
51 |
| - from ( |
52 |
| - select dbkey, row_number()over(order by dbkey) ri |
53 |
| - from ( |
54 |
| - select |
55 |
| - dbkey |
56 |
| - ,row_number()over(order by dbkey) ra |
57 |
| - ,row_number()over(order by dbkey desc) rd |
58 |
| - from (select rdb$db_key as dbkey from tmp_test_6278) |
59 |
| - ) |
60 |
| - where |
61 |
| - ra = (ra+rd)/3 |
62 |
| - or rd = (ra+rd)/3 |
63 |
| - ) x |
64 |
| - into dbkey_1, dbkey_2 ; |
65 |
| -
|
66 |
| - sttm = q'{select count(*) from tmp_test_6278 where rdb$db_key between ? and ?}' ; |
67 |
| - execute statement (sttm) (dbkey_1, dbkey_2) into count_intermediate_rows ; |
68 |
| - suspend ; |
69 |
| - end ^ |
70 |
| - set term ; ^ |
71 |
| - commit ; |
72 |
| -""" |
| 42 | +#--------------------------------------------------------- |
73 | 43 |
|
74 |
| -trace = ['log_statement_finish = true', |
75 |
| - 'print_plan = true', |
76 |
| - 'print_perf = true', |
77 |
| - 'explain_plan = true', |
78 |
| - 'time_threshold = 0', |
79 |
| - 'log_initfini = false', |
80 |
| - 'exclude_filter = "%(execute block)%"', |
81 |
| - 'include_filter = "%(select count)%"', |
82 |
| - ] |
| 44 | +def replace_leading(source, char="."): |
| 45 | + stripped = source.lstrip() |
| 46 | + return char * (len(source) - len(stripped)) + stripped |
83 | 47 |
|
| 48 | +#--------------------------------------------------------- |
84 | 49 |
|
85 |
| -@pytest.mark.version('>=4.0') |
| 50 | +@pytest.mark.version('>=4.0.0') |
86 | 51 | def test_1(act: Action, capsys):
|
87 |
| - allowed_patterns = [re.compile(' Table "TMP_TEST_6278"', re.IGNORECASE), |
88 |
| - re.compile('TMP_TEST_6278\\s+\\d+', re.IGNORECASE) |
89 |
| - ] |
90 |
| - # For yet unknown reason, trace must be read as in 'cp1252' (neither ascii or utf8 works) |
91 |
| - with act.trace(db_events=trace, encoding='cp1252'): |
92 |
| - act.isql(switches=['-q'], input=test_script) |
93 |
| - # Process isql output |
94 |
| - for line in act.clean_stdout.splitlines(): |
95 |
| - if elements := line.rstrip().split(): |
96 |
| - count_intermediate_rows = int(elements[0]) |
97 |
| - break |
98 |
| - # Process trace |
99 |
| - for line in act.trace_log: |
100 |
| - for p in allowed_patterns: |
101 |
| - if p.search(line): |
102 |
| - if line.startswith('TMP_TEST_6278'): |
103 |
| - trace_reads_statistics = int(line.rstrip().split()[1]) |
104 |
| - result = ('EXPECTED.' if (trace_reads_statistics - count_intermediate_rows) <= 1 |
105 |
| - else f'UNEXPECTED: {trace_reads_statistics - count_intermediate_rows}') |
106 |
| - print(f'Reads difference: {result}') |
107 |
| - else: |
108 |
| - print(line) |
109 |
| - # Check |
110 |
| - act.reset() # necessary to reset 'clean_stdout' !! |
111 |
| - act.expected_stdout = expected_stdout |
| 52 | + |
| 53 | + scoped_expr_lst = ('rdb$db_key > ? and rdb$db_key < ?', 'rdb$db_key >= ? and rdb$db_key <= ?', 'rdb$db_key between ? and ?', 'rdb$db_key > ?', 'rdb$db_key >= ?', 'rdb$db_key < ?', 'rdb$db_key <= ?') |
| 54 | + with act.db.connect() as con: |
| 55 | + cur = con.cursor() |
| 56 | + for x in scoped_expr_lst: |
| 57 | + with cur.prepare(f'select count(s) from test where {x}') as ps: |
| 58 | + print( '\n'.join([replace_leading(s) for s in ps.detailed_plan .split('\n')]) ) |
| 59 | + |
| 60 | + |
| 61 | + act.expected_stdout = """ |
| 62 | + Select Expression |
| 63 | + ....-> Aggregate |
| 64 | + ........-> Filter |
| 65 | + ............-> Table "TEST" Full Scan (lower bound, upper bound) |
| 66 | +
|
| 67 | + Select Expression |
| 68 | + ....-> Aggregate |
| 69 | + ........-> Filter |
| 70 | + ............-> Table "TEST" Full Scan (lower bound, upper bound) |
| 71 | +
|
| 72 | + Select Expression |
| 73 | + ....-> Aggregate |
| 74 | + ........-> Filter |
| 75 | + ............-> Table "TEST" Full Scan (lower bound, upper bound) |
| 76 | +
|
| 77 | + Select Expression |
| 78 | + ....-> Aggregate |
| 79 | + ........-> Filter |
| 80 | + ............-> Table "TEST" Full Scan (lower bound) |
| 81 | +
|
| 82 | + Select Expression |
| 83 | + ....-> Aggregate |
| 84 | + ........-> Filter |
| 85 | + ............-> Table "TEST" Full Scan (lower bound) |
| 86 | +
|
| 87 | + Select Expression |
| 88 | + ....-> Aggregate |
| 89 | + ........-> Filter |
| 90 | + ............-> Table "TEST" Full Scan (upper bound) |
| 91 | +
|
| 92 | + Select Expression |
| 93 | + ....-> Aggregate |
| 94 | + ........-> Filter |
| 95 | + ............-> Table "TEST" Full Scan (upper bound) |
| 96 | + """ |
112 | 97 | act.stdout = capsys.readouterr().out
|
113 | 98 | assert act.clean_stdout == act.clean_expected_stdout
|
0 commit comments