From a1bc3987f71935926239711c3b4b9e27ce352191 Mon Sep 17 00:00:00 2001 From: Karthik Ramanathan Date: Mon, 11 Nov 2024 20:29:02 +0530 Subject: [PATCH] [#24834] YSQL: Support INSERT ... ON CONFLICT batching for NULLS NOT DISTINCT unique indexes [Sample, DNR] Summary: D38354 and D39023 introduced batching support for INSERT ... ON CONFLICT queries. D39058 introduced NULLS NOT DISTINCT support for unique indexes. This revision adds support for INSERT ... ON CONFLICT batching for arbiter indexes that are marked with NULLS NOT DISTINCT. In particular, this revision handles a specific subtle case with batching NULL values. Consider the following example: ``` CREATE TABLE ab_tab (a int, b int); CREATE UNIQUE INDEX NONCONCURRENTLY ah_idx ON ab_tab (a HASH) NULLS NOT DISTINCT; -- Query 1 INSERT INTO ab_tab VALUES (1, 1), (1, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b; -- Query 2 INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b; ``` Batching for Query 1 is executed in the following manner: ``` Index scan on ah_idx WHERE a IN (1, 1) -- corresponding to (1, 1) and (1, 2) The expression is deduplicated in DocDB and any row corresponding to a IN (1) is returned. Store key, slot corresponding to a = 1 in map Load key, slot corresponding to a = 1 in map -- corresponding to (1, 1) Store intent corresponding to a = 1 in map Load key, slot corresponding to a = 1 in map -- corresponding to (1, 2) Error "row cannot be modified a second time" ``` NULL values cannot be a part of the IN clause. Thus NULL values have to be looked up one at a time. Batching for Query 2 would be executed in the following manner without the special handling in this revision: ``` Index scan on ah_idx WHERE a IS NULL -- corresponding to (null, 1) Store key, slot corresponding to a is NULL in map Index scan on ah_idx WHERE a is NULL -- corresponding to (null, 2) Store key, slot corresponding to a is NULL in map Error "key already exists" ``` Thus NULL values have to be deduplicated before they are looked up. **Credits** Most of the code has been borrowed from @aagrawal's revision (D39058). Test Plan: ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPgConstraints' ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPgMisc' ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressInsertOnConflict' ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressForeignKey' Subscribers: yugaware, aagrawal, yql, ybase, smishra Differential Revision: https://phorge.dev.yugabyte.com/D39852 --- .../src/backend/executor/execIndexing.c | 79 ++++++++++++++++--- .../src/backend/executor/nodeModifyTable.c | 2 +- .../src/backend/executor/ybcModifyTable.c | 16 +--- .../src/include/executor/ybcModifyTable.h | 2 +- .../expected/yb_insert_on_conflict.out | 14 ++-- .../expected/yb_insert_on_conflict_1.out | 20 +++-- .../regress/sql/yb_insert_on_conflict.sql | 1 - 7 files changed, 94 insertions(+), 40 deletions(-) diff --git a/src/postgres/src/backend/executor/execIndexing.c b/src/postgres/src/backend/executor/execIndexing.c index 8ddd160b63e..3019341b119 100644 --- a/src/postgres/src/backend/executor/execIndexing.c +++ b/src/postgres/src/backend/executor/execIndexing.c @@ -138,7 +138,8 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, CEOUC_WAIT_MODE waitMode, bool errorOK, ItemPointer conflictTid, - TupleTableSlot **ybConflictSlot); + TupleTableSlot **ybConflictSlot, + bool update_map); static bool index_recheck_constraint(Relation index, Oid *constr_procs, Datum *existing_values, bool *existing_isnull, @@ -415,7 +416,8 @@ YbExecDoInsertIndexTuple(ResultRelInfo *resultRelInfo, tupleid, values, isnull, estate, false, waitMode, violationOK, NULL, - NULL /* ybConflictSlot */); + NULL /* ybConflictSlot */, + false /* update_map */); } if ((checkUnique == UNIQUE_CHECK_PARTIAL || @@ -1135,7 +1137,8 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, values, isnull, estate, false, CEOUC_WAIT, true, conflictTid, - ybConflictSlot); + ybConflictSlot, + false /* update_map */); } if (!satisfiesConstraint) return false; @@ -1198,7 +1201,8 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, CEOUC_WAIT_MODE waitMode, bool violationOK, ItemPointer conflictTid, - TupleTableSlot **ybConflictSlot) + TupleTableSlot **ybConflictSlot, + bool update_map) { Oid *constr_procs; uint16 *constr_strats; @@ -1213,6 +1217,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, ExprContext *econtext; TupleTableSlot *existing_slot; TupleTableSlot *save_scantuple; + bool ybDropSlot = true; if (indexInfo->ii_ExclusionOps) { @@ -1369,6 +1374,28 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, * We have a definite conflict (or a potential one, but the caller * didn't want to wait). Return it to caller, or report it. */ + + if (update_map) + { + YBCPgInsertOnConflictKeyState keyState; + YBCPgYBTupleIdDescriptor *descr = + YBCBuildNonNullUniqueIndexYBTupleId(index, existing_values, existing_isnull); + + /* Handle duplicate keys having NULL values */ + HandleYBStatus(YBCPgInsertOnConflictKeyExists(descr, &keyState)); + if (keyState != KEY_READ) + { + YBCPgInsertOnConflictKeyInfo info = {existing_slot}; + HandleYBStatus(YBCPgAddInsertOnConflictKey(descr, &info)); + ybDropSlot = false; + } + /* + * TODO(arpan): There can be multiple conflicting rows once EXCLUDE + * constraint is supported. + */ + break; + } + if (violationOK) { conflict = true; @@ -1428,7 +1455,8 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index, * TODO(jason): this is not necessary for DO NOTHING, so it could be freed * here as a minor optimization in that case. */ - if (!*ybConflictSlot) + ybDropSlot &= (!ybConflictSlot || !*ybConflictSlot); + if (ybDropSlot) ExecDropSingleTupleTableSlot(existing_slot); return !conflict; } @@ -1450,7 +1478,8 @@ check_exclusion_constraint(Relation heap, Relation index, values, isnull, estate, newIndex, CEOUC_WAIT, false, NULL, - NULL /* ybConflictSlot */); + NULL /* ybConflictSlot */, + false /* update_map */); } /* @@ -1708,6 +1737,7 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo, */ Datum dvalues[num_slots]; bool dnulls[num_slots]; + bool has_nulls = false; int array_len = 0; for (i = 0; i < num_slots; ++i) { @@ -1738,8 +1768,6 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo, values, isnull); - Assert(!indexInfo->ii_NullsNotDistinct); - bool found_null = false; for (int j = 0; j < indnkeyatts; j++) { @@ -1749,8 +1777,39 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo, break; } } + if (found_null) + { + /* + * If any of the input values are NULL, and the index uses the + * - nulls-not-distinct mode: Since NULLs cannot be looked up in + * batched fashion, do a one-off loopup. + * - nulls-are-distinct mode: the constraint check is assumed + * to pass (i.e., we assume the operators are strict). + */ + if (indexInfo->ii_NullsNotDistinct) + { + /* + * Re-use check_exclusion_or_unique_constraint to populate + * batching map to avoid code duplication. + */ + check_exclusion_or_unique_constraint(heap, + index, + indexInfo, + NULL /* tupleid */, + values, + isnull, + estate, + false /* newIndex */, + CEOUC_WAIT, + true /* violationOK */, + NULL /* conflictTid */, + NULL /* ybConflictSlot */, + true /* update_map */); + has_nulls = true; + } continue; + } if (indnkeyatts == 1) { @@ -1786,7 +1845,7 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo, * Optimization to bail out early in case there is no batch read RPC to * send. An ON CONFLICT batching map will not be created for this index. */ - if (array_len == 0) + if (array_len == 0 && !has_nulls) { econtext->ecxt_scantuple = save_scantuple; return; @@ -1913,7 +1972,7 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo, oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); YBCPgInsertOnConflictKeyInfo info = {existing_slot}; YBCPgYBTupleIdDescriptor *descr = - YBCBuildNonNullUniqueIndexYBTupleId(index, existing_values); + YBCBuildNonNullUniqueIndexYBTupleId(index, existing_values, NULL); HandleYBStatus(YBCPgAddInsertOnConflictKey(descr, &info)); MemoryContextSwitchTo(oldcontext); diff --git a/src/postgres/src/backend/executor/nodeModifyTable.c b/src/postgres/src/backend/executor/nodeModifyTable.c index c6a38efc3f2..50f3ebdf4fb 100644 --- a/src/postgres/src/backend/executor/nodeModifyTable.c +++ b/src/postgres/src/backend/executor/nodeModifyTable.c @@ -5564,7 +5564,7 @@ YbExecCheckIndexConstraints(EState *estate, * TODO(jason): revisit when exclusion constraint is supported. */ YBCPgYBTupleIdDescriptor *descr = - YBCBuildNonNullUniqueIndexYBTupleId(index, values); + YBCBuildNonNullUniqueIndexYBTupleId(index, values, isnull); /* * If this function is invoked from YbExecUpdateAct, we can skip the diff --git a/src/postgres/src/backend/executor/ybcModifyTable.c b/src/postgres/src/backend/executor/ybcModifyTable.c index 816fb7c6d65..f55161de483 100644 --- a/src/postgres/src/backend/executor/ybcModifyTable.c +++ b/src/postgres/src/backend/executor/ybcModifyTable.c @@ -518,15 +518,6 @@ YbIsInsertOnConflictReadBatchingEnabled(ResultRelInfo *resultRelInfo) /* * TODO(jason): figure out how to enable triggers. */ - /* - * TODO(GH#24834): Enable INSERT ON CONFLICT batching for null-not-distinct - * indexes. - */ - for (int i = 0; i < resultRelInfo->ri_NumIndices; i++) - { - if (resultRelInfo->ri_IndexRelationInfo[i]->ii_NullsNotDistinct) - return false; - } return (IsYBRelation(resultRelInfo->ri_RelationDesc) && !IsCatalogRelation(resultRelInfo->ri_RelationDesc) && resultRelInfo->ri_BatchSize > 1 && @@ -552,7 +543,7 @@ YbIsInsertOnConflictReadBatchingEnabled(ResultRelInfo *resultRelInfo) * secondary index. */ YBCPgYBTupleIdDescriptor * -YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values) +YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values, bool *nulls) { Assert(IsYBRelation(unique_index)); @@ -601,7 +592,7 @@ YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values) next_attr->collation_id = ybc_get_attcollation(tupdesc, attnum); next_attr->attr_num = attnum; next_attr->datum = values[i]; - next_attr->is_null = false; + next_attr->is_null = nulls == NULL ? false : nulls[i]; YBCPgColumnInfo column_info = {0}; HandleYBTableDescStatus(YBCPgGetColumnInfo(ybc_table_desc, attnum, @@ -633,7 +624,8 @@ YBCForeignKeyReferenceCacheDeleteIndex(Relation index, Datum *values, bool *isnu if (isnulls[i]) return; - YBCPgYBTupleIdDescriptor* descr = YBCBuildNonNullUniqueIndexYBTupleId(index, values); + YBCPgYBTupleIdDescriptor *descr = + YBCBuildNonNullUniqueIndexYBTupleId(index, values, NULL); HandleYBStatus(YBCPgForeignKeyReferenceCacheDelete(descr)); pfree(descr); } diff --git a/src/postgres/src/include/executor/ybcModifyTable.h b/src/postgres/src/include/executor/ybcModifyTable.h index df213316f32..48b4351c6f8 100644 --- a/src/postgres/src/include/executor/ybcModifyTable.h +++ b/src/postgres/src/include/executor/ybcModifyTable.h @@ -245,7 +245,7 @@ extern Datum YBCGetYBTupleIdFromSlot(TupleTableSlot *slot); extern Datum YBCComputeYBTupleIdFromSlot(Relation rel, TupleTableSlot *slot); extern YBCPgYBTupleIdDescriptor* -YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values); +YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values, bool *nulls); /* * Returns if a table has secondary indices. diff --git a/src/postgres/src/test/regress/expected/yb_insert_on_conflict.out b/src/postgres/src/test/regress/expected/yb_insert_on_conflict.out index f3decfd862f..75ce82ce675 100644 --- a/src/postgres/src/test/regress/expected/yb_insert_on_conflict.out +++ b/src/postgres/src/test/regress/expected/yb_insert_on_conflict.out @@ -437,7 +437,7 @@ INSERT INTO ab_tab VALUES (null, null) ON CONFLICT DO NOTHING; INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT DO NOTHING; INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b; SELECT * FROM ab_tab WHERE a IS NULL ORDER BY b; - a | b + a | b ---+--- | 2 (1 row) @@ -448,9 +448,9 @@ CREATE UNIQUE INDEX NONCONCURRENTLY ioc_defaults_bc_idx ON ioc_defaults (b, c) N INSERT INTO ioc_defaults VALUES (1); INSERT INTO ioc_defaults VALUES (2), (3) ON CONFLICT (b, c) DO UPDATE SET a = EXCLUDED.a; SELECT * FROM ioc_defaults ORDER BY b, c; - a | b | c + a | b | c ---+----+--- - 3 | 42 | + 3 | 42 | (1 row) -- Reset. @@ -463,12 +463,12 @@ INSERT INTO ab_tab2 VALUES (123, null), (456, null) ON CONFLICT DO NOTHING; INSERT INTO ab_tab2 VALUES (null, null); INSERT INTO ab_tab2 VALUES (123, null), (456, null), (null, 5), (null, null) ON CONFLICT DO NOTHING; SELECT * FROM ab_tab2 ORDER BY a, b; - a | b + a | b -----+--- - 123 | - 456 | + 123 | + 456 | | 5 - | + | (4 rows) DELETE FROM ab_tab2; diff --git a/src/postgres/src/test/regress/expected/yb_insert_on_conflict_1.out b/src/postgres/src/test/regress/expected/yb_insert_on_conflict_1.out index 9c7c38b564a..51421dc7bee 100644 --- a/src/postgres/src/test/regress/expected/yb_insert_on_conflict_1.out +++ b/src/postgres/src/test/regress/expected/yb_insert_on_conflict_1.out @@ -432,10 +432,12 @@ INSERT INTO ab_tab VALUES (null, null) ON CONFLICT DO NOTHING; -- Multiple rows with NULL values should semantically be treated as the same logical row. INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT DO NOTHING; INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b; +ERROR: ON CONFLICT DO UPDATE command cannot affect row a second time +HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values. SELECT * FROM ab_tab WHERE a IS NULL ORDER BY b; - a | b + a | b ---+--- - | 2 + | (1 row) DELETE FROM ab_tab; @@ -443,10 +445,12 @@ DELETE FROM ab_tab; CREATE UNIQUE INDEX NONCONCURRENTLY ioc_defaults_bc_idx ON ioc_defaults (b, c) NULLS NOT DISTINCT; INSERT INTO ioc_defaults VALUES (1); INSERT INTO ioc_defaults VALUES (2), (3) ON CONFLICT (b, c) DO UPDATE SET a = EXCLUDED.a; +ERROR: ON CONFLICT DO UPDATE command cannot affect row a second time +HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values. SELECT * FROM ioc_defaults ORDER BY b, c; - a | b | c + a | b | c ---+----+--- - 3 | 42 | + 1 | 42 | (1 row) -- Reset. @@ -459,12 +463,12 @@ INSERT INTO ab_tab2 VALUES (123, null), (456, null) ON CONFLICT DO NOTHING; INSERT INTO ab_tab2 VALUES (null, null); INSERT INTO ab_tab2 VALUES (123, null), (456, null), (null, 5), (null, null) ON CONFLICT DO NOTHING; SELECT * FROM ab_tab2 ORDER BY a, b; - a | b + a | b -----+--- - 123 | - 456 | + 123 | + 456 | | 5 - | + | (4 rows) DELETE FROM ab_tab2; diff --git a/src/postgres/src/test/regress/sql/yb_insert_on_conflict.sql b/src/postgres/src/test/regress/sql/yb_insert_on_conflict.sql index 51ac3028af9..afb3e2672d5 100644 --- a/src/postgres/src/test/regress/sql/yb_insert_on_conflict.sql +++ b/src/postgres/src/test/regress/sql/yb_insert_on_conflict.sql @@ -173,7 +173,6 @@ SELECT * FROM ioc_defaults ORDER BY b, c; DELETE FROM ab_tab WHERE a IS null; DROP INDEX ioc_defaults_bc_idx; TRUNCATE ioc_defaults; - -- NULLS NOT DISTINCT DROP INDEX ah_idx; CREATE UNIQUE INDEX NONCONCURRENTLY ah_idx ON ab_tab (a HASH) NULLS NOT DISTINCT;