Skip to content

Commit

Permalink
[yugabyte#24834] YSQL: Support INSERT ... ON CONFLICT batching for NU…
Browse files Browse the repository at this point in the history
…LLS NOT DISTINCT unique indexes [Sample, DNR]

Summary:
D38354 and D39023 introduced batching support for INSERT ... ON CONFLICT queries.
D39058 introduced NULLS NOT DISTINCT support for unique indexes.

This revision adds support for INSERT ... ON CONFLICT batching for arbiter indexes that are marked with NULLS NOT DISTINCT.
In particular, this revision handles a specific subtle case with batching NULL values. Consider the following example:
```
CREATE TABLE ab_tab (a int, b int);
CREATE UNIQUE INDEX NONCONCURRENTLY ah_idx ON ab_tab (a HASH) NULLS NOT DISTINCT;

-- Query 1
INSERT INTO ab_tab VALUES (1, 1), (1, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b;

-- Query 2
INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b;
```

Batching for Query 1 is executed in the following manner:
```
Index scan on ah_idx WHERE a IN (1, 1) -- corresponding to (1, 1) and (1, 2)
The expression is deduplicated in DocDB and any row corresponding to a IN (1) is returned.
Store key, slot corresponding to a = 1 in map
Load key, slot corresponding to a = 1 in map -- corresponding to (1, 1)
Store intent corresponding to a = 1 in map
Load key, slot corresponding to a = 1 in map -- corresponding to (1, 2)
Error "row cannot be modified a second time"
```

NULL values cannot be a part of the IN clause. Thus NULL values have to be looked up one at a time.
Batching for Query 2 would be executed in the following manner without the special handling in this revision:
```
Index scan on ah_idx WHERE a IS NULL -- corresponding to (null, 1)
Store key, slot corresponding to a is NULL in map
Index scan on ah_idx WHERE a is NULL -- corresponding to (null, 2)
Store key, slot corresponding to a is NULL in map
Error "key already exists"
```
Thus NULL values have to be deduplicated before they are looked up.

**Credits**
Most of the code has been borrowed from @aagrawal's revision (D39058).

Test Plan:
   ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPgConstraints'
   ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressPgMisc'
   ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressInsertOnConflict'
   ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressForeignKey'

Subscribers: yugaware, aagrawal, yql, ybase, smishra

Differential Revision: https://phorge.dev.yugabyte.com/D39852
  • Loading branch information
karthik-ramanathan-3006 authored and arpang committed Nov 11, 2024
1 parent 5c011c8 commit a1bc398
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 40 deletions.
79 changes: 69 additions & 10 deletions src/postgres/src/backend/executor/execIndexing.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
CEOUC_WAIT_MODE waitMode,
bool errorOK,
ItemPointer conflictTid,
TupleTableSlot **ybConflictSlot);
TupleTableSlot **ybConflictSlot,
bool update_map);

static bool index_recheck_constraint(Relation index, Oid *constr_procs,
Datum *existing_values, bool *existing_isnull,
Expand Down Expand Up @@ -415,7 +416,8 @@ YbExecDoInsertIndexTuple(ResultRelInfo *resultRelInfo,
tupleid, values, isnull,
estate, false,
waitMode, violationOK, NULL,
NULL /* ybConflictSlot */);
NULL /* ybConflictSlot */,
false /* update_map */);
}

if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
Expand Down Expand Up @@ -1135,7 +1137,8 @@ ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
values, isnull, estate, false,
CEOUC_WAIT, true,
conflictTid,
ybConflictSlot);
ybConflictSlot,
false /* update_map */);
}
if (!satisfiesConstraint)
return false;
Expand Down Expand Up @@ -1198,7 +1201,8 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
CEOUC_WAIT_MODE waitMode,
bool violationOK,
ItemPointer conflictTid,
TupleTableSlot **ybConflictSlot)
TupleTableSlot **ybConflictSlot,
bool update_map)
{
Oid *constr_procs;
uint16 *constr_strats;
Expand All @@ -1213,6 +1217,7 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
ExprContext *econtext;
TupleTableSlot *existing_slot;
TupleTableSlot *save_scantuple;
bool ybDropSlot = true;

if (indexInfo->ii_ExclusionOps)
{
Expand Down Expand Up @@ -1369,6 +1374,28 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
* We have a definite conflict (or a potential one, but the caller
* didn't want to wait). Return it to caller, or report it.
*/

if (update_map)
{
YBCPgInsertOnConflictKeyState keyState;
YBCPgYBTupleIdDescriptor *descr =
YBCBuildNonNullUniqueIndexYBTupleId(index, existing_values, existing_isnull);

/* Handle duplicate keys having NULL values */
HandleYBStatus(YBCPgInsertOnConflictKeyExists(descr, &keyState));
if (keyState != KEY_READ)
{
YBCPgInsertOnConflictKeyInfo info = {existing_slot};
HandleYBStatus(YBCPgAddInsertOnConflictKey(descr, &info));
ybDropSlot = false;
}
/*
* TODO(arpan): There can be multiple conflicting rows once EXCLUDE
* constraint is supported.
*/
break;
}

if (violationOK)
{
conflict = true;
Expand Down Expand Up @@ -1428,7 +1455,8 @@ check_exclusion_or_unique_constraint(Relation heap, Relation index,
* TODO(jason): this is not necessary for DO NOTHING, so it could be freed
* here as a minor optimization in that case.
*/
if (!*ybConflictSlot)
ybDropSlot &= (!ybConflictSlot || !*ybConflictSlot);
if (ybDropSlot)
ExecDropSingleTupleTableSlot(existing_slot);
return !conflict;
}
Expand All @@ -1450,7 +1478,8 @@ check_exclusion_constraint(Relation heap, Relation index,
values, isnull,
estate, newIndex,
CEOUC_WAIT, false, NULL,
NULL /* ybConflictSlot */);
NULL /* ybConflictSlot */,
false /* update_map */);
}

/*
Expand Down Expand Up @@ -1708,6 +1737,7 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo,
*/
Datum dvalues[num_slots];
bool dnulls[num_slots];
bool has_nulls = false;
int array_len = 0;
for (i = 0; i < num_slots; ++i)
{
Expand Down Expand Up @@ -1738,8 +1768,6 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo,
values,
isnull);

Assert(!indexInfo->ii_NullsNotDistinct);

bool found_null = false;
for (int j = 0; j < indnkeyatts; j++)
{
Expand All @@ -1749,8 +1777,39 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo,
break;
}
}

if (found_null)
{
/*
* If any of the input values are NULL, and the index uses the
* - nulls-not-distinct mode: Since NULLs cannot be looked up in
* batched fashion, do a one-off loopup.
* - nulls-are-distinct mode: the constraint check is assumed
* to pass (i.e., we assume the operators are strict).
*/
if (indexInfo->ii_NullsNotDistinct)
{
/*
* Re-use check_exclusion_or_unique_constraint to populate
* batching map to avoid code duplication.
*/
check_exclusion_or_unique_constraint(heap,
index,
indexInfo,
NULL /* tupleid */,
values,
isnull,
estate,
false /* newIndex */,
CEOUC_WAIT,
true /* violationOK */,
NULL /* conflictTid */,
NULL /* ybConflictSlot */,
true /* update_map */);
has_nulls = true;
}
continue;
}

if (indnkeyatts == 1)
{
Expand Down Expand Up @@ -1786,7 +1845,7 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo,
* Optimization to bail out early in case there is no batch read RPC to
* send. An ON CONFLICT batching map will not be created for this index.
*/
if (array_len == 0)
if (array_len == 0 && !has_nulls)
{
econtext->ecxt_scantuple = save_scantuple;
return;
Expand Down Expand Up @@ -1913,7 +1972,7 @@ yb_batch_fetch_conflicting_rows(int idx, ResultRelInfo *resultRelInfo,
oldcontext = MemoryContextSwitchTo(estate->es_query_cxt);
YBCPgInsertOnConflictKeyInfo info = {existing_slot};
YBCPgYBTupleIdDescriptor *descr =
YBCBuildNonNullUniqueIndexYBTupleId(index, existing_values);
YBCBuildNonNullUniqueIndexYBTupleId(index, existing_values, NULL);
HandleYBStatus(YBCPgAddInsertOnConflictKey(descr, &info));
MemoryContextSwitchTo(oldcontext);

Expand Down
2 changes: 1 addition & 1 deletion src/postgres/src/backend/executor/nodeModifyTable.c
Original file line number Diff line number Diff line change
Expand Up @@ -5564,7 +5564,7 @@ YbExecCheckIndexConstraints(EState *estate,
* TODO(jason): revisit when exclusion constraint is supported.
*/
YBCPgYBTupleIdDescriptor *descr =
YBCBuildNonNullUniqueIndexYBTupleId(index, values);
YBCBuildNonNullUniqueIndexYBTupleId(index, values, isnull);

/*
* If this function is invoked from YbExecUpdateAct, we can skip the
Expand Down
16 changes: 4 additions & 12 deletions src/postgres/src/backend/executor/ybcModifyTable.c
Original file line number Diff line number Diff line change
Expand Up @@ -518,15 +518,6 @@ YbIsInsertOnConflictReadBatchingEnabled(ResultRelInfo *resultRelInfo)
/*
* TODO(jason): figure out how to enable triggers.
*/
/*
* TODO(GH#24834): Enable INSERT ON CONFLICT batching for null-not-distinct
* indexes.
*/
for (int i = 0; i < resultRelInfo->ri_NumIndices; i++)
{
if (resultRelInfo->ri_IndexRelationInfo[i]->ii_NullsNotDistinct)
return false;
}
return (IsYBRelation(resultRelInfo->ri_RelationDesc) &&
!IsCatalogRelation(resultRelInfo->ri_RelationDesc) &&
resultRelInfo->ri_BatchSize > 1 &&
Expand All @@ -552,7 +543,7 @@ YbIsInsertOnConflictReadBatchingEnabled(ResultRelInfo *resultRelInfo)
* secondary index.
*/
YBCPgYBTupleIdDescriptor *
YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values)
YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values, bool *nulls)
{
Assert(IsYBRelation(unique_index));

Expand Down Expand Up @@ -601,7 +592,7 @@ YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values)
next_attr->collation_id = ybc_get_attcollation(tupdesc, attnum);
next_attr->attr_num = attnum;
next_attr->datum = values[i];
next_attr->is_null = false;
next_attr->is_null = nulls == NULL ? false : nulls[i];
YBCPgColumnInfo column_info = {0};
HandleYBTableDescStatus(YBCPgGetColumnInfo(ybc_table_desc,
attnum,
Expand Down Expand Up @@ -633,7 +624,8 @@ YBCForeignKeyReferenceCacheDeleteIndex(Relation index, Datum *values, bool *isnu
if (isnulls[i])
return;

YBCPgYBTupleIdDescriptor* descr = YBCBuildNonNullUniqueIndexYBTupleId(index, values);
YBCPgYBTupleIdDescriptor *descr =
YBCBuildNonNullUniqueIndexYBTupleId(index, values, NULL);
HandleYBStatus(YBCPgForeignKeyReferenceCacheDelete(descr));
pfree(descr);
}
Expand Down
2 changes: 1 addition & 1 deletion src/postgres/src/include/executor/ybcModifyTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ extern Datum YBCGetYBTupleIdFromSlot(TupleTableSlot *slot);
extern Datum YBCComputeYBTupleIdFromSlot(Relation rel, TupleTableSlot *slot);

extern YBCPgYBTupleIdDescriptor*
YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values);
YBCBuildNonNullUniqueIndexYBTupleId(Relation unique_index, Datum *values, bool *nulls);

/*
* Returns if a table has secondary indices.
Expand Down
14 changes: 7 additions & 7 deletions src/postgres/src/test/regress/expected/yb_insert_on_conflict.out
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ INSERT INTO ab_tab VALUES (null, null) ON CONFLICT DO NOTHING;
INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT DO NOTHING;
INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b;
SELECT * FROM ab_tab WHERE a IS NULL ORDER BY b;
a | b
a | b
---+---
| 2
(1 row)
Expand All @@ -448,9 +448,9 @@ CREATE UNIQUE INDEX NONCONCURRENTLY ioc_defaults_bc_idx ON ioc_defaults (b, c) N
INSERT INTO ioc_defaults VALUES (1);
INSERT INTO ioc_defaults VALUES (2), (3) ON CONFLICT (b, c) DO UPDATE SET a = EXCLUDED.a;
SELECT * FROM ioc_defaults ORDER BY b, c;
a | b | c
a | b | c
---+----+---
3 | 42 |
3 | 42 |
(1 row)

-- Reset.
Expand All @@ -463,12 +463,12 @@ INSERT INTO ab_tab2 VALUES (123, null), (456, null) ON CONFLICT DO NOTHING;
INSERT INTO ab_tab2 VALUES (null, null);
INSERT INTO ab_tab2 VALUES (123, null), (456, null), (null, 5), (null, null) ON CONFLICT DO NOTHING;
SELECT * FROM ab_tab2 ORDER BY a, b;
a | b
a | b
-----+---
123 |
456 |
123 |
456 |
| 5
|
|
(4 rows)

DELETE FROM ab_tab2;
Expand Down
20 changes: 12 additions & 8 deletions src/postgres/src/test/regress/expected/yb_insert_on_conflict_1.out
Original file line number Diff line number Diff line change
Expand Up @@ -432,21 +432,25 @@ INSERT INTO ab_tab VALUES (null, null) ON CONFLICT DO NOTHING;
-- Multiple rows with NULL values should semantically be treated as the same logical row.
INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT DO NOTHING;
INSERT INTO ab_tab VALUES (null, 1), (null, 2) ON CONFLICT (a) DO UPDATE SET b = EXCLUDED.b;
ERROR: ON CONFLICT DO UPDATE command cannot affect row a second time
HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
SELECT * FROM ab_tab WHERE a IS NULL ORDER BY b;
a | b
a | b
---+---
| 2
|
(1 row)

DELETE FROM ab_tab;
-- Similarly, columns with default NULL values should be treated as the same logical row.
CREATE UNIQUE INDEX NONCONCURRENTLY ioc_defaults_bc_idx ON ioc_defaults (b, c) NULLS NOT DISTINCT;
INSERT INTO ioc_defaults VALUES (1);
INSERT INTO ioc_defaults VALUES (2), (3) ON CONFLICT (b, c) DO UPDATE SET a = EXCLUDED.a;
ERROR: ON CONFLICT DO UPDATE command cannot affect row a second time
HINT: Ensure that no rows proposed for insertion within the same command have duplicate constrained values.
SELECT * FROM ioc_defaults ORDER BY b, c;
a | b | c
a | b | c
---+----+---
3 | 42 |
1 | 42 |
(1 row)

-- Reset.
Expand All @@ -459,12 +463,12 @@ INSERT INTO ab_tab2 VALUES (123, null), (456, null) ON CONFLICT DO NOTHING;
INSERT INTO ab_tab2 VALUES (null, null);
INSERT INTO ab_tab2 VALUES (123, null), (456, null), (null, 5), (null, null) ON CONFLICT DO NOTHING;
SELECT * FROM ab_tab2 ORDER BY a, b;
a | b
a | b
-----+---
123 |
456 |
123 |
456 |
| 5
|
|
(4 rows)

DELETE FROM ab_tab2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@ SELECT * FROM ioc_defaults ORDER BY b, c;
DELETE FROM ab_tab WHERE a IS null;
DROP INDEX ioc_defaults_bc_idx;
TRUNCATE ioc_defaults;

-- NULLS NOT DISTINCT
DROP INDEX ah_idx;
CREATE UNIQUE INDEX NONCONCURRENTLY ah_idx ON ab_tab (a HASH) NULLS NOT DISTINCT;
Expand Down

0 comments on commit a1bc398

Please sign in to comment.