From c1cce06a972e8dab1a1b735fd3e676ddaefa97ee Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 22 Jan 2025 15:38:36 -0500 Subject: [PATCH] metamorphic: ignore rangedels for deduplicating point prefixes When constructing an external object, only the first point key with each unique prefix is added to the sstable. The key manager strives to maintain consistency with this behavior to ensure that the it maintains correct key histories after an ingestion of an external object. The key manager however also will discretize range deletions, sometimes creating keyMetas for keys that do not exist as point keys on the associated writer. The presence of these was confusing the key manager's calculation of which keys would end up in the external object. This commit updates the external object calculation to account for the existence of the range deletions. Future work may refactor the tracking of range deletions to track range deletions separately from point keys and as continuous spans. Fix #4267. --- metamorphic/key_manager.go | 33 +++++++++++++++++++++----------- metamorphic/ops.go | 6 +++--- metamorphic/testdata/key_manager | 28 +++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 14 deletions(-) diff --git a/metamorphic/key_manager.go b/metamorphic/key_manager.go index 5cdf944f1f..e0c3f6b8bd 100644 --- a/metamorphic/key_manager.go +++ b/metamorphic/key_manager.go @@ -25,6 +25,16 @@ type keyMeta struct { history keyHistory } +// all returns true if all the operations in the history are of the provided op +// types. +func (m *keyMeta) all(typs ...OpType) bool { + anyNotOfProvidedType := slices.ContainsFunc(m.history, func(i keyHistoryItem) bool { + notOfProvidedType := !slices.Contains(typs, i.opType) + return notOfProvidedType + }) + return !anyNotOfProvidedType +} + func (m *keyMeta) clear() { m.history = m.history[:0] } @@ -170,14 +180,9 @@ func (b *bounds) Expand(cmp base.Compare, other bounds) { // infallible, because a runtime failure would cause the key manager's state to // diverge from the runtime object state. Ingestion operations pose an obstacle, // because the generator may generate ingestions that fail due to overlapping -// sstables. Today, this complication is sidestepped by avoiding ingestion of -// multiple batches containing deletes or single deletes since loss of those -// specific operations on a key are what we cannot tolerate (doing SingleDelete -// on a key that has not been written to because the Set was lost is harmless). -// -// TODO(jackson): Instead, compute smallest and largest bounds of batches so -// that we know at generation-time whether or not an ingestion operation will -// fail and can avoid updating key state. +// sstables. The key manager tracks the bounds of keys written to each object, +// and uses this to infer at generation time which ingestions will fail, +// maintaining key histories accordingly. type keyManager struct { kf KeyFormat @@ -307,12 +312,18 @@ func (k *keyManager) KeysForExternalIngest(obj externalObjWithBounds) []keyMeta n := k.kf.Comparer.Split(km.key) km.key = append(km.key[:n:n], obj.syntheticSuffix...) } + // We only keep the first of every unique prefix for external ingests. + // See the use of uniquePrefixes in newExternalObjOp. However, we must + // ignore keys that only exist as a discretized range deletion for this + // purpose, so we only update the lastPrefix if the key contains any + // non-range delete keys. + // TODO(jackson): Track range deletions separately. if lastPrefix != nil && k.kf.Comparer.Equal(lastPrefix, km.key[:k.kf.Comparer.Split(km.key)]) { - // We only keep the first of every unique prefix for external ingests. - // See the use of uniquePrefixes in newExternalObjOp. continue } - lastPrefix = append(lastPrefix[:0], km.key[:k.kf.Comparer.Split(km.key)]...) + if !km.all(OpWriterDeleteRange) { + lastPrefix = append(lastPrefix[:0], km.key[:k.kf.Comparer.Split(km.key)]...) + } if k.kf.Comparer.Compare(km.key, obj.bounds.Start) >= 0 && k.kf.Comparer.Compare(km.key, obj.bounds.End) < 0 { res = append(res, km) diff --git a/metamorphic/ops.go b/metamorphic/ops.go index 9b133a2ac8..bb2bafad82 100644 --- a/metamorphic/ops.go +++ b/metamorphic/ops.go @@ -1922,9 +1922,9 @@ func (o *dbRatchetFormatMajorVersionOp) run(t *Test, h historyRecorder) { // versions, making the presence of an error and the error message itself // non-deterministic if we attempt to upgrade to an older version. // - //Regardless, subsequent operations should behave identically, which is what - //we're really aiming to test by including this format major version ratchet - //operation. + // Regardless, subsequent operations should behave identically, which is + // what we're really aiming to test by including this format major version + // ratchet operation. if t.getDB(o.dbID).FormatMajorVersion() < o.vers { err = t.getDB(o.dbID).RatchetFormatMajorVersion(o.vers) } diff --git a/metamorphic/testdata/key_manager b/metamorphic/testdata/key_manager index 3c0abda124..1775cbf17d 100644 --- a/metamorphic/testdata/key_manager +++ b/metamorphic/testdata/key_manager @@ -475,3 +475,31 @@ conflicts merging batch1 into db1: "foo" conflicts merging batch1 (collapsed) into db1: (none) [db1.Merge("foo", "foo")] can singledel on db1: (none) + +# Regression test for #4267. In the below example, foo@3 must not be eligible +# for single delete on db1 at the end of the test. The external object will end +# up containing the point key foo@3 (NOT foo@5). + +reset +---- + +run +add-new-key foo@5 +op db1.Set("foo@5", "foo5") +add-new-key foo@3 +op db1.Set("foo@3", "foo3") +op batch1.DeleteRange("a", "foo@3") +op batch1.Set("foo@3", "foo3batch1") +op external1 = batch1.NewExternalObj() +op db1.IngestExternalFiles(external1, "a", "z", "", "") +singledel-keys db1 +---- +"foo@5" is new +[db1.Set("foo@5", "foo5")] +"foo@3" is new +[db1.Set("foo@3", "foo3")] +[batch1.DeleteRange("a", "foo@3")] +[batch1.Set("foo@3", "foo3batch1")] +[external1 = batch1.NewExternalObj()] +[db1.IngestExternalFiles(external1, "a" /* start */, "z" /* end */, "" /* syntheticSuffix */, "" /* syntheticPrefix */)] +can singledel on db1: "foo@5"