Skip to content

Commit 4628f5d

Browse files
committed
Keep track of last 10k seen keys.
1 parent e8029e8 commit 4628f5d

File tree

2 files changed

+20
-35
lines changed

2 files changed

+20
-35
lines changed
Lines changed: 15 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { logger } from '@powersync/lib-services-framework';
22
import { bson, InternalOpId } from '@powersync/service-core';
3+
import { LRUCache } from 'lru-cache';
34
import { PowerSyncMongo } from './db.js';
45

56
export class MongoParameterCompactor {
@@ -9,24 +10,6 @@ export class MongoParameterCompactor {
910
private checkpoint: InternalOpId
1011
) {}
1112

12-
/**
13-
* This is the oldest checkpoint that we consider safe to still use. We cleanup old parameter
14-
* but no data that would be used by this checkpoint.
15-
*
16-
* Specifically, we return a checkpoint that has been available for at least 5 minutes, then
17-
* we can delete data only used for checkpoints older than that.
18-
*
19-
* @returns null if there is no safe checkpoint available.
20-
*/
21-
async getActiveCheckpoint(): Promise<InternalOpId | null> {
22-
const syncRules = await this.db.sync_rules.findOne({ _id: this.group_id });
23-
if (syncRules == null) {
24-
return null;
25-
}
26-
27-
return syncRules.last_checkpoint;
28-
}
29-
3013
async compact() {
3114
logger.info(`Compacting parameters for group ${this.group_id} up to checkpoint ${this.checkpoint}`);
3215
// This is the currently-active checkpoint.
@@ -53,7 +36,10 @@ export class MongoParameterCompactor {
5336
}
5437
);
5538

56-
let lastDoc: RawParameterData | null = null;
39+
// The index doesn't cover sorting by key, so we keep our own cache of the last seen key.
40+
let lastByKey = new LRUCache<string, InternalOpId>({
41+
max: 10_000
42+
});
5743
let removeIds: InternalOpId[] = [];
5844

5945
while (await cursor.hasNext()) {
@@ -62,19 +48,18 @@ export class MongoParameterCompactor {
6248
if (doc._id >= checkpoint) {
6349
continue;
6450
}
65-
const rawDoc: RawParameterData = {
66-
_id: doc._id,
67-
// Serializing to a Buffer is an easy way to check for exact equality of arbitrary BSON values.
68-
data: bson.serialize({
69-
key: doc.key,
70-
lookup: doc.lookup
51+
const uniqueKey = (
52+
bson.serialize({
53+
k: doc.key,
54+
l: doc.lookup
7155
}) as Buffer
72-
};
73-
if (lastDoc != null && lastDoc.data.equals(rawDoc.data) && lastDoc._id < doc._id) {
74-
removeIds.push(lastDoc._id);
56+
).toString('base64');
57+
const previous = lastByKey.get(uniqueKey);
58+
if (previous != null && previous < doc._id) {
59+
// We have a newer entry for the same key, so we can remove the old one.
60+
removeIds.push(previous);
7561
}
76-
77-
lastDoc = rawDoc;
62+
lastByKey.set(uniqueKey, doc._id);
7863
}
7964

8065
if (removeIds.length >= 1000) {
@@ -91,8 +76,3 @@ export class MongoParameterCompactor {
9176
logger.info('Parameter compaction completed');
9277
}
9378
}
94-
95-
interface RawParameterData {
96-
_id: InternalOpId;
97-
data: Buffer;
98-
}

modules/module-mongodb-storage/src/storage/implementation/MongoSyncBucketStorage.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,11 @@ export class MongoSyncBucketStorage
290290
const lookupFilter = lookups.map((lookup) => {
291291
return storage.serializeLookup(lookup);
292292
});
293+
// This query does not use indexes super efficiently, apart from the lookup filter.
294+
// From some experimentation I could do individual lookups more efficient using an index
295+
// on {'key.g': 1, lookup: 1, 'key.t': 1, 'key.k': 1, _id: -1},
296+
// but could not do the same using $group.
297+
// For now, just rely on compacting to remove extraneous data.
293298
const rows = await this.db.bucket_parameters
294299
.aggregate(
295300
[

0 commit comments

Comments
 (0)