Skip to content

Commit c101104

Browse files
committed
Avoid multiple small read requests on delta checkpoints
1 parent 1ceaf88 commit c101104

File tree

2 files changed

+1
-35
lines changed

2 files changed

+1
-35
lines changed

plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ public CheckpointEntryIterator(
230230
parquetReaderOptions,
231231
Optional.empty(),
232232
domainCompactionThreshold,
233-
OptionalLong.empty());
233+
OptionalLong.of(fileSize));
234234

235235
this.pageSource = (ParquetPageSource) pageSource.get();
236236
try {

plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAlluxioCacheFileOperations.java

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -200,8 +200,6 @@ public void testCacheCheckpointAndExtendedStatsFileOperations()
200200
"SELECT * FROM checkpoint_and_extended_stats",
201201
ImmutableMultiset.<CacheOperation>builder()
202202
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000003.checkpoint.parquet", 0, 7077), 2)
203-
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000003.checkpoint.parquet", 706, 972), 1)
204-
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000003.checkpoint.parquet", 4, 624), 1)
205203
.addCopies(new CacheOperation("InputFile.length", "00000000000000000003.checkpoint.parquet"), 2)
206204
.add(new CacheOperation("InputFile.length", "00000000000000000004.json"))
207205
.addAll(Stream.of("int_part=10/string_part=part1/", "int_part=20/string_part=part2/", "int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/")
@@ -217,8 +215,6 @@ public void testCacheCheckpointAndExtendedStatsFileOperations()
217215
"EXPLAIN ANALYZE SELECT * FROM checkpoint_and_extended_stats",
218216
ImmutableMultiset.<CacheOperation>builder()
219217
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000003.checkpoint.parquet", 0, 7077), 3)
220-
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000003.checkpoint.parquet", 706, 972), 2)
221-
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000003.checkpoint.parquet", 4, 624), 1)
222218
.addCopies(new CacheOperation("InputFile.length", "00000000000000000003.checkpoint.parquet"), 3)
223219
.add(new CacheOperation("InputFile.length", "00000000000000000004.json"))
224220
.addAll(Stream.of("int_part=10/string_part=part1/", "int_part=20/string_part=part2/", "int_part=__HIVE_DEFAULT_PARTITION__/string_part=__HIVE_DEFAULT_PARTITION__/")
@@ -340,8 +336,6 @@ public void testTimeTravelWithLastCheckpoint()
340336
assertFileSystemAccesses(
341337
"SELECT * FROM time_travel_with_last_checkpoint FOR VERSION AS OF 2",
342338
ImmutableMultiset.<CacheOperation>builder()
343-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 4, 561))
344-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 643, 767))
345339
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 0, 5884), 2)
346340
.addCopies(new CacheOperation("InputFile.length", "00000000000000000002.checkpoint.parquet"), 2)
347341
.add(new CacheOperation("InputFile.exists", "00000000000000000002.json"))
@@ -353,8 +347,6 @@ public void testTimeTravelWithLastCheckpoint()
353347
assertFileSystemAccesses(
354348
"SELECT * FROM time_travel_with_last_checkpoint FOR VERSION AS OF 2",
355349
ImmutableMultiset.<CacheOperation>builder()
356-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 4, 561))
357-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 643, 767))
358350
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 0, 5884), 2)
359351
.addCopies(new CacheOperation("InputFile.length", "00000000000000000002.checkpoint.parquet"), 2)
360352
.addCopies(new CacheOperation("Alluxio.readCached", "data", 0, 199), 3)
@@ -396,8 +388,6 @@ public void testTimeTravelWithoutLastCheckpoint()
396388
.add(new CacheOperation("Input.readFully", "00000000000000000002.checkpoint.parquet", 0, 5884))
397389
.add(new CacheOperation("Alluxio.writeCache", "00000000000000000002.checkpoint.parquet", 0, 5884))
398390
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 0, 5884), 2)
399-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 4, 561))
400-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 643, 767))
401391
.addCopies(new CacheOperation("InputFile.length", "00000000000000000002.checkpoint.parquet"), 2)
402392
.add(new CacheOperation("InputFile.exists", "00000000000000000002.json"))
403393
.add(new CacheOperation("Input.readFully", "data", 0, 199))
@@ -409,8 +399,6 @@ public void testTimeTravelWithoutLastCheckpoint()
409399
"SELECT * FROM time_travel_without_last_checkpoint FOR VERSION AS OF 2",
410400
ImmutableMultiset.<CacheOperation>builder()
411401
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 0, 5884), 2)
412-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 4, 561))
413-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000002.checkpoint.parquet", 643, 767))
414402
.addCopies(new CacheOperation("InputFile.length", "00000000000000000002.checkpoint.parquet"), 2)
415403
.add(new CacheOperation("InputFile.exists", "00000000000000000002.json"))
416404
.addCopies(new CacheOperation("Alluxio.readCached", "data", 0, 199), 3)
@@ -444,9 +432,6 @@ public void testReadV2CheckpointJson()
444432
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176))
445433
.add(new CacheOperation("Input.readFully", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176))
446434
.add(new CacheOperation("Alluxio.writeCache", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176))
447-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1224, 143))
448-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 4, 727))
449-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1994, 188))
450435
.add(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet"))
451436
.add(new CacheOperation("InputFile.length", "00000000000000000002.json"))
452437
.add(new CacheOperation("InputFile.newStream", "_last_checkpoint"))
@@ -460,9 +445,6 @@ public void testReadV2CheckpointJson()
460445
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json", 0, 765), 2)
461446
.addCopies(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.73a4ddb8-2bfc-40d8-b09f-1b6a0abdfb04.json"), 2)
462447
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 0, 9176))
463-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1224, 143))
464-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 4, 727))
465-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet", 1994, 188))
466448
.add(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.0000000001.0000000001.90cf4e21-dbaa-41d6-8ae5-6709cfbfbfe0.parquet"))
467449
.add(new CacheOperation("InputFile.length", "00000000000000000002.json"))
468450
.add(new CacheOperation("InputFile.newStream", "_last_checkpoint"))
@@ -483,24 +465,15 @@ public void testReadV2CheckpointParquet()
483465
.add(new CacheOperation("Input.readFully", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019))
484466
.addCopies(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet"), 2)
485467
.add(new CacheOperation("Alluxio.writeCache", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019))
486-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266))
487-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87))
488468
.build());
489469

490470
assertUpdate("CALL system.flush_metadata_cache(schema_name => CURRENT_SCHEMA, table_name => 'v2_checkpoint_parquet')");
491471
assertFileSystemAccesses(
492472
"SELECT * FROM v2_checkpoint_parquet",
493473
ImmutableMultiset.<CacheOperation>builder()
494474
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019), 2)
495-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 100, 2470))
496-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266))
497-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87))
498-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3829, 143))
499475
.addCopies(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet"), 4)
500476
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415))
501-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 4, 758))
502-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 1255, 143))
503-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 2040, 199))
504477
.add(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet"))
505478
.add(new CacheOperation("Input.readFully", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415))
506479
.add(new CacheOperation("Alluxio.writeCache", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415))
@@ -514,15 +487,8 @@ public void testReadV2CheckpointParquet()
514487
"SELECT * FROM v2_checkpoint_parquet",
515488
ImmutableMultiset.<CacheOperation>builder()
516489
.addCopies(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 0, 19019), 2)
517-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 100, 2470))
518-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 1304, 1266))
519-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3155, 87))
520-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet", 3829, 143))
521490
.addCopies(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.156b3304-76b2-49c3-a9a1-626f07df27c9.parquet"), 4)
522491
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 0, 9415))
523-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 4, 758))
524-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 1255, 143))
525-
.add(new CacheOperation("Alluxio.readCached", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet", 2040, 199))
526492
.add(new CacheOperation("InputFile.length", "00000000000000000001.checkpoint.0000000001.0000000001.03288d7e-af16-44ed-829c-196064a71812.parquet"))
527493
.add(new CacheOperation("InputFile.length", "00000000000000000002.json"))
528494
.add(new CacheOperation("InputFile.newStream", "_last_checkpoint"))

0 commit comments

Comments
 (0)