|
17 | 17 | import io.delta.kernel.internal.deletionvectors.Base85Codec; |
18 | 18 | import io.trino.filesystem.Location; |
19 | 19 | import io.trino.filesystem.TrinoFileSystem; |
| 20 | +import io.trino.filesystem.TrinoInput; |
20 | 21 | import io.trino.filesystem.TrinoInputFile; |
21 | 22 | import io.trino.plugin.deltalake.transactionlog.DeletionVectorEntry; |
22 | 23 | import io.trino.spi.TrinoException; |
23 | 24 | import org.roaringbitmap.RoaringBitmap; |
24 | 25 |
|
25 | | -import java.io.DataInputStream; |
26 | 26 | import java.io.DataOutputStream; |
27 | 27 | import java.io.IOException; |
28 | 28 | import java.nio.ByteBuffer; |
|
32 | 32 | import java.util.zip.Checksum; |
33 | 33 |
|
34 | 34 | import static com.google.common.base.Preconditions.checkArgument; |
35 | | -import static com.google.common.base.Preconditions.checkState; |
| 35 | +import static io.airlift.slice.SizeOf.SIZE_OF_INT; |
36 | 36 | import static io.delta.kernel.internal.deletionvectors.Base85Codec.decodeUUID; |
37 | 37 | import static io.delta.kernel.internal.deletionvectors.Base85Codec.encodeUUID; |
38 | 38 | import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; |
@@ -63,7 +63,7 @@ public static RoaringBitmapArray readDeletionVectors(TrinoFileSystem fileSystem, |
63 | 63 | { |
64 | 64 | if (deletionVector.storageType().equals(UUID_MARKER)) { |
65 | 65 | TrinoInputFile inputFile = fileSystem.newInputFile(location.appendPath(toFileName(deletionVector.pathOrInlineDv()))); |
66 | | - byte[] buffer = readDeletionVector(inputFile, deletionVector.offset().orElseThrow(), deletionVector.sizeInBytes()); |
| 66 | + ByteBuffer buffer = readDeletionVector(inputFile, deletionVector.offset().orElseThrow(), deletionVector.sizeInBytes()); |
67 | 67 | return deserializeDeletionVectors(buffer); |
68 | 68 | } |
69 | 69 | if (deletionVector.storageType().equals(INLINE_MARKER) || deletionVector.storageType().equals(PATH_MARKER)) { |
@@ -125,38 +125,40 @@ public static String toFileName(String pathOrInlineDv) |
125 | 125 | return "%sdeletion_vector_%s.bin".formatted(prefix, uuid); |
126 | 126 | } |
127 | 127 |
|
128 | | - public static byte[] readDeletionVector(TrinoInputFile inputFile, int offset, int expectedSize) |
| 128 | + public static ByteBuffer readDeletionVector(TrinoInputFile inputFile, int offset, int expectedSize) |
129 | 129 | throws IOException |
130 | 130 | { |
131 | | - byte[] bytes = new byte[expectedSize]; |
132 | | - try (DataInputStream inputStream = new DataInputStream(inputFile.newStream())) { |
133 | | - checkState(inputStream.skip(offset) == offset); |
134 | | - int actualSize = inputStream.readInt(); |
| 131 | + try (TrinoInput input = inputFile.newInput()) { |
| 132 | + ByteBuffer buffer = input.readFully(offset, SIZE_OF_INT + expectedSize + SIZE_OF_INT).toByteBuffer(); |
| 133 | + int actualSize = buffer.getInt(0); |
135 | 134 | if (actualSize != expectedSize) { |
136 | 135 | throw new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "The size of deletion vector %s expects %s but got %s".formatted(inputFile.location(), expectedSize, actualSize)); |
137 | 136 | } |
138 | | - inputStream.readFully(bytes); |
139 | | - int checksum = inputStream.readInt(); |
140 | | - if (calculateChecksum(bytes) != checksum) { |
| 137 | + int checksum = buffer.getInt(SIZE_OF_INT + expectedSize); |
| 138 | + if (calculateChecksum(buffer.array(), buffer.arrayOffset() + SIZE_OF_INT, expectedSize) != checksum) { |
141 | 139 | throw new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Checksum mismatch for deletion vector: " + inputFile.location()); |
142 | 140 | } |
| 141 | + return buffer.slice(SIZE_OF_INT, expectedSize).order(LITTLE_ENDIAN); |
143 | 142 | } |
144 | | - return bytes; |
145 | 143 | } |
146 | 144 |
|
147 | 145 | private static int calculateChecksum(byte[] data) |
| 146 | + { |
| 147 | + return calculateChecksum(data, 0, data.length); |
| 148 | + } |
| 149 | + |
| 150 | + private static int calculateChecksum(byte[] data, int offset, int length) |
148 | 151 | { |
149 | 152 | // Delta Lake allows integer overflow intentionally because it's fine from checksum perspective |
150 | 153 | // https://github.com/delta-io/delta/blob/039a29abb4abc72ac5912651679233dc983398d6/spark/src/main/scala/org/apache/spark/sql/delta/storage/dv/DeletionVectorStore.scala#L115 |
151 | 154 | Checksum crc = new CRC32(); |
152 | | - crc.update(data); |
| 155 | + crc.update(data, offset, length); |
153 | 156 | return (int) crc.getValue(); |
154 | 157 | } |
155 | 158 |
|
156 | | - private static RoaringBitmapArray deserializeDeletionVectors(byte[] bytes) |
| 159 | + private static RoaringBitmapArray deserializeDeletionVectors(ByteBuffer buffer) |
157 | 160 | throws IOException |
158 | 161 | { |
159 | | - ByteBuffer buffer = ByteBuffer.wrap(bytes).order(LITTLE_ENDIAN); |
160 | 162 | checkArgument(buffer.order() == LITTLE_ENDIAN, "Byte order must be little endian: %s", buffer.order()); |
161 | 163 | int magicNumber = buffer.getInt(); |
162 | 164 | if (magicNumber == PORTABLE_ROARING_BITMAP_MAGIC_NUMBER) { |
|
0 commit comments