diff --git a/docs/source/delta-kernel-java.md b/docs/source/delta-kernel-java.md index a5b3494129f..eb1bf84d442 100644 --- a/docs/source/delta-kernel-java.md +++ b/docs/source/delta-kernel-java.md @@ -99,14 +99,14 @@ Snapshot mySnapshot = myTable.getLatestSnapshot(myEngine); Now that we have a consistent snapshot view of the table, we can query more details about the table. For example, you can get the version and schema of this snapshot. ```java -long version = mySnapshot.getVersion(myEngine); -StructType tableSchema = mySnapshot.getSchema(myEngine); +long version = mySnapshot.getVersion(); +StructType tableSchema = mySnapshot.getSchema(); ``` Next, to read the table data, we have to *build* a [`Scan`](https://delta-io.github.io/delta/snapshot/kernel-api/java/io/delta/kernel/Scan.html) object. In order to build a `Scan` object, create a [`ScanBuilder`](https://delta-io.github.io/delta/snapshot/kernel-api/java/io/delta/kernel/ScanBuilder.html) object which optionally allows selecting a subset of columns to read or setting a query filter. For now, ignore these optional settings. ```java -Scan myScan = mySnapshot.getScanBuilder(myEngine).build() +Scan myScan = mySnapshot.getScanBuilder().build() // Common information about scanning for all data files to read. Row scanState = myScan.getScanState(myEngine) @@ -224,9 +224,7 @@ Predicate filter = new Predicate( "=", Arrays.asList(new Column("columnX"), Literal.ofInt(1))); -Scan myFilteredScan = mySnapshot.buildScan(engine) - .withFilter(myEngine, filter) - .build() +Scan myFilteredScan = mySnapshot.getScanBuilder().withFilter(filter).build() // Subset of the given filter that is not guaranteed to be satisfied by // Delta Kernel when it returns data. This filter is used by Delta Kernel @@ -845,10 +843,7 @@ import io.delta.kernel.types.*; StructType readSchema = ... ; // convert engine schema Predicate filterExpr = ... ; // convert engine filter expression -Scan myScan = mySnapshot.buildScan(engine) - .withFilter(myEngine, filterExpr) - .withReadSchema(myEngine, readSchema) - .build() +Scan myScan = mySnapshot.getScanBuilder().withFilter(filterExpr).withReadSchema(readSchema).build(); ``` diff --git a/docs/source/delta-kernel.md b/docs/source/delta-kernel.md index 25738edc3ec..cdbca5807bf 100644 --- a/docs/source/delta-kernel.md +++ b/docs/source/delta-kernel.md @@ -22,8 +22,8 @@ Here is an example of a simple table scan with a filter: Engine myEngine = DefaultEngine.create() ; // define a engine (more details below) Table myTable = Table.forPath("/delta/table/path"); // define what table to scan Snapshot mySnapshot = myTable.getLatestSnapshot(myEngine); // define which version of table to scan -Scan myScan = mySnapshot.getScanBuilder(myEngine) // specify the scan details - .withFilters(myEngine, scanFilter) +Scan myScan = mySnapshot.getScanBuilder() // specify the scan details + .withFilters(scanFilter) .build(); CloseableIterator physicalData = // read the Parquet data files .. read from Parquet data files ... diff --git a/kernel/USER_GUIDE.md b/kernel/USER_GUIDE.md index 41231ab7a14..f3a635af41b 100644 --- a/kernel/USER_GUIDE.md +++ b/kernel/USER_GUIDE.md @@ -95,8 +95,8 @@ Snapshot mySnapshot = myTable.getLatestSnapshot(myEngine); Now that we have a consistent snapshot view of the table, we can query more details about the table. For example, you can get the version and schema of this snapshot. ```java -long version = mySnapshot.getVersion(myEngine); -StructType tableSchema = mySnapshot.getSchema(myEngine); +long version = mySnapshot.getVersion(); +StructType tableSchema = mySnapshot.getSchema(); ``` Next, to read the table data, we have to *build* a [`Scan`](https://delta-io.github.io/delta/snapshot/kernel-api/java/io/delta/kernel/Scan.html) object. In order to build a `Scan` object, create a [`ScanBuilder`](https://delta-io.github.io/delta/snapshot/kernel-api/java/io/delta/kernel/ScanBuilder.html) object which optionally allows selecting a subset of columns to read or setting a query filter. For now, ignore these optional settings. @@ -220,9 +220,7 @@ Predicate filter = new Predicate( "=", Arrays.asList(new Column("columnX"), Literal.ofInt(1))); -Scan myFilteredScan = mySnapshot.buildScan(engine) - .withFilter(myEngine, filter) - .build() +Scan myFilteredScan = mySnapshot.getScanBuilder().withFilter(filter).build() // Subset of the given filter that is not guaranteed to be satisfied by // Delta Kernel when it returns data. This filter is used by Delta Kernel @@ -865,10 +863,7 @@ import io.delta.kernel.types.*; StructType readSchema = ... ; // convert engine schema Predicate filterExpr = ... ; // convert engine filter expression -Scan myScan = mySnapshot.buildScan(engine) - .withFilter(myEngine, filterExpr) - .withReadSchema(myEngine, readSchema) - .build() +Scan myScan = mySnapshot.getScanBuilder().withFilter(filterExpr).withReadSchema(readSchema).build(); ``` diff --git a/kernel/examples/kernel-examples/src/main/java/io/delta/kernel/examples/MultiThreadedTableReader.java b/kernel/examples/kernel-examples/src/main/java/io/delta/kernel/examples/MultiThreadedTableReader.java index e5eefd668b0..7d450bab83e 100644 --- a/kernel/examples/kernel-examples/src/main/java/io/delta/kernel/examples/MultiThreadedTableReader.java +++ b/kernel/examples/kernel-examples/src/main/java/io/delta/kernel/examples/MultiThreadedTableReader.java @@ -85,10 +85,10 @@ public int show(int limit, Optional> columnsOpt, Optional> columnsOpt, OptionalreadSchema. If the builder already has a projection applied, calling * this again replaces the existing projection. * - * @param engine {@link Engine} instance to use in Delta Kernel. * @param readSchema Subset of columns to read from the Delta table. * @return A {@link ScanBuilder} with projection pruning. */ - ScanBuilder withReadSchema(Engine engine, StructType readSchema); + ScanBuilder withReadSchema(StructType readSchema); /** @return Build the {@link Scan instance} */ Scan build(); diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/ScanBuilderImpl.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/ScanBuilderImpl.java index 4aa5b1c1e2b..d725f8dbf4c 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/internal/ScanBuilderImpl.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/ScanBuilderImpl.java @@ -18,7 +18,6 @@ import io.delta.kernel.Scan; import io.delta.kernel.ScanBuilder; -import io.delta.kernel.engine.Engine; import io.delta.kernel.expressions.Predicate; import io.delta.kernel.internal.actions.Metadata; import io.delta.kernel.internal.actions.Protocol; @@ -55,7 +54,7 @@ public ScanBuilderImpl( } @Override - public ScanBuilder withFilter(Engine engine, Predicate predicate) { + public ScanBuilder withFilter(Predicate predicate) { if (this.predicate.isPresent()) { throw new IllegalArgumentException("There already exists a filter in current builder"); } @@ -64,7 +63,7 @@ public ScanBuilder withFilter(Engine engine, Predicate predicate) { } @Override - public ScanBuilder withReadSchema(Engine engine, StructType readSchema) { + public ScanBuilder withReadSchema(StructType readSchema) { // TODO: validate the readSchema is a subset of the table schema this.readSchema = readSchema; return this; diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/PartitionUtils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/PartitionUtils.java index 33fa2af6d68..1877100dc19 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/PartitionUtils.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/PartitionUtils.java @@ -55,7 +55,7 @@ public static boolean partitionExists( io.delta.kernel.internal.util.PartitionUtils.validatePredicateOnlyOnPartitionColumns( partitionPredicate, snapshotPartColNames); - final Scan scan = snapshot.getScanBuilder().withFilter(engine, partitionPredicate).build(); + final Scan scan = snapshot.getScanBuilder().withFilter(partitionPredicate).build(); try (CloseableIterator columnarBatchIter = scan.getScanFiles(engine)) { while (columnarBatchIter.hasNext()) { diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ScanSuite.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ScanSuite.scala index 5e01230fcf7..49992ce6ecc 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ScanSuite.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/ScanSuite.scala @@ -97,15 +97,13 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with val snapshot = latestSnapshot(tablePath) hits.foreach { predicate => val scanFiles = collectScanFileRows( - snapshot.getScanBuilder() - .withFilter(defaultEngine, predicate) - .build()) + snapshot.getScanBuilder().withFilter(predicate).build()) assert(scanFiles.nonEmpty, s"Expected hit but got miss for $predicate") } misses.foreach { predicate => val scanFiles = collectScanFileRows( snapshot.getScanBuilder() - .withFilter(defaultEngine, predicate) + .withFilter(predicate) .build()) assert(scanFiles.isEmpty, s"Expected miss but got hit for $predicate\n" + s"Returned scan files have stats: ${getScanFileStats(scanFiles)}" @@ -121,9 +119,7 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with val snapshot = latestSnapshot(tablePath) filterToNumExpFiles.foreach { case (filter, numExpFiles) => val scanFiles = collectScanFileRows( - snapshot.getScanBuilder() - .withFilter(defaultEngine, filter) - .build()) + snapshot.getScanBuilder().withFilter(filter).build()) assert(scanFiles.length == numExpFiles, s"Expected $numExpFiles but found ${scanFiles.length} for $filter") } @@ -1010,7 +1006,7 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with predicate: Predicate, expNumPartitions: Int, expNumFiles: Long): Unit = { val snapshot = latestSnapshot(tableDir.getCanonicalPath) val scanFiles = collectScanFileRows( - snapshot.getScanBuilder().withFilter(defaultEngine, predicate).build()) + snapshot.getScanBuilder().withFilter(predicate).build()) assert(scanFiles.length == expNumFiles, s"Expected $expNumFiles but found ${scanFiles.length} for $predicate") @@ -1496,7 +1492,7 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with val partFilter = equals(new Column("part"), ofInt(1)) verifyNoStatsColumn( snapshot(engineDisallowedStatsReads) - .getScanBuilder().withFilter(engine, partFilter).build() + .getScanBuilder().withFilter(partFilter).build() .getScanFiles(engine)) // no eligible data skipping filter --> don't read stats @@ -1505,7 +1501,7 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with ofInt(1)) verifyNoStatsColumn( snapshot(engineDisallowedStatsReads) - .getScanBuilder().withFilter(engine, nonEligibleFilter).build() + .getScanBuilder().withFilter(nonEligibleFilter).build() .getScanFiles(engine)) } @@ -1543,9 +1539,7 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with val engine = engineVerifyJsonParseSchema(verifySchema(expectedCols)) collectScanFileRows( Table.forPath(engine, path).getLatestSnapshot(engine) - .getScanBuilder() - .withFilter(engine, predicate) - .build(), + .getScanBuilder().withFilter(predicate).build(), engine = engine) } } @@ -1573,7 +1567,6 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with latestSnapshot(tempDir.getCanonicalPath) .getScanBuilder() .withFilter( - defaultEngine, greaterThan( new ScalarExpression("+", Seq(col("id"), ofInt(10)).asJava), ofInt(100) @@ -1584,13 +1577,8 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with checkStatsPresent( latestSnapshot(tempDir.getCanonicalPath) .getScanBuilder() - .withFilter( - defaultEngine, - greaterThan( - col("id"), - ofInt(0) - ) - ).build() + .withFilter(greaterThan(col("id"), ofInt(0))) + .build() ) } } @@ -1634,7 +1622,7 @@ class ScanSuite extends AnyFunSuite with TestUtils with ExpressionTestUtils with val scanBuilder = snapshot.getScanBuilder() val scan = predicate match { - case Some(pred) => scanBuilder.withFilter(defaultEngine, pred).build() + case Some(pred) => scanBuilder.withFilter(pred).build() case None => scanBuilder.build() } val scanFiles = scan.asInstanceOf[ScanImpl].getScanFiles(defaultEngine, true) diff --git a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala index ee73a0ce38f..8b32c392cd7 100644 --- a/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala +++ b/kernel/kernel-defaults/src/test/scala/io/delta/kernel/defaults/utils/TestUtils.scala @@ -197,11 +197,11 @@ trait TestUtils extends Assertions with SQLHelper { var scanBuilder = snapshot.getScanBuilder() if (readSchema != null) { - scanBuilder = scanBuilder.withReadSchema(engine, readSchema) + scanBuilder = scanBuilder.withReadSchema(readSchema) } if (filter != null) { - scanBuilder = scanBuilder.withFilter(engine, filter) + scanBuilder = scanBuilder.withFilter(filter) } val scan = scanBuilder.build() @@ -265,7 +265,7 @@ trait TestUtils extends Assertions with SQLHelper { val scan = Table.forPath(engine, tablePath) .getLatestSnapshot(engine) .getScanBuilder() - .withReadSchema(engine, readSchema) + .withReadSchema(readSchema) .build() val scanState = scan.getScanState(engine)