diff --git a/CHANGELOG.md b/CHANGELOG.md index e5bbc88685..d8f798f880 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ request adding CHANGELOG notes for breaking (!) changes and possibly other secti - Added `--no-sts` flag to CLI to support S3-compatible storage systems that do not have Security Token Service available. - Support credential vending for federated catalogs. `ALLOW_FEDERATED_CATALOGS_CREDENTIAL_VENDING` (default: true) was added to toggle this feature. +- Create a Spark 4.0 client. - Enhanced catalog federation with SigV4 authentication support, additional authentication types for credential vending, and location-based access restrictions to block credential vending for remote tables outside allowed location lists. ### Changes diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index bb5c163dc7..84fee3615e 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -40,7 +40,7 @@ swagger = "1.6.16" # (aka mention of the dependency removed). # agrona = { module = "org.agrona:agrona", version = "2.3.2" } -antlr4-runtime = { module = "org.antlr:antlr4-runtime", version.strictly = "4.9.3" } # spark integration tests +antlr4-runtime = { module = "org.antlr:antlr4-runtime", version.strictly = "4.9.3" } # runtime/spark-tests (Spark 3.5) apache-httpclient5 = { module = "org.apache.httpcomponents.client5:httpclient5", version = "5.5.1" } assertj-core = { module = "org.assertj:assertj-core", version = "3.27.6" } auth0-jwt = { module = "com.auth0:java-jwt", version = "4.5.0" } diff --git a/plugins/pluginlibs.versions.toml b/plugins/pluginlibs.versions.toml index 37b7696b41..90c7793a77 100644 --- a/plugins/pluginlibs.versions.toml +++ b/plugins/pluginlibs.versions.toml @@ -18,7 +18,14 @@ # [versions] +antlr4-spark35 = "4.9.3" +antlr4-spark40 = "4.13.1" iceberg = "1.10.0" spark35 = "3.5.6" +spark40 = "4.0.1" scala212 = "2.12.19" scala213 = "2.13.15" + +[libraries] +antlr4-runtime-spark35 = { module = "org.antlr:antlr4-runtime", version.strictly = "4.9.3" } +antlr4-runtime-spark40 = { module = "org.antlr:antlr4-runtime", version.strictly = "4.13.1" } diff --git a/plugins/spark/README.md b/plugins/spark/README.md index a43c9c376f..4fbafcb7bd 100644 --- a/plugins/spark/README.md +++ b/plugins/spark/README.md @@ -17,7 +17,11 @@ under the License. --> -# Polaris Spark Plugin +# Polaris Spark Plugins + +This directory contains the Polaris Spark plugins. The plugins are built for specific versions of Spark: +- [Spark 3.5](./v3.5/README.md) +- [Spark 4.0](./v4.0/README.md) The Polaris Spark plugin provides a SparkCatalog class, which communicates with the Polaris REST endpoints, and provides implementations for Apache Spark's @@ -45,7 +49,7 @@ option with the Polaris Spark package, or the `--jars` option with the Polaris S The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. # Build and run with Polaris spark package locally -The Polaris Spark client source code is located in plugins/spark/v3.5/spark. To use the Polaris Spark package +The Polaris Spark client source code is located in plugins/spark/v3.5/spark. To use the Polaris Spark package with Spark, you first need to publish the source JAR to your local Maven repository. Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: @@ -96,7 +100,7 @@ polaris-spark-_--bundle.jar For example: polaris-spark-bundle-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar -Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, +Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, the bundle JAR can be found under: plugins/spark/v3.5/spark/build//libs/. To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: diff --git a/plugins/spark/spark-scala.properties b/plugins/spark/spark-scala.properties index 2ed71b574f..e74eb44743 100644 --- a/plugins/spark/spark-scala.properties +++ b/plugins/spark/spark-scala.properties @@ -17,6 +17,9 @@ # under the License. # -sparkVersions=3.5 +sparkVersions=3.5,4.0 -scalaVersions=2.12,2.13 +scalaVersions.3.5=2.12,2.13 + +# Spark 4.0 only supports Scala 2.13 +scalaVersions.4.0=2.13 diff --git a/plugins/spark/v3.5/README.md b/plugins/spark/v3.5/README.md new file mode 100644 index 0000000000..88187ddea8 --- /dev/null +++ b/plugins/spark/v3.5/README.md @@ -0,0 +1,131 @@ + + +# Polaris Spark 3.5 Plugin + +The Polaris Spark plugin provides a SparkCatalog class, which communicates with the Polaris +REST endpoints, and provides implementations for Apache Spark's +- [TableCatalog](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java) +- [ViewCatalog](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java) +- [SupportsNamespaces](https://github.com/apache/spark/blob/v3.5.6/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java) + +Right now, the plugin only provides support for Spark 3.5, Scala version 2.12 and 2.13, and depends on iceberg-spark-runtime 1.9.1. + +The Polaris Spark client supports catalog management for both Iceberg and Delta tables. It routes all Iceberg table +requests to the Iceberg REST endpoints and routes all Delta table requests to the Generic Table REST endpoints. + +The Spark Client requires at least delta 3.2.1 to work with Delta tables, which requires at least Apache Spark 3.5.3. + +# Start Spark with local Polaris service using the Polaris Spark plugin +The following command starts a Polaris server for local testing, it runs on localhost:8181 with default +realm `POLARIS` and root credentials `root:s3cr3t`: +```shell +./gradlew run +``` + +Once the local server is running, you can start Spark with the Polaris Spark plugin using either the `--packages` +option with the Polaris Spark package, or the `--jars` option with the Polaris Spark bundle JAR. + +The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. + +# Build and run with Polaris spark package locally +The Polaris Spark client source code is located in plugins/spark/v3.5/spark. To use the Polaris Spark package +with Spark, you first need to publish the source JAR to your local Maven repository. + +Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: +- `./gradlew assemble` -- build the whole Polaris project without running tests +- `./gradlew publishToMavenLocal` -- publish Polaris project source JAR to local Maven repository + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-_:,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +The Polaris version is defined in the `versions.txt` file located in the root directory of the Polaris project. +Assume the following values: +- `spark_version`: 3.5 +- `scala_version`: 2.12 +- `polaris_version`: 1.2.0-incubating-SNAPSHOT +- `catalog-name`: `polaris` + The Spark command would look like following: + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-3.5_2.12:1.2.0-incubating-SNAPSHOT,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog.polaris.warehouse=polaris \ +--conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog.polaris.credential="root:secret" \ +--conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog.polaris.token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Build and run with Polaris spark bundle JAR +The polaris-spark project also provides a Spark bundle JAR for the `--jars` use case. The resulting JAR will follow this naming format: +polaris-spark-_--bundle.jar +For example: +polaris-spark-bundle-3.5_2.12-1.2.0-incubating-SNAPSHOT-bundle.jar + +Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, +the bundle JAR can be found under: plugins/spark/v3.5/spark/build//libs/. +To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: + +```shell +bin/spark-shell \ +--jars \ +--packages org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.12:3.3.1 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Current Limitations +The following describes the current limitations of the Polaris Spark client: + +## General Limitations +1. The Polaris Spark client only supports Iceberg and Delta tables. It does not support other table formats like CSV, JSON, etc. +2. Generic tables (non-Iceberg tables) do not currently support credential vending. + +## Delta Table Limitations +1. Create table as select (CTAS) is not supported for Delta tables. As a result, the `saveAsTable` method of `Dataframe` + is also not supported, since it relies on the CTAS support. +2. Create a Delta table without explicit location is not supported. +3. Rename a Delta table is not supported. +4. ALTER TABLE ... SET LOCATION is not supported for DELTA table. \ No newline at end of file diff --git a/plugins/spark/v3.5/integration/build.gradle.kts b/plugins/spark/v3.5/integration/build.gradle.kts index f7c9892086..925dc189d2 100644 --- a/plugins/spark/v3.5/integration/build.gradle.kts +++ b/plugins/spark/v3.5/integration/build.gradle.kts @@ -91,7 +91,9 @@ dependencies { testImplementation(enforcedPlatform("org.scala-lang:scala-library:${scalaLibraryVersion}")) testImplementation(enforcedPlatform("org.scala-lang:scala-reflect:${scalaLibraryVersion}")) testImplementation(libs.javax.servlet.api) - testImplementation(libs.antlr4.runtime) + + // Spark 3.5 and Delta 3.3 require ANTLR 4.9.3 + testRuntimeOnly(pluginlibs.antlr4.runtime.spark35) } tasks.named("intTest").configure { diff --git a/plugins/spark/v4.0/README.md b/plugins/spark/v4.0/README.md new file mode 100644 index 0000000000..09986ad4bf --- /dev/null +++ b/plugins/spark/v4.0/README.md @@ -0,0 +1,123 @@ + + +# Polaris Spark 4.0 Plugin + +The Polaris Spark 4.0 plugin provides a SparkCatalog class, which communicates with the Polaris +REST endpoints, and provides implementations for Apache Spark's +[TableCatalog](https://github.com/apache/spark/blob/v4.0.1/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java), +[ViewCatalog](https://github.com/apache/spark/blob/v4.0.1/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java), +[SupportsNamespaces](https://github.com/apache/spark/blob/v4.0.1/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java). + +This plugin depends on iceberg-spark-runtime-4.0_2.13:1.10.0. + +# Start Spark with local Polaris service using the Polaris Spark plugin +The following command starts a Polaris server for local testing, it runs on localhost:8181 with default +realm `POLARIS` and root credentials `root:s3cr3t`: +```shell +./gradlew run +``` + +Once the local server is running, you can start Spark with the Polaris Spark plugin using either the `--packages` +option with the Polaris Spark package, or the `--jars` option with the Polaris Spark bundle JAR. + +The following sections explain how to build and run Spark with both the Polaris package and the bundle JAR. + +# Build and run with Polaris spark package locally +The Polaris Spark 4.0 client source code is located in plugins/spark/v4.0/spark. To use the Polaris Spark package +with Spark, you first need to publish the source JAR to your local Maven repository. + +Run the following command to build the Polaris Spark project and publish the source JAR to your local Maven repository: +- `./gradlew assemble` -- build the whole Polaris project without running tests +- `./gradlew publishToMavenLocal` -- publish Polaris project source JAR to local Maven repository + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-_:,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.13:4.0.0 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +The Polaris version is defined in the `versions.txt` file located in the root directory of the Polaris project. +Assume the following values: +- `spark_version`: 4.0 +- `scala_version`: 2.13 (only Scala 2.13 is supported for Spark 4.0) +- `polaris_version`: 1.2.0-incubating-SNAPSHOT +- `catalog-name`: `polaris` + The Spark command would look like following: + +```shell +bin/spark-shell \ +--packages org.apache.polaris:polaris-spark-4.0_2.13:1.2.0-incubating-SNAPSHOT,org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.13:4.0.0 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog.polaris.warehouse=polaris \ +--conf spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog.polaris.uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog.polaris.credential="root:secret" \ +--conf spark.sql.catalog.polaris.scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog.polaris.token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Build and run with Polaris spark bundle JAR +The polaris-spark project also provides a Spark bundle JAR for the `--jars` use case. The resulting JAR will follow this naming format: +polaris-spark-_--bundle.jar +For example: polaris-spark-4.0_2.13-1.2.0-incubating-SNAPSHOT-bundle.jar + +Run `./gradlew assemble` to build the entire Polaris project without running tests. After the build completes, +the bundle JAR can be found under: plugins/spark/v4.0/spark/build/2.13/libs/. +To start Spark using the bundle JAR, specify it with the `--jars` option as shown below: + +```shell +bin/spark-shell \ +--jars \ +--packages org.apache.iceberg:iceberg-aws-bundle:1.10.0,io.delta:delta-spark_2.13:4.0.0 \ +--conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension \ +--conf spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog \ +--conf spark.sql.catalog..warehouse= \ +--conf spark.sql.catalog..header.X-Iceberg-Access-Delegation=vended-credentials \ +--conf spark.sql.catalog.=org.apache.polaris.spark.SparkCatalog \ +--conf spark.sql.catalog..uri=http://localhost:8181/api/catalog \ +--conf spark.sql.catalog..credential="root:secret" \ +--conf spark.sql.catalog..scope='PRINCIPAL_ROLE:ALL' \ +--conf spark.sql.catalog..token-refresh-enabled=true \ +--conf spark.sql.sources.useV1SourceList='' +``` + +# Limitations +The Polaris Spark 4.0 client supports catalog management for both Iceberg and Delta tables, it routes all Iceberg table +requests to the Iceberg REST endpoints, and routes all Delta table requests to the Generic Table REST endpoints. + +The Spark 4.0 Client requires Delta Lake 4.0.0 or higher to work with Delta tables. +Following describes the current functionality limitations of the Polaris Spark 4.0 client: +1. Create table as select (CTAS) is not supported for Delta tables. As a result, the `saveAsTable` method of `Dataframe` is also not supported, since it relies on the CTAS support. +2. Create a Delta table without explicit location is not supported. +3. Rename a Delta table is not supported. +4. ALTER TABLE ... SET LOCATION is not supported for DELTA table. +5. For other non-Iceberg tables like csv, it is not supported today. diff --git a/plugins/spark/v4.0/integration/build.gradle.kts b/plugins/spark/v4.0/integration/build.gradle.kts new file mode 100644 index 0000000000..b84ac3e5cd --- /dev/null +++ b/plugins/spark/v4.0/integration/build.gradle.kts @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +plugins { + alias(libs.plugins.quarkus) + id("org.kordamp.gradle.jandex") + id("polaris-runtime") +} + +// get version information +val sparkMajorVersion = "4.0" +val scalaVersion = getAndUseScalaVersionForProject() +val icebergVersion = pluginlibs.versions.iceberg.get() +val spark40Version = pluginlibs.versions.spark40.get() +val scalaLibraryVersion = pluginlibs.versions.scala213.get() + +dependencies { + // must be enforced to get a consistent and validated set of dependencies + implementation(enforcedPlatform(libs.quarkus.bom)) { + exclude(group = "org.antlr", module = "antlr4-runtime") + exclude(group = "org.scala-lang", module = "scala-library") + exclude(group = "org.scala-lang", module = "scala-reflect") + } + + // For test configurations, exclude jakarta.servlet-api from Quarkus BOM + // to allow Spark 4.0's version (5.0.0) which includes SingleThreadModel + testImplementation(platform(libs.quarkus.bom)) { + exclude(group = "jakarta.servlet", module = "jakarta.servlet-api") + } + + implementation(project(":polaris-runtime-service")) + + testImplementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + testImplementation(project(":polaris-spark-${sparkMajorVersion}_${scalaVersion}")) + + testImplementation(project(":polaris-api-management-model")) + + testImplementation(project(":polaris-runtime-test-common")) + + testImplementation("org.apache.spark:spark-sql_${scalaVersion}:${spark40Version}") { + // exclude log4j dependencies. Explicit dependencies for the log4j libraries are + // enforced below to ensure the version compatibility + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.apache.logging.log4j", "log4j-core") + exclude("org.slf4j", "jul-to-slf4j") + } + // enforce the usage of log4j 2.24.3. This is for the log4j-api compatibility + // of spark-sql dependency + testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.2") + + testImplementation("io.delta:delta-spark_${scalaVersion}:4.0.0") + + testImplementation(platform(libs.jackson.bom)) + testImplementation("com.fasterxml.jackson.jakarta.rs:jackson-jakarta-rs-json-provider") + + testImplementation(testFixtures(project(":polaris-runtime-service"))) + + testImplementation(platform(libs.quarkus.bom)) + testImplementation("io.quarkus:quarkus-junit5") + testImplementation("io.quarkus:quarkus-rest-client") + testImplementation("io.quarkus:quarkus-rest-client-jackson") + + testImplementation(platform(libs.awssdk.bom)) + testImplementation("software.amazon.awssdk:glue") + testImplementation("software.amazon.awssdk:kms") + testImplementation("software.amazon.awssdk:dynamodb") + + testImplementation(platform(libs.testcontainers.bom)) + testImplementation("org.testcontainers:testcontainers") + testImplementation(libs.s3mock.testcontainers) + + // Required for Spark integration tests + testImplementation(enforcedPlatform("org.scala-lang:scala-library:${scalaLibraryVersion}")) + testImplementation(enforcedPlatform("org.scala-lang:scala-reflect:${scalaLibraryVersion}")) + testImplementation(libs.javax.servlet.api) + + // Spark 4.0 and Delta 4.0 require ANTLR 4.13.1 + testRuntimeOnly(pluginlibs.antlr4.runtime.spark40) +} + +// Force jakarta.servlet-api to 5.0.0 for Spark 4.0 compatibility +// Spark 4.0 requires version 5.0.0 which includes SingleThreadModel +// Quarkus BOM forces it to 6.x which removed SingleThreadModel +configurations.named("intTestRuntimeClasspath") { + resolutionStrategy { force("jakarta.servlet:jakarta.servlet-api:5.0.0") } +} + +tasks.named("intTest").configure { + if (System.getenv("AWS_REGION") == null) { + environment("AWS_REGION", "us-west-2") + } + // Note: the test secrets are referenced in + // org.apache.polaris.service.it.ServerManager + environment("POLARIS_BOOTSTRAP_CREDENTIALS", "POLARIS,test-admin,test-secret") + jvmArgs("--add-exports", "java.base/sun.nio.ch=ALL-UNNAMED") + // Need to allow a java security manager after Java 21, for Subject.getSubject to work + // "getSubject is supported only if a security manager is allowed". + systemProperty("java.security.manager", "allow") + // Same issue as above: allow a java security manager after Java 21 + // (this setting is for the application under test, while the setting above is for test code). + systemProperty("quarkus.test.arg-line", "-Djava.security.manager=allow") + val logsDir = project.layout.buildDirectory.get().asFile.resolve("logs") + // delete files from previous runs + doFirst { + // delete log files written by Polaris + logsDir.deleteRecursively() + // delete quarkus.log file (captured Polaris stdout/stderr) + project.layout.buildDirectory.get().asFile.resolve("quarkus.log").delete() + } + // This property is not honored in a per-profile application.properties file, + // so we need to set it here. + systemProperty("quarkus.log.file.path", logsDir.resolve("polaris.log").absolutePath) + // For Spark integration tests + addSparkJvmOptions() +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/PolarisManagementClient.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/PolarisManagementClient.java new file mode 100644 index 0000000000..cc0f177f7e --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/PolarisManagementClient.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static java.util.concurrent.TimeUnit.MINUTES; +import static org.apache.polaris.service.it.ext.PolarisServerManagerLoader.polarisServerManager; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.jakarta.rs.json.JacksonJsonProvider; +import jakarta.ws.rs.client.Client; +import jakarta.ws.rs.client.ClientBuilder; +import java.util.Map; +import java.util.Random; +import org.apache.iceberg.rest.HTTPClient; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.auth.AuthSession; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.rest.responses.OAuthTokenResponse; +import org.apache.polaris.service.it.env.ClientCredentials; +import org.apache.polaris.service.it.env.ManagementApi; +import org.apache.polaris.service.it.env.PolarisApiEndpoints; + +/** + * This class provides a REST client for the Polaris Management service endpoints and its auth-token + * endpoint, which is used in Spark client tests to run commands that Spark SQL can’t issue directly + * (e.g., createCatalog). + */ +public final class PolarisManagementClient implements AutoCloseable { + private final PolarisApiEndpoints endpoints; + private final Client client; + // Use an alphanumeric ID for widest compatibility in HTTP and SQL. + // Use MAX_RADIX for shorter output. + private final String clientId = + Long.toString(Math.abs(new Random().nextLong()), Character.MAX_RADIX); + // initialization an Iceberg rest client for fetch token + private final RESTClient restClient; + + private PolarisManagementClient(PolarisApiEndpoints endpoints) { + this.endpoints = endpoints; + + this.client = + ClientBuilder.newBuilder() + .readTimeout(5, MINUTES) + .connectTimeout(1, MINUTES) + .register(new JacksonJsonProvider(new ObjectMapper())) + .build(); + + this.restClient = HTTPClient.builder(Map.of()).uri(endpoints.catalogApiEndpoint()).build(); + } + + public static PolarisManagementClient managementClient(PolarisApiEndpoints endpoints) { + return new PolarisManagementClient(endpoints); + } + + /** This method should be used by test code to make top-level entity names. */ + public String newEntityName(String hint) { + return polarisServerManager().transformEntityName(hint + "_" + clientId); + } + + public ManagementApi managementApi(String authToken) { + return new ManagementApi(client, endpoints, authToken, endpoints.managementApiEndpoint()); + } + + public ManagementApi managementApi(ClientCredentials credentials) { + return managementApi(obtainToken(credentials)); + } + + /** Requests an access token from the Polaris server for the given {@link ClientCredentials}. */ + public String obtainToken(ClientCredentials credentials) { + OAuthTokenResponse response = + OAuth2Util.fetchToken( + restClient.withAuthSession(AuthSession.EMPTY), + Map.of(), + String.format("%s:%s", credentials.clientId(), credentials.clientSecret()), + "PRINCIPAL_ROLE:ALL", + endpoints.catalogApiEndpoint() + "/v1/oauth/tokens", + Map.of("grant_type", "client_credentials")); + return response.token(); + } + + @Override + public void close() throws Exception { + client.close(); + restClient.close(); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java new file mode 100644 index 0000000000..fb5dac805a --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogBaseIT.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.util.Arrays; +import java.util.Map; +import org.apache.iceberg.exceptions.BadRequestException; +import org.apache.iceberg.exceptions.NamespaceNotEmptyException; +import org.apache.iceberg.spark.SupportsReplaceView; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.connector.catalog.CatalogPlugin; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.NamespaceChange; +import org.apache.spark.sql.connector.catalog.StagingTableCatalog; +import org.apache.spark.sql.connector.catalog.SupportsNamespaces; +import org.apache.spark.sql.connector.catalog.View; +import org.apache.spark.sql.connector.catalog.ViewCatalog; +import org.apache.spark.sql.connector.catalog.ViewChange; +import org.apache.spark.sql.connector.catalog.ViewInfo; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * This integration directly performs operations using the SparkCatalog instance, instead of going + * through Spark SQL interface. This provides a more direct testing capability against the Polaris + * SparkCatalog operations, some operations like listNamespaces under a namespace can not be + * triggered through a SQL interface directly with Spark. + */ +@QuarkusIntegrationTest +public abstract class SparkCatalogBaseIT extends SparkIntegrationBase { + private static StructType schema = new StructType().add("id", "long").add("name", "string"); + protected StagingTableCatalog tableCatalog = null; + protected SupportsNamespaces namespaceCatalog = null; + protected ViewCatalog viewCatalog = null; + protected SupportsReplaceView replaceViewCatalog = null; + + @BeforeEach + protected void loadCatalogs() { + Preconditions.checkArgument(spark != null, "No active spark found"); + Preconditions.checkArgument(catalogName != null, "No catalogName found"); + CatalogPlugin catalogPlugin = spark.sessionState().catalogManager().catalog(catalogName); + tableCatalog = (StagingTableCatalog) catalogPlugin; + namespaceCatalog = (SupportsNamespaces) catalogPlugin; + viewCatalog = (ViewCatalog) catalogPlugin; + replaceViewCatalog = (SupportsReplaceView) catalogPlugin; + } + + @Test + void testNamespaceOperations() throws Exception { + String[][] lv1ns = new String[][] {{"l1ns1"}, {"l1ns2"}}; + String[][] lv2ns1 = new String[][] {{"l1ns1", "l2ns1"}, {"l1ns1", "l2ns2"}}; + String[][] lv2ns2 = new String[][] {{"l1ns2", "l2ns3"}}; + + // create the namespaces + for (String[] namespace : lv1ns) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns1) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns2) { + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + } + + // list namespaces under root + String[][] lv1nsResult = namespaceCatalog.listNamespaces(); + assertThat(lv1nsResult.length).isEqualTo(lv1ns.length); + for (String[] namespace : lv1ns) { + assertThat(Arrays.asList(lv1nsResult)).contains(namespace); + } + // list namespace under l1ns1 + String[][] lv2ns1Result = namespaceCatalog.listNamespaces(lv1ns[0]); + assertThat(lv2ns1Result.length).isEqualTo(lv2ns1.length); + for (String[] namespace : lv2ns1) { + assertThat(Arrays.asList(lv2ns1Result)).contains(namespace); + } + // list namespace under l1ns2 + String[][] lv2ns2Result = namespaceCatalog.listNamespaces(lv1ns[1]); + assertThat(lv2ns2Result.length).isEqualTo(lv2ns2.length); + for (String[] namespace : lv2ns2) { + assertThat(Arrays.asList(lv2ns2Result)).contains(namespace); + } + // no namespace under l1ns2.l2ns3 + assertThat(namespaceCatalog.listNamespaces(lv2ns2[0]).length).isEqualTo(0); + + // drop the nested namespace under lv1ns[1] + namespaceCatalog.dropNamespace(lv2ns2[0], true); + assertThat(namespaceCatalog.listNamespaces(lv1ns[1]).length).isEqualTo(0); + namespaceCatalog.dropNamespace(lv1ns[1], true); + assertThatThrownBy(() -> namespaceCatalog.listNamespaces(lv1ns[1])) + .isInstanceOf(NoSuchNamespaceException.class); + + // directly drop lv1ns[0] should fail + assertThatThrownBy(() -> namespaceCatalog.dropNamespace(lv1ns[0], true)) + .isInstanceOfAny( + BadRequestException.class, // Iceberg < 1.9.0 + NamespaceNotEmptyException.class // Iceberg >= 1.9.0 + ); + for (String[] namespace : lv2ns1) { + namespaceCatalog.dropNamespace(namespace, true); + } + namespaceCatalog.dropNamespace(lv1ns[0], true); + + // no more namespace available + assertThat(namespaceCatalog.listNamespaces().length).isEqualTo(0); + } + + @Test + void testAlterNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("owner", "user1"); + + namespaceCatalog.createNamespace(namespace, metadata); + assertThat(namespaceCatalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("owner", "user1")); + + namespaceCatalog.alterNamespace(namespace, NamespaceChange.setProperty("owner", "new-user")); + assertThat(namespaceCatalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("owner", "new-user")); + + // drop the namespace + namespaceCatalog.dropNamespace(namespace, true); + } + + @Test + void testBasicViewOperations() throws Exception { + String[] namespace = new String[] {"ns"}; + namespaceCatalog.createNamespace(namespace, Maps.newHashMap()); + + Identifier viewIdentifier = Identifier.of(namespace, "test-view"); + String viewSql = "select id from test-table where id < 3"; + ViewInfo viewInfo = + new ViewInfo( + viewIdentifier, + viewSql, + catalogName, + namespace, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + viewCatalog.createView(viewInfo); + + // load the view + View view = viewCatalog.loadView(viewIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // alter the view properties + viewCatalog.alterView(viewIdentifier, ViewChange.setProperty("owner", "user1")); + view = viewCatalog.loadView(viewIdentifier); + assertThat(view.properties()).contains(Map.entry("owner", "user1")); + + // rename the view + Identifier newIdentifier = Identifier.of(namespace, "new-view"); + viewCatalog.renameView(viewIdentifier, newIdentifier); + assertThatThrownBy(() -> viewCatalog.loadView(viewIdentifier)) + .isInstanceOf(NoSuchViewException.class); + view = viewCatalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // replace the view + String newSql = "select id from test-table where id == 3"; + Map properties = Maps.newHashMap(); + properties.put("owner", "test-user"); + replaceViewCatalog.replaceView( + newIdentifier, + newSql, + catalogName, + namespace, + schema, + new String[0], + new String[0], + new String[0], + properties); + view = viewCatalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(newSql); + assertThat(view.properties()).contains(Map.entry("owner", "test-user")); + + // drop the view + viewCatalog.dropView(newIdentifier); + assertThatThrownBy(() -> viewCatalog.loadView(newIdentifier)) + .isInstanceOf(NoSuchViewException.class); + } + + @Test + void testListViews() throws Exception { + String[] l1ns = new String[] {"ns"}; + namespaceCatalog.createNamespace(l1ns, Maps.newHashMap()); + + // create a new namespace under the default NS + String[] l2ns = new String[] {"ns", "nsl2"}; + namespaceCatalog.createNamespace(l2ns, Maps.newHashMap()); + // create one view under l1 + String view1Name = "test-view1"; + String view1SQL = "select id from test-table where id >= 3"; + ViewInfo viewInfo1 = + new ViewInfo( + Identifier.of(l1ns, view1Name), + view1SQL, + catalogName, + l1ns, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + viewCatalog.createView(viewInfo1); + // create two views under the l2 namespace + String[] nsl2ViewNames = new String[] {"test-view2", "test-view3"}; + String[] nsl2ViewSQLs = + new String[] { + "select id from test-table where id == 3", "select id from test-table where id < 3" + }; + for (int i = 0; i < nsl2ViewNames.length; i++) { + ViewInfo viewInfo = + new ViewInfo( + Identifier.of(l2ns, nsl2ViewNames[i]), + nsl2ViewSQLs[i], + catalogName, + l2ns, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + viewCatalog.createView(viewInfo); + } + // list views under l1ns + Identifier[] l1Views = viewCatalog.listViews(l1ns); + assertThat(l1Views.length).isEqualTo(1); + assertThat(l1Views[0].name()).isEqualTo(view1Name); + + // list views under l2ns + Identifier[] l2Views = viewCatalog.listViews(l2ns); + assertThat(l2Views.length).isEqualTo(nsl2ViewSQLs.length); + for (String name : nsl2ViewNames) { + assertThat(Arrays.asList(l2Views)).contains(Identifier.of(l2ns, name)); + } + + // drop namespace fails since there are views under it + assertThatThrownBy(() -> namespaceCatalog.dropNamespace(l2ns, true)) + .isInstanceOfAny( + BadRequestException.class, // Iceberg < 1.9.0 + NamespaceNotEmptyException.class // Iceberg >= 1.9.0 + ); + // drop the views + for (String name : nsl2ViewNames) { + viewCatalog.dropView(Identifier.of(l2ns, name)); + } + namespaceCatalog.dropNamespace(l2ns, true); + viewCatalog.dropView(Identifier.of(l1ns, view1Name)); + namespaceCatalog.dropNamespace(l1ns, true); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java new file mode 100644 index 0000000000..812d8f19d5 --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogIcebergIT.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import org.apache.polaris.service.it.ext.SparkSessionBuilder; +import org.apache.spark.sql.SparkSession; + +@QuarkusIntegrationTest +public class SparkCatalogIcebergIT extends SparkCatalogBaseIT { + /** Initialize the spark catalog to use the iceberg spark catalog. */ + @Override + protected SparkSession buildSparkSession() { + return SparkSessionBuilder.buildWithTestDefaults() + .withWarehouse(warehouseDir) + .addCatalog(catalogName, "org.apache.iceberg.spark.SparkCatalog", endpoints, sparkToken) + .getOrCreate(); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java new file mode 100644 index 0000000000..97a4c222db --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkCatalogPolarisIT.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import io.quarkus.test.junit.QuarkusIntegrationTest; + +@QuarkusIntegrationTest +public class SparkCatalogPolarisIT extends SparkCatalogBaseIT {} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java new file mode 100644 index 0000000000..7beacb1141 --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkDeltaIT.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.io.File; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.delta.DeltaAnalysisException; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +@QuarkusIntegrationTest +public class SparkDeltaIT extends SparkIntegrationBase { + private String defaultNs; + private String tableRootDir; + + private String getTableLocation(String tableName) { + return String.format("%s/%s", tableRootDir, tableName); + } + + private String getTableNameWithRandomSuffix() { + return generateName("deltatb"); + } + + @BeforeEach + public void createDefaultResources(@TempDir Path tempDir) { + spark.sparkContext().setLogLevel("WARN"); + defaultNs = generateName("delta"); + // create a default namespace + sql("CREATE NAMESPACE %s", defaultNs); + sql("USE NAMESPACE %s", defaultNs); + tableRootDir = + IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve(defaultNs).getPath(); + } + + @AfterEach + public void cleanupDeltaData() { + // clean up delta data + File dirToDelete = new File(tableRootDir); + FileUtils.deleteQuietly(dirToDelete); + sql("DROP NAMESPACE %s", defaultNs); + } + + @Test + public void testBasicTableOperations() { + // create a regular delta table + String deltatb1 = "deltatb1"; + sql( + "CREATE TABLE %s (id INT, name STRING) USING DELTA LOCATION '%s'", + deltatb1, getTableLocation(deltatb1)); + sql("INSERT INTO %s VALUES (1, 'anna'), (2, 'bob')", deltatb1); + List results = sql("SELECT * FROM %s WHERE id > 1 ORDER BY id DESC", deltatb1); + assertThat(results.size()).isEqualTo(1); + assertThat(results.get(0)).isEqualTo(new Object[] {2, "bob"}); + + // create a detla table with partition + String deltatb2 = "deltatb2"; + sql( + "CREATE TABLE %s (name String, age INT, country STRING) USING DELTA PARTITIONED BY (country) LOCATION '%s'", + deltatb2, getTableLocation(deltatb2)); + sql( + "INSERT INTO %s VALUES ('anna', 10, 'US'), ('james', 32, 'US'), ('yan', 16, 'CHINA')", + deltatb2); + results = sql("SELECT name, country FROM %s ORDER BY age", deltatb2); + assertThat(results.size()).isEqualTo(3); + assertThat(results.get(0)).isEqualTo(new Object[] {"anna", "US"}); + assertThat(results.get(1)).isEqualTo(new Object[] {"yan", "CHINA"}); + assertThat(results.get(2)).isEqualTo(new Object[] {"james", "US"}); + + // verify the partition dir is created + List subDirs = listDirs(getTableLocation(deltatb2)); + assertThat(subDirs).contains("_delta_log", "country=CHINA", "country=US"); + + // test listTables + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(2); + assertThat(tables) + .contains( + new Object[] {defaultNs, deltatb1, false}, new Object[] {defaultNs, deltatb2, false}); + + sql("DROP TABLE %s", deltatb1); + sql("DROP TABLE %s", deltatb2); + tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(0); + } + + @Test + public void testAlterOperations() { + String deltatb = getTableNameWithRandomSuffix(); + sql( + "CREATE TABLE %s (id INT, name STRING) USING DELTA LOCATION '%s'", + deltatb, getTableLocation(deltatb)); + sql("INSERT INTO %s VALUES (1, 'anna'), (2, 'bob')", deltatb); + + // test alter columns + // add two new columns to the table + sql("Alter TABLE %s ADD COLUMNS (city STRING, age INT)", deltatb); + // add one more row to the table + sql("INSERT INTO %s VALUES (3, 'john', 'SFO', 20)", deltatb); + // verify the table now have 4 columns with correct result + List results = sql("SELECT * FROM %s ORDER BY id", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results).contains(new Object[] {1, "anna", null, null}); + assertThat(results).contains(new Object[] {2, "bob", null, null}); + assertThat(results).contains(new Object[] {3, "john", "SFO", 20}); + + // drop and rename column require set the delta.columnMapping property + sql("ALTER TABLE %s SET TBLPROPERTIES ('delta.columnMapping.mode' = 'name')", deltatb); + // drop column age + sql("Alter TABLE %s DROP COLUMN age", deltatb); + // verify the table now have 3 columns with correct result + results = sql("SELECT * FROM %s ORDER BY id", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results).contains(new Object[] {1, "anna", null}); + assertThat(results).contains(new Object[] {2, "bob", null}); + assertThat(results).contains(new Object[] {3, "john", "SFO"}); + + // rename column city to address + sql("Alter TABLE %s RENAME COLUMN city TO address", deltatb); + // verify column address exists + results = sql("SELECT id, address FROM %s ORDER BY id", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results).contains(new Object[] {1, null}); + assertThat(results).contains(new Object[] {2, null}); + assertThat(results).contains(new Object[] {3, "SFO"}); + + // test alter properties + sql( + "ALTER TABLE %s SET TBLPROPERTIES ('description' = 'people table', 'test-owner' = 'test-user')", + deltatb); + List tableInfo = sql("DESCRIBE TABLE EXTENDED %s", deltatb); + // find the table properties result + String properties = null; + for (Object[] info : tableInfo) { + if (info[0].equals("Table Properties")) { + properties = (String) info[1]; + break; + } + } + assertThat(properties).contains("description=people table,test-owner=test-user"); + sql("DROP TABLE %s", deltatb); + } + + @Test + public void testUnsupportedAlterTableOperations() { + String deltatb = getTableNameWithRandomSuffix(); + sql( + "CREATE TABLE %s (name String, age INT, country STRING) USING DELTA PARTITIONED BY (country) LOCATION '%s'", + deltatb, getTableLocation(deltatb)); + + // ALTER TABLE ... RENAME TO ... fails + assertThatThrownBy(() -> sql("ALTER TABLE %s RENAME TO new_delta", deltatb)) + .isInstanceOf(UnsupportedOperationException.class); + + // ALTER TABLE ... SET LOCATION ... fails + assertThatThrownBy(() -> sql("ALTER TABLE %s SET LOCATION '/tmp/new/path'", deltatb)) + .isInstanceOf(DeltaAnalysisException.class); + + sql("DROP TABLE %s", deltatb); + } + + @Test + public void testUnsupportedTableCreateOperations() { + String deltatb = getTableNameWithRandomSuffix(); + // create delta table with no location + assertThatThrownBy(() -> sql("CREATE TABLE %s (id INT, name STRING) USING DELTA", deltatb)) + .isInstanceOf(UnsupportedOperationException.class); + + // CTAS fails + assertThatThrownBy( + () -> + sql( + "CREATE TABLE %s USING DELTA LOCATION '%s' AS SELECT 1 AS id", + deltatb, getTableLocation(deltatb))) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + public void testDataframeSaveOperations() { + List data = Arrays.asList(RowFactory.create("Alice", 30), RowFactory.create("Bob", 25)); + StructType schema = + new StructType( + new StructField[] { + new StructField("name", DataTypes.StringType, false, Metadata.empty()), + new StructField("age", DataTypes.IntegerType, false, Metadata.empty()) + }); + Dataset df = spark.createDataFrame(data, schema); + + String deltatb = getTableNameWithRandomSuffix(); + // saveAsTable requires support for delta requires CTAS support for third party catalog + // in delta catalog, which is currently not supported. + assertThatThrownBy( + () -> + df.write() + .format("delta") + .option("path", getTableLocation(deltatb)) + .saveAsTable(deltatb)) + .isInstanceOf(IllegalArgumentException.class); + + // verify regular dataframe saving still works + df.write().format("delta").save(getTableLocation(deltatb)); + + // verify the partition dir is created + List subDirs = listDirs(getTableLocation(deltatb)); + assertThat(subDirs).contains("_delta_log"); + + // verify we can create a table out of the existing delta location + sql("CREATE TABLE %s USING DELTA LOCATION '%s'", deltatb, getTableLocation(deltatb)); + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(1); + assertThat(tables).contains(new Object[] {defaultNs, deltatb, false}); + + sql("INSERT INTO %s VALUES ('Anna', 11)", deltatb); + + List results = sql("SELECT * FROM %s ORDER BY name", deltatb); + assertThat(results.size()).isEqualTo(3); + assertThat(results.get(0)).isEqualTo(new Object[] {"Alice", 30}); + assertThat(results.get(1)).isEqualTo(new Object[] {"Anna", 11}); + assertThat(results.get(2)).isEqualTo(new Object[] {"Bob", 25}); + + sql("DROP TABLE %s", deltatb); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java new file mode 100644 index 0000000000..a4e060a52f --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIT.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import io.quarkus.test.junit.QuarkusIntegrationTest; +import java.io.File; +import java.nio.file.Path; +import java.util.List; +import org.apache.commons.io.FileUtils; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +@QuarkusIntegrationTest +public class SparkIT extends SparkIntegrationBase { + @Test + public void testNamespaces() { + List namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(0); + + String[] l1NS = new String[] {"l1ns1", "l1ns2"}; + for (String ns : l1NS) { + sql("CREATE NAMESPACE %s", ns); + } + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(2); + for (String ns : l1NS) { + assertThat(namespaces).contains(new Object[] {ns}); + } + String l2ns = "l2ns"; + // create a nested namespace + sql("CREATE NAMESPACE %s.%s", l1NS[0], l2ns); + // spark show namespace only shows + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(2); + + // can not drop l1NS before the nested namespace is dropped + assertThatThrownBy(() -> sql("DROP NAMESPACE %s", l1NS[0])) + .hasMessageContaining(String.format("Namespace %s is not empty", l1NS[0])); + sql("DROP NAMESPACE %s.%s", l1NS[0], l2ns); + + for (String ns : l1NS) { + sql("DROP NAMESPACE %s", ns); + } + + // no namespace available after all drop + namespaces = sql("SHOW NAMESPACES"); + assertThat(namespaces.size()).isEqualTo(0); + } + + @Test + public void testCreatDropView() { + String namespace = generateName("ns"); + // create namespace ns + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create two views under the namespace + String view1Name = "testView1"; + String view2Name = "testView2"; + sql("CREATE VIEW %s AS SELECT 1 AS id", view1Name); + sql("CREATE VIEW %s AS SELECT 10 AS id", view2Name); + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(2); + assertThat(views).contains(new Object[] {namespace, view1Name, false}); + assertThat(views).contains(new Object[] {namespace, view2Name, false}); + + // drop the views + sql("DROP VIEW %s", view1Name); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, view2Name, false}); + + sql("DROP VIEW %s", view2Name); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(0); + + sql("DROP NAMESPACE %s", namespace); + } + + @Test + public void renameIcebergViewAndTable() { + String namespace = generateName("ns"); + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create one view and one table + String viewName = "originalView"; + sql("CREATE VIEW %s AS SELECT 1 AS id", viewName); + + String icebergTable = "iceberg_table"; + sql("CREATE TABLE %s (col1 int, col2 string)", icebergTable); + + // verify view and table is showing correctly + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, viewName, false}); + + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(1); + assertThat(tables).contains(new Object[] {namespace, icebergTable, false}); + + // rename the view + String renamedView = "renamedView"; + sql("ALTER VIEW %s RENAME TO %s", viewName, renamedView); + views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, renamedView, false}); + + // rename the table + String newIcebergTable = "iceberg_table_new"; + sql("ALTER TABLE %s RENAME TO %s", icebergTable, newIcebergTable); + tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(1); + assertThat(tables).contains(new Object[] {namespace, newIcebergTable, false}); + + // clean up the resources + sql("DROP VIEW %s", renamedView); + sql("DROP TABLE %s", newIcebergTable); + sql("DROP NAMESPACE %s", namespace); + } + + @Test + public void testMixedTableAndViews(@TempDir Path tempDir) { + String namespace = generateName("ns"); + sql("CREATE NAMESPACE %s", namespace); + sql("USE %s", namespace); + + // create one iceberg table, iceberg view and one delta table + String icebergTable = "icebergtb"; + sql("CREATE TABLE %s (col1 int, col2 String)", icebergTable); + sql("INSERT INTO %s VALUES (1, 'a'), (2, 'b')", icebergTable); + + String viewName = "icebergview"; + sql("CREATE VIEW %s AS SELECT col1 + 2 AS col1, col2 FROM %s", viewName, icebergTable); + + String deltaTable = "deltatb"; + String deltaDir = + IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve(namespace).getPath(); + sql( + "CREATE TABLE %s (col1 int, col2 int) using delta location '%s/%s'", + deltaTable, deltaDir, deltaTable); + sql("INSERT INTO %s VALUES (1, 3), (2, 5), (11, 20)", deltaTable); + // join the iceberg and delta table + List joinResult = + sql( + "SELECT icebergtb.col1 as id, icebergtb.col2 as str_col, deltatb.col2 as int_col from icebergtb inner join deltatb on icebergtb.col1 = deltatb.col1 order by id"); + assertThat(joinResult.get(0)).isEqualTo(new Object[] {1, "a", 3}); + assertThat(joinResult.get(1)).isEqualTo(new Object[] {2, "b", 5}); + + // show tables shows all tables + List tables = sql("SHOW TABLES"); + assertThat(tables.size()).isEqualTo(2); + assertThat(tables) + .contains( + new Object[] {namespace, icebergTable, false}, + new Object[] {namespace, deltaTable, false}); + + // verify the table and view content + List results = sql("SELECT * FROM %s ORDER BY col1", icebergTable); + assertThat(results.size()).isEqualTo(2); + assertThat(results.get(0)).isEqualTo(new Object[] {1, "a"}); + assertThat(results.get(1)).isEqualTo(new Object[] {2, "b"}); + + // verify the table and view content + results = sql("SELECT * FROM %s ORDER BY col1", viewName); + assertThat(results.size()).isEqualTo(2); + assertThat(results.get(0)).isEqualTo(new Object[] {3, "a"}); + assertThat(results.get(1)).isEqualTo(new Object[] {4, "b"}); + + List views = sql("SHOW VIEWS"); + assertThat(views.size()).isEqualTo(1); + assertThat(views).contains(new Object[] {namespace, viewName, false}); + + // drop views and tables + sql("DROP TABLE %s", icebergTable); + sql("DROP TABLE %s", deltaTable); + sql("DROP VIEW %s", viewName); + sql("DROP NAMESPACE %s", namespace); + + // clean up delta directory + File dirToDelete = new File(deltaDir); + FileUtils.deleteQuietly(dirToDelete); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java new file mode 100644 index 0000000000..7eda4f704a --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/java/org/apache/polaris/spark/quarkus/it/SparkIntegrationBase.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.quarkus.it; + +import com.google.common.collect.ImmutableList; +import com.google.errorprone.annotations.FormatMethod; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.util.List; +import java.util.UUID; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.filefilter.DirectoryFileFilter; +import org.apache.commons.io.filefilter.FalseFileFilter; +import org.apache.polaris.core.admin.model.AwsStorageConfigInfo; +import org.apache.polaris.core.admin.model.Catalog; +import org.apache.polaris.core.admin.model.CatalogProperties; +import org.apache.polaris.core.admin.model.PolarisCatalog; +import org.apache.polaris.core.admin.model.StorageConfigInfo; +import org.apache.polaris.service.it.env.ClientCredentials; +import org.apache.polaris.service.it.env.IntegrationTestsHelper; +import org.apache.polaris.service.it.env.ManagementApi; +import org.apache.polaris.service.it.env.PolarisApiEndpoints; +import org.apache.polaris.service.it.ext.PolarisIntegrationTestExtension; +import org.apache.polaris.service.it.ext.SparkSessionBuilder; +import org.apache.polaris.test.commons.s3mock.S3Mock; +import org.apache.spark.sql.Dataset; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.SparkSession; +import org.intellij.lang.annotations.Language; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.io.TempDir; +import org.slf4j.LoggerFactory; + +@ExtendWith(PolarisIntegrationTestExtension.class) +public abstract class SparkIntegrationBase { + protected static final S3Mock s3Container = new S3Mock(); + protected static SparkSession spark; + protected PolarisApiEndpoints endpoints; + protected PolarisManagementClient client; + protected ManagementApi managementApi; + protected String catalogName; + protected String sparkToken; + + protected URI warehouseDir; + + @BeforeAll + public static void setup() throws IOException { + s3Container.start(); + } + + @AfterAll + public static void cleanup() { + s3Container.stop(); + } + + @BeforeEach + public void before( + PolarisApiEndpoints apiEndpoints, ClientCredentials credentials, @TempDir Path tempDir) { + endpoints = apiEndpoints; + client = PolarisManagementClient.managementClient(endpoints); + sparkToken = client.obtainToken(credentials); + managementApi = client.managementApi(credentials); + + warehouseDir = IntegrationTestsHelper.getTemporaryDirectory(tempDir).resolve("spark-warehouse"); + + catalogName = client.newEntityName("spark_catalog"); + + AwsStorageConfigInfo awsConfigModel = + AwsStorageConfigInfo.builder() + .setRoleArn("arn:aws:iam::123456789012:role/my-role") + .setExternalId("externalId") + .setUserArn("userArn") + .setStorageType(StorageConfigInfo.StorageTypeEnum.S3) + .setAllowedLocations(List.of("s3://my-old-bucket/path/to/data")) + .build(); + CatalogProperties props = new CatalogProperties("s3://my-bucket/path/to/data"); + props.putAll(s3Container.getS3ConfigProperties()); + props.put("polaris.config.drop-with-purge.enabled", "true"); + props.put("polaris.config.namespace-custom-location.enabled", "true"); + Catalog catalog = + PolarisCatalog.builder() + .setType(Catalog.TypeEnum.INTERNAL) + .setName(catalogName) + .setProperties(props) + .setStorageConfigInfo(awsConfigModel) + .build(); + + managementApi.createCatalog(catalog); + + spark = buildSparkSession(); + + onSpark("USE " + catalogName); + } + + protected SparkSession buildSparkSession() { + return SparkSessionBuilder.buildWithTestDefaults() + .withExtensions( + "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension") + .withConfig( + "spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") + .withWarehouse(warehouseDir) + .addCatalog(catalogName, "org.apache.polaris.spark.SparkCatalog", endpoints, sparkToken) + .getOrCreate(); + } + + @AfterEach + public void after() throws Exception { + cleanupCatalog(catalogName); + try { + SparkSession.clearDefaultSession(); + SparkSession.clearActiveSession(); + spark.close(); + } catch (Exception e) { + LoggerFactory.getLogger(getClass()).error("Unable to close spark session", e); + } + + client.close(); + } + + protected void cleanupCatalog(String catalogName) { + onSpark("USE " + catalogName); + List namespaces = onSpark("SHOW NAMESPACES").collectAsList(); + for (Row namespace : namespaces) { + List tables = onSpark("SHOW TABLES IN " + namespace.getString(0)).collectAsList(); + for (Row table : tables) { + onSpark("DROP TABLE " + namespace.getString(0) + "." + table.getString(1)); + } + List views = onSpark("SHOW VIEWS IN " + namespace.getString(0)).collectAsList(); + for (Row view : views) { + onSpark("DROP VIEW " + namespace.getString(0) + "." + view.getString(1)); + } + onSpark("DROP NAMESPACE " + namespace.getString(0)); + } + + managementApi.deleteCatalog(catalogName); + } + + @FormatMethod + protected List sql(String query, Object... args) { + List rows = spark.sql(String.format(query, args)).collectAsList(); + if (rows.isEmpty()) { + return ImmutableList.of(); + } + return rowsToJava(rows); + } + + protected List rowsToJava(List rows) { + return rows.stream().map(this::toJava).collect(Collectors.toList()); + } + + private Object[] toJava(Row row) { + return IntStream.range(0, row.size()) + .mapToObj( + pos -> { + if (row.isNullAt(pos)) { + return null; + } + + Object value = row.get(pos); + if (value instanceof Row valueRow) { + return toJava(valueRow); + } else if (value instanceof scala.collection.Seq) { + return row.getList(pos); + } else if (value instanceof scala.collection.Map) { + return row.getJavaMap(pos); + } else { + return value; + } + }) + .toArray(Object[]::new); + } + + /** List the name of directories under a given path non-recursively. */ + protected List listDirs(String path) { + File directory = new File(path); + return FileUtils.listFilesAndDirs( + directory, FalseFileFilter.INSTANCE, DirectoryFileFilter.DIRECTORY) + .stream() + .map(File::getName) + .toList(); + } + + /** Generate a string name with given prefix and a random suffix */ + protected String generateName(String prefix) { + return prefix + "_" + UUID.randomUUID().toString().replaceAll("-", ""); + } + + protected static Dataset onSpark(@Language("SQL") String sql) { + return spark.sql(sql); + } +} diff --git a/plugins/spark/v4.0/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager b/plugins/spark/v4.0/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager new file mode 100644 index 0000000000..b3dd7d7c06 --- /dev/null +++ b/plugins/spark/v4.0/integration/src/intTest/resources/META-INF/services/org.apache.polaris.service.it.ext.PolarisServerManager @@ -0,0 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +org.apache.polaris.service.it.ServerManager diff --git a/plugins/spark/v4.0/regtests/Dockerfile b/plugins/spark/v4.0/regtests/Dockerfile new file mode 100644 index 0000000000..f095aa0349 --- /dev/null +++ b/plugins/spark/v4.0/regtests/Dockerfile @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +FROM docker.io/apache/spark:4.0.1-scala2.13-java17-ubuntu +ARG POLARIS_HOST=polaris +ENV POLARIS_HOST=$POLARIS_HOST +ENV SPARK_HOME=/opt/spark +ENV CURRENT_SCALA_VERSION='2.13' +ENV LANGUAGE='en_US:en' +ENV HOME=/home/spark + +USER root +RUN apt update +RUN apt-get install -y diffutils wget curl +RUN mkdir -p /home/spark && \ + chown -R spark /home/spark && \ + mkdir -p /tmp/polaris-regtests && \ + chown -R spark /tmp/polaris-regtests +RUN mkdir /opt/spark/conf && chmod -R 777 /opt/spark/conf + +USER spark + +WORKDIR /home/spark/polaris + +COPY --chown=spark ./v4.0 /home/spark/polaris/v4.0 + +# /home/spark/regtests might not be writable in all situations, see https://github.com/apache/polaris/pull/205 +USER root +RUN chmod -R go+rwx /home/spark/polaris +RUN chmod -R 777 ./v4.0/regtests +USER spark + +ENTRYPOINT ["./v4.0/regtests/run.sh"] diff --git a/plugins/spark/v4.0/regtests/README.md b/plugins/spark/v4.0/regtests/README.md new file mode 100644 index 0000000000..6587b8ccdb --- /dev/null +++ b/plugins/spark/v4.0/regtests/README.md @@ -0,0 +1,83 @@ + + +# End-to-end regression tests + +regtests provides basic end-to-end tests for spark_sql using spark client jars. + +Regression tests are either run in Docker, using docker-compose to orchestrate the tests, or +locally. + +**NOTE** regtests are supposed to be a light-weight testing to ensure jars can be used to start +spark and run basic SQL commands. Please use integration for detailed testing. + +## Prerequisites + +It is recommended to clean the `regtests/output` directory before running tests. This can be done by +running: + +```shell +rm -rf ./plugins/spark/v4.0/regtests/output && mkdir -p ./plugins/spark/v4.0/regtests/output && chmod -R 777 ./plugins/spark/v4.0/regtests/output +``` + +## Run Tests With Docker Compose + +Tests can be run with docker-compose using the provided `./plugins/spark/v4.0/regtests/docker-compose.yml` file, as +follows: + +```shell +./gradlew assemble publishToMavenLocal +./gradlew \ + :polaris-server:assemble \ + :polaris-server:quarkusAppPartsBuild --rerun \ + -Dquarkus.container-image.build=true +docker compose -f ./plugins/spark/v4.0/regtests/docker-compose.yml up --build --exit-code-from regtest +``` + +In this setup, a Polaris container will be started in a docker-compose group, using the image +previously built by the Gradle build. Then another container, including a Spark SQL shell, will run +the tests. The exit code will be the same as the exit code of the Spark container. + +This is the flow used in CI and should be done locally before pushing to GitHub to ensure that no +environmental factors contribute to the outcome of the tests. + +**Important**: if you are also using minikube, for example to test the Helm chart, you may need to +_unset_ the Docker environment that was pointing to the Minikube Docker daemon, otherwise the image +will be built by the Minikube Docker daemon and will not be available to the local Docker daemon. +This can be done by running, _before_ building the image and running the tests: + +```shell +eval $(minikube -p minikube docker-env --unset) +``` + +## Run Tests Locally + +Regression tests can be run locally as well, using the test harness. + +To run regression tests locally, run the following: +- `./gradlew assemble publishToMavenLocal` -- build the Polaris project and Spark Client jars. Publish the binary to local maven repo. +- `./gradlew run` -- start a Polaris server on localhost:8181. +- `env POLARIS_HOST=localhost ./plugins/spark/v4.0/regtests/run.sh` -- run regtests. + +Note: the regression tests expect Polaris to run with certain options, e.g. with support for `FILE` +storage, default realm `POLARIS` and root credentials `root:secret`; if you run the above command, +this will be the case. If you run Polaris in a different way, make sure that Polaris is configured +appropriately. diff --git a/plugins/spark/v4.0/regtests/docker-compose.yml b/plugins/spark/v4.0/regtests/docker-compose.yml new file mode 100644 index 0000000000..32381d1408 --- /dev/null +++ b/plugins/spark/v4.0/regtests/docker-compose.yml @@ -0,0 +1,50 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +services: + polaris: + image: apache/polaris:latest + ports: + - "8181" + - "8182" + environment: + AWS_REGION: us-west-2 + POLARIS_BOOTSTRAP_CREDENTIALS: POLARIS,root,s3cr3t + quarkus.log.file.enable: "false" + quarkus.otel.sdk.disabled: "true" + polaris.features."ALLOW_INSECURE_STORAGE_TYPES": "true" + polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES": "[\"FILE\",\"S3\",\"GCS\",\"AZURE\"]" + polaris.readiness.ignore-severe-issues: "true" + healthcheck: + test: ["CMD", "curl", "http://localhost:8182/q/health"] + interval: 10s + timeout: 10s + retries: 5 + regtest: + build: + context: ../.. + dockerfile: v4.0/regtests/Dockerfile + args: + POLARIS_HOST: polaris + depends_on: + polaris: + condition: service_healthy + volumes: + - ~/.m2:/home/spark/.m2 + - ./output:/tmp/polaris-regtests/ diff --git a/plugins/spark/v4.0/regtests/run.sh b/plugins/spark/v4.0/regtests/run.sh new file mode 100755 index 0000000000..184145757b --- /dev/null +++ b/plugins/spark/v4.0/regtests/run.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# Run without args to run all tests. +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) +SPARK_ROOT_DIR=$(dirname ${SCRIPT_DIR}) +export SPARK_LOCAL_HOSTNAME=localhost # avoid VPN messing up driver local IP address binding + +FMT_RED='\033[0;31m' +FMT_GREEN='\033[0;32m' +FMT_NC='\033[0m' + +function loginfo() { + echo "$(date): ${@}" +} +function loggreen() { + echo -e "${FMT_GREEN}$(date): ${@}${FMT_NC}" +} +function logred() { + echo -e "${FMT_RED}$(date): ${@}${FMT_NC}" +} + +# Allow bearer token to be provided if desired +if [[ -z "$REGTEST_ROOT_BEARER_TOKEN" ]]; then + if ! output=$(curl -X POST -H "Polaris-Realm: POLARIS" "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens" \ + -d "grant_type=client_credentials" \ + -d "client_id=root" \ + -d "client_secret=s3cr3t" \ + -d "scope=PRINCIPAL_ROLE:ALL"); then + logred "Error: Failed to retrieve bearer token" + exit 1 + fi + + token=$(echo "$output" | awk -F\" '{print $4}') + + if [ "$token" == "unauthorized_client" ]; then + logred "Error: Failed to retrieve bearer token" + exit 1 + fi + + export REGTEST_ROOT_BEARER_TOKEN=$token +fi + +echo "Root bearer token: ${REGTEST_ROOT_BEARER_TOKEN}" + +NUM_FAILURES=0 + +SCALA_VERSIONS=("2.12" "2.13") +if [[ -n "$CURRENT_SCALA_VERSION" ]]; then + SCALA_VERSIONS=("${CURRENT_SCALA_VERSION}") +fi +SPARK_MAJOR_VERSION="4.0" +SPARK_VERSION="4.0.1" + +SPARK_SHELL_OPTIONS=("PACKAGE" "JAR") + +for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do + echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}" + # find the project jar + SPARK_DIR=${SPARK_ROOT_DIR}/spark + JAR_PATH=$(find ${SPARK_DIR} -name "polaris-spark-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-bundle.jar" -print -quit) + echo "find jar ${JAR_PATH}" + + # extract the polaris + JAR_NAME=$(basename "$JAR_PATH") + echo "JAR_NAME=${JAR_NAME}" + POLARIS_VERSION=$(echo "$JAR_NAME" | sed -n 's/.*-\([0-9][^-]*.*\)-bundle\.jar/\1/p') + echo "$POLARIS_VERSION" + + SPARK_EXISTS="TRUE" + if [ -z "${SPARK_HOME}" ]; then + SPARK_EXISTS="FALSE" + fi + + for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do + # clean up the default configuration if exists + if [ -f "${SPARK_HOME}" ]; then + SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" + if [ -f ${SPARK_CONF} ]; then + rm ${SPARK_CONF} + fi + fi + + if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then + # run the setup without jar configuration + source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} + else + source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar ${JAR_PATH} + fi + + # run the spark_sql test + loginfo "Starting test spark_sql.sh" + + TEST_FILE="spark_sql.sh" + TEST_SHORTNAME="spark_sql" + TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}" + TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr" + TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout" + + mkdir -p ${TEST_TMPDIR} + if (( ${VERBOSE} )); then + ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | tee ${TEST_STDOUT} + else + ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > ${TEST_STDOUT} + fi + loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}" + + TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref" + if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then + loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}" + else + logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}" + echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + echo "meld ${TEST_STDOUT} ${TEST_REF}" >> ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh + logred "To compare and fix diffs (if 'meld' installed): ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh" + logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}" + logred "See stderr from test run for additional diagnostics: ${TEST_STDERR}" + diff ${TEST_STDOUT} ${TEST_REF} + NUM_FAILURES=$(( NUM_FAILURES + 1 )) + fi + done + + # clean up + if [ "${SPARK_EXISTS}" = "FALSE" ]; then + rm -rf ${SPARK_HOME} + export SPARK_HOME="" + fi +done + +# clean the output dir +rm -rf ${SCRIPT_DIR}/output + +loginfo "Tests completed with ${NUM_FAILURES} failures" +if (( ${NUM_FAILURES} > 0 )); then + exit 1 +else + exit 0 +fi diff --git a/plugins/spark/v4.0/regtests/setup.sh b/plugins/spark/v4.0/regtests/setup.sh new file mode 100755 index 0000000000..6564809cb1 --- /dev/null +++ b/plugins/spark/v4.0/regtests/setup.sh @@ -0,0 +1,194 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +################################### +# Idempotent setup for spark regression tests. Run manually or let run.sh auto-run. +# +# Warning - first time setup may download large amounts of files +# Warning - may clobber conf/spark-defaults.conf +# Warning - it will set the SPARK_HOME environment variable with the spark setup +# +# The script can be called independently like following +# ./setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion ${SCALA_VERSION} --jar ${JAR_PATH} +# Required Parameters: +# --sparkVersion : the spark version to setup +# --scalaVersion : the scala version of spark to setup +# --jar : path to the local Polaris Spark client jar +# + +set -x + +# Fix HOME directory for Ivy cache (Apache Spark Docker image sets HOME to /nonexistent) +export HOME=/home/spark + +SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) + +SPARK_VERSION=4.0.1 +SCALA_VERSION=2.12 +POLARIS_CLIENT_JAR="" +POLARIS_VERSION="" +while [[ $# -gt 0 ]]; do + case "$1" in + --sparkVersion) + SPARK_VERSION="$2" + shift # past argument + shift # past value + ;; + --scalaVersion) + SCALA_VERSION="$2" + shift # past argument + shift # past value + ;; + --polarisVersion) + POLARIS_VERSION="$2" + shift # past argument + shift # past value + ;; + --jar) + POLARIS_CLIENT_JAR="$2" + shift # past argument + shift # past value + ;; + --) shift; + break + ;; + esac +done + +echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}" + +if [ "$SCALA_VERSION" == "2.12" ]; then + SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3 +else + SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3-scala${SCALA_VERSION} +fi + +echo "Getting spark distribution ${SPARK_DISTRIBUTION}" + +if [ -z "${SPARK_HOME}" ]; then + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) +fi +SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" +DERBY_HOME="/tmp/derby" + +# Ensure binaries are downloaded locally +echo 'Verifying Spark binaries...' +if ! [ -f ${SPARK_HOME}/bin/spark-sql ]; then + echo 'Setting up Spark...' + if [ -z "${SPARK_VERSION}" ] || [ -z "${SPARK_DISTRIBUTION}" ]; then + echo 'SPARK_VERSION or SPARK_DISTRIBUTION not set. Please set SPARK_VERSION and SPARK_DISTRIBUTION to the desired version.' + exit 1 + fi + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then + echo 'Downloading spark distro...' + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://www.apache.org/dyn/closer.lua/spark/spark-${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz?action=download + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then + if [[ "${OSTYPE}" == "darwin"* ]]; then + echo "Detected OS: mac. Running 'brew install wget' to try again." + brew install wget + wget -O ~/${SPARK_DISTRIBUTION}.tgz https://www.apache.org/dyn/closer.lua/spark/spark-${SPARK_VERSION}/${SPARK_DISTRIBUTION}.tgz?action=download + fi + fi + else + echo 'Found existing Spark tarball' + fi + # check if the download was successful + if ! [ -f ~/${SPARK_DISTRIBUTION}.tgz ]; then + echo 'Failed to download Spark distribution. Please check the logs.' + exit 1 + fi + tar xzvf ~/${SPARK_DISTRIBUTION}.tgz -C ~/${TEST_ROOT_DIR} + if [ $? -ne 0 ]; then + echo 'Failed to extract Spark distribution. Please check the logs.' + exit 1 + else + echo 'Extracted Spark distribution.' + rm ~/${SPARK_DISTRIBUTION}.tgz + fi + SPARK_HOME=$(realpath ~/${SPARK_DISTRIBUTION}) + SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf" +else + echo 'Verified Spark distro already installed.' +fi + +echo "SPARK_HOME=${SPARK_HOME}" +echo "SPARK_CONF=${SPARK_CONF}" + +# Ensure Spark boilerplate conf is set +echo 'Verifying Spark conf...' +if grep 'POLARIS_TESTCONF_V5' ${SPARK_CONF} 2>/dev/null; then + echo 'Verified spark conf' +else + echo 'Setting spark conf...' + # Instead of clobbering existing spark conf, just comment it all out in case it was customized carefully. + sed -i 's/^/# /' ${SPARK_CONF} + +# If POLARIS_CLIENT_JAR is provided, set the spark conf to use the jars configuration. +# Otherwise use the packages setting +if [[ -z "$POLARIS_CLIENT_JAR" ]]; then + cat << EOF >> ${SPARK_CONF} +# POLARIS Spark client test conf +spark.jars.packages org.apache.polaris:polaris-spark-4.0_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1 +EOF +else + cat << EOF >> ${SPARK_CONF} +# POLARIS Spark client test conf +spark.jars $POLARIS_CLIENT_JAR +spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1 +EOF +fi + +cat << EOF >> ${SPARK_CONF} + +spark.sql.variable.substitute true + +spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME} + +# Set Ivy cache directory to a writable location +spark.jars.ivy /home/spark/.ivy2 + +spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions,io.delta.sql.DeltaSparkSessionExtension +# this configuration is needed for delta table +spark.sql.catalog.spark_catalog=org.apache.spark.sql.delta.catalog.DeltaCatalog +spark.sql.catalog.polaris=org.apache.polaris.spark.SparkCatalog +spark.sql.catalog.polaris.uri=http://${POLARIS_HOST:-localhost}:8181/api/catalog +# this configuration is currently only used for iceberg tables, generic tables currently +# don't support credential vending +spark.sql.catalog.polaris.header.X-Iceberg-Access-Delegation=vended-credentials +spark.sql.catalog.polaris.client.region=us-west-2 +# configuration required to ensure DataSourceV2 load works correctly for +# different table formats +spark.sql.sources.useV1SourceList='' +EOF + echo 'Success!' +fi + +# cleanup derby home if existed +if [ -d "${DERBY_HOME}" ]; then + echo "Directory ${DERBY_HOME} exists. Deleting it..." + rm -rf "${DERBY_HOME}" +fi + +echo "Launch spark-sql at ${SPARK_HOME}/bin/spark-sql" +# bootstrap dependencies so that future queries don't need to wait for the downloads. +# this is mostly useful for building the Docker image with all needed dependencies +${SPARK_HOME}/bin/spark-sql -e "SELECT 1" + +# ensure SPARK_HOME is setup for later tests +export SPARK_HOME=$SPARK_HOME diff --git a/plugins/spark/v4.0/regtests/spark_sql.ref b/plugins/spark/v4.0/regtests/spark_sql.ref new file mode 100644 index 0000000000..7d9c3efa48 --- /dev/null +++ b/plugins/spark/v4.0/regtests/spark_sql.ref @@ -0,0 +1,57 @@ +{"defaults":{"default-base-location":"file:///tmp/spark_catalog"},"overrides":{"prefix":"spark_sql_catalog"},"endpoints":["GET /v1/{prefix}/namespaces","GET /v1/{prefix}/namespaces/{namespace}","HEAD /v1/{prefix}/namespaces/{namespace}","POST /v1/{prefix}/namespaces","POST /v1/{prefix}/namespaces/{namespace}/properties","DELETE /v1/{prefix}/namespaces/{namespace}","GET /v1/{prefix}/namespaces/{namespace}/tables","GET /v1/{prefix}/namespaces/{namespace}/tables/{table}","HEAD /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/namespaces/{namespace}/tables","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}","DELETE /v1/{prefix}/namespaces/{namespace}/tables/{table}","POST /v1/{prefix}/tables/rename","POST /v1/{prefix}/namespaces/{namespace}/register","POST /v1/{prefix}/namespaces/{namespace}/tables/{table}/metrics","POST /v1/{prefix}/transactions/commit","GET /v1/{prefix}/namespaces/{namespace}/views","GET /v1/{prefix}/namespaces/{namespace}/views/{view}","HEAD /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/namespaces/{namespace}/views","POST /v1/{prefix}/namespaces/{namespace}/views/{view}","DELETE /v1/{prefix}/namespaces/{namespace}/views/{view}","POST /v1/{prefix}/views/rename","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","POST polaris/v1/{prefix}/namespaces/{namespace}/generic-tables","DELETE polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET polaris/v1/{prefix}/namespaces/{namespace}/generic-tables/{generic-table}","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies","GET /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","DELETE /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}","PUT /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","POST /polaris/v1/{prefix}/namespaces/{namespace}/policies/{policy-name}/mappings","GET /polaris/v1/{prefix}/applicable-policies"]} +Catalog created +spark-sql ()> use polaris; +spark-sql ()> create namespace db1; +spark-sql ()> create namespace db2; +spark-sql ()> show namespaces; +db1 +db2 +spark-sql ()> + > create namespace db1.schema1; +spark-sql ()> show namespaces in db1; +db1.schema1 +spark-sql ()> + > create table db1.schema1.iceberg_tb (col1 int); +spark-sql ()> show tables in db1; +spark-sql ()> show tables in db1.schema1; +iceberg_tb +spark-sql ()> + > use db1.schema1; +spark-sql (db1.schema1)> insert into iceberg_tb values (123), (234), (111); +spark-sql (db1.schema1)> select * from iceberg_tb order by col1; +111 +123 +234 +spark-sql (db1.schema1)> + > create table delta_tb1(col1 string) using delta location 'file:///tmp/spark_catalog/delta_tb1'; +spark-sql (db1.schema1)> insert into delta_tb1 values ('ab'), ('bb'), ('dd'); +spark-sql (db1.schema1)> select * from delta_tb1 order by col1; +ab +bb +dd +spark-sql (db1.schema1)> + > show tables; +iceberg_tb +delta_tb1 +spark-sql (db1.schema1)> + > use db1; +spark-sql (db1)> create table delta_tb2(col1 int) using delta location 'file:///tmp/spark_catalog/delta_tb2'; +spark-sql (db1)> insert into delta_tb2 values (1), (2), (3) order by col1; +spark-sql (db1)> select * from delta_tb2; +1 +2 +3 +spark-sql (db1)> + > show tables; +delta_tb2 +spark-sql (db1)> show tables in db1.schema1; +iceberg_tb +delta_tb1 +spark-sql (db1)> + > drop table db1.schema1.iceberg_tb; +spark-sql (db1)> drop table db1.schema1.delta_tb1; +spark-sql (db1)> drop namespace db1.schema1; +spark-sql (db1)> drop table delta_tb2; +spark-sql (db1)> drop namespace db1; +spark-sql (db1)> drop namespace db2; +spark-sql (db1)> diff --git a/plugins/spark/v4.0/regtests/spark_sql.sh b/plugins/spark/v4.0/regtests/spark_sql.sh new file mode 100755 index 0000000000..fe036664cd --- /dev/null +++ b/plugins/spark/v4.0/regtests/spark_sql.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}" + +CATALOG_NAME="spark_sql_catalog" +curl -i -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \ + -d '{"name": "spark_sql_catalog", "id": 100, "type": "INTERNAL", "readOnly": false, "properties": {"default-base-location": "file:///tmp/spark_catalog"}, "storageConfigInfo": {"storageType": "FILE", "allowedLocations": ["file:///tmp"]}}' > /dev/stderr + +# Add TABLE_WRITE_DATA to the catalog's catalog_admin role since by default it can only manage access and metadata +curl -i -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME}/catalog-roles/catalog_admin/grants \ + -d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' > /dev/stderr + +curl -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + "http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/config?warehouse=${CATALOG_NAME}" +echo +echo "Catalog created" +cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=${CATALOG_NAME} +use polaris; +create namespace db1; +create namespace db2; +show namespaces; + +create namespace db1.schema1; +show namespaces in db1; + +create table db1.schema1.iceberg_tb (col1 int); +show tables in db1; +show tables in db1.schema1; + +use db1.schema1; +insert into iceberg_tb values (123), (234), (111); +select * from iceberg_tb order by col1; + +create table delta_tb1(col1 string) using delta location 'file:///tmp/spark_catalog/delta_tb1'; +insert into delta_tb1 values ('ab'), ('bb'), ('dd'); +select * from delta_tb1 order by col1; + +show tables; + +use db1; +create table delta_tb2(col1 int) using delta location 'file:///tmp/spark_catalog/delta_tb2'; +insert into delta_tb2 values (1), (2), (3) order by col1; +select * from delta_tb2; + +show tables; +show tables in db1.schema1; + +drop table db1.schema1.iceberg_tb; +drop table db1.schema1.delta_tb1; +drop namespace db1.schema1; +drop table delta_tb2; +drop namespace db1; +drop namespace db2; +EOF + +# clean up the spark_catalog dir +rm -rf /tmp/spark_catalog/ + +curl -i -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \ + http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/${CATALOG_NAME} > /dev/stderr diff --git a/plugins/spark/v4.0/spark/BUNDLE-LICENSE b/plugins/spark/v4.0/spark/BUNDLE-LICENSE new file mode 100644 index 0000000000..05c71437e3 --- /dev/null +++ b/plugins/spark/v4.0/spark/BUNDLE-LICENSE @@ -0,0 +1,583 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +This product includes code from Apache Iceberg. + +* plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java +* plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java + +Copyright: 2017-2025 The Apache Software Foundation +Home page: https://iceberg.apache.org +License: https://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Iceberg. + +Copyright: 2017-2025 The Apache Software Foundation +Project URL: https://iceberg.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Spark. + +Copyright: 2014 and onwards The Apache Software Foundation +Project URL: https://spark.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Guava. + +Copyright: 2006-2020 The Guava Authors +Project URL: https://github.com/google/guava +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains jspecify. + +Copyright: Google LLC - SpotBugs Team +Project URL: https://github.com/jspecify/jspecify +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Datasketches. + +Copyright: 2020 The Apache Software Foundation + 2015-2018 Yahoo + 2019 Verizon Media +Project URL: https://datasketches.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Parquet. + +Copyright: 2014-2024 The Apache Software Foundation +Project URL: https://parquet.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains FastUtil. + +Copyright: 2002-2014 Sebastiano Vigna +Project URL: http://fastutil.di.unimi.it/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache ORC. + +Copyright: 2013 and onwards The Apache Software Foundation. +Project URL: https://orc.apache.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Arrow. + +Copyright: 2016-2025 The Apache Software Foundation +Project URL: https://arrow.apache.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Caffeine by Ben Manes. + +Copyright: 2014-2019 Ben Manes and contributors +Project URL: https://github.com/ben-manes/caffeine +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains RoaringBitmap. + +Copyright: (c) 2013-... the RoaringBitmap authors +Project URL: https://github.com/RoaringBitmap/RoaringBitmap +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains failsafe. + +Copyright: Jonathan Halterman and friends +Project URL: https://failsafe.dev/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Project Nessie. + +Copyright: 2015-2025 Dremio Corporation +Project URL: https://projectnessie.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Avro. + +Copyright: 2010-2019 The Apache Software Foundation +Project URL: https://avro.apache.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains the Jackson JSON processor. + +Copyright: 2007-2020 Tatu Saloranta and other contributors +Project URL: http://jackson.codehaus.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Airlift Aircompressor. + +Copyright: 2011-2020 Aircompressor authors. +Project URL: https://github.com/airlift/aircompressor +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache HttpComponents Client. + +Copyright: 1999-2022 The Apache Software Foundation. +Project URL: https://hc.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Netty's buffer library. + +Copyright: 2014-2020 The Netty Project +Project URL: https://netty.io/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Apache Thrift. + +Copyright: 2006-2017 The Apache Software Foundation. +Project URL: https://thrift.apache.org/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Jetbrains Annotations. + +Copyright: 2000-2020 JetBrains s.r.o. +Project URL: https://github.com/JetBrains/java-annotations +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Google FlatBuffers. + +Copyright: 2013-2020 Google Inc. +Home page: https://google.github.io/flatbuffers/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains ThreeTen. + +Copyright: 2007-present, Stephen Colebourne & Michael Nascimento Santos. +Project URL: https://www.threeten.org/threeten-extra/ +License: BSD 3-Clause +| All rights reserved. +| +| * Redistribution and use in source and binary forms, with or without +| modification, are permitted provided that the following conditions are met: +| +| * Redistributions of source code must retain the above copyright notice, +| this list of conditions and the following disclaimer. +| +| * Redistributions in binary form must reproduce the above copyright notice, +| this list of conditions and the following disclaimer in the documentation +| and/or other materials provided with the distribution. +| +| * Neither the name of JSR-310 nor the names of its contributors +| may be used to endorse or promote products derived from this software +| without specific prior written permission. +| +| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +| CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +| EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +| PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +| PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +| NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This binary artifact contains Eclipse Collections. + +Copyright: 2021 Goldman Sachs. +Project URL: https://github.com/eclipse-collections/eclipse-collections/ +License: EDL 1.0 - http://www.eclipse.org/org/documents/edl-v10.php + +-------------------------------------------------------------------------------- + +This binary artifact contains checkerframework checker-qual Annotations. + +Copyright: 2004-2019 the Checker Framework developers +Project URL: https://github.com/typetools/checker-framework +License: MIT License +| The annotations are licensed under the MIT License. (The text of this +| license appears below.) More specifically, all the parts of the Checker +| Framework that you might want to include with your own program use the +| MIT License. This is the checker-qual.jar file and all the files that +| appear in it: every file in a qual/ directory, plus utility files such +| as NullnessUtil.java, RegexUtil.java, SignednessUtil.java, etc. +| In addition, the cleanroom implementations of third-party annotations, +| which the Checker Framework recognizes as aliases for its own +| annotations, are licensed under the MIT License. +| +| Permission is hereby granted, free of charge, to any person obtaining a copy +| of this software and associated documentation files (the "Software"), to deal +| in the Software without restriction, including without limitation the rights +| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +| copies of the Software, and to permit persons to whom the Software is +| furnished to do so, subject to the following conditions: +| +| The above copyright notice and this permission notice shall be included in +| all copies or substantial portions of the Software. +| +| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +| THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This binary artifact contains Google Error Prone Annotations. + +Copyright: Copyright 2011-2019 The Error Prone Authors +Project URL: https://github.com/google/error-prone +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Eclipse MicroProfile OpenAPI. + +Copyright: 2017 Contributors to the Eclipse Foundation +Project URL: https://github.com/microprofile/microprofile-open-api +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Annotation. + +Project URL: https://projects.eclipse.org/projects/ee4j.ca +License: EPL 2.0 - https://www.eclipse.org/legal/epl-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Validation. + +Project URL: https://beanvalidation.org +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Servlet. + +Project URL: https://projects.eclipse.org/projects/ee4j.servlet +License: EPL 2.0 - https://www.eclipse.org/legal/epl-2.0 + +-------------------------------------------------------------------------------- + +This binary artifact contains Micrometer. + +Copyright: 2017-Present VMware, Inc. All Rights Reserved. +Project URL: https://github.com/micrometer-metrics/micrometer +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons Compress. + +Copyright: 2002-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-compress/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons Codec. + +Copyright: 2002-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-codec/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains SLF4J. + +Copyright: 2004-2022 QOS.ch Sarl (Switzerland) +Project URL: http://www.slf4j.org +License: MIT License +| Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland) +| All rights reserved. +| +| Permission is hereby granted, free of charge, to any person obtaining +| a copy of this software and associated documentation files (the +| "Software"), to deal in the Software without restriction, including +| without limitation the rights to use, copy, modify, merge, publish, +| distribute, sublicense, and/or sell copies of the Software, and to +| permit persons to whom the Software is furnished to do so, subject to +| the following conditions: +| +| The above copyright notice and this permission notice shall be +| included in all copies or substantial portions of the Software. +| +| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +| EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +| MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +| NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +| LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +| OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +| WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This binary artifact contains j2objc. + +Project URL: https://github.com/google/j2objc/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons IO. + +Copyright: 2002-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-io/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains Commons Lang3. + +Copyright: 2001-2025 The Apache Software Foundation +Project URL: https://commons.apache.org/proper/commons-lang/ +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- + +This binary artifact contains OpenHFT. + +Copyright: 2014 Higher Frequency Trading http://www.higherfrequencytrading.com +Project URL: https://github.com/OpenHFT +License: Apache License 2.0 - https://www.apache.org/licenses/LICENSE-2.0.txt + +-------------------------------------------------------------------------------- diff --git a/plugins/spark/v4.0/spark/BUNDLE-NOTICE b/plugins/spark/v4.0/spark/BUNDLE-NOTICE new file mode 100644 index 0000000000..9138e5e894 --- /dev/null +++ b/plugins/spark/v4.0/spark/BUNDLE-NOTICE @@ -0,0 +1,483 @@ +Apache Polaris (incubating) +Copyright 2025 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +The initial code for the Polaris project was donated +to the ASF by Snowflake Inc. (https://www.snowflake.com/) copyright 2024. + +-------------------------------------------------------------------------------- + +This binary artifact includes Project Nessie with the following in its NOTICE +file: +| Nessie +| Copyright 2015-2025 Dremio Corporation +| +| --------------------------------------- +| This project includes code from Apache Polaris (incubating), with the following in its NOTICE file: +| +| | Apache Polaris (incubating) +| | Copyright 2024 The Apache Software Foundation +| | +| | This product includes software developed at +| | The Apache Software Foundation (http://www.apache.org/). +| | +| | The initial code for the Polaris project was donated +| | to the ASF by Snowflake Inc. (https://www.snowflake.com/) copyright 2024. + +-------------------------------------------------------------------------------- + +This binary artifact contains the Jackson JSON processor with the following in its NOTICE +file: +| # Jackson JSON processor +| +| Jackson is a high-performance, Free/Open Source JSON processing library. +| It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has +| been in development since 2007. +| It is currently developed by a community of developers. +| +| ## Copyright +| +| Copyright 2007-, Tatu Saloranta (tatu.saloranta@iki.fi) +| +| ## Licensing +| +| Jackson 2.x core and extension components are licensed under Apache License 2.0 +| To find the details that apply to this artifact see the accompanying LICENSE file. +| +| ## Credits +| +| A list of contributors may be found from CREDITS(-2.x) file, which is included +| in some artifacts (usually source distributions); but is always available +| from the source code management (SCM) system project uses. +| +| ## FastDoubleParser +| +| jackson-core bundles a shaded copy of FastDoubleParser . +| That code is available under an MIT license +| under the following copyright. +| +| Copyright © 2023 Werner Randelshofer, Switzerland. MIT License. +| +| See FastDoubleParser-NOTICE for details of other source code included in FastDoubleParser +| and the licenses and copyrights that apply to that code. + +-------------------------------------------------------------------------------- + +This binary artifact contains Airlift Aircompressor with the following in its NOTICE +file: + +| Snappy Copyright Notices +| ========================= +| +| * Copyright 2011 Dain Sundstrom +| * Copyright 2011, Google Inc. +| +| +| Snappy License +| =============== +| Copyright 2011, Google Inc. +| All rights reserved. +| +| Redistribution and use in source and binary forms, with or without +| modification, are permitted provided that the following conditions are +| met: +| +| * Redistributions of source code must retain the above copyright +| notice, this list of conditions and the following disclaimer. +| * Redistributions in binary form must reproduce the above +| copyright notice, this list of conditions and the following disclaimer +| in the documentation and/or other materials provided with the +| distribution. +| * Neither the name of Google Inc. nor the names of its +| contributors may be used to endorse or promote products derived from +| this software without specific prior written permission. +| +| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +| "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +| OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +| SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +| LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +| OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This binary artifact contains Netty's buffer library with the following in its NOTICE +file: +| The Netty Project +| ================= +| +| Please visit the Netty web site for more information: +| +| * https://netty.io/ +| +| Copyright 2014 The Netty Project +| +| The Netty Project licenses this file to you under the Apache License, +| version 2.0 (the "License"); you may not use this file except in compliance +| with the License. You may obtain a copy of the License at: +| +| http://www.apache.org/licenses/LICENSE-2.0 +| +| Unless required by applicable law or agreed to in writing, software +| distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +| WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +| License for the specific language governing permissions and limitations +| under the License. +| +| Also, please refer to each LICENSE..txt file, which is located in +| the 'license' directory of the distribution file, for the license terms of the +| components that this product depends on. +| +| ------------------------------------------------------------------------------- +| This product contains the extensions to Java Collections Framework which has +| been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene: +| +| * LICENSE: +| * license/LICENSE.jsr166y.txt (Public Domain) +| * HOMEPAGE: +| * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/ +| * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/ +| +| This product contains a modified version of Robert Harder's Public Domain +| Base64 Encoder and Decoder, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.base64.txt (Public Domain) +| * HOMEPAGE: +| * http://iharder.sourceforge.net/current/java/base64/ +| +| This product contains a modified portion of 'Webbit', an event based +| WebSocket and HTTP server, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.webbit.txt (BSD License) +| * HOMEPAGE: +| * https://github.com/joewalnes/webbit +| +| This product contains a modified portion of 'SLF4J', a simple logging +| facade for Java, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.slf4j.txt (MIT License) +| * HOMEPAGE: +| * http://www.slf4j.org/ +| +| This product contains a modified portion of 'Apache Harmony', an open source +| Java SE, which can be obtained at: +| +| * NOTICE: +| * license/NOTICE.harmony.txt +| * LICENSE: +| * license/LICENSE.harmony.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://archive.apache.org/dist/harmony/ +| +| This product contains a modified portion of 'jbzip2', a Java bzip2 compression +| and decompression library written by Matthew J. Francis. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jbzip2.txt (MIT License) +| * HOMEPAGE: +| * https://code.google.com/p/jbzip2/ +| +| This product contains a modified portion of 'libdivsufsort', a C API library to construct +| the suffix array and the Burrows-Wheeler transformed string for any input string of +| a constant-size alphabet written by Yuta Mori. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.libdivsufsort.txt (MIT License) +| * HOMEPAGE: +| * https://github.com/y-256/libdivsufsort +| +| This product contains a modified portion of Nitsan Wakart's 'JCTools', Java Concurrency Tools for the JVM, +| which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jctools.txt (ASL2 License) +| * HOMEPAGE: +| * https://github.com/JCTools/JCTools +| +| This product optionally depends on 'JZlib', a re-implementation of zlib in +| pure Java, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jzlib.txt (BSD style License) +| * HOMEPAGE: +| * http://www.jcraft.com/jzlib/ +| +| This product optionally depends on 'Compress-LZF', a Java library for encoding and +| decoding data in LZF format, written by Tatu Saloranta. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.compress-lzf.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/ning/compress +| +| This product optionally depends on 'lz4', a LZ4 Java compression +| and decompression library written by Adrien Grand. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.lz4.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/jpountz/lz4-java +| +| This product optionally depends on 'lzma-java', a LZMA Java compression +| and decompression library, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.lzma-java.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/jponge/lzma-java +| +| This product contains a modified portion of 'jfastlz', a Java port of FastLZ compression +| and decompression library written by William Kinney. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jfastlz.txt (MIT License) +| * HOMEPAGE: +| * https://code.google.com/p/jfastlz/ +| +| This product contains a modified portion of and optionally depends on 'Protocol Buffers', Google's data +| interchange format, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.protobuf.txt (New BSD License) +| * HOMEPAGE: +| * https://github.com/google/protobuf +| +| This product optionally depends on 'Bouncy Castle Crypto APIs' to generate +| a temporary self-signed X.509 certificate when the JVM does not provide the +| equivalent functionality. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.bouncycastle.txt (MIT License) +| * HOMEPAGE: +| * http://www.bouncycastle.org/ +| +| This product optionally depends on 'Snappy', a compression library produced +| by Google Inc, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.snappy.txt (New BSD License) +| * HOMEPAGE: +| * https://github.com/google/snappy +| +| This product optionally depends on 'JBoss Marshalling', an alternative Java +| serialization API, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.jboss-marshalling.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/jboss-remoting/jboss-marshalling +| +| This product optionally depends on 'Caliper', Google's micro- +| benchmarking framework, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.caliper.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/google/caliper +| +| This product optionally depends on 'Apache Commons Logging', a logging +| framework, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.commons-logging.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://commons.apache.org/logging/ +| +| This product optionally depends on 'Apache Log4J', a logging framework, which +| can be obtained at: +| +| * LICENSE: +| * license/LICENSE.log4j.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://logging.apache.org/log4j/ +| +| This product optionally depends on 'Aalto XML', an ultra-high performance +| non-blocking XML processor, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.aalto-xml.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://wiki.fasterxml.com/AaltoHome +| +| This product contains a modified version of 'HPACK', a Java implementation of +| the HTTP/2 HPACK algorithm written by Twitter. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.hpack.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/twitter/hpack +| +| This product contains a modified version of 'HPACK', a Java implementation of +| the HTTP/2 HPACK algorithm written by Cory Benfield. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.hyper-hpack.txt (MIT License) +| * HOMEPAGE: +| * https://github.com/python-hyper/hpack/ +| +| This product contains a modified version of 'HPACK', a Java implementation of +| the HTTP/2 HPACK algorithm written by Tatsuhiro Tsujikawa. It can be obtained at: +| +| * LICENSE: +| * license/LICENSE.nghttp2-hpack.txt (MIT License) +| * HOMEPAGE: +| * https://github.com/nghttp2/nghttp2/ +| +| This product contains a modified portion of 'Apache Commons Lang', a Java library +| provides utilities for the java.lang API, which can be obtained at: +| +| * LICENSE: +| * license/LICENSE.commons-lang.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://commons.apache.org/proper/commons-lang/ +| +| +| This product contains the Maven wrapper scripts from 'Maven Wrapper', that provides an easy way to ensure a user has everything necessary to run the Maven build. +| +| * LICENSE: +| * license/LICENSE.mvn-wrapper.txt (Apache License 2.0) +| * HOMEPAGE: +| * https://github.com/takari/maven-wrapper +| +| This product contains the dnsinfo.h header file, that provides a way to retrieve the system DNS configuration on MacOS. +| This private header is also used by Apple's open source +| mDNSResponder (https://opensource.apple.com/tarballs/mDNSResponder/). +| +| * LICENSE: +| * license/LICENSE.dnsinfo.txt (Apache License 2.0) +| * HOMEPAGE: +| * http://www.opensource.apple.com/source/configd/configd-453.19/dnsinfo/dnsinfo.h + +-------------------------------------------------------------------------------- + +This binary artifact contains Eclipse MicroProfile OpenAPI with the following in its NOTICE +file: +| ========================================================================= +| == NOTICE file corresponding to section 4(d) of the Apache License, == +| == Version 2.0, in this case for MicroProfile OpenAPI == +| ========================================================================= +| +| The majority of this software were originally based on the following: +| * Swagger Core +| https://github.com/swagger-api/swagger-core +| under Apache License, v2.0 +| +| +| SPDXVersion: SPDX-2.1 +| PackageName: Eclipse MicroProfile +| PackageHomePage: http://www.eclipse.org/microprofile +| PackageLicenseDeclared: Apache-2.0 +| +| PackageCopyrightText: +| Arthur De Magalhaes arthurdm@ca.ibm.com +| + +-------------------------------------------------------------------------------- + +This binary artifact contains Jakarta Validation with the following in its NOTICE +file: +| # Notices for Eclipse Jakarta Validation +| +| This content is produced and maintained by the Eclipse Jakarta Validation +| project. +| +| * Project home: https://projects.eclipse.org/projects/ee4j.validation +| +| ## Trademarks +| +| Jakarta Validation is a trademark of the Eclipse Foundation. +| +| ## Copyright +| +| All content is the property of the respective authors or their employers. For +| more information regarding authorship of content, please consult the listed +| source code repository logs. +| +| ## Declared Project Licenses +| +| This program and the accompanying materials are made available under the terms +| of the Apache License, Version 2.0 which is available at +| https://www.apache.org/licenses/LICENSE-2.0. +| +| SPDX-License-Identifier: Apache-2.0 +| +| ## Source Code +| +| The project maintains the following source code repositories: +| +| * [The specification repository](https://github.com/jakartaee/validation-spec) +| * [The API repository](https://github.com/jakartaee/validation) +| * [The TCK repository](https://github.com/jakartaee/validation-tck) +| +| ## Third-party Content +| +| This project leverages the following third party content. +| +| Test dependencies: +| +| * [TestNG](https://github.com/cbeust/testng) - Apache License 2.0 +| * [JCommander](https://github.com/cbeust/jcommander) - Apache License 2.0 +| * [SnakeYAML](https://bitbucket.org/asomov/snakeyaml/src) - Apache License 2.0 +| + +-------------------------------------------------------------------------------- + +This binary artifact contains Micrometer with the following in its NOTICE +file: +| Micrometer +| +| Copyright (c) 2017-Present VMware, Inc. All Rights Reserved. +| +| Licensed under the Apache License, Version 2.0 (the "License"); +| you may not use this file except in compliance with the License. +| You may obtain a copy of the License at +| +| https://www.apache.org/licenses/LICENSE-2.0 +| +| Unless required by applicable law or agreed to in writing, software +| distributed under the License is distributed on an "AS IS" BASIS, +| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +| See the License for the specific language governing permissions and +| limitations under the License. +| +| ------------------------------------------------------------------------------- +| +| This product contains a modified portion of 'io.netty.util.internal.logging', +| in the Netty/Common library distributed by The Netty Project: +| +| * Copyright 2013 The Netty Project +| * License: Apache License v2.0 +| * Homepage: https://netty.io +| +| This product contains a modified portion of 'StringUtils.isBlank()', +| in the Commons Lang library distributed by The Apache Software Foundation: +| +| * Copyright 2001-2019 The Apache Software Foundation +| * License: Apache License v2.0 +| * Homepage: https://commons.apache.org/proper/commons-lang/ +| +| This product contains a modified portion of 'JsonUtf8Writer', +| in the Moshi library distributed by Square, Inc: +| +| * Copyright 2010 Google Inc. +| * License: Apache License v2.0 +| * Homepage: https://github.com/square/moshi +| +| This product contains a modified portion of the 'org.springframework.lang' +| package in the Spring Framework library, distributed by VMware, Inc: +| +| * Copyright 2002-2019 the original author or authors. +| * License: Apache License v2.0 +| * Homepage: https://spring.io/projects/spring-framework + +-------------------------------------------------------------------------------- diff --git a/plugins/spark/v4.0/spark/build.gradle.kts b/plugins/spark/v4.0/spark/build.gradle.kts new file mode 100644 index 0000000000..6be6cb5ae4 --- /dev/null +++ b/plugins/spark/v4.0/spark/build.gradle.kts @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +import com.github.jengelman.gradle.plugins.shadow.tasks.ShadowJar + +plugins { id("polaris-client") } + +checkstyle { + configProperties = + mapOf( + "org.checkstyle.google.suppressionfilter.config" to + project.file("checkstyle_suppressions.xml").absolutePath + ) +} + +// get version information +val sparkMajorVersion = "4.0" +val scalaVersion = getAndUseScalaVersionForProject() +val icebergVersion = pluginlibs.versions.iceberg.get() +val spark40Version = pluginlibs.versions.spark40.get() + +val scalaLibraryVersion = pluginlibs.versions.scala213.get() + +dependencies { + // TODO: extract a polaris-rest module as a thin layer for + // client to depends on. + implementation(project(":polaris-core")) { isTransitive = false } + + implementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + + compileOnly("org.scala-lang:scala-library:${scalaLibraryVersion}") + compileOnly("org.scala-lang:scala-reflect:${scalaLibraryVersion}") + compileOnly("org.apache.spark:spark-sql_${scalaVersion}:${spark40Version}") { + // exclude log4j dependencies + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-api") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.slf4j", "jul-to-slf4j") + } + + compileOnly(libs.jakarta.annotation.api) + compileOnly(libs.jakarta.validation.api) + + testImplementation(platform(libs.junit.bom)) + testImplementation("org.junit.jupiter:junit-jupiter") + testImplementation(libs.assertj.core) + testImplementation(libs.mockito.core) + + testImplementation( + "org.apache.iceberg:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}:${icebergVersion}" + ) + testImplementation("org.apache.spark:spark-sql_${scalaVersion}:${spark40Version}") { + // exclude log4j dependencies. Explicit dependencies for the log4j libraries are + // enforced below to ensure the version compatibility + exclude("org.apache.logging.log4j", "log4j-slf4j2-impl") + exclude("org.apache.logging.log4j", "log4j-api") + exclude("org.apache.logging.log4j", "log4j-1.2-api") + exclude("org.apache.logging.log4j", "log4j-core") + exclude("org.slf4j", "jul-to-slf4j") + } + // enforce the usage of log4j 2.25.2. This is for the log4j-api compatibility + // of spark-sql dependency + testRuntimeOnly("org.apache.logging.log4j:log4j-api:2.25.2") + testRuntimeOnly("org.apache.logging.log4j:log4j-core:2.25.2") +} + +tasks.register("createPolarisSparkJar") { + archiveClassifier = "bundle" + isZip64 = true + + // pack both the source code and dependencies + from(sourceSets.main.get().output) + configurations = listOf(project.configurations.runtimeClasspath.get()) + + // recursively remove all LICENSE and NOTICE file under META-INF, includes + // directories contains 'license' in the name + exclude("META-INF/**/*LICENSE*") + exclude("META-INF/**/*NOTICE*") + // exclude the top level LICENSE, LICENSE-*.txt and NOTICE + exclude("LICENSE*") + exclude("NOTICE*") + + // add polaris customized LICENSE and NOTICE for the bundle jar at top level. Note that the + // customized LICENSE and NOTICE file are called BUNDLE-LICENSE and BUNDLE-NOTICE, + // and renamed to LICENSE and NOTICE after include, this is to avoid the file + // being excluded due to the exclude pattern matching used above. + from("${projectDir}/BUNDLE-LICENSE") { rename { "LICENSE" } } + from("${projectDir}/BUNDLE-NOTICE") { rename { "NOTICE" } } +} + +// ensure the shadow jar job (which will automatically run license addition) is run for both +// `assemble` and `build` task +tasks.named("assemble") { dependsOn("createPolarisSparkJar") } + +tasks.named("build") { dependsOn("createPolarisSparkJar") } diff --git a/plugins/spark/v4.0/spark/checkstyle_suppressions.xml b/plugins/spark/v4.0/spark/checkstyle_suppressions.xml new file mode 100644 index 0000000000..d6f9482ea7 --- /dev/null +++ b/plugins/spark/v4.0/spark/checkstyle_suppressions.xml @@ -0,0 +1,32 @@ + + + + + + + + diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java new file mode 100644 index 0000000000..08116c9e66 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisCatalog.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import java.util.List; +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.polaris.spark.rest.GenericTable; + +public interface PolarisCatalog { + List listGenericTables(Namespace ns); + + GenericTable loadGenericTable(TableIdentifier identifier); + + boolean dropGenericTable(TableIdentifier identifier); + + GenericTable createGenericTable( + TableIdentifier identifier, + String format, + String baseLocation, + String doc, + Map props); +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java new file mode 100644 index 0000000000..5be0f6952e --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisRESTCatalog.java @@ -0,0 +1,247 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import java.io.Closeable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.io.CloseableGroup; +import org.apache.iceberg.rest.Endpoint; +import org.apache.iceberg.rest.ErrorHandlers; +import org.apache.iceberg.rest.HTTPClient; +import org.apache.iceberg.rest.RESTClient; +import org.apache.iceberg.rest.ResourcePaths; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.rest.responses.ConfigResponse; +import org.apache.iceberg.util.EnvironmentUtil; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.polaris.core.rest.PolarisEndpoints; +import org.apache.polaris.core.rest.PolarisResourcePaths; +import org.apache.polaris.spark.rest.CreateGenericTableRESTRequest; +import org.apache.polaris.spark.rest.CreateGenericTableRequest; +import org.apache.polaris.spark.rest.GenericTable; +import org.apache.polaris.spark.rest.ListGenericTablesRESTResponse; +import org.apache.polaris.spark.rest.LoadGenericTableRESTResponse; + +/** + * [[PolarisRESTCatalog]] talks to Polaris REST APIs, and implements the PolarisCatalog interfaces, + * which are generic table related APIs at this moment. This class doesn't interact with any Spark + * objects. + */ +public class PolarisRESTCatalog implements PolarisCatalog, Closeable { + public static final String REST_PAGE_SIZE = "rest-page-size"; + + private final Function, RESTClient> clientBuilder; + + private RESTClient restClient = null; + private CloseableGroup closeables = null; + private Set endpoints; + private OAuth2Util.AuthSession catalogAuth = null; + private PolarisResourcePaths pathGenerator = null; + private Integer pageSize = null; + + // the default endpoints to config if server doesn't specify the 'endpoints' configuration. + private static final Set DEFAULT_ENDPOINTS = PolarisEndpoints.GENERIC_TABLE_ENDPOINTS; + + public PolarisRESTCatalog() { + this(config -> HTTPClient.builder(config).uri(config.get(CatalogProperties.URI)).build()); + } + + public PolarisRESTCatalog(Function, RESTClient> clientBuilder) { + this.clientBuilder = clientBuilder; + } + + public void initialize(Map unresolved, OAuth2Util.AuthSession catalogAuth) { + Preconditions.checkArgument(unresolved != null, "Invalid configuration: null"); + + // Resolve any configuration that is supplied by environment variables. + // For example: if we have an entity ("key", "env:envVar") in the unresolved, + // and envVar is configured to envValue in system env. After resolve, we got + // entity ("key", "envValue"). + Map props = EnvironmentUtil.resolveAll(unresolved); + + // TODO: switch to use authManager once iceberg dependency is updated to 1.9.0 + this.catalogAuth = catalogAuth; + + ConfigResponse config; + try (RESTClient initClient = clientBuilder.apply(props).withAuthSession(catalogAuth)) { + config = fetchConfig(initClient, catalogAuth.headers(), props); + } catch (IOException e) { + throw new UncheckedIOException("Failed to close HTTP client", e); + } + + // call getConfig to get the server configurations + Map mergedProps = config.merge(props); + if (config.endpoints().isEmpty()) { + this.endpoints = DEFAULT_ENDPOINTS; + } else { + this.endpoints = ImmutableSet.copyOf(config.endpoints()); + } + + this.pathGenerator = PolarisResourcePaths.forCatalogProperties(mergedProps); + this.restClient = clientBuilder.apply(mergedProps).withAuthSession(catalogAuth); + + this.pageSize = PropertyUtil.propertyAsNullableInt(mergedProps, REST_PAGE_SIZE); + if (pageSize != null) { + Preconditions.checkArgument( + pageSize > 0, "Invalid value for %s, must be a positive integer", REST_PAGE_SIZE); + } + + this.closeables = new CloseableGroup(); + this.closeables.addCloseable(this.restClient); + this.closeables.setSuppressCloseFailure(true); + } + + protected static ConfigResponse fetchConfig( + RESTClient client, Map headers, Map properties) { + // send the client's warehouse location to the service to keep in sync + // this is needed for cases where the warehouse is configured at client side, + // and used by Polaris server as catalog name. + ImmutableMap.Builder queryParams = ImmutableMap.builder(); + if (properties.containsKey(CatalogProperties.WAREHOUSE_LOCATION)) { + queryParams.put( + CatalogProperties.WAREHOUSE_LOCATION, + properties.get(CatalogProperties.WAREHOUSE_LOCATION)); + } + + ConfigResponse configResponse = + client.get( + ResourcePaths.config(), + queryParams.build(), + ConfigResponse.class, + headers, + ErrorHandlers.defaultErrorHandler()); + configResponse.validate(); + return configResponse; + } + + @Override + public void close() throws IOException { + if (closeables != null) { + closeables.close(); + } + } + + @Override + public List listGenericTables(Namespace ns) { + Endpoint.check(endpoints, PolarisEndpoints.V1_LIST_GENERIC_TABLES); + + Map queryParams = Maps.newHashMap(); + ImmutableList.Builder tables = ImmutableList.builder(); + String pageToken = ""; + if (pageSize != null) { + queryParams.put("pageSize", String.valueOf(pageSize)); + } + + do { + queryParams.put("pageToken", pageToken); + ListGenericTablesRESTResponse response = + restClient + .withAuthSession(this.catalogAuth) + .get( + pathGenerator.genericTables(ns), + queryParams, + ListGenericTablesRESTResponse.class, + Map.of(), + ErrorHandlers.namespaceErrorHandler()); + pageToken = response.getNextPageToken(); + tables.addAll(response.getIdentifiers()); + } while (pageToken != null); + + return tables.build(); + } + + @Override + public boolean dropGenericTable(TableIdentifier identifier) { + Endpoint.check(endpoints, PolarisEndpoints.V1_DELETE_GENERIC_TABLE); + + try { + restClient + .withAuthSession(this.catalogAuth) + .delete( + pathGenerator.genericTable(identifier), + null, + Map.of(), + ErrorHandlers.tableErrorHandler()); + return true; + } catch (NoSuchTableException e) { + return false; + } + } + + @Override + public GenericTable createGenericTable( + TableIdentifier identifier, + String format, + String baseLocation, + String doc, + Map props) { + Endpoint.check(endpoints, PolarisEndpoints.V1_CREATE_GENERIC_TABLE); + CreateGenericTableRESTRequest request = + new CreateGenericTableRESTRequest( + CreateGenericTableRequest.builder() + .setName(identifier.name()) + .setFormat(format) + .setBaseLocation(baseLocation) + .setDoc(doc) + .setProperties(props) + .build()); + + LoadGenericTableRESTResponse response = + restClient + .withAuthSession(this.catalogAuth) + .post( + pathGenerator.genericTables(identifier.namespace()), + request, + LoadGenericTableRESTResponse.class, + Map.of(), + ErrorHandlers.tableErrorHandler()); + + return response.getTable(); + } + + @Override + public GenericTable loadGenericTable(TableIdentifier identifier) { + Endpoint.check(endpoints, PolarisEndpoints.V1_LOAD_GENERIC_TABLE); + LoadGenericTableRESTResponse response = + restClient + .withAuthSession(this.catalogAuth) + .get( + pathGenerator.genericTable(identifier), + null, + LoadGenericTableRESTResponse.class, + Map.of(), + ErrorHandlers.tableErrorHandler()); + + return response.getTable(); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java new file mode 100644 index 0000000000..771c191c05 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/PolarisSparkCatalog.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import java.util.Map; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.spark.Spark3Util; +// Use the spec class defined at client side under the rest package. +// The spec classes used at client side and server side are different in +// terms of import, where the client side uses the shaded jackson library +// from iceberg-spark-runtime. +import org.apache.polaris.spark.rest.GenericTable; +import org.apache.polaris.spark.utils.PolarisCatalogUtils; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableChange; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A spark TableCatalog Implementation interacts with Polaris specific APIs only. The APIs it + * interacts with is generic table APIs, and all table operations performed in this class are + * expected to be for non-iceberg tables. + */ +public class PolarisSparkCatalog implements TableCatalog { + private static final Logger LOGGER = LoggerFactory.getLogger(PolarisSparkCatalog.class); + + private PolarisCatalog polarisCatalog = null; + private String catalogName = null; + + public PolarisSparkCatalog(PolarisCatalog polarisCatalog) { + this.polarisCatalog = polarisCatalog; + } + + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + this.catalogName = name; + } + + @Override + public String name() { + return catalogName; + } + + @Override + public Table loadTable(Identifier identifier) throws NoSuchTableException { + try { + GenericTable genericTable = + this.polarisCatalog.loadGenericTable(Spark3Util.identifierToTableIdentifier(identifier)); + return PolarisCatalogUtils.loadSparkTable(genericTable); + } catch (org.apache.iceberg.exceptions.NoSuchTableException e) { + throw new NoSuchTableException(identifier); + } + } + + @Override + @SuppressWarnings({"deprecation", "RedundantSuppression"}) + public Table createTable( + Identifier identifier, + StructType schema, + Transform[] transforms, + Map properties) + throws TableAlreadyExistsException, NoSuchNamespaceException { + try { + String format = properties.get(PolarisCatalogUtils.TABLE_PROVIDER_KEY); + + String baseLocation; + // Extract the base table location from the spark properties. + // Spark pass the table base location either with the + // TableCatalog.PROP_LOCATION key, or with "path" key if created + // with the path option. + if (properties.get(TableCatalog.PROP_LOCATION) != null) { + baseLocation = properties.get(TableCatalog.PROP_LOCATION); + if (properties.get(PolarisCatalogUtils.TABLE_PATH_KEY) != null) { + LOGGER.debug( + "Both location and path are propagated in the table properties, location {}, path {}", + baseLocation, + properties.get(PolarisCatalogUtils.TABLE_PATH_KEY)); + } + } else { + baseLocation = properties.get(PolarisCatalogUtils.TABLE_PATH_KEY); + } + GenericTable genericTable = + this.polarisCatalog.createGenericTable( + Spark3Util.identifierToTableIdentifier(identifier), + format, + baseLocation, + null, + properties); + return PolarisCatalogUtils.loadSparkTable(genericTable); + } catch (AlreadyExistsException e) { + throw new TableAlreadyExistsException(identifier); + } + } + + @Override + public Table alterTable(Identifier identifier, TableChange... changes) + throws NoSuchTableException { + // alterTable currently is not supported for generic tables + throw new UnsupportedOperationException("alterTable operation is not supported"); + } + + @Override + public boolean purgeTable(Identifier ident) { + // purgeTable for generic table will only do a drop without purge + return dropTable(ident); + } + + @Override + public boolean dropTable(Identifier identifier) { + return this.polarisCatalog.dropGenericTable(Spark3Util.identifierToTableIdentifier(identifier)); + } + + @Override + public void renameTable(Identifier from, Identifier to) + throws NoSuchTableException, TableAlreadyExistsException { + throw new UnsupportedOperationException("renameTable operation is not supported"); + } + + @Override + public Identifier[] listTables(String[] namespace) { + try { + return this.polarisCatalog.listGenericTables(Namespace.of(namespace)).stream() + .map(ident -> Identifier.of(ident.namespace().levels(), ident.name())) + .toArray(Identifier[]::new); + } catch (UnsupportedOperationException ex) { + return new Identifier[0]; + } + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java new file mode 100644 index 0000000000..ab7ff21026 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/SparkCatalog.java @@ -0,0 +1,354 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Maps; +import java.util.Arrays; +import java.util.Map; +import java.util.stream.Stream; +import org.apache.arrow.util.VisibleForTesting; +import org.apache.iceberg.CatalogProperties; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.spark.SupportsReplaceView; +import org.apache.iceberg.util.PropertyUtil; +import org.apache.polaris.spark.utils.DeltaHelper; +import org.apache.polaris.spark.utils.PolarisCatalogUtils; +import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; +import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.NamespaceChange; +import org.apache.spark.sql.connector.catalog.StagedTable; +import org.apache.spark.sql.connector.catalog.StagingTableCatalog; +import org.apache.spark.sql.connector.catalog.SupportsNamespaces; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableChange; +import org.apache.spark.sql.connector.catalog.View; +import org.apache.spark.sql.connector.catalog.ViewCatalog; +import org.apache.spark.sql.connector.catalog.ViewChange; +import org.apache.spark.sql.connector.catalog.ViewInfo; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * SparkCatalog Implementation that is able to interact with both Iceberg SparkCatalog and Polaris + * SparkCatalog. All namespaces and view related operations continue goes through the Iceberg + * SparkCatalog. For table operations, depends on the table format, the operation can be achieved + * with interaction with both Iceberg and Polaris SparkCatalog. + */ +public class SparkCatalog + implements StagingTableCatalog, + TableCatalog, + SupportsNamespaces, + ViewCatalog, + SupportsReplaceView { + + @VisibleForTesting protected String catalogName = null; + @VisibleForTesting protected org.apache.iceberg.spark.SparkCatalog icebergsSparkCatalog = null; + @VisibleForTesting protected PolarisSparkCatalog polarisSparkCatalog = null; + @VisibleForTesting protected DeltaHelper deltaHelper = null; + + @Override + public String name() { + return catalogName; + } + + /** + * Check whether invalid catalog configuration is provided, and return an option map with catalog + * type configured correctly. This function mainly validates two parts: 1) No customized catalog + * implementation is provided. 2) No non-rest catalog type is configured. + */ + @VisibleForTesting + public CaseInsensitiveStringMap validateAndResolveCatalogOptions( + CaseInsensitiveStringMap options) { + Preconditions.checkArgument( + options.get(CatalogProperties.CATALOG_IMPL) == null, + "Customized catalog implementation is not supported and not needed, please remove the configuration!"); + + String catalogType = + PropertyUtil.propertyAsString( + options, CatalogUtil.ICEBERG_CATALOG_TYPE, CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + Preconditions.checkArgument( + catalogType.equals(CatalogUtil.ICEBERG_CATALOG_TYPE_REST), + "Only rest catalog type is allowed, but got catalog type: " + + catalogType + + ". Either configure the type to rest or remove the config"); + + Map resolvedOptions = Maps.newHashMap(); + resolvedOptions.putAll(options); + // when no catalog type is configured, iceberg uses hive by default. Here, we make sure the + // type is set to rest since we only support rest catalog. + resolvedOptions.put(CatalogUtil.ICEBERG_CATALOG_TYPE, CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + + return new CaseInsensitiveStringMap(resolvedOptions); + } + + /** + * Initialize REST Catalog for Iceberg and Polaris, this is the only catalog type supported by + * Polaris at this moment. + */ + private void initRESTCatalog(String name, CaseInsensitiveStringMap options) { + CaseInsensitiveStringMap resolvedOptions = validateAndResolveCatalogOptions(options); + + // initialize the icebergSparkCatalog + this.icebergsSparkCatalog = new org.apache.iceberg.spark.SparkCatalog(); + this.icebergsSparkCatalog.initialize(name, resolvedOptions); + + // initialize the polaris spark catalog + OAuth2Util.AuthSession catalogAuth = + PolarisCatalogUtils.getAuthSession(this.icebergsSparkCatalog); + PolarisRESTCatalog restCatalog = new PolarisRESTCatalog(); + restCatalog.initialize(options, catalogAuth); + this.polarisSparkCatalog = new PolarisSparkCatalog(restCatalog); + this.polarisSparkCatalog.initialize(name, resolvedOptions); + } + + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + this.catalogName = name; + initRESTCatalog(name, options); + this.deltaHelper = new DeltaHelper(options); + } + + @Override + public Table loadTable(Identifier ident) throws NoSuchTableException { + try { + return this.icebergsSparkCatalog.loadTable(ident); + } catch (NoSuchTableException e) { + return this.polarisSparkCatalog.loadTable(ident); + } + } + + @Override + @SuppressWarnings({"deprecation"}) + public Table createTable( + Identifier ident, StructType schema, Transform[] transforms, Map properties) + throws TableAlreadyExistsException, NoSuchNamespaceException { + String provider = properties.get(PolarisCatalogUtils.TABLE_PROVIDER_KEY); + if (PolarisCatalogUtils.useIceberg(provider)) { + return this.icebergsSparkCatalog.createTable(ident, schema, transforms, properties); + } else { + if (PolarisCatalogUtils.isTableWithSparkManagedLocation(properties)) { + throw new UnsupportedOperationException( + "Create table without location key is not supported by Polaris. Please provide location or path on table creation."); + } + + if (PolarisCatalogUtils.useDelta(provider)) { + // For delta table, we load the delta catalog to help dealing with the + // delta log creation. + TableCatalog deltaCatalog = deltaHelper.loadDeltaCatalog(this.polarisSparkCatalog); + return deltaCatalog.createTable(ident, schema, transforms, properties); + } else { + return this.polarisSparkCatalog.createTable(ident, schema, transforms, properties); + } + } + } + + @Override + public Table alterTable(Identifier ident, TableChange... changes) throws NoSuchTableException { + try { + return this.icebergsSparkCatalog.alterTable(ident, changes); + } catch (NoSuchTableException e) { + Table table = this.polarisSparkCatalog.loadTable(ident); + String provider = table.properties().get(PolarisCatalogUtils.TABLE_PROVIDER_KEY); + if (PolarisCatalogUtils.useDelta(provider)) { + // For delta table, most of the alter operations is a delta log manipulation, + // we load the delta catalog to help handling the alter table operation. + // NOTE: This currently doesn't work for changing file location and file format + // using ALTER TABLE ...SET LOCATION, and ALTER TABLE ... SET FILEFORMAT. + TableCatalog deltaCatalog = deltaHelper.loadDeltaCatalog(this.polarisSparkCatalog); + return deltaCatalog.alterTable(ident, changes); + } + return this.polarisSparkCatalog.alterTable(ident); + } + } + + @Override + public boolean dropTable(Identifier ident) { + return this.icebergsSparkCatalog.dropTable(ident) || this.polarisSparkCatalog.dropTable(ident); + } + + @Override + public void renameTable(Identifier from, Identifier to) + throws NoSuchTableException, TableAlreadyExistsException { + try { + this.icebergsSparkCatalog.renameTable(from, to); + } catch (NoSuchTableException e) { + this.polarisSparkCatalog.renameTable(from, to); + } + } + + @Override + public void invalidateTable(Identifier ident) { + this.icebergsSparkCatalog.invalidateTable(ident); + } + + @Override + public boolean purgeTable(Identifier ident) { + if (this.icebergsSparkCatalog.purgeTable(ident)) { + return true; + } else { + return this.polarisSparkCatalog.purgeTable(ident); + } + } + + @Override + public Identifier[] listTables(String[] namespace) { + Identifier[] icebergIdents = this.icebergsSparkCatalog.listTables(namespace); + Identifier[] genericTableIdents = this.polarisSparkCatalog.listTables(namespace); + + return Stream.concat(Arrays.stream(icebergIdents), Arrays.stream(genericTableIdents)) + .toArray(Identifier[]::new); + } + + @Override + @SuppressWarnings({"deprecation", "RedundantSuppression"}) + public StagedTable stageCreate( + Identifier ident, StructType schema, Transform[] transforms, Map properties) + throws TableAlreadyExistsException { + return this.icebergsSparkCatalog.stageCreate(ident, schema, transforms, properties); + } + + @Override + public StagedTable stageReplace( + Identifier ident, StructType schema, Transform[] transforms, Map properties) + throws NoSuchTableException { + return this.icebergsSparkCatalog.stageReplace(ident, schema, transforms, properties); + } + + @Override + public StagedTable stageCreateOrReplace( + Identifier ident, StructType schema, Transform[] transforms, Map properties) { + return this.icebergsSparkCatalog.stageCreateOrReplace(ident, schema, transforms, properties); + } + + @Override + public String[] defaultNamespace() { + return this.icebergsSparkCatalog.defaultNamespace(); + } + + @Override + public String[][] listNamespaces() { + return this.icebergsSparkCatalog.listNamespaces(); + } + + @Override + public String[][] listNamespaces(String[] namespace) throws NoSuchNamespaceException { + return this.icebergsSparkCatalog.listNamespaces(namespace); + } + + @Override + public Map loadNamespaceMetadata(String[] namespace) + throws NoSuchNamespaceException { + return this.icebergsSparkCatalog.loadNamespaceMetadata(namespace); + } + + @Override + public void createNamespace(String[] namespace, Map metadata) + throws NamespaceAlreadyExistsException { + this.icebergsSparkCatalog.createNamespace(namespace, metadata); + } + + @Override + public void alterNamespace(String[] namespace, NamespaceChange... changes) + throws NoSuchNamespaceException { + this.icebergsSparkCatalog.alterNamespace(namespace, changes); + } + + @Override + public boolean dropNamespace(String[] namespace, boolean cascade) + throws NoSuchNamespaceException { + return this.icebergsSparkCatalog.dropNamespace(namespace, cascade); + } + + @Override + public Identifier[] listViews(String... namespace) { + return this.icebergsSparkCatalog.listViews(namespace); + } + + @Override + public View loadView(Identifier ident) throws NoSuchViewException { + return this.icebergsSparkCatalog.loadView(ident); + } + + @Override + public View createView(ViewInfo viewInfo) + throws ViewAlreadyExistsException, NoSuchNamespaceException { + return this.icebergsSparkCatalog.createView(viewInfo); + } + + @Override + public View alterView(Identifier ident, ViewChange... changes) + throws NoSuchViewException, IllegalArgumentException { + return this.icebergsSparkCatalog.alterView(ident, changes); + } + + @Override + public boolean dropView(Identifier ident) { + return this.icebergsSparkCatalog.dropView(ident); + } + + @Override + public void renameView(Identifier fromIdentifier, Identifier toIdentifier) + throws NoSuchViewException, ViewAlreadyExistsException { + this.icebergsSparkCatalog.renameView(fromIdentifier, toIdentifier); + } + + @Override + public View replaceView(ViewInfo viewInfo, boolean orCreate) + throws NoSuchNamespaceException, NoSuchViewException { + return this.icebergsSparkCatalog.replaceView(viewInfo, orCreate); + } + + // This method is required by SupportsReplaceView interface from Iceberg + // It provides backward compatibility with the old API signature + @Override + public View replaceView( + Identifier ident, + String sql, + String currentCatalog, + String[] currentNamespace, + StructType schema, + String[] queryColumnNames, + String[] columnAliases, + String[] columnComments, + Map properties) + throws NoSuchNamespaceException, NoSuchViewException { + // Delegate to the new API by creating a ViewInfo object + ViewInfo viewInfo = + new ViewInfo( + ident, + sql, + currentCatalog, + currentNamespace, + schema, + queryColumnNames, + columnAliases, + columnComments, + properties); + return this.icebergsSparkCatalog.replaceView(viewInfo, false); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java new file mode 100644 index 0000000000..644fcc1c1d --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRESTRequest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import java.util.Map; +import org.apache.iceberg.rest.RESTRequest; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +/** + * RESTRequest definition for CreateGenericTable which extends the iceberg RESTRequest. This is + * currently required because the Iceberg HTTPClient requires the request and response to be a class + * of RESTRequest and RESTResponse. + */ +public class CreateGenericTableRESTRequest extends CreateGenericTableRequest + implements RESTRequest { + + @JsonCreator + public CreateGenericTableRESTRequest( + @JsonProperty(value = "name", required = true) String name, + @JsonProperty(value = "format", required = true) String format, + @JsonProperty(value = "base-location") String baseLocation, + @JsonProperty(value = "doc") String doc, + @JsonProperty(value = "properties") Map properties) { + super(name, format, baseLocation, doc, properties); + } + + public CreateGenericTableRESTRequest(CreateGenericTableRequest request) { + this( + request.getName(), + request.getFormat(), + request.getBaseLocation(), + request.getDoc(), + request.getProperties()); + } + + @Override + public void validate() {} +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRequest.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRequest.java new file mode 100644 index 0000000000..9d4021012f --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/CreateGenericTableRequest.java @@ -0,0 +1,187 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.constraints.NotNull; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class CreateGenericTableRequest { + + @NotNull private final String name; + @NotNull private final String format; + private final String baseLocation; + private final String doc; + private final Map properties; + + /** */ + @JsonProperty(value = "name", required = true) + public String getName() { + return name; + } + + /** */ + @JsonProperty(value = "format", required = true) + public String getFormat() { + return format; + } + + /** */ + @JsonProperty(value = "base-location") + public String getBaseLocation() { + return baseLocation; + } + + /** */ + @JsonProperty(value = "doc") + public String getDoc() { + return doc; + } + + /** */ + @JsonProperty(value = "properties") + public Map getProperties() { + return properties; + } + + @JsonCreator + public CreateGenericTableRequest( + @JsonProperty(value = "name", required = true) String name, + @JsonProperty(value = "format", required = true) String format, + @JsonProperty(value = "base-location") String baseLocation, + @JsonProperty(value = "doc") String doc, + @JsonProperty(value = "properties") Map properties) { + this.name = name; + this.format = format; + this.baseLocation = baseLocation; + this.doc = doc; + this.properties = Objects.requireNonNullElse(properties, new HashMap<>()); + } + + public CreateGenericTableRequest(String name, String format) { + this.name = name; + this.format = format; + this.baseLocation = null; + this.doc = null; + this.properties = new HashMap<>(); + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(String name, String format) { + return new Builder(name, format); + } + + public static final class Builder { + private String name; + private String format; + private String baseLocation; + private String doc; + private Map properties; + + private Builder() {} + + private Builder(String name, String format) { + this.name = name; + this.format = format; + } + + public Builder setName(String name) { + this.name = name; + return this; + } + + public Builder setFormat(String format) { + this.format = format; + return this; + } + + public Builder setBaseLocation(String baseLocation) { + this.baseLocation = baseLocation; + return this; + } + + public Builder setDoc(String doc) { + this.doc = doc; + return this; + } + + public Builder setProperties(Map properties) { + this.properties = properties; + return this; + } + + public CreateGenericTableRequest build() { + CreateGenericTableRequest inst = + new CreateGenericTableRequest(name, format, baseLocation, doc, properties); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof CreateGenericTableRequest)) { + return false; + } + CreateGenericTableRequest createGenericTableRequest = (CreateGenericTableRequest) o; + return Objects.equals(this.name, createGenericTableRequest.name) + && Objects.equals(this.format, createGenericTableRequest.format) + && Objects.equals(this.baseLocation, createGenericTableRequest.baseLocation) + && Objects.equals(this.doc, createGenericTableRequest.doc) + && Objects.equals(this.properties, createGenericTableRequest.properties); + } + + @Override + public int hashCode() { + return Objects.hash(name, format, baseLocation, doc, properties); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class CreateGenericTableRequest {\n"); + + sb.append(" name: ").append(toIndentedString(name)).append("\n"); + sb.append(" format: ").append(toIndentedString(format)).append("\n"); + sb.append(" baseLocation: ").append(toIndentedString(baseLocation)).append("\n"); + sb.append(" doc: ").append(toIndentedString(doc)).append("\n"); + sb.append(" properties: ").append(toIndentedString(properties)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/GenericTable.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/GenericTable.java new file mode 100644 index 0000000000..27ad3bab6f --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/GenericTable.java @@ -0,0 +1,186 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.constraints.NotNull; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class GenericTable { + + @NotNull private final String name; + @NotNull private final String format; + private final String baseLocation; + private final String doc; + private final Map properties; + + /** */ + @JsonProperty(value = "name", required = true) + public String getName() { + return name; + } + + /** */ + @JsonProperty(value = "format", required = true) + public String getFormat() { + return format; + } + + /** */ + @JsonProperty(value = "base-location") + public String getBaseLocation() { + return baseLocation; + } + + /** */ + @JsonProperty(value = "doc") + public String getDoc() { + return doc; + } + + /** */ + @JsonProperty(value = "properties") + public Map getProperties() { + return properties; + } + + @JsonCreator + public GenericTable( + @JsonProperty(value = "name", required = true) String name, + @JsonProperty(value = "format", required = true) String format, + @JsonProperty(value = "base-location") String baseLocation, + @JsonProperty(value = "doc") String doc, + @JsonProperty(value = "properties") Map properties) { + this.name = name; + this.format = format; + this.baseLocation = baseLocation; + this.doc = doc; + this.properties = Objects.requireNonNullElse(properties, new HashMap<>()); + } + + public GenericTable(String name, String format) { + this.name = name; + this.format = format; + this.baseLocation = null; + this.doc = null; + this.properties = new HashMap<>(); + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(String name, String format) { + return new Builder(name, format); + } + + public static final class Builder { + private String name; + private String format; + private String baseLocation; + private String doc; + private Map properties; + + private Builder() {} + + private Builder(String name, String format) { + this.name = name; + this.format = format; + } + + public Builder setName(String name) { + this.name = name; + return this; + } + + public Builder setFormat(String format) { + this.format = format; + return this; + } + + public Builder setBaseLocation(String baseLocation) { + this.baseLocation = baseLocation; + return this; + } + + public Builder setDoc(String doc) { + this.doc = doc; + return this; + } + + public Builder setProperties(Map properties) { + this.properties = properties; + return this; + } + + public GenericTable build() { + GenericTable inst = new GenericTable(name, format, baseLocation, doc, properties); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof GenericTable)) { + return false; + } + GenericTable genericTable = (GenericTable) o; + return Objects.equals(this.name, genericTable.name) + && Objects.equals(this.format, genericTable.format) + && Objects.equals(this.baseLocation, genericTable.baseLocation) + && Objects.equals(this.doc, genericTable.doc) + && Objects.equals(this.properties, genericTable.properties); + } + + @Override + public int hashCode() { + return Objects.hash(name, format, baseLocation, doc, properties); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class GenericTable {\n"); + + sb.append(" name: ").append(toIndentedString(name)).append("\n"); + sb.append(" format: ").append(toIndentedString(format)).append("\n"); + sb.append(" baseLocation: ").append(toIndentedString(baseLocation)).append("\n"); + sb.append(" doc: ").append(toIndentedString(doc)).append("\n"); + sb.append(" properties: ").append(toIndentedString(properties)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesRESTResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesRESTResponse.java new file mode 100644 index 0000000000..55205d30f5 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesRESTResponse.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.RESTResponse; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +/** + * RESTResponse definition for ListGenericTable which extends the iceberg RESTResponse. This is + * currently required because the Iceberg HTTPClient requires the request and response to be a class + * of RESTRequest and RESTResponse. + */ +public class ListGenericTablesRESTResponse extends ListGenericTablesResponse + implements RESTResponse { + + @JsonCreator + public ListGenericTablesRESTResponse( + @JsonProperty(value = "next-page-token") String nextPageToken, + @JsonProperty(value = "identifiers") Set identifiers) { + super(nextPageToken, identifiers); + } + + @Override + public void validate() {} +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesResponse.java new file mode 100644 index 0000000000..1e7369ed45 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/ListGenericTablesResponse.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.Valid; +import java.util.LinkedHashSet; +import java.util.Objects; +import java.util.Set; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class ListGenericTablesResponse { + + private final String nextPageToken; + @Valid private final Set<@Valid TableIdentifier> identifiers; + + /** + * An opaque token that allows clients to make use of pagination for list APIs (e.g. ListTables). + * Clients may initiate the first paginated request by sending an empty query parameter + * `pageToken` to the server. Servers that support pagination should identify the + * `pageToken` parameter and return a `next-page-token` in the response if + * there are more results available. After the initial request, the value of + * `next-page-token` from each response must be used as the `pageToken` + * parameter value for the next request. The server must return `null` value for the + * `next-page-token` in the last response. Servers that support pagination must return + * all results in a single response with the value of `next-page-token` set to + * `null` if the query parameter `pageToken` is not set in the request. + * Servers that do not support pagination should ignore the `pageToken` parameter and + * return all results in a single response. The `next-page-token` must be omitted from + * the response. Clients must interpret either `null` or missing response value of + * `next-page-token` as the end of the listing results. + */ + @JsonProperty(value = "next-page-token") + public String getNextPageToken() { + return nextPageToken; + } + + /** */ + @JsonProperty(value = "identifiers") + public Set<@Valid TableIdentifier> getIdentifiers() { + return identifiers; + } + + @JsonCreator + public ListGenericTablesResponse( + @JsonProperty(value = "next-page-token") String nextPageToken, + @JsonProperty(value = "identifiers") Set<@Valid TableIdentifier> identifiers) { + this.nextPageToken = nextPageToken; + this.identifiers = Objects.requireNonNullElse(identifiers, new LinkedHashSet<>()); + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private String nextPageToken; + private Set<@Valid TableIdentifier> identifiers; + + private Builder() {} + + public Builder setNextPageToken(String nextPageToken) { + this.nextPageToken = nextPageToken; + return this; + } + + public Builder setIdentifiers(Set<@Valid TableIdentifier> identifiers) { + this.identifiers = identifiers; + return this; + } + + public ListGenericTablesResponse build() { + ListGenericTablesResponse inst = new ListGenericTablesResponse(nextPageToken, identifiers); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof ListGenericTablesResponse)) { + return false; + } + ListGenericTablesResponse listGenericTablesResponse = (ListGenericTablesResponse) o; + return Objects.equals(this.nextPageToken, listGenericTablesResponse.nextPageToken) + && Objects.equals(this.identifiers, listGenericTablesResponse.identifiers); + } + + @Override + public int hashCode() { + return Objects.hash(nextPageToken, identifiers); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class ListGenericTablesResponse {\n"); + + sb.append(" nextPageToken: ").append(toIndentedString(nextPageToken)).append("\n"); + sb.append(" identifiers: ").append(toIndentedString(identifiers)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java new file mode 100644 index 0000000000..ae9999dd58 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableRESTResponse.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import org.apache.iceberg.rest.RESTResponse; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +/** + * RESTResponse definition for LoadGenericTable which extends the iceberg RESTResponse. This is + * currently required because the Iceberg HTTPClient requires the request and response to be a class + * of RESTRequest and RESTResponse. + */ +public class LoadGenericTableRESTResponse extends LoadGenericTableResponse implements RESTResponse { + + @JsonCreator + public LoadGenericTableRESTResponse( + @JsonProperty(value = "table", required = true) GenericTable table) { + super(table); + } + + @Override + public void validate() {} +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableResponse.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableResponse.java new file mode 100644 index 0000000000..1923db1225 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/rest/LoadGenericTableResponse.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotNull; +import java.util.Objects; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonCreator; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonProperty; + +// TODO: auto generate the class based on spec +public class LoadGenericTableResponse { + + @NotNull @Valid private final GenericTable table; + + /** */ + @JsonProperty(value = "table", required = true) + public GenericTable getTable() { + return table; + } + + @JsonCreator + public LoadGenericTableResponse( + @JsonProperty(value = "table", required = true) GenericTable table) { + this.table = table; + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(GenericTable table) { + return new Builder(table); + } + + public static final class Builder { + private GenericTable table; + + private Builder() {} + + private Builder(GenericTable table) { + this.table = table; + } + + public Builder setTable(GenericTable table) { + this.table = table; + return this; + } + + public LoadGenericTableResponse build() { + LoadGenericTableResponse inst = new LoadGenericTableResponse(table); + return inst; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof LoadGenericTableResponse)) { + return false; + } + LoadGenericTableResponse loadGenericTableResponse = (LoadGenericTableResponse) o; + return Objects.equals(this.table, loadGenericTableResponse.table); + } + + @Override + public int hashCode() { + return Objects.hash(table); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("class LoadGenericTableResponse {\n"); + + sb.append(" table: ").append(toIndentedString(table)).append("\n"); + sb.append("}"); + return sb.toString(); + } + + /** + * Convert the given object to string with each line indented by 4 spaces (except the first line). + */ + private String toIndentedString(Object o) { + if (o == null) { + return "null"; + } + return o.toString().replace("\n", "\n "); + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java new file mode 100644 index 0000000000..2974384247 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/DeltaHelper.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.utils; + +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import org.apache.iceberg.common.DynConstructors; +import org.apache.polaris.spark.PolarisSparkCatalog; +import org.apache.spark.sql.connector.catalog.DelegatingCatalogExtension; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class DeltaHelper { + private static final Logger LOG = LoggerFactory.getLogger(DeltaHelper.class); + + public static final String DELTA_CATALOG_IMPL_KEY = "delta-catalog-impl"; + private static final String DEFAULT_DELTA_CATALOG_CLASS = + "org.apache.spark.sql.delta.catalog.DeltaCatalog"; + + private TableCatalog deltaCatalog = null; + private String deltaCatalogImpl = DEFAULT_DELTA_CATALOG_CLASS; + + public DeltaHelper(CaseInsensitiveStringMap options) { + if (options.get(DELTA_CATALOG_IMPL_KEY) != null) { + this.deltaCatalogImpl = options.get(DELTA_CATALOG_IMPL_KEY); + } + } + + public TableCatalog loadDeltaCatalog(PolarisSparkCatalog polarisSparkCatalog) { + if (this.deltaCatalog != null) { + return this.deltaCatalog; + } + + DynConstructors.Ctor ctor; + try { + ctor = DynConstructors.builder(TableCatalog.class).impl(deltaCatalogImpl).buildChecked(); + } catch (NoSuchMethodException e) { + throw new IllegalArgumentException( + String.format("Cannot initialize Delta Catalog %s: %s", deltaCatalogImpl, e.getMessage()), + e); + } + + try { + this.deltaCatalog = ctor.newInstance(); + } catch (ClassCastException e) { + throw new IllegalArgumentException( + String.format( + "Cannot initialize Delta Catalog, %s does not implement Table Catalog.", + deltaCatalogImpl), + e); + } + + // set the polaris spark catalog as the delegate catalog of delta catalog + ((DelegatingCatalogExtension) this.deltaCatalog).setDelegateCatalog(polarisSparkCatalog); + + // We want to behave exactly the same as unity catalog for Delta. However, DeltaCatalog + // implementation today is hard coded for unity catalog. Following issue is used to track + // the extension of the usage https://github.com/delta-io/delta/issues/4306. + // Here, we use reflection to set the isUnityCatalog to true for exactly same behavior as + // unity catalog for now. + try { + // isUnityCatalog is a lazy val, access the compute method for the lazy val + // make sure the method is triggered before the value is set, otherwise, the + // value will be overwritten later when the method is triggered. + String methodGetName = "isUnityCatalog" + "$lzycompute"; + Method method = this.deltaCatalog.getClass().getDeclaredMethod(methodGetName); + method.setAccessible(true); + // invoke the lazy methods before it is set + method.invoke(this.deltaCatalog); + } catch (NoSuchMethodException e) { + LOG.warn("No lazy compute method found for variable isUnityCatalog"); + } catch (Exception e) { + throw new RuntimeException("Failed to invoke the lazy compute methods for isUnityCatalog", e); + } + + try { + Field field = this.deltaCatalog.getClass().getDeclaredField("isUnityCatalog"); + field.setAccessible(true); + field.set(this.deltaCatalog, true); + } catch (NoSuchFieldException e) { + throw new RuntimeException( + "Failed find the isUnityCatalog field, delta-spark version >= 3.2.1 is required", e); + } catch (IllegalAccessException e) { + throw new RuntimeException("Failed to set the isUnityCatalog field", e); + } + + return this.deltaCatalog; + } +} diff --git a/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java new file mode 100644 index 0000000000..98016b71fd --- /dev/null +++ b/plugins/spark/v4.0/spark/src/main/java/org/apache/polaris/spark/utils/PolarisCatalogUtils.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.utils; + +import com.google.common.collect.Maps; +import java.lang.reflect.Field; +import java.util.Map; +import org.apache.iceberg.CachingCatalog; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.rest.RESTCatalog; +import org.apache.iceberg.rest.RESTSessionCatalog; +import org.apache.iceberg.rest.auth.OAuth2Util; +import org.apache.iceberg.spark.SparkCatalog; +import org.apache.polaris.spark.rest.GenericTable; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableProvider; +import org.apache.spark.sql.execution.datasources.DataSource; +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +public class PolarisCatalogUtils { + public static final String TABLE_PROVIDER_KEY = "provider"; + public static final String TABLE_PATH_KEY = "path"; + + /** Check whether the table provider is iceberg. */ + public static boolean useIceberg(String provider) { + return provider == null || "iceberg".equalsIgnoreCase(provider); + } + + /** Check whether the table provider is delta. */ + public static boolean useDelta(String provider) { + return "delta".equalsIgnoreCase(provider); + } + + /** + * For tables whose location is managed by Spark Session Catalog, there will be no location or + * path in the properties. + */ + public static boolean isTableWithSparkManagedLocation(Map properties) { + boolean hasLocationClause = properties.containsKey(TableCatalog.PROP_LOCATION); + boolean hasPathClause = properties.containsKey(TABLE_PATH_KEY); + return !hasLocationClause && !hasPathClause; + } + + /** + * Load spark table using DataSourceV2. + * + * @return V2Table if DataSourceV2 is available for the table format. For delta table, it returns + * DeltaTableV2. + */ + public static Table loadSparkTable(GenericTable genericTable) { + SparkSession sparkSession = SparkSession.active(); + TableProvider provider = + DataSource.lookupDataSourceV2(genericTable.getFormat(), sparkSession.sessionState().conf()) + .get(); + Map properties = genericTable.getProperties(); + boolean hasLocationClause = properties.get(TableCatalog.PROP_LOCATION) != null; + boolean hasPathClause = properties.get(TABLE_PATH_KEY) != null; + Map tableProperties = Maps.newHashMap(); + tableProperties.putAll(properties); + if (!hasPathClause) { + // DataSourceV2 requires the path property on table loading. However, spark today + // doesn't create the corresponding path property if the path keyword is not + // provided by user when location is provided. Here, we duplicate the location + // property as path to make sure the table can be loaded. + if (genericTable.getBaseLocation() != null && !genericTable.getBaseLocation().isEmpty()) { + tableProperties.put(TABLE_PATH_KEY, genericTable.getBaseLocation()); + } else if (hasLocationClause) { + tableProperties.put(TABLE_PATH_KEY, properties.get(TableCatalog.PROP_LOCATION)); + } + } + return DataSourceV2Utils.getTableFromProvider( + provider, new CaseInsensitiveStringMap(tableProperties), scala.Option.empty()); + } + + /** + * Get the catalogAuth field inside the RESTSessionCatalog used by Iceberg Spark Catalog use + * reflection. TODO: Deprecate this function once the iceberg client is updated to 1.9.0 to use + * AuthManager and the capability of injecting an AuthManger is available. Related iceberg PR: + * https://github.com/apache/iceberg/pull/12655 + */ + public static OAuth2Util.AuthSession getAuthSession(SparkCatalog sparkCatalog) { + try { + Field icebergCatalogField = sparkCatalog.getClass().getDeclaredField("icebergCatalog"); + icebergCatalogField.setAccessible(true); + Catalog icebergCatalog = (Catalog) icebergCatalogField.get(sparkCatalog); + RESTCatalog icebergRestCatalog; + if (icebergCatalog instanceof CachingCatalog) { + Field catalogField = icebergCatalog.getClass().getDeclaredField("catalog"); + catalogField.setAccessible(true); + icebergRestCatalog = (RESTCatalog) catalogField.get(icebergCatalog); + } else { + icebergRestCatalog = (RESTCatalog) icebergCatalog; + } + + Field sessionCatalogField = icebergRestCatalog.getClass().getDeclaredField("sessionCatalog"); + sessionCatalogField.setAccessible(true); + RESTSessionCatalog sessionCatalog = + (RESTSessionCatalog) sessionCatalogField.get(icebergRestCatalog); + + Field authField = sessionCatalog.getClass().getDeclaredField("catalogAuth"); + authField.setAccessible(true); + return (OAuth2Util.AuthSession) authField.get(sessionCatalog); + } catch (Exception e) { + throw new RuntimeException("Failed to get the catalogAuth from the Iceberg SparkCatalog", e); + } + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java new file mode 100644 index 0000000000..f698615e67 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/NoopDeltaCatalog.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.connector.catalog.DelegatingCatalogExtension; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableChange; + +/** + * This is a fake delta catalog class that is used for testing. This class is a noop class that + * directly passes all calls to the delegate CatalogPlugin configured as part of + * DelegatingCatalogExtension. + */ +public class NoopDeltaCatalog extends DelegatingCatalogExtension { + // This is a mock of isUnityCatalog scala val in + // org.apache.spark.sql.delta.catalog.DeltaCatalog. + private boolean isUnityCatalog = false; + + @Override + public Table alterTable(Identifier ident, TableChange... changes) throws NoSuchTableException { + return super.loadTable(ident); + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java new file mode 100644 index 0000000000..2d71d9cb6e --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/PolarisInMemoryCatalog.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import com.google.common.collect.Maps; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentMap; +import java.util.stream.Collectors; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.exceptions.AlreadyExistsException; +import org.apache.iceberg.exceptions.NoSuchNamespaceException; +import org.apache.iceberg.exceptions.NoSuchTableException; +import org.apache.iceberg.inmemory.InMemoryCatalog; +import org.apache.polaris.spark.rest.GenericTable; + +/** InMemory implementation for the Polaris Catalog. This class is mainly used by testing. */ +public class PolarisInMemoryCatalog extends InMemoryCatalog implements PolarisCatalog { + private final ConcurrentMap genericTables; + + public PolarisInMemoryCatalog() { + this.genericTables = Maps.newConcurrentMap(); + } + + @Override + public List listGenericTables(Namespace ns) { + return this.genericTables.keySet().stream() + .filter(t -> t.namespace().equals(ns)) + .sorted(Comparator.comparing(TableIdentifier::toString)) + .collect(Collectors.toList()); + } + + @Override + public GenericTable loadGenericTable(TableIdentifier identifier) { + GenericTable table = this.genericTables.get(identifier); + if (table == null) { + throw new NoSuchTableException("Generic table does not exist: %s", identifier); + } + + return table; + } + + @Override + public boolean dropGenericTable(TableIdentifier identifier) { + return null != this.genericTables.remove(identifier); + } + + @Override + public GenericTable createGenericTable( + TableIdentifier identifier, + String format, + String baseLocation, + String doc, + Map props) { + if (!namespaceExists(identifier.namespace())) { + throw new NoSuchNamespaceException( + "Cannot create generic table %s. Namespace does not exist: %s", + identifier, identifier.namespace()); + } + + GenericTable previous = + this.genericTables.putIfAbsent( + identifier, + GenericTable.builder() + .setName(identifier.name()) + .setFormat(format) + .setBaseLocation(baseLocation) + .setProperties(props) + .build()); + + if (previous != null) { + throw new AlreadyExistsException("Generic table already exists: %s", identifier); + } + + return this.genericTables.get(identifier); + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java new file mode 100644 index 0000000000..708bf60e7c --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/SparkCatalogTest.java @@ -0,0 +1,642 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark; + +import static org.apache.iceberg.CatalogProperties.CATALOG_IMPL; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import com.google.common.collect.Maps; +import java.util.Arrays; +import java.util.Map; +import java.util.UUID; +import org.apache.hadoop.conf.Configuration; +import org.apache.iceberg.CatalogUtil; +import org.apache.iceberg.actions.DeleteReachableFiles; +import org.apache.iceberg.catalog.Catalog; +import org.apache.iceberg.spark.SparkUtil; +import org.apache.iceberg.spark.actions.DeleteReachableFilesSparkAction; +import org.apache.iceberg.spark.actions.SparkActions; +import org.apache.iceberg.spark.source.SparkTable; +import org.apache.polaris.spark.utils.DeltaHelper; +import org.apache.polaris.spark.utils.PolarisCatalogUtils; +import org.apache.spark.SparkContext; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException; +import org.apache.spark.sql.catalyst.analysis.NoSuchTableException; +import org.apache.spark.sql.catalyst.analysis.NoSuchViewException; +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException; +import org.apache.spark.sql.connector.catalog.Identifier; +import org.apache.spark.sql.connector.catalog.NamespaceChange; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.catalog.TableCatalog; +import org.apache.spark.sql.connector.catalog.TableChange; +import org.apache.spark.sql.connector.catalog.TableProvider; +import org.apache.spark.sql.connector.catalog.V1Table; +import org.apache.spark.sql.connector.catalog.View; +import org.apache.spark.sql.connector.catalog.ViewChange; +import org.apache.spark.sql.connector.catalog.ViewInfo; +import org.apache.spark.sql.connector.expressions.Transform; +import org.apache.spark.sql.execution.datasources.DataSource; +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils; +import org.apache.spark.sql.internal.SQLConf; +import org.apache.spark.sql.internal.SessionState; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; +import org.mockito.MockedStatic; +import org.mockito.Mockito; +import scala.Option; + +public class SparkCatalogTest { + private static class InMemoryIcebergSparkCatalog extends org.apache.iceberg.spark.SparkCatalog { + private PolarisInMemoryCatalog inMemoryCatalog = null; + + @Override + protected Catalog buildIcebergCatalog(String name, CaseInsensitiveStringMap options) { + PolarisInMemoryCatalog inMemoryCatalog = new PolarisInMemoryCatalog(); + inMemoryCatalog.initialize(name, options); + + this.inMemoryCatalog = inMemoryCatalog; + + return inMemoryCatalog; + } + + public PolarisInMemoryCatalog getInMemoryCatalog() { + return this.inMemoryCatalog; + } + } + + /** + * And SparkCatalog implementation that uses InMemory catalog implementation for both Iceberg and + * Polaris + */ + private static class InMemorySparkCatalog extends SparkCatalog { + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + this.catalogName = name; + // initialize the InMemory icebergSparkCatalog + this.icebergsSparkCatalog = new InMemoryIcebergSparkCatalog(); + this.icebergsSparkCatalog.initialize(name, options); + + // initialize the polarisSparkCatalog with PolarisSparkCatalog + this.polarisSparkCatalog = + new PolarisSparkCatalog( + ((InMemoryIcebergSparkCatalog) this.icebergsSparkCatalog).getInMemoryCatalog()); + this.polarisSparkCatalog.initialize(name, options); + + this.deltaHelper = new DeltaHelper(options); + } + } + + private InMemorySparkCatalog catalog; + private String catalogName; + + private static final String[] defaultNS = new String[] {"ns"}; + private static StructType defaultSchema = + new StructType().add("id", "long").add("name", "string"); + + @BeforeEach + public void setup() throws Exception { + catalogName = "test_" + UUID.randomUUID(); + Map catalogConfig = Maps.newHashMap(); + catalogConfig.put(CATALOG_IMPL, "org.apache.iceberg.inmemory.InMemoryCatalog"); + catalogConfig.put("cache-enabled", "false"); + catalogConfig.put( + DeltaHelper.DELTA_CATALOG_IMPL_KEY, "org.apache.polaris.spark.NoopDeltaCatalog"); + catalog = new InMemorySparkCatalog(); + Configuration conf = new Configuration(); + try (MockedStatic mockedStaticSparkSession = + Mockito.mockStatic(SparkSession.class); + MockedStatic mockedSparkUtil = Mockito.mockStatic(SparkUtil.class)) { + SparkSession mockedSession = Mockito.mock(SparkSession.class); + mockedStaticSparkSession.when(SparkSession::active).thenReturn(mockedSession); + mockedStaticSparkSession + .when(SparkSession::getActiveSession) + .thenReturn(Option.apply(mockedSession)); + mockedSparkUtil + .when(() -> SparkUtil.hadoopConfCatalogOverrides(mockedSession, catalogName)) + .thenReturn(conf); + SparkContext mockedContext = Mockito.mock(SparkContext.class); + Mockito.when(mockedSession.sparkContext()).thenReturn(mockedContext); + Mockito.when(mockedContext.applicationId()).thenReturn("appId"); + Mockito.when(mockedContext.sparkUser()).thenReturn("test-user"); + Mockito.when(mockedContext.version()).thenReturn("4.0"); + + catalog.initialize(catalogName, new CaseInsensitiveStringMap(catalogConfig)); + } + catalog.createNamespace(defaultNS, Maps.newHashMap()); + } + + @Test + void testCatalogValidation() { + Map catalogConfigWithImpl = Maps.newHashMap(); + catalogConfigWithImpl.put(CATALOG_IMPL, "org.apache.iceberg.inmemory.InMemoryCatalog"); + catalogConfigWithImpl.put("cache-enabled", "false"); + SparkCatalog testCatalog = new SparkCatalog(); + assertThatThrownBy( + () -> + testCatalog.validateAndResolveCatalogOptions( + new CaseInsensitiveStringMap(catalogConfigWithImpl))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Customized catalog implementation is not supported and not needed"); + + Map catalogConfigInvalidType = Maps.newHashMap(); + catalogConfigInvalidType.put(CatalogUtil.ICEBERG_CATALOG_TYPE, "hive"); + assertThatThrownBy( + () -> + testCatalog.validateAndResolveCatalogOptions( + new CaseInsensitiveStringMap(catalogConfigInvalidType))) + .isInstanceOf(IllegalArgumentException.class); + + CaseInsensitiveStringMap resolvedMap = + testCatalog.validateAndResolveCatalogOptions( + new CaseInsensitiveStringMap(Maps.newHashMap())); + assertThat(resolvedMap.get(CatalogUtil.ICEBERG_CATALOG_TYPE)) + .isEqualTo(CatalogUtil.ICEBERG_CATALOG_TYPE_REST); + } + + @Test + void testCreateAndLoadNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("key1", "value1"); + + // no namespace can be found + assertThatThrownBy(() -> catalog.loadNamespaceMetadata(namespace)) + .isInstanceOf(NoSuchNamespaceException.class); + + // create the namespace + catalog.createNamespace(namespace, metadata); + + Map nsMetadata = catalog.loadNamespaceMetadata(namespace); + assertThat(nsMetadata).contains(Map.entry("key1", "value1")); + } + + @Test + void testDropAndListNamespaces() throws Exception { + String[][] lv1ns = new String[][] {{"l1ns1"}, {"l1ns2"}}; + String[][] lv2ns1 = new String[][] {{"l1ns1", "l2ns1"}, {"l1ns1", "l2ns2"}}; + String[][] lv2ns2 = new String[][] {{"l1ns2", "l2ns3"}}; + + // create the namespaces + for (String[] namespace : lv1ns) { + catalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns1) { + catalog.createNamespace(namespace, Maps.newHashMap()); + } + for (String[] namespace : lv2ns2) { + catalog.createNamespace(namespace, Maps.newHashMap()); + } + + // list namespaces under root + String[][] lv1nsResult = catalog.listNamespaces(); + assertThat(lv1nsResult.length).isEqualTo(lv1ns.length + 1); + assertThat(Arrays.asList(lv1nsResult)).contains(defaultNS); + for (String[] namespace : lv1ns) { + assertThat(Arrays.asList(lv1nsResult)).contains(namespace); + } + // list namespace under l1ns1 + String[][] lv2ns1Result = catalog.listNamespaces(lv1ns[0]); + assertThat(lv2ns1Result.length).isEqualTo(lv2ns1.length); + for (String[] namespace : lv2ns1) { + assertThat(Arrays.asList(lv2ns1Result)).contains(namespace); + } + // list namespace under l1ns2 + String[][] lv2ns2Result = catalog.listNamespaces(lv1ns[1]); + assertThat(lv2ns2Result.length).isEqualTo(lv2ns2.length); + for (String[] namespace : lv2ns2) { + assertThat(Arrays.asList(lv2ns2Result)).contains(namespace); + } + // no namespace under l1ns2.l2ns3 + assertThat(catalog.listNamespaces(lv2ns2[0]).length).isEqualTo(0); + + // drop l1ns2 + catalog.dropNamespace(lv2ns2[0], true); + assertThat(catalog.listNamespaces(lv1ns[1]).length).isEqualTo(0); + + catalog.dropNamespace(lv1ns[1], true); + assertThatThrownBy(() -> catalog.listNamespaces(lv1ns[1])) + .isInstanceOf(NoSuchNamespaceException.class); + } + + @Test + void testAlterNamespace() throws Exception { + String[] namespace = new String[] {"ns1"}; + Map metadata = Maps.newHashMap(); + metadata.put("orig_key1", "orig_value1"); + + catalog.createNamespace(namespace, metadata); + assertThat(catalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("orig_key1", "orig_value1")); + + catalog.alterNamespace(namespace, NamespaceChange.setProperty("new_key", "new_value")); + assertThat(catalog.loadNamespaceMetadata(namespace)) + .contains(Map.entry("new_key", "new_value")); + } + + @Test + void testStageOperations() throws Exception { + Identifier createId = Identifier.of(defaultNS, "iceberg-table-create"); + Map icebergProperties = Maps.newHashMap(); + icebergProperties.put("provider", "iceberg"); + icebergProperties.put(TableCatalog.PROP_LOCATION, "file:///tmp/path/to/iceberg-table/"); + StructType iceberg_schema = new StructType().add("boolType", "boolean"); + + catalog.stageCreate(createId, iceberg_schema, new Transform[0], icebergProperties); + + catalog.stageCreateOrReplace(createId, iceberg_schema, new Transform[0], icebergProperties); + } + + @Test + void testBasicViewOperations() throws Exception { + Identifier viewIdentifier = Identifier.of(defaultNS, "test-view"); + String viewSql = "select id from test-table where id < 3"; + StructType schema = new StructType().add("id", "long"); + ViewInfo viewInfo = + new ViewInfo( + viewIdentifier, + viewSql, + catalogName, + defaultNS, + schema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + catalog.createView(viewInfo); + + // load the view + View view = catalog.loadView(viewIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // alter the view properties + catalog.alterView(viewIdentifier, ViewChange.setProperty("view_key1", "view_value1")); + view = catalog.loadView(viewIdentifier); + assertThat(view.properties()).contains(Map.entry("view_key1", "view_value1")); + + // rename the view + Identifier newIdentifier = Identifier.of(defaultNS, "new-view"); + catalog.renameView(viewIdentifier, newIdentifier); + assertThatThrownBy(() -> catalog.loadView(viewIdentifier)) + .isInstanceOf(NoSuchViewException.class); + view = catalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(viewSql); + assertThat(view.schema()).isEqualTo(schema); + + // replace the view + String newSql = "select id from test-table where id == 3"; + Map properties = Maps.newHashMap(); + properties.put("key1", "value1"); + catalog.replaceView( + newIdentifier, + newSql, + catalogName, + defaultNS, + schema, + new String[0], + new String[0], + new String[0], + properties); + view = catalog.loadView(newIdentifier); + assertThat(view.query()).isEqualTo(newSql); + assertThat(view.properties()).contains(Map.entry("key1", "value1")); + + // drop the view + catalog.dropView(newIdentifier); + assertThatThrownBy(() -> catalog.loadView(newIdentifier)) + .isInstanceOf(NoSuchViewException.class); + } + + @Test + void testListViews() throws Exception { + // create a new namespace under the default NS + String[] namespace = new String[] {"ns", "nsl2"}; + catalog.createNamespace(namespace, Maps.newHashMap()); + // create under defaultNS + String view1Name = "test-view1"; + String view1SQL = "select id from test-table where id >= 3"; + ViewInfo viewInfo1 = + new ViewInfo( + Identifier.of(defaultNS, view1Name), + view1SQL, + catalogName, + defaultNS, + defaultSchema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + catalog.createView(viewInfo1); + // create two views under ns.nsl2 + String[] nsl2ViewNames = new String[] {"test-view2", "test-view3"}; + String[] nsl2ViewSQLs = + new String[] { + "select id from test-table where id == 3", "select id from test-table where id < 3" + }; + for (int i = 0; i < nsl2ViewNames.length; i++) { + ViewInfo viewInfo = + new ViewInfo( + Identifier.of(namespace, nsl2ViewNames[i]), + nsl2ViewSQLs[i], + catalogName, + namespace, + defaultSchema, + new String[0], + new String[0], + new String[0], + Maps.newHashMap()); + catalog.createView(viewInfo); + } + // list views under defaultNS + Identifier[] l1Views = catalog.listViews(defaultNS); + assertThat(l1Views.length).isEqualTo(1); + assertThat(l1Views[0].name()).isEqualTo(view1Name); + + // list views under ns1.nsl2 + Identifier[] l2Views = catalog.listViews(namespace); + assertThat(l2Views.length).isEqualTo(nsl2ViewSQLs.length); + for (String name : nsl2ViewNames) { + assertThat(Arrays.asList(l2Views)).contains(Identifier.of(namespace, name)); + } + } + + @Test + void testIcebergTableOperations() throws Exception { + Identifier identifier = Identifier.of(defaultNS, "iceberg-table"); + createAndValidateGenericTableWithLoad(catalog, identifier, defaultSchema, "iceberg"); + + // load the table + Table table = catalog.loadTable(identifier); + // verify iceberg SparkTable is loaded + assertThat(table).isInstanceOf(SparkTable.class); + + Identifier[] icebergTables = catalog.listTables(defaultNS); + assertThat(icebergTables.length).isEqualTo(1); + assertThat(icebergTables[0]).isEqualTo(Identifier.of(defaultNS, "iceberg-table")); + + // verify create table with the same identifier fails with spark TableAlreadyExistsException + Map newProperties = Maps.newHashMap(); + newProperties.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "iceberg"); + newProperties.put(TableCatalog.PROP_LOCATION, "file:///tmp/path/to/table/"); + assertThatThrownBy( + () -> catalog.createTable(identifier, defaultSchema, new Transform[0], newProperties)) + .isInstanceOf(TableAlreadyExistsException.class); + + // drop the iceberg table + catalog.dropTable(identifier); + assertThatThrownBy(() -> catalog.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class); + assertThat(catalog.listTables(defaultNS)).isEmpty(); + } + + @ParameterizedTest + @ValueSource(strings = {"delta", "csv"}) + void testCreateAndLoadGenericTable(String format) throws Exception { + Identifier identifier = Identifier.of(defaultNS, "generic-test-table"); + createAndValidateGenericTableWithLoad(catalog, identifier, defaultSchema, format); + + Identifier[] icebergTables = catalog.listTables(defaultNS); + assertThat(icebergTables.length).isEqualTo(1); + assertThat(icebergTables[0]).isEqualTo(Identifier.of(defaultNS, "generic-test-table")); + + Map newProperties = Maps.newHashMap(); + newProperties.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "parquet"); + newProperties.put(TableCatalog.PROP_LOCATION, "file:///tmp/path/to/table/"); + assertThatThrownBy( + () -> catalog.createTable(identifier, defaultSchema, new Transform[0], newProperties)) + .isInstanceOf(TableAlreadyExistsException.class); + + // drop the iceberg table + catalog.dropTable(identifier); + assertThatThrownBy(() -> catalog.loadTable(identifier)) + .isInstanceOf(NoSuchTableException.class); + assertThat(catalog.listTables(defaultNS)).isEmpty(); + } + + @Test + void testMixedTables() throws Exception { + // create two iceberg tables, and three non-iceberg tables + String[] tableNames = new String[] {"iceberg1", "iceberg2", "delta1", "csv1", "delta2"}; + String[] tableFormats = new String[] {"iceberg", null, "delta", "csv", "delta"}; + for (int i = 0; i < tableNames.length; i++) { + Identifier identifier = Identifier.of(defaultNS, tableNames[i]); + createAndValidateGenericTableWithLoad(catalog, identifier, defaultSchema, tableFormats[i]); + } + + // list all tables + Identifier[] tableIdents = catalog.listTables(defaultNS); + assertThat(tableIdents.length).isEqualTo(tableNames.length); + for (String name : tableNames) { + assertThat(tableIdents).contains(Identifier.of(defaultNS, name)); + } + + // drop iceberg2 and delta1 table + catalog.dropTable(Identifier.of(defaultNS, "iceberg2")); + catalog.dropTable(Identifier.of(defaultNS, "delta2")); + + String[] remainingTableNames = new String[] {"iceberg1", "delta1", "csv1"}; + Identifier[] remainingTableIndents = catalog.listTables(defaultNS); + assertThat(remainingTableIndents.length).isEqualTo(remainingTableNames.length); + for (String name : remainingTableNames) { + assertThat(tableIdents).contains(Identifier.of(defaultNS, name)); + } + + // drop the remaining tables + for (String name : remainingTableNames) { + catalog.dropTable(Identifier.of(defaultNS, name)); + } + assertThat(catalog.listTables(defaultNS)).isEmpty(); + } + + @Test + void testAlterAndRenameTable() throws Exception { + String icebergTableName = "iceberg-table"; + String deltaTableName = "delta-table"; + String csvTableName = "csv-table"; + Identifier icebergIdent = Identifier.of(defaultNS, icebergTableName); + Identifier deltaIdent = Identifier.of(defaultNS, deltaTableName); + Identifier csvIdent = Identifier.of(defaultNS, csvTableName); + createAndValidateGenericTableWithLoad(catalog, icebergIdent, defaultSchema, "iceberg"); + createAndValidateGenericTableWithLoad(catalog, deltaIdent, defaultSchema, "delta"); + createAndValidateGenericTableWithLoad(catalog, csvIdent, defaultSchema, "csv"); + + // verify alter iceberg table + Table newIcebergTable = + catalog.alterTable(icebergIdent, TableChange.setProperty("iceberg_key", "iceberg_value")); + assertThat(newIcebergTable).isInstanceOf(SparkTable.class); + assertThat(newIcebergTable.properties()).contains(Map.entry("iceberg_key", "iceberg_value")); + + // verify rename iceberg table works + Identifier newIcebergIdent = Identifier.of(defaultNS, "new-iceberg-table"); + catalog.renameTable(icebergIdent, newIcebergIdent); + assertThatThrownBy(() -> catalog.loadTable(icebergIdent)) + .isInstanceOf(NoSuchTableException.class); + Table icebergTable = catalog.loadTable(newIcebergIdent); + assertThat(icebergTable).isInstanceOf(SparkTable.class); + + // verify alter delta table is a no-op, and alter csv table throws an exception + SQLConf conf = new SQLConf(); + try (MockedStatic mockedStaticSparkSession = + Mockito.mockStatic(SparkSession.class); + MockedStatic mockedStaticDS = Mockito.mockStatic(DataSource.class); + MockedStatic mockedStaticDSV2 = + Mockito.mockStatic(DataSourceV2Utils.class)) { + SparkSession mockedSession = Mockito.mock(SparkSession.class); + mockedStaticSparkSession.when(SparkSession::active).thenReturn(mockedSession); + SessionState mockedState = Mockito.mock(SessionState.class); + Mockito.when(mockedSession.sessionState()).thenReturn(mockedState); + Mockito.when(mockedState.conf()).thenReturn(conf); + + TableProvider deltaProvider = Mockito.mock(TableProvider.class); + mockedStaticDS + .when(() -> DataSource.lookupDataSourceV2(Mockito.eq("delta"), Mockito.any())) + .thenReturn(Option.apply(deltaProvider)); + V1Table deltaTable = Mockito.mock(V1Table.class); + Map deltaProps = Maps.newHashMap(); + deltaProps.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "delta"); + deltaProps.put(TableCatalog.PROP_LOCATION, "file:///tmp/delta/path/to/table/test-delta/"); + Mockito.when(deltaTable.properties()).thenReturn(deltaProps); + mockedStaticDSV2 + .when( + () -> + DataSourceV2Utils.getTableFromProvider( + Mockito.eq(deltaProvider), Mockito.any(), Mockito.any())) + .thenReturn(deltaTable); + + Table delta = + catalog.alterTable(deltaIdent, TableChange.setProperty("delta_key", "delta_value")); + assertThat(delta).isInstanceOf(V1Table.class); + + TableProvider csvProvider = Mockito.mock(TableProvider.class); + mockedStaticDS + .when(() -> DataSource.lookupDataSourceV2(Mockito.eq("csv"), Mockito.any())) + .thenReturn(Option.apply(csvProvider)); + Map csvProps = Maps.newHashMap(); + csvProps.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, "csv"); + V1Table csvTable = Mockito.mock(V1Table.class); + Mockito.when(csvTable.properties()).thenReturn(csvProps); + mockedStaticDSV2 + .when( + () -> + DataSourceV2Utils.getTableFromProvider( + Mockito.eq(csvProvider), Mockito.any(), Mockito.any())) + .thenReturn(csvTable); + assertThatThrownBy( + () -> catalog.alterTable(csvIdent, TableChange.setProperty("csv_key", "scv_value"))) + .isInstanceOf(UnsupportedOperationException.class); + } + + // verify rename non-iceberg table is not supported + assertThatThrownBy( + () -> catalog.renameTable(deltaIdent, Identifier.of(defaultNS, "new-delta-table"))) + .isInstanceOf(UnsupportedOperationException.class); + assertThatThrownBy( + () -> catalog.renameTable(csvIdent, Identifier.of(defaultNS, "new-csv-table"))) + .isInstanceOf(UnsupportedOperationException.class); + } + + @Test + void testPurgeInvalidateTable() throws Exception { + Identifier icebergIdent = Identifier.of(defaultNS, "iceberg-table"); + Identifier deltaIdent = Identifier.of(defaultNS, "delta-table"); + createAndValidateGenericTableWithLoad(catalog, icebergIdent, defaultSchema, "iceberg"); + createAndValidateGenericTableWithLoad(catalog, deltaIdent, defaultSchema, "delta"); + + // test invalidate table is a no op today + catalog.invalidateTable(icebergIdent); + catalog.invalidateTable(deltaIdent); + + Identifier[] tableIdents = catalog.listTables(defaultNS); + assertThat(tableIdents.length).isEqualTo(2); + + // verify purge tables drops the table + catalog.purgeTable(deltaIdent); + assertThat(catalog.listTables(defaultNS).length).isEqualTo(1); + + // purge iceberg table triggers file deletion + try (MockedStatic mockedStaticActions = Mockito.mockStatic(SparkActions.class)) { + SparkActions actions = Mockito.mock(SparkActions.class); + DeleteReachableFilesSparkAction deleteAction = + Mockito.mock(DeleteReachableFilesSparkAction.class); + mockedStaticActions.when(SparkActions::get).thenReturn(actions); + Mockito.when(actions.deleteReachableFiles(Mockito.any())).thenReturn(deleteAction); + Mockito.when(deleteAction.io(Mockito.any())).thenReturn(deleteAction); + Mockito.when(deleteAction.execute()) + .thenReturn(Mockito.mock(DeleteReachableFiles.Result.class)); + + catalog.purgeTable(icebergIdent); + } + assertThat(catalog.listTables(defaultNS).length).isEqualTo(0); + } + + private void createAndValidateGenericTableWithLoad( + InMemorySparkCatalog sparkCatalog, Identifier identifier, StructType schema, String format) + throws Exception { + Map properties = Maps.newHashMap(); + properties.put(PolarisCatalogUtils.TABLE_PROVIDER_KEY, format); + properties.put( + TableCatalog.PROP_LOCATION, + String.format("file:///tmp/delta/path/to/table/%s/", identifier.name())); + + SQLConf conf = new SQLConf(); + try (MockedStatic mockedStaticSparkSession = + Mockito.mockStatic(SparkSession.class); + MockedStatic mockedStaticDS = Mockito.mockStatic(DataSource.class); + MockedStatic mockedStaticDSV2 = + Mockito.mockStatic(DataSourceV2Utils.class)) { + SparkSession mockedSession = Mockito.mock(SparkSession.class); + mockedStaticSparkSession.when(SparkSession::active).thenReturn(mockedSession); + SessionState mockedState = Mockito.mock(SessionState.class); + Mockito.when(mockedSession.sessionState()).thenReturn(mockedState); + Mockito.when(mockedState.conf()).thenReturn(conf); + + TableProvider provider = Mockito.mock(TableProvider.class); + mockedStaticDS + .when(() -> DataSource.lookupDataSourceV2(Mockito.eq(format), Mockito.any())) + .thenReturn(Option.apply(provider)); + V1Table table = Mockito.mock(V1Table.class); + mockedStaticDSV2 + .when( + () -> + DataSourceV2Utils.getTableFromProvider( + Mockito.eq(provider), Mockito.any(), Mockito.any())) + .thenReturn(table); + Table createdTable = + sparkCatalog.createTable(identifier, schema, new Transform[0], properties); + Table loadedTable = sparkCatalog.loadTable(identifier); + + // verify the create and load table result + if (PolarisCatalogUtils.useIceberg(format)) { + // iceberg SparkTable is returned for iceberg tables + assertThat(createdTable).isInstanceOf(SparkTable.class); + assertThat(loadedTable).isInstanceOf(SparkTable.class); + } else { + // Spark V1 table is returned for non-iceberg tables + assertThat(createdTable).isInstanceOf(V1Table.class); + assertThat(loadedTable).isInstanceOf(V1Table.class); + } + } + } +} diff --git a/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java new file mode 100644 index 0000000000..0f7d3c99b3 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/java/org/apache/polaris/spark/rest/DeserializationTest.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.polaris.spark.rest; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Maps; +import java.util.Map; +import java.util.Set; +import java.util.stream.Stream; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.TableIdentifier; +import org.apache.iceberg.rest.RESTSerializers; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.JsonAutoDetect; +import org.apache.iceberg.shaded.com.fasterxml.jackson.annotation.PropertyAccessor; +import org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonFactory; +import org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonFactoryBuilder; +import org.apache.iceberg.shaded.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.iceberg.shaded.com.fasterxml.jackson.databind.DeserializationFeature; +import org.apache.iceberg.shaded.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.iceberg.shaded.com.fasterxml.jackson.databind.PropertyNamingStrategies; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +public class DeserializationTest { + private ObjectMapper mapper; + private static final JsonFactory FACTORY = + new JsonFactoryBuilder() + .configure(JsonFactory.Feature.INTERN_FIELD_NAMES, false) + .configure(JsonFactory.Feature.FAIL_ON_SYMBOL_HASH_OVERFLOW, false) + .build(); + + @BeforeEach + public void setUp() { + // NOTE: This is the same setting as iceberg RESTObjectMapper.java. However, + // RESTObjectMapper is not a public class, therefore, we duplicate the + // setting here for serialization and deserialization tests. + mapper = new ObjectMapper(FACTORY); + mapper.setVisibility(PropertyAccessor.FIELD, JsonAutoDetect.Visibility.ANY); + mapper.setVisibility(PropertyAccessor.CREATOR, JsonAutoDetect.Visibility.ANY); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + mapper.setPropertyNamingStrategy(new PropertyNamingStrategies.KebabCaseStrategy()); + RESTSerializers.registerAll(mapper); + } + + @ParameterizedTest + @MethodSource("genericTableTestCases") + public void testLoadGenericTableRESTResponse( + String baseLocation, String doc, Map properties) + throws JsonProcessingException { + GenericTable.Builder tableBuilder = + GenericTable.builder() + .setFormat("delta") + .setName("test-table") + .setProperties(properties) + .setDoc(doc); + if (baseLocation != null) { + tableBuilder.setBaseLocation(baseLocation); + } + GenericTable table = tableBuilder.build(); + LoadGenericTableRESTResponse response = new LoadGenericTableRESTResponse(table); + String json = mapper.writeValueAsString(response); + LoadGenericTableRESTResponse deserializedResponse = + mapper.readValue(json, LoadGenericTableRESTResponse.class); + assertThat(deserializedResponse.getTable().getFormat()).isEqualTo("delta"); + assertThat(deserializedResponse.getTable().getName()).isEqualTo("test-table"); + assertThat(deserializedResponse.getTable().getDoc()).isEqualTo(doc); + assertThat(deserializedResponse.getTable().getProperties().size()).isEqualTo(properties.size()); + assertThat(deserializedResponse.getTable().getBaseLocation()).isEqualTo(baseLocation); + } + + @ParameterizedTest + @MethodSource("genericTableTestCases") + public void testCreateGenericTableRESTRequest( + String baseLocation, String doc, Map properties) + throws JsonProcessingException { + CreateGenericTableRESTRequest request = + new CreateGenericTableRESTRequest( + CreateGenericTableRequest.builder() + .setName("test-table") + .setFormat("delta") + .setDoc(doc) + .setBaseLocation(baseLocation) + .setProperties(properties) + .build()); + String json = mapper.writeValueAsString(request); + CreateGenericTableRESTRequest deserializedRequest = + mapper.readValue(json, CreateGenericTableRESTRequest.class); + assertThat(deserializedRequest.getName()).isEqualTo("test-table"); + assertThat(deserializedRequest.getFormat()).isEqualTo("delta"); + assertThat(deserializedRequest.getDoc()).isEqualTo(doc); + assertThat(deserializedRequest.getProperties().size()).isEqualTo(properties.size()); + assertThat(deserializedRequest.getBaseLocation()).isEqualTo(baseLocation); + } + + @Test + public void testListGenericTablesRESTResponse() throws JsonProcessingException { + Namespace namespace = Namespace.of("test-ns"); + Set idents = + ImmutableSet.of( + TableIdentifier.of(namespace, "table1"), + TableIdentifier.of(namespace, "table2"), + TableIdentifier.of(namespace, "table3")); + + // page token is null + ListGenericTablesRESTResponse response = new ListGenericTablesRESTResponse(null, idents); + String json = mapper.writeValueAsString(response); + ListGenericTablesRESTResponse deserializedResponse = + mapper.readValue(json, ListGenericTablesRESTResponse.class); + assertThat(deserializedResponse.getNextPageToken()).isNull(); + assertThat(deserializedResponse.getIdentifiers().size()).isEqualTo(idents.size()); + for (TableIdentifier identifier : idents) { + assertThat(deserializedResponse.getIdentifiers()).contains(identifier); + } + + // page token is not null + response = new ListGenericTablesRESTResponse("page-token", idents); + json = mapper.writeValueAsString(response); + deserializedResponse = mapper.readValue(json, ListGenericTablesRESTResponse.class); + assertThat(deserializedResponse.getNextPageToken()).isEqualTo("page-token"); + for (TableIdentifier identifier : idents) { + assertThat(deserializedResponse.getIdentifiers()).contains(identifier); + } + } + + @Test + public void testLoadGenericTableRestResponse() throws JsonProcessingException { + LoadGenericTableRESTResponse request = + new LoadGenericTableRESTResponse( + GenericTable.builder().setName("test-table").setFormat("delta").build()); + String json = mapper.writeValueAsString(request); + LoadGenericTableRESTResponse deserializedResponse = + mapper.readValue(json, LoadGenericTableRESTResponse.class); + assertThat(deserializedResponse.getTable().getName()).isEqualTo("test-table"); + } + + private static Stream genericTableTestCases() { + var doc = "table for testing"; + var properties = Maps.newHashMap(); + properties.put("location", "s3://path/to/table/"); + var baseLocation = "s3://path/to/table/"; + return Stream.of( + Arguments.of(null, doc, properties), + Arguments.of(baseLocation, doc, properties), + Arguments.of(null, null, Maps.newHashMap()), + Arguments.of(baseLocation, doc, Maps.newHashMap()), + Arguments.of(baseLocation, null, properties)); + } +} diff --git a/plugins/spark/v4.0/spark/src/test/resources/logback-test.xml b/plugins/spark/v4.0/spark/src/test/resources/logback-test.xml new file mode 100644 index 0000000000..b7e97bb826 --- /dev/null +++ b/plugins/spark/v4.0/spark/src/test/resources/logback-test.xml @@ -0,0 +1,32 @@ + + + + + + + %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n + + + + + + diff --git a/runtime/spark-tests/build.gradle.kts b/runtime/spark-tests/build.gradle.kts index 884475c221..fb77c10f4f 100644 --- a/runtime/spark-tests/build.gradle.kts +++ b/runtime/spark-tests/build.gradle.kts @@ -56,7 +56,9 @@ dependencies { testImplementation(enforcedPlatform(libs.scala212.lang.library)) testImplementation(enforcedPlatform(libs.scala212.lang.reflect)) testImplementation(libs.javax.servlet.api) - testImplementation(libs.antlr4.runtime) + + // Spark 3.5 and Delta 3.3 require ANTLR 4.9.3 + testRuntimeOnly(libs.antlr4.runtime) } tasks.named("intTest").configure { diff --git a/settings.gradle.kts b/settings.gradle.kts index 83347575d3..f1987c3c54 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -68,7 +68,9 @@ val sparkVersions = sparkScalaVersions["sparkVersions"].toString().split(",").ma val noSourceChecksProjects = mutableSetOf() for (sparkVersion in sparkVersions) { - val scalaVersions = sparkScalaVersions["scalaVersions"].toString().split(",").map { it.trim() } + val scalaVersionsKey = "scalaVersions.${sparkVersion}" + val scalaVersionsStr = sparkScalaVersions[scalaVersionsKey].toString() + val scalaVersions = scalaVersionsStr.split(",").map { it.trim() } var first = true for (scalaVersion in scalaVersions) { val sparkArtifactId = "polaris-spark-${sparkVersion}_${scalaVersion}"