Skip to content

Commit b450c1c

Browse files
authored
[infra] Update pyspark java iceberg library to 1.6.0 (#1462)
* update pyspark java iceberb library to 1.6.0 * fix test * add reminder * make link
1 parent 9f47077 commit b450c1c

File tree

3 files changed

+5
-9
lines changed

3 files changed

+5
-9
lines changed

dev/Dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ ENV PYTHONPATH=$SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip:$
3636
RUN mkdir -p ${HADOOP_HOME} && mkdir -p ${SPARK_HOME} && mkdir -p /home/iceberg/spark-events
3737
WORKDIR ${SPARK_HOME}
3838

39+
# Remember to also update `tests/conftest`'s spark setting
3940
ENV SPARK_VERSION=3.5.3
4041
ENV ICEBERG_SPARK_RUNTIME_VERSION=3.5_2.12
4142
ENV ICEBERG_VERSION=1.6.0

tests/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2240,9 +2240,10 @@ def spark() -> "SparkSession":
22402240

22412241
from pyspark.sql import SparkSession
22422242

2243+
# Remember to also update `dev/Dockerfile`
22432244
spark_version = ".".join(importlib.metadata.version("pyspark").split(".")[:2])
22442245
scala_version = "2.12"
2245-
iceberg_version = "1.4.3"
2246+
iceberg_version = "1.6.0"
22462247

22472248
os.environ["PYSPARK_SUBMIT_ARGS"] = (
22482249
f"--packages org.apache.iceberg:iceberg-spark-runtime-{spark_version}_{scala_version}:{iceberg_version},"

tests/integration/test_deletes.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,7 @@ def test_delete_partitioned_table_positional_deletes(spark: SparkSession, sessio
237237
# Will rewrite a data file without the positional delete
238238
tbl.delete(EqualTo("number", 40))
239239

240-
# One positional delete has been added, but an OVERWRITE status is set
241-
# https://github.com/apache/iceberg/issues/10122
242-
assert [snapshot.summary.operation.value for snapshot in tbl.snapshots()] == ["append", "overwrite", "overwrite"]
240+
assert [snapshot.summary.operation.value for snapshot in tbl.snapshots()] == ["append", "delete", "overwrite"]
243241
assert tbl.scan().to_arrow().to_pydict() == {"number_partitioned": [10], "number": [20]}
244242

245243

@@ -410,8 +408,6 @@ def test_overwrite_partitioned_table(spark: SparkSession, session_catalog: RestC
410408
# Will rewrite a data file without the positional delete
411409
tbl.overwrite(arrow_tbl, "number_partitioned == 10")
412410

413-
# One positional delete has been added, but an OVERWRITE status is set
414-
# https://github.com/apache/iceberg/issues/10122
415411
assert [snapshot.summary.operation.value for snapshot in tbl.snapshots()] == ["append", "delete", "append"]
416412
assert tbl.scan().to_arrow().to_pydict() == {"number_partitioned": [10, 10, 20], "number": [4, 5, 3]}
417413

@@ -461,13 +457,11 @@ def test_partitioned_table_positional_deletes_sequence_number(spark: SparkSessio
461457
# Will rewrite a data file without a positional delete
462458
tbl.delete(EqualTo("number", 201))
463459

464-
# One positional delete has been added, but an OVERWRITE status is set
465-
# https://github.com/apache/iceberg/issues/10122
466460
snapshots = tbl.snapshots()
467461
assert len(snapshots) == 3
468462

469463
# Snapshots produced by Spark
470-
assert [snapshot.summary.operation.value for snapshot in tbl.snapshots()[0:2]] == ["append", "overwrite"]
464+
assert [snapshot.summary.operation.value for snapshot in tbl.snapshots()[0:2]] == ["append", "delete"]
471465

472466
# Will rewrite one parquet file
473467
assert snapshots[2].summary == Summary(

0 commit comments

Comments
 (0)