File tree Expand file tree Collapse file tree 1 file changed +11
-9
lines changed
Expand file tree Collapse file tree 1 file changed +11
-9
lines changed Original file line number Diff line number Diff line change 2222from pyiceberg .schema import Schema
2323from pyiceberg .types import FixedType , NestedField , UUIDType
2424
25- spark = SparkSession .builder .getOrCreate ()
25+ # The configuration is important, otherwise we get many small
26+ # parquet files with a single row. When a positional delete
27+ # hits the Parquet file with one row, the parquet file gets
28+ # dropped instead of having a merge-on-read delete file.
29+ spark = (
30+ SparkSession
31+ .builder
32+ .config ("spark.sql.shuffle.partitions" , "1" )
33+ .config ("spark.default.parallelism" , "1" )
34+ .getOrCreate ()
35+ )
2636
2737catalogs = {
2838 'rest' : load_catalog (
120130 """
121131 )
122132
123- # Partitioning is not really needed, but there is a bug:
124- # https://github.com/apache/iceberg/pull/7685
125- spark .sql (f"ALTER TABLE { catalog_name } .default.test_positional_mor_deletes ADD PARTITION FIELD years(dt) AS dt_years" )
126-
127133 spark .sql (
128134 f"""
129135 INSERT INTO { catalog_name } .default.test_positional_mor_deletes
168174 """
169175 )
170176
171- # Partitioning is not really needed, but there is a bug:
172- # https://github.com/apache/iceberg/pull/7685
173- spark .sql (f"ALTER TABLE { catalog_name } .default.test_positional_mor_double_deletes ADD PARTITION FIELD years(dt) AS dt_years" )
174-
175177 spark .sql (
176178 f"""
177179 INSERT INTO { catalog_name } .default.test_positional_mor_double_deletes
You can’t perform that action at this time.
0 commit comments