Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyiceberg/io/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2089,6 +2089,9 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc
physical_type_string == "FLOAT" and expected_physical_type == "DOUBLE"
):
pass
# Allow DECIMAL to be stored as FIXED_LEN_BYTE_ARRAY
elif physical_type_string == "FIXED_LEN_BYTE_ARRAY" and expected_physical_type in ("INT32", "INT64"):
pass
else:
raise ValueError(
f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}"
Expand Down
30 changes: 30 additions & 0 deletions tests/io/test_pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2188,6 +2188,36 @@ def test_stats_aggregator_update_max(vals: List[Any], primitive_type: PrimitiveT
assert stats.current_max == expected_result


@pytest.mark.parametrize(
"iceberg_type, physical_type_string, should_succeed",
[
# Exact match
(IntegerType(), "INT32", True),
# Allowed INT32 -> INT64 promotion
(LongType(), "INT32", True),
# Allowed FLOAT -> DOUBLE promotion
(DoubleType(), "FLOAT", True),
# Allowed FIXED_LEN_BYTE_ARRAY -> INT32
(DecimalType(precision=2, scale=2), "FIXED_LEN_BYTE_ARRAY", True),
# Allowed FIXED_LEN_BYTE_ARRAY -> INT64
(DecimalType(precision=12, scale=2), "FIXED_LEN_BYTE_ARRAY", True),
# Fail case: INT64 cannot be cast to INT32
(IntegerType(), "INT64", False),
],
)
def test_stats_aggregator_conditionally_allowed_types(
iceberg_type: PrimitiveType, physical_type_string: str, should_succeed: bool
) -> None:
if should_succeed:
stats = StatsAggregator(iceberg_type, physical_type_string)
assert stats.primitive_type == iceberg_type
assert stats.current_min is None
assert stats.current_max is None
else:
with pytest.raises(ValueError, match="Unexpected physical type"):
StatsAggregator(iceberg_type, physical_type_string)


def test_bin_pack_arrow_table(arrow_table_with_null: pa.Table) -> None:
# default packs to 1 bin since the table is small
bin_packed = bin_pack_arrow_table(
Expand Down