Skip to content

Commit a7136bf

Browse files
committed
Merge branch 'main' of github.com:apache/iceberg-python into fd-rust
2 parents 39dee4a + 75ef45d commit a7136bf

File tree

4 files changed

+20
-14
lines changed

4 files changed

+20
-14
lines changed

pyiceberg/expressions/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@
8282
BETWEEN = CaselessKeyword("between")
8383

8484
unquoted_identifier = Word(alphas + "_", alphanums + "_$")
85-
quoted_identifier = QuotedString('"', escChar="\\", unquoteResults=True)
85+
quoted_identifier = QuotedString('"', esc_quote="\\", unquote_results=True)
8686

8787

8888
@quoted_identifier.set_parse_action

pyiceberg/io/pyarrow.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2681,7 +2681,7 @@ def bin_pack_arrow_table(tbl: pa.Table, target_file_size: int) -> Iterator[list[
26812681
from pyiceberg.utils.bin_packing import PackingIterator
26822682

26832683
avg_row_size_bytes = tbl.nbytes / tbl.num_rows
2684-
target_rows_per_file = target_file_size // avg_row_size_bytes
2684+
target_rows_per_file = max(1, int(target_file_size / avg_row_size_bytes))
26852685
batches = tbl.to_batches(max_chunksize=target_rows_per_file)
26862686
bin_packed_record_batches = PackingIterator(
26872687
items=batches,

tests/integration/test_writes/test_writes.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1650,18 +1650,18 @@ def test_merge_manifests_file_content(session_catalog: Catalog, arrow_table_with
16501650
for i in range(3):
16511651
tbl_a_data_file = tbl_a_entries["data_file"][i]
16521652
assert tbl_a_data_file["column_sizes"] == [
1653-
(1, 49),
1654-
(2, 78),
1655-
(3, 128),
1656-
(4, 94),
1657-
(5, 118),
1658-
(6, 94),
1659-
(7, 118),
1660-
(8, 118),
1661-
(9, 118),
1662-
(10, 94),
1663-
(11, 78),
1664-
(12, 109),
1653+
(1, 51),
1654+
(2, 80),
1655+
(3, 130),
1656+
(4, 96),
1657+
(5, 120),
1658+
(6, 96),
1659+
(7, 120),
1660+
(8, 120),
1661+
(9, 120),
1662+
(10, 96),
1663+
(11, 80),
1664+
(12, 111),
16651665
]
16661666
assert tbl_a_data_file["content"] == 0
16671667
assert tbl_a_data_file["equality_ids"] is None

tests/io/test_pyarrow.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2248,6 +2248,12 @@ def test_bin_pack_arrow_table(arrow_table_with_null: pa.Table) -> None:
22482248
assert len(list(bin_packed)) == 5
22492249

22502250

2251+
def test_bin_pack_arrow_table_target_size_smaller_than_row(arrow_table_with_null: pa.Table) -> None:
2252+
bin_packed = list(bin_pack_arrow_table(arrow_table_with_null, target_file_size=1))
2253+
assert len(bin_packed) == arrow_table_with_null.num_rows
2254+
assert sum(batch.num_rows for bin_ in bin_packed for batch in bin_) == arrow_table_with_null.num_rows
2255+
2256+
22512257
def test_schema_mismatch_type(table_schema_simple: Schema) -> None:
22522258
other_schema = pa.schema(
22532259
(

0 commit comments

Comments
 (0)