Skip to content

Commit 436389a

Browse files
committed
do not materialize entire files in a batch reader
1 parent d3eb149 commit 436389a

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,16 +1726,11 @@ def to_record_batches(self, tasks: Iterable[FileScanTask]) -> Iterator[pa.Record
17261726
deletes_per_file = _read_all_delete_files(self._io, tasks)
17271727

17281728
total_row_count = 0
1729-
executor = ExecutorFactory.get_or_create()
17301729

1731-
def batches_for_task(task: FileScanTask) -> List[pa.RecordBatch]:
1732-
# Materialize the iterator here to ensure execution happens within the executor.
1733-
# Otherwise, the iterator would be lazily consumed later (in the main thread),
1734-
# defeating the purpose of using executor.map.
1735-
return list(self._record_batches_from_scan_tasks_and_deletes([task], deletes_per_file))
17361730

17371731
limit_reached = False
1738-
for batches in executor.map(batches_for_task, tasks):
1732+
for task in tasks:
1733+
batches = self._record_batches_from_scan_tasks_and_deletes([task], deletes_per_file)
17391734
for batch in batches:
17401735
current_batch_size = len(batch)
17411736
if self._limit is not None and total_row_count + current_batch_size >= self._limit:

0 commit comments

Comments
 (0)