Skip to content

Commit 1399e83

Browse files
committed
perf: do not copy metadata for each data file in summary
f Please enter the commit message for your changes. Lines starting
1 parent d3eb149 commit 1399e83

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

pyiceberg/table/update/snapshot.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,11 @@ def _write_delete_manifest() -> List[ManifestFile]:
240240
def _summary(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> Summary:
241241
from pyiceberg.table import TableProperties
242242

243+
# avoid copying metadata for each data file
244+
table_metadata = self._transaction.table_metadata
245+
243246
partition_summary_limit = int(
244-
self._transaction.table_metadata.properties.get(
247+
table_metadata.properties.get(
245248
TableProperties.WRITE_PARTITION_SUMMARY_LIMIT, TableProperties.WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT
246249
)
247250
)
@@ -250,23 +253,21 @@ def _summary(self, snapshot_properties: Dict[str, str] = EMPTY_DICT) -> Summary:
250253
for data_file in self._added_data_files:
251254
ssc.add_file(
252255
data_file=data_file,
253-
partition_spec=self._transaction.table_metadata.spec(),
254-
schema=self._transaction.table_metadata.schema(),
256+
partition_spec=table_metadata.spec(),
257+
schema=table_metadata.schema(),
255258
)
256259

257260
if len(self._deleted_data_files) > 0:
258-
specs = self._transaction.table_metadata.specs()
261+
specs = table_metadata.specs()
259262
for data_file in self._deleted_data_files:
260263
ssc.remove_file(
261264
data_file=data_file,
262265
partition_spec=specs[data_file.spec_id],
263-
schema=self._transaction.table_metadata.schema(),
266+
schema=table_metadata.schema(),
264267
)
265268

266269
previous_snapshot = (
267-
self._transaction.table_metadata.snapshot_by_id(self._parent_snapshot_id)
268-
if self._parent_snapshot_id is not None
269-
else None
270+
table_metadata.snapshot_by_id(self._parent_snapshot_id) if self._parent_snapshot_id is not None else None
270271
)
271272

272273
return update_snapshot_summaries(

0 commit comments

Comments
 (0)