|
22 | 22 | from enum import Enum |
23 | 23 | from typing import TYPE_CHECKING, Any, DefaultDict, Dict, Iterable, List, Mapping, Optional |
24 | 24 |
|
25 | | -from pydantic import Field, PrivateAttr, model_serializer |
| 25 | +from pydantic import Field, PrivateAttr, field_validator, model_serializer, model_validator |
26 | 26 |
|
27 | 27 | from pyiceberg.io import FileIO |
28 | 28 | from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests |
@@ -237,14 +237,55 @@ def __eq__(self, other: Any) -> bool: |
237 | 237 |
|
238 | 238 |
|
239 | 239 | class Snapshot(IcebergBaseModel): |
| 240 | + """Represents a snapshot of an Iceberg table at a specific point in time. |
| 241 | +
|
| 242 | + A snapshot tracks the state of a table, including all data and delete files, |
| 243 | + at the time the snapshot was created. |
| 244 | + """ |
| 245 | + |
240 | 246 | snapshot_id: int = Field(alias="snapshot-id") |
241 | 247 | parent_snapshot_id: Optional[int] = Field(alias="parent-snapshot-id", default=None) |
242 | 248 | sequence_number: Optional[int] = Field(alias="sequence-number", default=INITIAL_SEQUENCE_NUMBER) |
243 | 249 | timestamp_ms: int = Field(alias="timestamp-ms", default_factory=lambda: int(time.time() * 1000)) |
244 | 250 | manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file") |
| 251 | + first_row_id: Optional[int] = Field( |
| 252 | + alias="first-row-id", |
| 253 | + default=None, |
| 254 | + description="The row-id of the first newly added row in this snapshot. Returns None when row lineage is not supported.", |
| 255 | + ) |
| 256 | + added_rows: Optional[int] = Field( |
| 257 | + alias="added-rows", |
| 258 | + default=None, |
| 259 | + description=( |
| 260 | + "The upper bound of number of rows with assigned row IDs in this snapshot. Returns None if the value was not stored." |
| 261 | + ), |
| 262 | + ) |
245 | 263 | summary: Optional[Summary] = Field(default=None) |
246 | 264 | schema_id: Optional[int] = Field(alias="schema-id", default=None) |
247 | 265 |
|
| 266 | + @field_validator("first_row_id") |
| 267 | + @classmethod |
| 268 | + def validate_first_row_id(cls, v: Optional[int]) -> Optional[int]: |
| 269 | + """Validate that first_row_id is non-negative if provided.""" |
| 270 | + if v is not None and v < 0: |
| 271 | + raise ValueError(f"Invalid first-row-id (cannot be negative): {v}") |
| 272 | + return v |
| 273 | + |
| 274 | + @field_validator("added_rows") |
| 275 | + @classmethod |
| 276 | + def validate_added_rows(cls, v: Optional[int]) -> Optional[int]: |
| 277 | + """Validate that added_rows is non-negative if provided.""" |
| 278 | + if v is not None and v < 0: |
| 279 | + raise ValueError(f"Invalid added-rows (cannot be negative): {v}") |
| 280 | + return v |
| 281 | + |
| 282 | + @model_validator(mode="after") |
| 283 | + def validate_row_lineage_fields(self) -> "Snapshot": |
| 284 | + """Validate that added_rows is required when first_row_id is set.""" |
| 285 | + if self.first_row_id is not None and self.added_rows is None: |
| 286 | + raise ValueError("Invalid added-rows (required when first-row-id is set): None") |
| 287 | + return self |
| 288 | + |
248 | 289 | def __str__(self) -> str: |
249 | 290 | """Return the string representation of the Snapshot class.""" |
250 | 291 | operation = f"{self.summary.operation}: " if self.summary else "" |
|
0 commit comments