apache
diff --git a/‎.github/workflows/nightly-pypi-build.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/nightly-pypi-build.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mkdocs/docs/how-to-release.md‎
Lines changed: 9 additions & 1 deletion b/‎mkdocs/docs/how-to-release.md‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎poetry.lock‎
Lines changed: 610 additions & 500 deletions b/‎poetry.lock‎
Lines changed: 610 additions & 500 deletions
diff --git a/‎pyiceberg/catalog/glue.py‎
Lines changed: 7 additions & 12 deletions b/‎pyiceberg/catalog/glue.py‎
Lines changed: 7 additions & 12 deletions
diff --git a/‎pyiceberg/catalog/rest/__init__.py‎
Lines changed: 3 additions & 3 deletions b/‎pyiceberg/catalog/rest/__init__.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎pyiceberg/expressions/__init__.py‎
Lines changed: 48 additions & 11 deletions b/‎pyiceberg/expressions/__init__.py‎
Lines changed: 48 additions & 11 deletions
diff --git a/‎pyiceberg/io/pyarrow.py‎
Lines changed: 15 additions & 6 deletions b/‎pyiceberg/io/pyarrow.py‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎pyiceberg/table/snapshots.py‎
Lines changed: 22 additions & 0 deletions b/‎pyiceberg/table/snapshots.py‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎pyiceberg/table/update/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎pyiceberg/table/update/__init__.py‎
Lines changed: 16 additions & 0 deletions
@@ -71,7 +71,7 @@ jobs:
 
     steps:
     - name: Download all the artifacts
-      uses: actions/download-artifact@v5
+      uses: actions/download-artifact@v6
       with:
         merge-multiple: true
         path: dist/
 
@@ -30,6 +30,7 @@ This guide outlines the process for releasing PyIceberg in accordance with the [
 ## Requirements
 
 * A GPG key must be registered and published in the [Apache Iceberg KEYS file](https://downloads.apache.org/iceberg/KEYS). Follow [the instructions for setting up a GPG key and uploading it to the KEYS file](#set-up-gpg-key-and-upload-to-apache-iceberg-keys-file).
+    * Permission to update the `KEYS` artifact in the [Apache release distribution](https://dist.apache.org/repos/dist/release/iceberg/) (requires Iceberg PMC privileges).
 * SVN Access
     * Permission to upload artifacts to the [Apache development distribution](https://dist.apache.org/repos/dist/dev/iceberg/) (requires Apache Committer access).
     * Permission to upload artifacts to the [Apache release distribution](https://dist.apache.org/repos/dist/release/iceberg/) (requires Apache PMC access).
@@ -405,5 +406,12 @@ cd icebergsvn
 echo "" >> KEYS # append a newline
 gpg --list-sigs <YOUR KEY ID HERE> >> KEYS # append signatures
 gpg --armor --export <YOUR KEY ID HERE> >> KEYS # append public key block
-svn commit -m "add key for <YOUR NAME HERE>"
+svn commit -m "add key for <YOUR NAME HERE>" # this requires Iceberg PMC privileges
 ```
+
+<!-- prettier-ignore-start -->
+
+!!! note
+    Updating the `KEYS` artifact in the `release/` distribution requires Iceberg PMC privileges. Please work with a PMC member to update the file.
+
+<!-- prettier-ignore-end -->
@@ -355,34 +355,29 @@ def __init__(self, name: str, client: Optional["GlueClient"] = None, **propertie
                 _register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id)
 
     def _convert_glue_to_iceberg(self, glue_table: "TableTypeDef") -> Table:
-        properties: Properties = glue_table["Parameters"]
-
-        database_name = glue_table.get("DatabaseName", None)
-        if database_name is None:
+        if (database_name := glue_table.get("DatabaseName")) is None:
             raise ValueError("Glue table is missing DatabaseName property")
 
-        parameters = glue_table.get("Parameters", None)
-        if parameters is None:
-            raise ValueError("Glue table is missing Parameters property")
+        if (table_name := glue_table.get("Name")) is None:
+            raise ValueError("Glue table is missing Name property")
 
-        table_name = glue_table["Name"]
+        if (parameters := glue_table.get("Parameters")) is None:
+            raise ValueError("Glue table is missing Parameters property")
 
-        if TABLE_TYPE not in properties:
+        if (glue_table_type := parameters.get(TABLE_TYPE)) is None:
             raise NoSuchPropertyException(
                 f"Property {TABLE_TYPE} missing, could not determine type: {database_name}.{table_name}"
             )
-        glue_table_type = properties[TABLE_TYPE]
 
         if glue_table_type.lower() != ICEBERG:
             raise NoSuchIcebergTableError(
                 f"Property table_type is {glue_table_type}, expected {ICEBERG}: {database_name}.{table_name}"
             )
 
-        if METADATA_LOCATION not in properties:
+        if (metadata_location := parameters.get(METADATA_LOCATION)) is None:
             raise NoSuchPropertyException(
                 f"Table property {METADATA_LOCATION} is missing, cannot find metadata for: {database_name}.{table_name}"
             )
-        metadata_location = properties[METADATA_LOCATION]
 
         io = self._load_file_io(location=metadata_location)
         file = io.new_input(metadata_location)
 
@@ -238,6 +238,9 @@ def _create_session(self) -> Session:
         """Create a request session with provided catalog configuration."""
         session = Session()
 
+        # Set HTTP headers
+        self._config_headers(session)
+
         # Sets the client side and server side SSL cert verification, if provided as properties.
         if ssl_config := self.properties.get(SSL):
             if ssl_ca_bundle := ssl_config.get(CA_BUNDLE):
@@ -265,9 +268,6 @@ def _create_session(self) -> Session:
         else:
             session.auth = AuthManagerAdapter(self._create_legacy_oauth2_auth_manager(session))
 
-        # Set HTTP headers
-        self._config_headers(session)
-
         # Configure SigV4 Request Signing
         if property_as_bool(self.properties, SIGV4, False):
             self._init_sigv4(session)
 
@@ -33,7 +33,7 @@
 )
 from typing import Literal as TypingLiteral
 
-from pydantic import Field
+from pydantic import ConfigDict, Field
 
 from pyiceberg.expressions.literals import (
     AboveMax,
@@ -302,12 +302,19 @@ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
         return (self.left, self.right)
 
 
-class Or(BooleanExpression):
+class Or(IcebergBaseModel, BooleanExpression):
     """OR operation expression - logical disjunction."""
 
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    type: TypingLiteral["or"] = Field(default="or", alias="type")
     left: BooleanExpression
     right: BooleanExpression
 
+    def __init__(self, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> None:
+        if isinstance(self, Or) and not hasattr(self, "left") and not hasattr(self, "right"):
+            super().__init__(left=left, right=right)
+
     def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: BooleanExpression) -> BooleanExpression:  # type: ignore
         if rest:
             return _build_balanced_tree(Or, (left, right, *rest))
@@ -319,10 +326,12 @@ def __new__(cls, left: BooleanExpression, right: BooleanExpression, *rest: Boole
             return left
         else:
             obj = super().__new__(cls)
-            obj.left = left
-            obj.right = right
             return obj
 
+    def __str__(self) -> str:
+        """Return the string representation of the Or class."""
+        return f"{str(self.__class__.__name__)}(left={repr(self.left)}, right={repr(self.right)})"
+
     def __eq__(self, other: Any) -> bool:
         """Return the equality of two instances of the Or class."""
         return self.left == other.left and self.right == other.right if isinstance(other, Or) else False
@@ -341,22 +350,31 @@ def __getnewargs__(self) -> Tuple[BooleanExpression, BooleanExpression]:
         return (self.left, self.right)
 
 
-class Not(BooleanExpression):
+class Not(IcebergBaseModel, BooleanExpression):
     """NOT operation expression - logical negation."""
 
-    child: BooleanExpression
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    type: TypingLiteral["not"] = Field(default="not")
+    child: BooleanExpression = Field()
+
+    def __init__(self, child: BooleanExpression, **_: Any) -> None:
+        super().__init__(child=child)
 
-    def __new__(cls, child: BooleanExpression) -> BooleanExpression:  # type: ignore
+    def __new__(cls, child: BooleanExpression, **_: Any) -> BooleanExpression:  # type: ignore
         if child is AlwaysTrue():
             return AlwaysFalse()
         elif child is AlwaysFalse():
             return AlwaysTrue()
         elif isinstance(child, Not):
             return child.child
         obj = super().__new__(cls)
-        obj.child = child
         return obj
 
+    def __str__(self) -> str:
+        """Return the string representation of the Not class."""
+        return f"Not(child={self.child})"
+
     def __repr__(self) -> str:
         """Return the string representation of the Not class."""
         return f"Not(child={repr(self.child)})"
@@ -373,8 +391,6 @@ def __getnewargs__(self) -> Tuple[BooleanExpression]:
         """Pickle the Not class."""
         return (self.child,)
 
-    """TRUE expression."""
-
 
 class AlwaysTrue(BooleanExpression, Singleton, IcebergRootModel[str]):
     """TRUE expression."""
@@ -447,7 +463,20 @@ def bind(self, schema: Schema, case_sensitive: bool = True) -> BooleanExpression
     def as_bound(self) -> Type[BoundPredicate[L]]: ...
 
 
-class UnaryPredicate(UnboundPredicate[Any], ABC):
+class UnaryPredicate(IcebergBaseModel, UnboundPredicate[Any], ABC):
+    type: str
+
+    model_config = {"arbitrary_types_allowed": True}
+
+    def __init__(self, term: Union[str, UnboundTerm[Any]]):
+        unbound = _to_unbound_term(term)
+        super().__init__(term=unbound)
+
+    def __str__(self) -> str:
+        """Return the string representation of the UnaryPredicate class."""
+        # Sort to make it deterministic
+        return f"{str(self.__class__.__name__)}(term={str(self.term)})"
+
     def bind(self, schema: Schema, case_sensitive: bool = True) -> BoundUnaryPredicate[Any]:
         bound_term = self.term.bind(schema, case_sensitive)
         return self.as_bound(bound_term)
@@ -506,6 +535,8 @@ def as_unbound(self) -> Type[NotNull]:
 
 
 class IsNull(UnaryPredicate):
+    type: str = "is-null"
+
     def __invert__(self) -> NotNull:
         """Transform the Expression into its negated version."""
         return NotNull(self.term)
@@ -516,6 +547,8 @@ def as_bound(self) -> Type[BoundIsNull[L]]:
 
 
 class NotNull(UnaryPredicate):
+    type: str = "not-null"
+
     def __invert__(self) -> IsNull:
         """Transform the Expression into its negated version."""
         return IsNull(self.term)
@@ -558,6 +591,8 @@ def as_unbound(self) -> Type[NotNaN]:
 
 
 class IsNaN(UnaryPredicate):
+    type: str = "is-nan"
+
     def __invert__(self) -> NotNaN:
         """Transform the Expression into its negated version."""
         return NotNaN(self.term)
@@ -568,6 +603,8 @@ def as_bound(self) -> Type[BoundIsNaN[L]]:
 
 
 class NotNaN(UnaryPredicate):
+    type: str = "not-nan"
+
     def __invert__(self) -> IsNaN:
         """Transform the Expression into its negated version."""
         return IsNaN(self.term)
 
@@ -2082,13 +2082,18 @@ def __init__(self, iceberg_type: PrimitiveType, physical_type_string: str, trunc
         self.trunc_length = trunc_length
 
         expected_physical_type = _primitive_to_physical(iceberg_type)
+
+        # TODO: Refactor to use promotion logic
         if expected_physical_type != physical_type_string:
             # Allow promotable physical types
             # INT32 -> INT64 and FLOAT -> DOUBLE are safe type casts
             if (physical_type_string == "INT32" and expected_physical_type == "INT64") or (
                 physical_type_string == "FLOAT" and expected_physical_type == "DOUBLE"
             ):
                 pass
+            # Allow DECIMAL to be stored as FIXED_LEN_BYTE_ARRAY, INT32 or INT64
+            elif physical_type_string == "FIXED_LEN_BYTE_ARRAY" and expected_physical_type in ("INT32", "INT64"):
+                pass
             else:
                 raise ValueError(
                     f"Unexpected physical type {physical_type_string} for {iceberg_type}, expected {expected_physical_type}"
@@ -2506,12 +2511,16 @@ def data_file_statistics_from_parquet_metadata(
 
                     if isinstance(stats_col.iceberg_type, DecimalType) and statistics.physical_type != "FIXED_LEN_BYTE_ARRAY":
                         scale = stats_col.iceberg_type.scale
-                        col_aggs[field_id].update_min(
-                            unscaled_to_decimal(statistics.min_raw, scale)
-                        ) if statistics.min_raw is not None else None
-                        col_aggs[field_id].update_max(
-                            unscaled_to_decimal(statistics.max_raw, scale)
-                        ) if statistics.max_raw is not None else None
+                        (
+                            col_aggs[field_id].update_min(unscaled_to_decimal(statistics.min_raw, scale))
+                            if statistics.min_raw is not None
+                            else None
+                        )
+                        (
+                            col_aggs[field_id].update_max(unscaled_to_decimal(statistics.max_raw, scale))
+                            if statistics.max_raw is not None
+                            else None
+                        )
                     else:
                         col_aggs[field_id].update_min(statistics.min)
                         col_aggs[field_id].update_max(statistics.max)
 
@@ -244,6 +244,12 @@ class Snapshot(IcebergBaseModel):
     manifest_list: str = Field(alias="manifest-list", description="Location of the snapshot's manifest list file")
     summary: Optional[Summary] = Field(default=None)
     schema_id: Optional[int] = Field(alias="schema-id", default=None)
+    first_row_id: Optional[int] = Field(
+        alias="first-row-id", default=None, description="assigned to the first row in the first data file in the first manifest"
+    )
+    added_rows: Optional[int] = Field(
+        alias="added-rows", default=None, description="The upper bound of the number of rows with assigned row IDs"
+    )
 
     def __str__(self) -> str:
         """Return the string representation of the Snapshot class."""
@@ -253,6 +259,22 @@ def __str__(self) -> str:
         result_str = f"{operation}id={self.snapshot_id}{parent_id}{schema_id}"
         return result_str
 
+    def __repr__(self) -> str:
+        """Return the string representation of the Snapshot class."""
+        fields = [
+            f"snapshot_id={self.snapshot_id}",
+            f"parent_snapshot_id={self.parent_snapshot_id}",
+            f"sequence_number={self.sequence_number}",
+            f"timestamp_ms={self.timestamp_ms}",
+            f"manifest_list='{self.manifest_list}'",
+            f"summary={repr(self.summary)}" if self.summary else None,
+            f"schema_id={self.schema_id}" if self.schema_id is not None else None,
+            f"first_row_id={self.first_row_id}" if self.first_row_id is not None else None,
+            f"added_rows={self.added_rows}" if self.added_rows is not None else None,
+        ]
+        filtered_fields = [field for field in fields if field is not None]
+        return f"Snapshot({', '.join(filtered_fields)})"
+
     def manifests(self, io: FileIO) -> List[ManifestFile]:
         """Return the manifests for the given snapshot."""
         return list(_manifests(io, self.manifest_list))
 
@@ -437,13 +437,29 @@ def _(update: AddSnapshotUpdate, base_metadata: TableMetadata, context: _TableMe
             f"Cannot add snapshot with sequence number {update.snapshot.sequence_number} "
             f"older than last sequence number {base_metadata.last_sequence_number}"
         )
+    elif base_metadata.format_version >= 3 and update.snapshot.first_row_id is None:
+        raise ValueError("Cannot add snapshot without first row id")
+    elif (
+        base_metadata.format_version >= 3
+        and update.snapshot.first_row_id is not None
+        and base_metadata.next_row_id is not None
+        and update.snapshot.first_row_id < base_metadata.next_row_id
+    ):
+        raise ValueError(
+            f"Cannot add a snapshot with first row id smaller than the table's next-row-id {update.snapshot.first_row_id} < {base_metadata.next_row_id}"
+        )
 
     context.add_update(update)
     return base_metadata.model_copy(
         update={
             "last_updated_ms": update.snapshot.timestamp_ms,
             "last_sequence_number": update.snapshot.sequence_number,
             "snapshots": base_metadata.snapshots + [update.snapshot],
+            "next_row_id": base_metadata.next_row_id + update.snapshot.added_rows
+            if base_metadata.format_version >= 3
+            and base_metadata.next_row_id is not None
+            and update.snapshot.added_rows is not None
+            else None,
         }
     )