apache
diff --git a/‎pyiceberg/table/update/schema.py‎
Lines changed: 125 additions & 24 deletions b/‎pyiceberg/table/update/schema.py‎
Lines changed: 125 additions & 24 deletions
diff --git a/‎tests/catalog/test_sql.py‎
Lines changed: 20 additions & 4 deletions b/‎tests/catalog/test_sql.py‎
Lines changed: 20 additions & 4 deletions
@@ -90,6 +90,15 @@ class UpdateSchema(UpdateTableMetadata["UpdateSchema"]):
     _allow_incompatible_changes: bool
     _case_sensitive: bool
 
+    # Store user intent for retry support
+    _column_additions: list[tuple[str | tuple[str, ...], IcebergType, str | None, bool, L | None]]
+    _column_updates: list[tuple[str | tuple[str, ...], IcebergType | None, bool | None, str | None]]
+    _column_deletions: list[str | tuple[str, ...]]
+    _column_renames: list[tuple[str | tuple[str, ...], str]]
+    _move_operations: list[tuple[str, str | tuple[str, ...], str | tuple[str, ...] | None]]
+    _optional_columns: list[str | tuple[str, ...]]
+    _default_value_updates: list[tuple[str | tuple[str, ...], L | None]]
+
     def __init__(
         self,
         transaction: Transaction,
@@ -99,22 +108,40 @@ def __init__(
         name_mapping: NameMapping | None = None,
     ) -> None:
         super().__init__(transaction)
-
+        self._transaction = transaction
+        self._allow_incompatible_changes = allow_incompatible_changes
+        self._case_sensitive = case_sensitive
+        self._name_mapping = name_mapping
+        self._provided_schema = schema  # Store for _reset_state
+
+        # Initialize user intent storage
+        self._column_additions = []
+        self._column_updates = []
+        self._column_deletions = []
+        self._column_renames = []
+        self._move_operations = []
+        self._optional_columns = []
+        self._default_value_updates = []
+        self._identifier_field_updates: set[str] | None = None
+
+        # Initialize state from metadata
+        self._init_state_from_metadata(schema)
+
+    def _init_state_from_metadata(self, schema: Schema | None = None) -> None:
+        """Initialize or reinitialize state from current transaction metadata."""
         if isinstance(schema, Schema):
             self._schema = schema
             self._last_column_id = itertools.count(1 + schema.highest_field_id)
         else:
             self._schema = self._transaction.table_metadata.schema()
             self._last_column_id = itertools.count(1 + self._transaction.table_metadata.last_column_id)
 
-        self._name_mapping = name_mapping
         self._identifier_field_names = self._schema.identifier_field_names()
 
         self._adds = {}
         self._updates = {}
         self._deletes = set()
         self._moves = {}
-
         self._added_name_to_id = {}
 
         def get_column_name(field_id: int) -> str:
@@ -127,9 +154,39 @@ def get_column_name(field_id: int) -> str:
             field_id: get_column_name(parent_field_id) for field_id, parent_field_id in self._schema._lazy_id_to_parent.items()
         }
 
-        self._allow_incompatible_changes = allow_incompatible_changes
-        self._case_sensitive = case_sensitive
-        self._transaction = transaction
+    def _reset_state(self) -> None:
+        """Reset state for retry, rebuilding from refreshed metadata."""
+        self._init_state_from_metadata(self._provided_schema)
+
+        for path, field_type, doc, required, default_value in self._column_additions:
+            self._do_add_column(path, field_type, doc, required, default_value)
+
+        for path in self._column_deletions:
+            self._do_delete_column(path)
+
+        for path_from, new_name in self._column_renames:
+            self._do_rename_column(path_from, new_name)
+
+        for path, field_type, required, doc in self._column_updates:
+            self._do_update_column(path, field_type, required, doc)
+
+        for path in self._optional_columns:
+            self._set_column_requirement(path, required=False)
+
+        for path, default_value in self._default_value_updates:
+            self._set_column_default_value(path, default_value)
+
+        for op, path, other_path in self._move_operations:
+            if op == "first":
+                self._do_move_first(path)
+            elif op == "before":
+                self._do_move_before(path, other_path)  # type: ignore
+            elif op == "after":
+                self._do_move_after(path, other_path)  # type: ignore
+
+        # Restore identifier fields if they were explicitly set
+        if self._identifier_field_updates is not None:
+            self._identifier_field_names = self._identifier_field_updates.copy()
 
     def case_sensitive(self, case_sensitive: bool) -> UpdateSchema:
         """Determine if the case of schema needs to be considered when comparing column names.
@@ -186,6 +243,19 @@ def add_column(
         Returns:
             This for method chaining.
         """
+        self._column_additions.append((path, field_type, doc, required, default_value))
+        self._do_add_column(path, field_type, doc, required, default_value)
+        return self
+
+    def _do_add_column(
+        self,
+        path: str | tuple[str, ...],
+        field_type: IcebergType,
+        doc: str | None,
+        required: bool,
+        default_value: L | None,
+    ) -> None:
+        """Internal method to add a column. Used by add_column and _reset_state."""
         if isinstance(path, str):
             if "." in path:
                 raise ValueError(f"Cannot add column with ambiguous name: {path}, provide a tuple instead")
@@ -256,8 +326,6 @@ def add_column(
         else:
             self._adds[parent_id] = [field]
 
-        return self
-
     def delete_column(self, path: str | tuple[str, ...]) -> UpdateSchema:
         """Delete a column from a table.
 
@@ -267,6 +335,12 @@ def delete_column(self, path: str | tuple[str, ...]) -> UpdateSchema:
         Returns:
             The UpdateSchema with the delete operation staged.
         """
+        self._column_deletions.append(path)
+        self._do_delete_column(path)
+        return self
+
+    def _do_delete_column(self, path: str | tuple[str, ...]) -> None:
+        """Internal method to delete a column. Used by delete_column and _reset_state."""
         name = (path,) if isinstance(path, str) else path
         full_name = ".".join(name)
 
@@ -279,8 +353,6 @@ def delete_column(self, path: str | tuple[str, ...]) -> UpdateSchema:
 
         self._deletes.add(field.field_id)
 
-        return self
-
     def set_default_value(self, path: str | tuple[str, ...], default_value: L | None) -> UpdateSchema:
         """Set the default value of a column.
 
@@ -290,8 +362,8 @@ def set_default_value(self, path: str | tuple[str, ...], default_value: L | None
         Returns:
             The UpdateSchema with the delete operation staged.
         """
+        self._default_value_updates.append((path, default_value))
         self._set_column_default_value(path, default_value)
-
         return self
 
     def rename_column(self, path_from: str | tuple[str, ...], new_name: str) -> UpdateSchema:
@@ -304,6 +376,12 @@ def rename_column(self, path_from: str | tuple[str, ...], new_name: str) -> Upda
         Returns:
             The UpdateSchema with the rename operation staged.
         """
+        self._column_renames.append((path_from, new_name))
+        self._do_rename_column(path_from, new_name)
+        return self
+
+    def _do_rename_column(self, path_from: str | tuple[str, ...], new_name: str) -> None:
+        """Internal method to rename a column. Used by rename_column and _reset_state."""
         path_from = ".".join(path_from) if isinstance(path_from, tuple) else path_from
         field_from = self._schema.find_field(path_from, self._case_sensitive)
 
@@ -338,8 +416,6 @@ def rename_column(self, path_from: str | tuple[str, ...], new_name: str) -> Upda
             new_identifier_path = f"{from_field_correct_casing[: -len(field_from.name)]}{new_name}"
             self._identifier_field_names.add(new_identifier_path)
 
-        return self
-
     def make_column_optional(self, path: str | tuple[str, ...]) -> UpdateSchema:
         """Make a column optional.
 
@@ -349,10 +425,12 @@ def make_column_optional(self, path: str | tuple[str, ...]) -> UpdateSchema:
         Returns:
             The UpdateSchema with the requirement change staged.
         """
+        self._optional_columns.append(path)
         self._set_column_requirement(path, required=False)
         return self
 
     def set_identifier_fields(self, *fields: str) -> None:
+        self._identifier_field_updates = set(fields)
         self._identifier_field_names = set(fields)
 
     def _set_column_requirement(self, path: str | tuple[str, ...], required: bool) -> None:
@@ -454,12 +532,25 @@ def update_column(
         Returns:
             The UpdateSchema with the type update staged.
         """
-        path = (path,) if isinstance(path, str) else path
-        full_name = ".".join(path)
-
         if field_type is None and required is None and doc is None:
             return self
 
+        # Store intent for retry support
+        self._column_updates.append((path, field_type, required, doc))
+        self._do_update_column(path, field_type, required, doc)
+        return self
+
+    def _do_update_column(
+        self,
+        path: str | tuple[str, ...],
+        field_type: IcebergType | None,
+        required: bool | None,
+        doc: str | None,
+    ) -> None:
+        """Internal method to update a column. Used by update_column and _reset_state."""
+        path = (path,) if isinstance(path, str) else path
+        full_name = ".".join(path)
+
         field = self._schema.find_field(full_name, self._case_sensitive)
 
         if field.field_id in self._deletes:
@@ -500,8 +591,6 @@ def update_column(
         if required is not None:
             self._set_column_requirement(path, required=required)
 
-        return self
-
     def _find_for_move(self, name: str) -> int | None:
         try:
             return self._schema.find_field(name, self._case_sensitive).field_id
@@ -544,6 +633,12 @@ def move_first(self, path: str | tuple[str, ...]) -> UpdateSchema:
         Returns:
             The UpdateSchema with the move operation staged.
         """
+        self._move_operations.append(("first", path, None))
+        self._do_move_first(path)
+        return self
+
+    def _do_move_first(self, path: str | tuple[str, ...]) -> None:
+        """Internal method to move a field to first position. Used by move_first and _reset_state."""
         full_name = ".".join(path) if isinstance(path, tuple) else path
 
         field_id = self._find_for_move(full_name)
@@ -553,8 +648,6 @@ def move_first(self, path: str | tuple[str, ...]) -> UpdateSchema:
 
         self._move(_Move(field_id=field_id, full_name=full_name, op=_MoveOperation.First))
 
-        return self
-
     def move_before(self, path: str | tuple[str, ...], before_path: str | tuple[str, ...]) -> UpdateSchema:
         """Move the field to before another field.
 
@@ -564,6 +657,12 @@ def move_before(self, path: str | tuple[str, ...], before_path: str | tuple[str,
         Returns:
             The UpdateSchema with the move operation staged.
         """
+        self._move_operations.append(("before", path, before_path))
+        self._do_move_before(path, before_path)
+        return self
+
+    def _do_move_before(self, path: str | tuple[str, ...], before_path: str | tuple[str, ...]) -> None:
+        """Internal method to move a field before another. Used by move_before and _reset_state."""
         full_name = ".".join(path) if isinstance(path, tuple) else path
         field_id = self._find_for_move(full_name)
 
@@ -587,8 +686,6 @@ def move_before(self, path: str | tuple[str, ...], before_path: str | tuple[str,
 
         self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=before_field_id, op=_MoveOperation.Before))
 
-        return self
-
     def move_after(self, path: str | tuple[str, ...], after_name: str | tuple[str, ...]) -> UpdateSchema:
         """Move the field to after another field.
 
@@ -598,6 +695,12 @@ def move_after(self, path: str | tuple[str, ...], after_name: str | tuple[str, .
         Returns:
             The UpdateSchema with the move operation staged.
         """
+        self._move_operations.append(("after", path, after_name))
+        self._do_move_after(path, after_name)
+        return self
+
+    def _do_move_after(self, path: str | tuple[str, ...], after_name: str | tuple[str, ...]) -> None:
+        """Internal method to move a field after another. Used by move_after and _reset_state."""
         full_name = ".".join(path) if isinstance(path, tuple) else path
 
         field_id = self._find_for_move(full_name)
@@ -616,8 +719,6 @@ def move_after(self, path: str | tuple[str, ...], after_name: str | tuple[str, .
 
         self._move(_Move(field_id=field_id, full_name=full_name, other_field_id=after_field_id, op=_MoveOperation.After))
 
-        return self
-
     def _commit(self) -> UpdatesAndRequirements:
         """Apply the pending changes and commit."""
         from pyiceberg.table import TableProperties
 
@@ -1423,6 +1423,15 @@ def test_append_table(catalog: SqlCatalog, table_schema_simple: Schema, table_id
     ],
 )
 def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schema, table_identifier: Identifier) -> None:
+    """Test that concurrent schema updates are resolved via retry (Java-like behavior).
+
+    When two writers concurrently update the schema:
+    1. First writer succeeds
+    2. Second writer's first attempt fails (stale schema_id)
+    3. Second writer retries with refreshed metadata and succeeds
+
+    This matches Java's BaseTransaction.applyUpdates() behavior.
+    """
     namespace = Catalog.namespace_from(table_identifier)
     catalog.create_namespace(namespace)
     table_a = catalog.create_table(table_identifier, table_schema_simple)
@@ -1431,10 +1440,17 @@ def test_concurrent_commit_table(catalog: SqlCatalog, table_schema_simple: Schem
     with table_a.update_schema() as update:
         update.add_column(path="b", field_type=IntegerType())
 
-    with pytest.raises(CommitFailedException, match="Requirement failed: current schema id has changed: expected 0, found 1"):
-        # This one should fail since it already has been updated
-        with table_b.update_schema() as update:
-            update.add_column(path="c", field_type=IntegerType())
+    # With retry support, this now succeeds after refreshing metadata
+    # (Java-like behavior: retry resolves conflicts)
+    with table_b.update_schema() as update:
+        update.add_column(path="c", field_type=IntegerType())
+
+    # Verify both columns were added
+    table_a.refresh()
+    field_names = [f.name for f in table_a.schema().fields]
+    assert "b" in field_names
+    assert "c" in field_names
+    assert table_a.schema().schema_id == 2
 
 
 @pytest.mark.parametrize(