diff --git a/.cargo/audit.toml b/.cargo/audit.toml
index 09e2d35c50..71354ea3a5 100644
--- a/.cargo/audit.toml
+++ b/.cargo/audit.toml
@@ -33,4 +33,9 @@ ignore = [
   #
   # Introduced by object_store, see https://github.com/apache/arrow-rs-object-store/issues/564
   "RUSTSEC-2025-0134",
+  # `rand` unsoundness with custom logger using `rand::rng()`
+  #
+  # Direct dependency upgraded to 0.9.3+. Transitive rand 0.8.5 remains
+  # from reqsign/sqllogictest/rustc-hash — no 0.8.x patch exists.
+  "RUSTSEC-2026-0097",
 ]
diff --git a/.github/actions/overwrite-package-version/action.yml b/.github/actions/overwrite-package-version/action.yml
index 8a2739456e..aed736ecf9 100644
--- a/.github/actions/overwrite-package-version/action.yml
+++ b/.github/actions/overwrite-package-version/action.yml
@@ -25,7 +25,7 @@ runs:
   using: "composite"
   steps:
     - name: Setup Python
-      uses: actions/setup-python@v5
+      uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5
       with:
         python-version: '3.12'
 
diff --git a/.github/actions/setup-builder/action.yml b/.github/actions/setup-builder/action.yml
index 532174590f..e961ed6335 100644
--- a/.github/actions/setup-builder/action.yml
+++ b/.github/actions/setup-builder/action.yml
@@ -26,8 +26,8 @@ runs:
   using: "composite"
   steps:
     - name: Setup specified Rust toolchain
-      shell: bash
       if: ${{ inputs.rust-version != '' }}
+      shell: bash
       env:
         RUST_VERSION: ${{ inputs.rust-version }}
       run: |
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 23c9b239ee..03235972dd 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -17,6 +17,15 @@
 
 version: 2
 updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+      day: "sunday"
+    cooldown:
+      default-days: 7
+
   # Maintain dependencies for iceberg
   - package-ecosystem: "cargo"
     directory: "/"
@@ -35,3 +44,5 @@ updates:
         patterns:
           - "arrow*"
           - "parquet"
+    cooldown:
+      default-days: 7
diff --git a/.github/workflows/asf-allowlist-check.yml b/.github/workflows/asf-allowlist-check.yml
new file mode 100644
index 0000000000..65dbe8bcbe
--- /dev/null
+++ b/.github/workflows/asf-allowlist-check.yml
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+# Verifies all GitHub Actions refs are on the ASF allowlist.
+# Actions not on the allowlist silently fail with "Startup failure" — no logs,
+# no notifications, and PRs may appear green because no checks ran.
+# See https://github.com/apache/infrastructure-actions/issues/574
+name: "ASF Allowlist Check"
+
+on:
+  pull_request:
+    paths:
+      - ".github/**"
+  push:
+    branches:
+      - main
+    paths:
+      - ".github/**"
+
+permissions:
+  contents: read
+
+jobs:
+  asf-allowlist-check:
+    runs-on: ubuntu-24.04
+    steps:
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      with:
+        persist-credentials: false
+    - uses: apache/infrastructure-actions/allowlist-check@4e9c961f587f72b170874b6f5cd4ac15f7f26eb8  # main
diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index abe0c377c5..3f9865ed8a 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -37,18 +37,23 @@ on:
     - cron: '0 0 * * *'
 
 permissions:
+  # All other permissions are set to none
   contents: read
+  checks: write
+  issues: write
 
 jobs:
   security_audit:
     runs-on: ubuntu-latest
     if: github.repository == 'apache/iceberg-rust'
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
         with:
           rust-version: stable
-      - uses: rustsec/audit-check@v2.0.0
+      - uses: rustsec/audit-check@69366f33c96575abad1ee0dba8212993eecbe998 # v2.0.0
         with:
           token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml
index efd4a78098..4483a53310 100644
--- a/.github/workflows/bindings_python_ci.yml
+++ b/.github/workflows/bindings_python_ci.yml
@@ -47,7 +47,9 @@ jobs:
   check-rust:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
       - name: Check format
         working-directory: "bindings/python"
         run: cargo fmt --all -- --check
@@ -58,8 +60,10 @@ jobs:
   check-python:
     runs-on: ubuntu-slim
     steps:
-      - uses: actions/checkout@v6
-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
+      - uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
         with:
           version: "0.9.3"
           enable-cache: true
@@ -85,16 +89,18 @@ jobs:
           - macos-latest
           - windows-latest
     steps:
-      - uses: actions/checkout@v6
-      - uses: actions/setup-python@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
         with:
           python-version: 3.12
-      - uses: PyO3/maturin-action@v1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           working-directory: "bindings/python"
           command: build
           args: --out dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one
-      - uses: astral-sh/setup-uv@5a095e7a2014a4212f075830d4f7277575a9d098
+      - uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8.0.0
         with:
           version: "0.9.3"
           enable-cache: true
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ea2257b676..1949015462 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -53,19 +53,21 @@ jobs:
           - ubuntu-latest
           - macos-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
 
       - name: Check License Header
-        uses: apache/skywalking-eyes/header@v0.8.0
+        uses: apache/skywalking-eyes/header@61275cc80d0798a405cb070f7d3a8aaf7cf2c2c1 # v0.8.0
 
       - name: Check toml format
         run: make check-toml
 
       - name: Install protoc
-        uses: arduino/setup-protoc@v3
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
@@ -91,16 +93,18 @@ jobs:
           - macos-latest
           - windows-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
 
       - name: Cache Rust artifacts
-        uses: Swatinem/rust-cache@v2
+        uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
 
       - name: Install protoc
-        uses: arduino/setup-protoc@v3
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
@@ -117,13 +121,15 @@ jobs:
           - macos-latest
           - windows-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
 
       - name: Cache Rust artifacts
-        uses: Swatinem/rust-cache@v2
+        uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
 
       - name: Build
         run: cargo build -p iceberg --no-default-features
@@ -138,24 +144,26 @@ jobs:
           - { name: "doc", args: "--doc --all-features --workspace" }
     name: Tests (${{ matrix.test-suite.name }})
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
 
       - name: Install protoc
-        uses: arduino/setup-protoc@v3
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
       - name: Cache Rust artifacts
-        uses: Swatinem/rust-cache@v2
+        uses: swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2
         with:
           key: ${{ matrix.test-suite.name }}
 
       - name: Install cargo-nextest
         if: matrix.test-suite.name == 'default'
-        uses: taiki-e/install-action@v2
+        uses: taiki-e/install-action@0abfcd587b70a713fdaa7fb502c885e2112acb15 # v2.75.7
         with:
           tool: cargo-nextest
 
@@ -164,6 +172,7 @@ jobs:
         run: make docker-up
 
       - name: Run tests
+        shell: bash
         env:
           # Disable debug info to speed up compilation and reduce artifact size
           RUSTFLAGS: "-C debuginfo=0"
@@ -182,9 +191,11 @@ jobs:
     name: Verify MSRV
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
       - name: Install protoc
-        uses: arduino/setup-protoc@v3
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
       - name: Get MSRV
diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml
index 8031cd8ca9..089ddfe8e2 100644
--- a/.github/workflows/ci_typos.yml
+++ b/.github/workflows/ci_typos.yml
@@ -43,6 +43,8 @@ jobs:
     env:
       FORCE_COLOR: 1
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
       - name: Check typos
-        uses: crate-ci/typos@v1.44.0
+        uses: crate-ci/typos@02ea592e44b3a53c302f697cddca7641cd051c3d # v1.45.0
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 17bfd8bf3d..7e9c8208c8 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -41,14 +41,16 @@ jobs:
 
     steps:
     - name: Checkout repository
-      uses: actions/checkout@v6
+      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      with:
+        persist-credentials: false
 
     - name: Initialize CodeQL
-      uses: github/codeql-action/init@v4
+      uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
       with:
         languages: actions
 
     - name: Perform CodeQL Analysis
-      uses: github/codeql-action/analyze@v4
+      uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
       with:
         category: "/language:actions"
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 26f61118b7..83e1031d17 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -32,6 +32,7 @@ permissions:
 jobs:
   publish:
     runs-on: ubuntu-latest
+    environment: publish
     strategy:
       max-parallel: 1 # Publish package one by one instead of flooding the registry
       matrix:
@@ -46,7 +47,9 @@ jobs:
           - "crates/catalog/sql"
           - "crates/integrations/datafusion"
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Get MSRV
         id: get-msrv
@@ -61,6 +64,19 @@ jobs:
         working-directory: ${{ matrix.package }}
         # Only publish if it's a tag and the tag is not a pre-release
         if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
-        run: cargo publish --all-features
+        run: cargo publish --all-features # zizmor: ignore[use-trusted-publishing] -- https://github.com/apache/iceberg-rust/issues/1539
+        shell: bash
         env:
           CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
+
+  # Trigger Python release after crate publishing completes.
+  # Only runs for tag pushes; for manual Python releases, use workflow_dispatch on release_python.yml directly.
+  release-python:
+    needs: [publish]
+    if: ${{ startsWith(github.ref, 'refs/tags/') }}
+    permissions:
+      contents: read
+      id-token: write # Required for PyPI trusted publishing in the called workflow
+    uses: ./.github/workflows/release_python.yml
+    with:
+      release_tag: ${{ github.ref_name }}
diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml
index abf8b52b6d..0638cff6df 100644
--- a/.github/workflows/release_python.yml
+++ b/.github/workflows/release_python.yml
@@ -18,10 +18,12 @@
 name: Publish Python 🐍 distribution 📦 to PyPI
 
 on:
-  workflow_run:
-    workflows: ["Publish"] # Trigger this workflow after the "publish.yml" workflow completes
-    types:
-      - completed
+  workflow_call:
+    inputs:
+      release_tag:
+        description: 'Release tag (e.g., v0.4.0 or v0.4.0-rc.1)'
+        required: true
+        type: string
   workflow_dispatch:
     inputs:
       release_tag:
@@ -33,37 +35,24 @@ permissions:
   contents: read
 
 jobs:
-  check-cargo-publish:
-    runs-on: ubuntu-latest
-    # Only run if the triggering workflow succeeded OR if manually triggered
-    if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
-    steps:
-      - run: echo 'The Publish workflow passed or was manually triggered'
-
   validate-release-tag:
     runs-on: ubuntu-latest
-    needs: [check-cargo-publish]
     outputs:
       cargo-version: ${{ steps.validate.outputs.cargo-version }}
       is-rc: ${{ steps.validate.outputs.is-rc }}
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
         if: ${{ github.event_name == 'workflow_dispatch' }}
+        with:
+          persist-credentials: false
 
       - name: Validate release tag format
         id: validate
-        # Use input for workflow_dispatch, otherwise use `workflow_run.head_branch`
-        # Note, `workflow_run.head_branch` does not contain `refs/tags/` prefix, just the tag name, i.e. `v0.4.0` or `v0.4.0-rc.1`
         # Valid formats: v<major>.<minor>.<patch> OR v<major>.<minor>.<patch>-rc.<release_candidate>
+        shell: bash
         env:
-          DISPATCH_RELEASE_TAG: ${{ github.event.inputs.release_tag }}
-          RUN_HEAD_BRANCH: ${{ github.event.workflow_run.head_branch }}
+          RELEASE_TAG: ${{ inputs.release_tag }}
         run: |
-          if [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
-            RELEASE_TAG="$DISPATCH_RELEASE_TAG"
-          else
-            RELEASE_TAG="$RUN_HEAD_BRANCH"
-          fi
           echo "Validating release tag: $RELEASE_TAG"
           if [[ ! "$RELEASE_TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+(-rc\.[0-9]+)?$ ]]; then
             echo "❌ Invalid release tag format: $RELEASE_TAG"
@@ -114,7 +103,9 @@ jobs:
     runs-on: ubuntu-latest
     needs: [validate-release-tag]
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Install toml-cli
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
@@ -124,19 +115,22 @@ jobs:
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
         working-directory: "bindings/python"
         run: |
-          echo "Setting cargo version to: ${{ needs.validate-release-tag.outputs.cargo-version }}"
-          toml set Cargo.toml package.version ${{ needs.validate-release-tag.outputs.cargo-version }} > Cargo.toml.tmp
+          echo "Setting cargo version to: ${NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION}"
+          toml set Cargo.toml package.version "${NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION}" > Cargo.toml.tmp
           # doing this explicitly to avoid issue in Windows where `mv` does not overwrite existing file
           rm Cargo.toml
           mv Cargo.toml.tmp Cargo.toml
+        shell: bash
+        env:
+          NEEDS_VALIDATE_RELEASE_TAG_OUTPUTS_CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }}
 
-      - uses: PyO3/maturin-action@v1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           working-directory: "bindings/python"
           command: sdist
           args: -o dist
       - name: Upload sdist
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-sdist
           path: bindings/python/dist
@@ -158,7 +152,9 @@ jobs:
             }
           - { os: ubuntu-latest, target: "armv7l" }
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Install toml-cli
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
@@ -167,14 +163,17 @@ jobs:
       - name: Set cargo version for RC
         if: ${{ needs.validate-release-tag.outputs.is-rc == 'true' }}
         working-directory: "bindings/python"
+        shell: bash
+        env:
+          CARGO_VERSION: ${{ needs.validate-release-tag.outputs.cargo-version }}
         run: |
-          echo "Setting cargo version to: ${{ needs.validate-release-tag.outputs.cargo-version }}"
-          toml set Cargo.toml package.version ${{ needs.validate-release-tag.outputs.cargo-version }} > Cargo.toml.tmp
+          echo "Setting cargo version to: $CARGO_VERSION"
+          toml set Cargo.toml package.version "$CARGO_VERSION" > Cargo.toml.tmp
           # doing this explicitly to avoid issue in Windows where `mv` does not overwrite existing file
           rm Cargo.toml
           mv Cargo.toml.tmp Cargo.toml
 
-      - uses: actions/setup-python@v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
         with:
           python-version: 3.12
       - name: Get MSRV
@@ -185,7 +184,7 @@ jobs:
         uses: ./.github/actions/setup-builder
         with:
           rust-version: ${{ steps.get-msrv.outputs.msrv }}
-      - uses: PyO3/maturin-action@v1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           target: ${{ matrix.target }}
           manylinux: ${{ matrix.manylinux || 'auto' }}
@@ -193,7 +192,7 @@ jobs:
           command: build
           args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one
       - name: Upload wheels
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-${{ matrix.os }}-${{ matrix.target }}
           path: bindings/python/dist
@@ -212,13 +211,13 @@ jobs:
 
     steps:
       - name: Download all the dists
-        uses: actions/download-artifact@v8
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
         with:
           pattern: wheels-*
           merge-multiple: true
           path: bindings/python/dist
       - name: Publish to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
         with:
           skip-existing: true
           packages-dir: bindings/python/dist
diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml
index 595cb42d05..26b034554c 100644
--- a/.github/workflows/release_python_nightly.yml
+++ b/.github/workflows/release_python_nightly.yml
@@ -27,6 +27,7 @@ permissions:
 
 jobs:
   set-version:
+    if: github.repository == 'apache/iceberg-rust' || github.event_name == 'workflow_dispatch' # Run on schedule for apache repo, or on manual dispatch from any repo
     runs-on: ubuntu-latest
     outputs:
       timestamp: ${{ steps.set-ts.outputs.TIMESTAMP }}
@@ -37,30 +38,30 @@ jobs:
 
   sdist:
     needs: set-version
-    if: github.repository == 'apache/iceberg-rust' # Only run for apache repo
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp
         with:
           timestamp: ${{ needs.set-version.outputs.TIMESTAMP }}
 
-      - uses: PyO3/maturin-action@v1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           working-directory: "bindings/python"
           command: sdist
           args: -o dist
 
       - name: Upload sdist
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-sdist
           path: bindings/python/dist
 
   wheels:
     needs: set-version
-    if: github.repository == 'apache/iceberg-rust' # Only run for apache repo
     runs-on: "${{ matrix.os }}"
     strategy:
       max-parallel: 15
@@ -76,13 +77,15 @@ jobs:
             }
           - { os: ubuntu-latest, target: "armv7l" }
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp
         with:
           timestamp: ${{ needs.set-version.outputs.TIMESTAMP }}
 
-      - uses: actions/setup-python@v6
+      - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
         with:
           python-version: 3.12
 
@@ -95,7 +98,7 @@ jobs:
         with:
           rust-version: ${{ steps.get-msrv.outputs.msrv }}
 
-      - uses: PyO3/maturin-action@v1
+      - uses: PyO3/maturin-action@e83996d129638aa358a18fbd1dfb82f0b0fb5d3b # v1.51.0
         with:
           target: ${{ matrix.target }}
           manylinux: ${{ matrix.manylinux || 'auto' }}
@@ -104,12 +107,13 @@ jobs:
           args: --release -o dist -i python3.12 # Explicitly set interpreter; manylinux containers have multiple Pythons and maturin may pick an older one
 
       - name: Upload wheels
-        uses: actions/upload-artifact@v7
+        uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: wheels-${{ matrix.os }}-${{ matrix.target }}
           path: bindings/python/dist
 
   testpypi-publish:
+    if: github.repository == 'apache/iceberg-rust' # Only run for apache repo
     needs: [sdist, wheels]
     runs-on: ubuntu-latest
 
@@ -122,7 +126,7 @@ jobs:
 
     steps:
       - name: Download all the dists
-        uses: actions/download-artifact@v8
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
         with:
           pattern: wheels-*
           merge-multiple: true
@@ -132,7 +136,7 @@ jobs:
       - name: Publish to TestPyPI
         id: publish-testpypi
         continue-on-error: true
-        uses: pypa/gh-action-pypi-publish@release/v1
+        uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0
         with:
           repository-url: https://test.pypi.org/legacy/
           skip-existing: true
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index e2afce4c71..c3d3f18294 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -32,7 +32,7 @@ jobs:
     if: github.repository_owner == 'apache'
     runs-on: ubuntu-24.04
     steps:
-      - uses: actions/stale@v10.2.0
+      - uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
         with:
           # stale issues
           stale-issue-label: 'stale,security'
diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml
index 59bd2c6f2c..71fb9503c9 100644
--- a/.github/workflows/website.yml
+++ b/.github/workflows/website.yml
@@ -39,15 +39,17 @@ jobs:
     permissions:
       contents: write
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          persist-credentials: false
 
       - name: Setup mdBook
-        uses: peaceiris/actions-mdbook@v2
+        uses: peaceiris/actions-mdbook@ee69d230fe19748b7abf22df32acaa93833fad08 # v2
         with:
           mdbook-version: "0.4.36"
 
       - name: Install protoc
-        uses: arduino/setup-protoc@v3
+        uses: arduino/setup-protoc@c65c819552d16ad3c9b72d9dfd5ba5237b9c906b # v3
         with:
           repo-token: ${{ secrets.GITHUB_TOKEN }}
 
@@ -64,7 +66,7 @@ jobs:
           cp -r target/doc ./website/book/api
 
       - name: Deploy to gh-pages
-        uses: peaceiris/actions-gh-pages@v4.0.0
+        uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
         if: github.event_name == 'push' && github.ref_name == 'main'
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/zizmor.yml b/.github/workflows/zizmor.yml
new file mode 100644
index 0000000000..313835fcbe
--- /dev/null
+++ b/.github/workflows/zizmor.yml
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: GitHub Actions Security Analysis with zizmor 🌈
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["**"]
+
+permissions: {}
+
+jobs:
+  zizmor:
+    name: Run zizmor 🌈
+    runs-on: ubuntu-latest
+    permissions: {}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          persist-credentials: false
+
+      - name: Run zizmor 🌈
+        uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
+        with:
+          advanced-security: false
diff --git a/.typos.toml b/.typos.toml
index 407ce8168c..36996a553a 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -18,5 +18,9 @@
 [type.rust]
 extend-ignore-identifiers-re = ["^bimap$"]
 
+[default.extend-words]
+ags = "ags"
+AGS = "AGS"
+
 [files]
 extend-exclude = ["**/testdata", "CHANGELOG.md"]
diff --git a/Cargo.lock b/Cargo.lock
index a24ef04626..98bdd58fc0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -193,7 +193,7 @@ dependencies = [
  "miniz_oxide",
  "num-bigint",
  "quad-rand",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex-lite",
  "serde",
  "serde_bytes",
@@ -665,9 +665,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-s3tables"
-version = "1.53.0"
+version = "1.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c91febb29f5287a7b723dbacca6d81b1086b8ac0af6b35b873539ee19c74827f"
+checksum = "2e0ec266873694efc365debded01f44e27a0de3946a3ac15d24c489759e5ddf8"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -1127,6 +1127,20 @@ name = "bytemuck"
 version = "1.25.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+dependencies = [
+ "bytemuck_derive",
+]
+
+[[package]]
+name = "bytemuck_derive"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
 
 [[package]]
 name = "byteorder"
@@ -1629,9 +1643,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16"
+checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1673,7 +1687,7 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sqlparser",
  "tempfile",
@@ -1685,9 +1699,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137"
+checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1710,9 +1724,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6"
+checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1761,9 +1775,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293"
+checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2"
 dependencies = [
  "ahash",
  "apache-avro",
@@ -1788,9 +1802,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd"
+checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def"
 dependencies = [
  "futures",
  "log",
@@ -1799,9 +1813,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79"
+checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1825,7 +1839,7 @@ dependencies = [
  "liblzma",
  "log",
  "object_store",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tokio",
  "tokio-util",
  "url",
@@ -1834,9 +1848,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15"
+checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1858,9 +1872,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49dda81c79b6ba57b1853a9158abc66eb85a3aa1cede0c517dabec6d8a4ed3aa"
+checksum = "a579c3bd290c66ea4b269493e75e8a3ed42c9c895a651f10210a29538aee50c4"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -1878,9 +1892,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c"
+checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1901,9 +1915,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8"
+checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1925,9 +1939,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946"
+checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1955,15 +1969,15 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a"
+checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee"
 
 [[package]]
 name = "datafusion-execution"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df"
+checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1978,16 +1992,16 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tempfile",
  "url",
 ]
 
 [[package]]
 name = "datafusion-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf"
+checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2008,9 +2022,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9"
+checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2021,9 +2035,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e"
+checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2044,7 +2058,7 @@ dependencies = [
  "md-5",
  "memchr",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sha2",
  "unicode-segmentation",
@@ -2053,9 +2067,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6"
+checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad"
 dependencies = [
  "ahash",
  "arrow",
@@ -2075,9 +2089,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311"
+checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47"
 dependencies = [
  "ahash",
  "arrow",
@@ -2088,9 +2102,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790"
+checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2113,9 +2127,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6"
+checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2129,9 +2143,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1"
+checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2147,9 +2161,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9"
+checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2157,9 +2171,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578"
+checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -2168,9 +2182,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46"
+checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace"
 dependencies = [
  "arrow",
  "chrono",
@@ -2188,9 +2202,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36"
+checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59"
 dependencies = [
  "ahash",
  "arrow",
@@ -2212,9 +2226,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64"
+checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2227,9 +2241,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c"
+checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362"
 dependencies = [
  "ahash",
  "arrow",
@@ -2244,9 +2258,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941"
+checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2263,9 +2277,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d"
+checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79"
 dependencies = [
  "ahash",
  "arrow",
@@ -2295,9 +2309,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6"
+checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2312,9 +2326,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba"
+checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2344,7 +2358,7 @@ dependencies = [
  "datafusion-functions-nested",
  "log",
  "percent-encoding",
- "rand 0.9.2",
+ "rand 0.9.4",
  "serde_json",
  "sha1",
  "sha2",
@@ -2353,9 +2367,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81"
+checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -3056,6 +3070,7 @@ version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
+ "bytemuck",
  "cfg-if",
  "crunchy",
  "num-traits",
@@ -3356,11 +3371,12 @@ dependencies = [
  "ordered-float 4.6.0",
  "parquet",
  "pretty_assertions",
- "rand 0.8.5",
+ "rand 0.9.4",
  "regex",
  "reqwest",
  "roaring",
  "serde",
+ "serde_arrow",
  "serde_bytes",
  "serde_derive",
  "serde_json",
@@ -3468,14 +3484,19 @@ name = "iceberg-catalog-s3tables"
 version = "0.9.0"
 dependencies = [
  "anyhow",
+ "arrow-array",
+ "arrow-schema",
  "async-trait",
  "aws-config",
  "aws-sdk-s3tables",
+ "futures",
  "iceberg",
  "iceberg-storage-opendal",
  "iceberg_test_utils",
  "itertools 0.13.0",
+ "parquet",
  "tokio",
+ "uuid",
 ]
 
 [[package]]
@@ -4130,6 +4151,21 @@ dependencies = [
  "twox-hash",
 ]
 
+[[package]]
+name = "marrow"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5240d6977234968ff9ad254bfa73aa397fb51e41dcb22b1eb85835e9295485b"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "bytemuck",
+ "half",
+ "serde",
+]
+
 [[package]]
 name = "md-5"
 version = "0.10.6"
@@ -4185,9 +4221,9 @@ dependencies = [
 
 [[package]]
 name = "minijinja"
-version = "2.18.0"
+version = "2.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "328251e58ad8e415be6198888fc207502727dc77945806421ab34f35bf012e7d"
+checksum = "805bfd7352166bae857ee569628b52bcd85a1cecf7810861ebceb1686b72b75d"
 dependencies = [
  "memo-map",
  "serde",
@@ -4205,9 +4241,9 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.1.1"
+version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
 dependencies = [
  "libc",
  "wasi",
@@ -4257,7 +4293,7 @@ dependencies = [
  "hyper-util",
  "log",
  "pin-project-lite",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "serde_json",
  "serde_urlencoded",
@@ -4515,7 +4551,7 @@ dependencies = [
  "parking_lot",
  "percent-encoding",
  "quick-xml 0.39.2",
- "rand 0.10.0",
+ "rand 0.10.1",
  "reqwest",
  "ring",
  "rustls-pki-types",
@@ -5171,7 +5207,7 @@ dependencies = [
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
  "rustc-hash",
  "rustls",
@@ -5251,9 +5287,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.5",
@@ -5261,9 +5297,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
+checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
 dependencies = [
  "chacha20",
  "getrandom 0.4.2",
@@ -5699,9 +5735,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.10"
+version = "0.103.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
 dependencies = [
  "aws-lc-rs",
  "ring",
@@ -5893,6 +5929,21 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_arrow"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2784e59a0315568e850cb01ddadf458f8c09e28d8cfc4880c2cc08f5dc3444e0"
+dependencies = [
+ "arrow-array",
+ "arrow-schema",
+ "bytemuck",
+ "chrono",
+ "half",
+ "marrow",
+ "serde",
+]
+
 [[package]]
 name = "serde_bytes"
 version = "0.11.19"
@@ -6741,9 +6792,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.50.0"
+version = "1.51.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d"
+checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c"
 dependencies = [
  "bytes",
  "libc",
@@ -6758,9 +6809,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.6.1"
+version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c55a2eff8b69ce66c84f85e1da1c233edc36ceb85a2058d11b0d6a3c7e7569c"
+checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7247,7 +7298,7 @@ dependencies = [
  "nix 0.29.0",
  "once_cell",
  "pin-project",
- "rand 0.9.2",
+ "rand 0.9.4",
  "socket2 0.5.10",
  "thiserror 2.0.18",
  "tokio",
diff --git a/Cargo.toml b/Cargo.toml
index 778e69c9d9..7f612c44bf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -66,7 +66,7 @@ cfg-if = "1"
 chrono = "0.4.41"
 clap = { version = "4.5.48", features = ["derive", "cargo"] }
 dashmap = "6"
-datafusion = "53.0.0"
+datafusion = "53.1.0"
 datafusion-cli = "53.0.0"
 datafusion-sqllogictest = "53.0.0"
 derive_builder = "0.20"
@@ -108,7 +108,7 @@ ordered-float = "4"
 parquet = "58"
 pilota = "0.11.10"
 pretty_assertions = "1.4"
-rand = "0.8.5"
+rand = "0.9.3"
 regex = "1.11.3"
 reqwest = { version = "0.12.12", default-features = false, features = ["json"] }
 roaring = { version = "0.11" }
diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock
index 1b5c06f492..72ea322d7b 100644
--- a/bindings/python/Cargo.lock
+++ b/bindings/python/Cargo.lock
@@ -163,7 +163,7 @@ dependencies = [
  "miniz_oxide",
  "num-bigint",
  "quad-rand",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex-lite",
  "serde",
  "serde_bytes",
@@ -1052,9 +1052,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de9f8117889ba9503440f1dd79ebab32ba52ccf1720bb83cd718a29d4edc0d16"
+checksum = "93db0e623840612f7f2cd757f7e8a8922064192363732c88692e0870016e141b"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1095,7 +1095,7 @@ dependencies = [
  "object_store",
  "parking_lot",
  "parquet",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sqlparser",
  "tempfile",
@@ -1107,9 +1107,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be893b73a13671f310ffcc8da2c546b81efcc54c22e0382c0a28aa3537017137"
+checksum = "37cefde60b26a7f4ff61e9d2ff2833322f91df2b568d7238afe67bde5bdffb66"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1132,9 +1132,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "830487b51ed83807d6b32d6325f349c3144ae0c9bf772cf2a712db180c31d5e6"
+checksum = "17e112307715d6a7a331111a4c2330ff54bc237183511c319e3708a4cff431fb"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1155,9 +1155,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d7663f3af955292f8004e74bcaf8f7ea3d66cc38438749615bb84815b61a293"
+checksum = "d72a11ca44a95e1081870d3abb80c717496e8a7acb467a1d3e932bb636af5cc2"
 dependencies = [
  "ahash",
  "arrow",
@@ -1180,9 +1180,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f590205c7e32fe1fea48dd53ffb406e56ae0e7a062213a3ac848db8771641bd"
+checksum = "89f4afaed29670ec4fd6053643adc749fe3f4bc9d1ce1b8c5679b22c67d12def"
 dependencies = [
  "futures",
  "log",
@@ -1191,9 +1191,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fde1e030a9dc87b743c806fbd631f5ecfa2ccaa4ffb61fa19144a07fea406b79"
+checksum = "e9fb386e1691355355a96419978a0022b7947b44d4a24a6ea99f00b6b485cbb6"
 dependencies = [
  "arrow",
  "async-compression",
@@ -1217,7 +1217,7 @@ dependencies = [
  "liblzma",
  "log",
  "object_store",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tokio",
  "tokio-util",
  "url",
@@ -1226,9 +1226,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "331ebae7055dc108f9b54994b93dff91f3a17445539efe5b74e89264f7b36e15"
+checksum = "ffa6c52cfed0734c5f93754d1c0175f558175248bf686c944fb05c373e5fc096"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -1250,9 +1250,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e0d475088325e2986876aa27bb30d0574f72a22955a527d202f454681d55c5c"
+checksum = "503f29e0582c1fc189578d665ff57d9300da1f80c282777d7eb67bb79fb8cdca"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1273,9 +1273,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea1520d81f31770f3ad6ee98b391e75e87a68a5bb90de70064ace5e0a7182fe8"
+checksum = "e33804749abc8d0c8cb7473228483cb8070e524c6f6086ee1b85a64debe2b3d2"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1297,9 +1297,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95be805d0742ab129720f4c51ad9242cd872599cdb076098b03f061fcdc7f946"
+checksum = "32a8e0365e0e08e8ff94d912f0ababcf9065a1a304018ba90b1fc83c855b4997"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1327,15 +1327,15 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c93ad9e37730d2c7196e68616f3f2dd3b04c892e03acd3a8eeca6e177f3c06a"
+checksum = "8de6ac0df1662b9148ad3c987978b32cbec7c772f199b1d53520c8fa764a87ee"
 
 [[package]]
 name = "datafusion-execution"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9437d3cd5d363f9319f8122182d4d233427de79c7eb748f23054c9aaa0fdd8df"
+checksum = "c03c7fbdaefcca4ef6ffe425a5fc2325763bfb426599bb0bf4536466efabe709"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1349,16 +1349,16 @@ dependencies = [
  "log",
  "object_store",
  "parking_lot",
- "rand 0.9.2",
+ "rand 0.9.4",
  "tempfile",
  "url",
 ]
 
 [[package]]
 name = "datafusion-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67164333342b86521d6d93fa54081ee39839894fb10f7a700c099af96d7552cf"
+checksum = "574b9b6977fedbd2a611cbff12e5caf90f31640ad9dc5870f152836d94bad0dd"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1379,9 +1379,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab05fdd00e05d5a6ee362882546d29d6d3df43a6c55355164a7fbee12d163bc9"
+checksum = "7d7c3adf3db8bf61e92eb90cb659c8e8b734593a8f7c8e12a843c7ddba24b87e"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1422,9 +1422,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04fb863482d987cf938db2079e07ab0d3bb64595f28907a6c2f8671ad71cca7e"
+checksum = "f28aa4e10384e782774b10e72aca4d93ef7b31aa653095d9d4536b0a3dbc51b6"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1445,7 +1445,7 @@ dependencies = [
  "md-5",
  "memchr",
  "num-traits",
- "rand 0.9.2",
+ "rand 0.9.4",
  "regex",
  "sha2",
  "unicode-segmentation",
@@ -1454,9 +1454,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "829856f4e14275fb376c104f27cbf3c3b57a9cfe24885d98677525f5e43ce8d6"
+checksum = "00aa6217e56098ba84e0a338176fe52f0a84cca398021512c6c8c5eff806d0ad"
 dependencies = [
  "ahash",
  "arrow",
@@ -1476,9 +1476,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08af79cc3d2aa874a362fb97decfcbd73d687190cb096f16a6c85a7780cce311"
+checksum = "b511250349407db7c43832ab2de63f5557b19a20dfd236b39ca2c04468b50d47"
 dependencies = [
  "ahash",
  "arrow",
@@ -1489,9 +1489,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "465ae3368146d49c2eda3e2c0ef114424c87e8a6b509ab34c1026ace6497e790"
+checksum = "ef13a858e20d50f0a9bb5e96e7ac82b4e7597f247515bccca4fdd2992df0212a"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -1514,9 +1514,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6156e6b22fcf1784112fc0173f3ae6e78c8fdb4d3ed0eace9543873b437e2af6"
+checksum = "72b40d3f5bbb3905f9ccb1ce9485a9595c77b69758a7c24d3ba79e334ff51e7e"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1530,9 +1530,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca7baec14f866729012efb89011a6973f3a346dc8090c567bfcd328deff551c1"
+checksum = "d4e88ec9d57c9b685d02f58bfee7be62d72610430ddcedb82a08e5d9925dbfb6"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1548,9 +1548,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "159228c3280d342658466bb556dc24de30047fe1d7e559dc5d16ccc5324166f9"
+checksum = "8307bb93519b1a91913723a1130cfafeee3f72200d870d88e91a6fc5470ede5c"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1558,9 +1558,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5427e5da5edca4d21ea1c7f50e1c9421775fe33d7d5726e5641a833566e7578"
+checksum = "2e367e6a71051d0ebdd29b2f85d12059b38b1d1f172c6906e80016da662226bd"
 dependencies = [
  "datafusion-doc",
  "quote",
@@ -1569,9 +1569,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89099eefcd5b223ec685c36a41d35c69239236310d71d339f2af0fa4383f3f46"
+checksum = "e929015451a67f77d9d8b727b2bf3a40c4445fdef6cdc53281d7d97c76888ace"
 dependencies = [
  "arrow",
  "chrono",
@@ -1589,9 +1589,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0f222df5195d605d79098ef37bdd5323bff0131c9d877a24da6ec98dfca9fe36"
+checksum = "4b1e68aba7a4b350401cfdf25a3d6f989ad898a7410164afe9ca52080244cb59"
 dependencies = [
  "ahash",
  "arrow",
@@ -1613,9 +1613,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "40838625d63d9c12549d81979db3dd675d159055eb9135009ba272ab0e8d0f64"
+checksum = "ea22315f33cf2e0adc104e8ec42e285f6ed93998d565c65e82fec6a9ee9f9db4"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1628,9 +1628,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eacbcc4cfd502558184ed58fa3c72e775ec65bf077eef5fd2b3453db676f893c"
+checksum = "b04b45ea8ad3ac2d78f2ea2a76053e06591c9629c7a603eda16c10649ecf4362"
 dependencies = [
  "ahash",
  "arrow",
@@ -1645,9 +1645,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d501d0e1d0910f015677121601ac177ec59272ef5c9324d1147b394988f40941"
+checksum = "7cb13397809a425918f608dfe8653f332015a3e330004ab191b4404187238b95"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1664,9 +1664,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "463c88ad6f1ecab1810f4c9f046898bee035b370137eb79b2b2db925e270631d"
+checksum = "5edc023675791af9d5fb4cc4c24abf5f7bd3bd4dcf9e5bd90ea1eff6976dcc79"
 dependencies = [
  "ahash",
  "arrow",
@@ -1719,7 +1719,7 @@ dependencies = [
  "datafusion-proto-common",
  "object_store",
  "prost",
- "rand 0.9.2",
+ "rand 0.9.4",
 ]
 
 [[package]]
@@ -1735,9 +1735,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-pruning"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2857618a0ecbd8cd0cf29826889edd3a25774ec26b2995fc3862095c95d88fc6"
+checksum = "ac8c76860e355616555081cab5968cec1af7a80701ff374510860bcd567e365a"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1752,9 +1752,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef8637e35022c5c775003b3ab1debc6b4a8f0eb41b069bdd5475dd3aa93f6eba"
+checksum = "5412111aa48e2424ba926112e192f7a6b7e4ccb450145d25ce5ede9f19dc491e"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -1766,9 +1766,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "53.0.0"
+version = "53.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12d9e9f16a1692a11c94bcc418191fa15fd2b4d72a0c1a0c607db93c0b84dd81"
+checksum = "fa0d133ddf8b9b3b872acac900157f783e7b879fe9a6bccf389abebbfac45ec1"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2435,7 +2435,7 @@ dependencies = [
  "once_cell",
  "ordered-float 4.6.0",
  "parquet",
- "rand 0.8.5",
+ "rand 0.9.4",
  "reqwest",
  "roaring",
  "serde",
@@ -3587,7 +3587,7 @@ dependencies = [
  "bytes",
  "getrandom 0.3.4",
  "lru-slab",
- "rand 0.9.2",
+ "rand 0.9.4",
  "ring",
  "rustc-hash",
  "rustls",
@@ -3647,9 +3647,9 @@ dependencies = [
 
 [[package]]
 name = "rand"
-version = "0.9.2"
+version = "0.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.5",
@@ -3968,9 +3968,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.10"
+version = "0.103.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
 dependencies = [
  "ring",
  "rustls-pki-types",
diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs
index a7e0171337..5b3ccf3b39 100644
--- a/crates/catalog/glue/src/catalog.rs
+++ b/crates/catalog/glue/src/catalog.rs
@@ -203,7 +203,6 @@ impl GlueCatalog {
         // Use provided factory or default to OpenDalStorageFactory::S3
         let factory = storage_factory.unwrap_or_else(|| {
             Arc::new(OpenDalStorageFactory::S3 {
-                configured_scheme: "s3a".to_string(),
                 customized_credential_load: None,
             })
         });
diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs
index f19cf7bff4..d0e6486ad8 100644
--- a/crates/catalog/hms/tests/hms_catalog_test.rs
+++ b/crates/catalog/hms/tests/hms_catalog_test.rs
@@ -23,7 +23,10 @@
 use std::collections::HashMap;
 use std::sync::Arc;
 
-use iceberg::io::{FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
+use iceberg::io::{
+    FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
+    S3_SECRET_ACCESS_KEY,
+};
 use iceberg::{Catalog, CatalogBuilder, Namespace, NamespaceIdent};
 use iceberg_catalog_hms::{
     HMS_CATALOG_PROP_THRIFT_TRANSPORT, HMS_CATALOG_PROP_URI, HMS_CATALOG_PROP_WAREHOUSE,
@@ -56,11 +59,11 @@ async fn get_catalog() -> HmsCatalog {
         (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
         (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
         (S3_REGION.to_string(), "us-east-1".to_string()),
+        (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
     ]);
 
     // Wait for bucket to actually exist
     let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-        configured_scheme: "s3a".to_string(),
         customized_credential_load: None,
     }))
     .with_props(props.clone())
@@ -79,7 +82,6 @@ async fn get_catalog() -> HmsCatalog {
 
     HmsCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3a".to_string(),
             customized_credential_load: None,
         }))
         .load("hms", props)
diff --git a/crates/catalog/loader/tests/common/mod.rs b/crates/catalog/loader/tests/common/mod.rs
index 600cd9b6f4..1d40fef357 100644
--- a/crates/catalog/loader/tests/common/mod.rs
+++ b/crates/catalog/loader/tests/common/mod.rs
@@ -24,8 +24,8 @@ use std::fmt;
 use std::sync::Arc;
 
 use iceberg::io::{
-    FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION,
-    S3_SECRET_ACCESS_KEY,
+    FileIOBuilder, LocalFsStorageFactory, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS,
+    S3_REGION, S3_SECRET_ACCESS_KEY,
 };
 use iceberg::memory::{MEMORY_CATALOG_WAREHOUSE, MemoryCatalogBuilder};
 use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
@@ -229,10 +229,10 @@ async fn glue_catalog() -> GlueCatalog {
         (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
         (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
         (S3_REGION.to_string(), "us-east-1".to_string()),
+        (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
     ]);
 
     let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-        configured_scheme: "s3a".to_string(),
         customized_credential_load: None,
     }))
     .with_props(props.clone())
@@ -280,10 +280,10 @@ async fn hms_catalog() -> HmsCatalog {
         (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
         (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
         (S3_REGION.to_string(), "us-east-1".to_string()),
+        (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
     ]);
 
     let file_io = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-        configured_scheme: "s3a".to_string(),
         customized_credential_load: None,
     }))
     .with_props(props.clone())
@@ -300,7 +300,6 @@ async fn hms_catalog() -> HmsCatalog {
 
     HmsCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3a".to_string(),
             customized_credential_load: None,
         }))
         .load("hms", props)
diff --git a/crates/catalog/s3tables/Cargo.toml b/crates/catalog/s3tables/Cargo.toml
index 2fe096fec9..dc7be3027f 100644
--- a/crates/catalog/s3tables/Cargo.toml
+++ b/crates/catalog/s3tables/Cargo.toml
@@ -39,6 +39,11 @@ iceberg-storage-opendal = { workspace = true, features = ["opendal-s3"] }
 
 
 [dev-dependencies]
+arrow-array = { workspace = true }
+arrow-schema = { workspace = true }
+futures = { workspace = true }
 iceberg_test_utils = { path = "../../test_utils", features = ["tests"] }
 itertools = { workspace = true }
+parquet = { workspace = true }
 tokio = { workspace = true }
+uuid = { workspace = true }
diff --git a/crates/catalog/s3tables/src/catalog.rs b/crates/catalog/s3tables/src/catalog.rs
index b88bd77d29..cc43446943 100644
--- a/crates/catalog/s3tables/src/catalog.rs
+++ b/crates/catalog/s3tables/src/catalog.rs
@@ -202,7 +202,6 @@ impl S3TablesCatalog {
         // Use provided factory or default to OpenDalStorageFactory::S3
         let factory = storage_factory.unwrap_or_else(|| {
             Arc::new(OpenDalStorageFactory::S3 {
-                configured_scheme: "s3a".to_string(),
                 customized_credential_load: None,
             })
         });
@@ -707,6 +706,7 @@ where T: std::fmt::Debug {
 
 #[cfg(test)]
 mod tests {
+    use futures::TryStreamExt;
     use iceberg::spec::{NestedField, PrimitiveType, Schema, Type};
     use iceberg::transaction::{ApplyTransactionAction, Transaction};
 
@@ -1175,4 +1175,108 @@ mod tests {
             assert_eq!(err.message(), "Catalog name cannot be empty");
         }
     }
+
+    /// Verify that an S3 Table catalog can create a table, write data, load the same table, and read from it.
+    #[tokio::test]
+    async fn test_s3tables_create_table_write_load_table_read() {
+        use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder;
+        use iceberg::writer::file_writer::ParquetWriterBuilder;
+        use iceberg::writer::file_writer::location_generator::{
+            DefaultFileNameGenerator, DefaultLocationGenerator,
+        };
+        use iceberg::writer::file_writer::rolling_writer::RollingFileWriterBuilder;
+        use iceberg::writer::{IcebergWriter, IcebergWriterBuilder};
+
+        let catalog = match load_s3tables_catalog_from_env().await {
+            Ok(Some(c)) => c,
+            Ok(None) => return,
+            Err(e) => panic!("Error loading catalog: {e}"),
+        };
+
+        let ns = NamespaceIdent::new(format!("test_rw_{}", uuid::Uuid::new_v4().simple()));
+        catalog.create_namespace(&ns, HashMap::new()).await.unwrap();
+
+        let table_name = String::from("table");
+
+        let schema = Schema::builder()
+            .with_fields(vec![
+                NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+            ])
+            .build()
+            .unwrap();
+        let creation = TableCreation::builder()
+            .name(table_name.clone())
+            .schema(schema)
+            .build();
+
+        let table = catalog.create_table(&ns, creation).await.unwrap();
+
+        // Write one row.
+        let arrow_schema: Arc<arrow_schema::Schema> = Arc::new(
+            table
+                .metadata()
+                .current_schema()
+                .as_ref()
+                .try_into()
+                .unwrap(),
+        );
+        let batch = arrow_array::RecordBatch::try_new(arrow_schema, vec![Arc::new(
+            arrow_array::Int32Array::from(vec![42]),
+        )])
+        .unwrap();
+
+        // Locations will be generated based on the table metadata, which will be using `s3://` for Amazon S3 Tables.
+        let location_generator = DefaultLocationGenerator::new(table.metadata().clone()).unwrap();
+        let file_name_generator = DefaultFileNameGenerator::new(
+            "test".to_string(),
+            None,
+            iceberg::spec::DataFileFormat::Parquet,
+        );
+        let parquet_writer_builder = ParquetWriterBuilder::new(
+            parquet::file::properties::WriterProperties::default(),
+            table.metadata().current_schema().clone(),
+        );
+        let rw = RollingFileWriterBuilder::new_with_default_file_size(
+            parquet_writer_builder,
+            table.file_io().clone(),
+            location_generator,
+            file_name_generator,
+        );
+        let mut writer = DataFileWriterBuilder::new(rw).build(None).await.unwrap();
+        writer.write(batch.clone()).await.unwrap();
+        let data_files = writer.close().await.unwrap();
+
+        let tx = Transaction::new(&table);
+        let tx = tx
+            .fast_append()
+            .add_data_files(data_files)
+            .apply(tx)
+            .unwrap();
+        tx.commit(&catalog).await.unwrap();
+
+        // Reload from catalog and read back.
+        let table_ident = TableIdent::new(ns.clone(), table_name.clone());
+        let reloaded = catalog.load_table(&table_ident).await.unwrap();
+        let batches: Vec<arrow_array::RecordBatch> = reloaded
+            .scan()
+            .select_all()
+            .build()
+            .expect("scan to be valid (snapshot exists, schema is OK)")
+            .to_arrow()
+            .await
+            .expect("scan tasks should be OK")
+            .try_collect()
+            .await
+            .expect("scan should complete successfully");
+
+        assert_eq!(batches.len(), 1);
+        assert_eq!(
+            batches[0], batch,
+            "read records should match records written earlier"
+        );
+
+        // Clean up.
+        catalog.purge_table(&table_ident).await.ok();
+        catalog.drop_namespace(&ns).await.ok();
+    }
 }
diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml
index aa1d0cd4a5..18729176dc 100644
--- a/crates/iceberg/Cargo.toml
+++ b/crates/iceberg/Cargo.toml
@@ -91,6 +91,7 @@ rand = { workspace = true }
 regex = { workspace = true }
 tempfile = { workspace = true }
 minijinja = { workspace = true }
+serde_arrow = { version = "0.14", features = ["arrow-58"] }
 
 [package.metadata.cargo-machete]
 # These dependencies are added to ensure minimal dependency version
diff --git a/crates/iceberg/src/arrow/int96.rs b/crates/iceberg/src/arrow/int96.rs
new file mode 100644
index 0000000000..63a7a30f1a
--- /dev/null
+++ b/crates/iceberg/src/arrow/int96.rs
@@ -0,0 +1,578 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! INT96 timestamp coercion for Parquet files.
+
+use std::sync::Arc;
+
+use arrow_schema::{
+    DataType, Field, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, TimeUnit,
+};
+use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
+
+use crate::arrow::schema::{ArrowSchemaVisitor, DEFAULT_MAP_FIELD_NAME, visit_schema};
+use crate::error::Result;
+use crate::spec::{PrimitiveType, Schema, Type};
+use crate::{Error, ErrorKind};
+
+/// Coerce Arrow schema types for INT96 columns to match the Iceberg table schema.
+///
+/// arrow-rs defaults INT96 to `Timestamp(Nanosecond)`, which overflows i64 for dates outside
+/// ~1677-2262. We use arrow-rs's schema hint mechanism to read INT96 at the resolution
+/// specified by the Iceberg schema (`timestamp` → microsecond, `timestamp_ns` → nanosecond).
+///
+/// Iceberg Java handles this differently: it bypasses parquet-mr with a custom column reader
+/// (`GenericParquetReaders.TimestampInt96Reader`). We achieve the same result via schema hints.
+///
+/// References:
+/// - Iceberg spec primitive types: <https://iceberg.apache.org/spec/#primitive-types>
+/// - arrow-rs schema hint support: <https://github.com/apache/arrow-rs/pull/7285>
+pub(crate) fn coerce_int96_timestamps(
+    arrow_schema: &ArrowSchemaRef,
+    iceberg_schema: &Schema,
+) -> Option<Arc<ArrowSchema>> {
+    let mut visitor = Int96CoercionVisitor::new(iceberg_schema);
+    let coerced = visit_schema(arrow_schema, &mut visitor).ok()?;
+    if visitor.changed {
+        Some(Arc::new(coerced))
+    } else {
+        None
+    }
+}
+
+/// Visitor that coerces `Timestamp(Nanosecond)` Arrow fields to the resolution
+/// indicated by the Iceberg schema.
+struct Int96CoercionVisitor<'a> {
+    iceberg_schema: &'a Schema,
+    // TODO(#2310): use FieldRef (Arc<Field>) once ArrowSchemaVisitor passes FieldRef.
+    field_stack: Vec<Field>,
+    changed: bool,
+}
+
+impl<'a> Int96CoercionVisitor<'a> {
+    fn new(iceberg_schema: &'a Schema) -> Self {
+        Self {
+            iceberg_schema,
+            field_stack: Vec::new(),
+            changed: false,
+        }
+    }
+
+    /// Determine the target TimeUnit for a Timestamp(Nanosecond) field based on the
+    /// Iceberg schema. Falls back to microsecond when field IDs are unavailable,
+    /// matching Iceberg Java behavior.
+    fn target_unit(&self, field: &Field) -> Option<TimeUnit> {
+        if !matches!(
+            field.data_type(),
+            DataType::Timestamp(TimeUnit::Nanosecond, _)
+        ) {
+            return None;
+        }
+
+        let target = field
+            .metadata()
+            .get(PARQUET_FIELD_ID_META_KEY)
+            .and_then(|id_str| id_str.parse::<i32>().ok())
+            .and_then(|field_id| self.iceberg_schema.field_by_id(field_id))
+            .and_then(|f| match &*f.field_type {
+                Type::Primitive(PrimitiveType::Timestamp | PrimitiveType::Timestamptz) => {
+                    Some(TimeUnit::Microsecond)
+                }
+                Type::Primitive(PrimitiveType::TimestampNs | PrimitiveType::TimestamptzNs) => {
+                    Some(TimeUnit::Nanosecond)
+                }
+                _ => None,
+            })
+            // Iceberg Java reads INT96 as microseconds by default
+            .unwrap_or(TimeUnit::Microsecond);
+
+        if target == TimeUnit::Nanosecond {
+            None
+        } else {
+            Some(target)
+        }
+    }
+}
+
+impl ArrowSchemaVisitor for Int96CoercionVisitor<'_> {
+    type T = Field;
+    type U = ArrowSchema;
+
+    fn before_field(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_field(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_list_element(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_list_element(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_map_key(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_map_key(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn before_map_value(&mut self, field: &Field) -> Result<()> {
+        self.field_stack.push(field.as_ref().clone());
+        Ok(())
+    }
+
+    fn after_map_value(&mut self, _field: &Field) -> Result<()> {
+        self.field_stack.pop();
+        Ok(())
+    }
+
+    fn schema(&mut self, schema: &ArrowSchema, values: Vec<Field>) -> Result<ArrowSchema> {
+        Ok(ArrowSchema::new_with_metadata(
+            values,
+            schema.metadata().clone(),
+        ))
+    }
+
+    fn r#struct(&mut self, _fields: &Fields, results: Vec<Field>) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in struct"))?;
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Struct(Fields::from(results)),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn list(&mut self, list: &DataType, value: Field) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in list"))?;
+        let list_type = match list {
+            DataType::List(_) => DataType::List(Arc::new(value)),
+            DataType::LargeList(_) => DataType::LargeList(Arc::new(value)),
+            DataType::FixedSizeList(_, size) => DataType::FixedSizeList(Arc::new(value), *size),
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected list type, got {list}"),
+                ));
+            }
+        };
+        Ok(
+            Field::new(field_info.name(), list_type, field_info.is_nullable())
+                .with_metadata(field_info.metadata().clone()),
+        )
+    }
+
+    fn map(&mut self, map: &DataType, key_value: Field, value: Field) -> Result<Field> {
+        let field_info = self
+            .field_stack
+            .last()
+            .ok_or_else(|| Error::new(ErrorKind::Unexpected, "Field stack underflow in map"))?;
+        let sorted = match map {
+            DataType::Map(_, sorted) => *sorted,
+            _ => {
+                return Err(Error::new(
+                    ErrorKind::Unexpected,
+                    format!("Expected map type, got {map}"),
+                ));
+            }
+        };
+        let struct_field = Field::new(
+            DEFAULT_MAP_FIELD_NAME,
+            DataType::Struct(Fields::from(vec![key_value, value])),
+            false,
+        );
+        Ok(Field::new(
+            field_info.name(),
+            DataType::Map(Arc::new(struct_field), sorted),
+            field_info.is_nullable(),
+        )
+        .with_metadata(field_info.metadata().clone()))
+    }
+
+    fn primitive(&mut self, p: &DataType) -> Result<Field> {
+        let field_info = self.field_stack.last().ok_or_else(|| {
+            Error::new(ErrorKind::Unexpected, "Field stack underflow in primitive")
+        })?;
+
+        if let Some(target_unit) = self.target_unit(field_info) {
+            let tz = match field_info.data_type() {
+                DataType::Timestamp(_, tz) => tz.clone(),
+                _ => None,
+            };
+            self.changed = true;
+            Ok(Field::new(
+                field_info.name(),
+                DataType::Timestamp(target_unit, tz),
+                field_info.is_nullable(),
+            )
+            .with_metadata(field_info.metadata().clone()))
+        } else {
+            Ok(
+                Field::new(field_info.name(), p.clone(), field_info.is_nullable())
+                    .with_metadata(field_info.metadata().clone()),
+            )
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
+    use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
+
+    use super::coerce_int96_timestamps;
+    use crate::spec::{ListType, MapType, NestedField, PrimitiveType, Schema, StructType, Type};
+
+    fn iceberg_schema_with_timestamp() -> Schema {
+        Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp)).into(),
+                NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+            ])
+            .build()
+            .unwrap()
+    }
+
+    fn field_id_meta(id: i32) -> HashMap<String, String> {
+        HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), id.to_string())])
+    }
+
+    #[test]
+    fn test_coerce_timestamp_ns_to_us() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+            Field::new("id", DataType::Int32, false).with_metadata(field_id_meta(2)),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+        // Non-timestamp field unchanged
+        assert_eq!(coerced.field(1).data_type(), &DataType::Int32);
+    }
+
+    #[test]
+    fn test_coerce_timestamptz_ns_to_us() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamptz)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts",
+                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into()))
+        );
+    }
+
+    #[test]
+    fn test_no_coercion_when_iceberg_is_timestamp_ns() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::TimestampNs)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    #[test]
+    fn test_no_coercion_when_iceberg_is_timestamptz_ns() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::TimestamptzNs))
+                    .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts",
+                DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into())),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    #[test]
+    fn test_no_coercion_when_already_microsecond() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Microsecond, None), true)
+                .with_metadata(field_id_meta(1)),
+            Field::new("id", DataType::Int32, false).with_metadata(field_id_meta(2)),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        assert!(coerce_int96_timestamps(&arrow_schema, &iceberg).is_none());
+    }
+
+    // Without field IDs, the visitor can't look up the Iceberg type and falls back
+    // to microsecond to match Iceberg Java behavior.
+    #[test]
+    fn test_defaults_to_us_without_field_ids() {
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "ts",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    // Field ID exists but points to a non-timestamp Iceberg type. The field_by_id
+    // lookup succeeds but the match arm returns None, so unwrap_or falls back to
+    // microsecond.
+    #[test]
+    fn test_defaults_to_us_when_iceberg_type_is_not_timestamp() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::String)).into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(
+            coerced.field(0).data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    #[test]
+    fn test_coerce_preserves_field_metadata() {
+        let mut meta = field_id_meta(1);
+        meta.insert("custom_key".to_string(), "custom_value".to_string());
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                .with_metadata(meta.clone()),
+        ]));
+        let iceberg = iceberg_schema_with_timestamp();
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        assert_eq!(coerced.field(0).metadata(), &meta);
+    }
+
+    #[test]
+    fn test_coerce_timestamp_in_struct() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::required(
+                    1,
+                    "data",
+                    Type::Struct(StructType::new(vec![
+                        NestedField::optional(2, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                            .into(),
+                    ])),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "data",
+                DataType::Struct(
+                    vec![
+                        Field::new("ts", DataType::Timestamp(TimeUnit::Nanosecond, None), true)
+                            .with_metadata(field_id_meta(2)),
+                    ]
+                    .into(),
+                ),
+                false,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let inner = match coerced.field(0).data_type() {
+            DataType::Struct(fields) => fields,
+            other => panic!("Expected Struct, got {other}"),
+        };
+        assert_eq!(
+            inner[0].data_type(),
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    #[test]
+    fn test_coerce_timestamp_in_list() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(
+                    1,
+                    "timestamps",
+                    Type::List(ListType {
+                        element_field: NestedField::optional(
+                            2,
+                            "element",
+                            Type::Primitive(PrimitiveType::Timestamp),
+                        )
+                        .into(),
+                    }),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let element_field = Field::new(
+            "element",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )
+        .with_metadata(field_id_meta(2));
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("timestamps", DataType::List(Arc::new(element_field)), true)
+                .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let element_dt = match coerced.field(0).data_type() {
+            DataType::List(f) => f.data_type(),
+            other => panic!("Expected List, got {other}"),
+        };
+        assert_eq!(
+            element_dt,
+            &DataType::Timestamp(TimeUnit::Microsecond, None)
+        );
+    }
+
+    #[test]
+    fn test_coerce_timestamp_in_map_value() {
+        let iceberg = Schema::builder()
+            .with_schema_id(1)
+            .with_fields(vec![
+                NestedField::optional(
+                    1,
+                    "ts_map",
+                    Type::Map(MapType {
+                        key_field: NestedField::required(
+                            2,
+                            "key",
+                            Type::Primitive(PrimitiveType::String),
+                        )
+                        .into(),
+                        value_field: NestedField::optional(
+                            3,
+                            "value",
+                            Type::Primitive(PrimitiveType::Timestamp),
+                        )
+                        .into(),
+                    }),
+                )
+                .into(),
+            ])
+            .build()
+            .unwrap();
+
+        let key_field = Field::new("key", DataType::Utf8, false).with_metadata(field_id_meta(2));
+        let value_field = Field::new(
+            "value",
+            DataType::Timestamp(TimeUnit::Nanosecond, None),
+            true,
+        )
+        .with_metadata(field_id_meta(3));
+        let entries_field = Field::new(
+            "key_value",
+            DataType::Struct(vec![key_field, value_field].into()),
+            false,
+        );
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new(
+                "ts_map",
+                DataType::Map(Arc::new(entries_field), false),
+                true,
+            )
+            .with_metadata(field_id_meta(1)),
+        ]));
+
+        let coerced = coerce_int96_timestamps(&arrow_schema, &iceberg).unwrap();
+        let value_dt = match coerced.field(0).data_type() {
+            DataType::Map(entries, _) => match entries.data_type() {
+                DataType::Struct(fields) => fields[1].data_type().clone(),
+                other => panic!("Expected Struct inside Map, got {other}"),
+            },
+            other => panic!("Expected Map, got {other}"),
+        };
+        assert_eq!(value_dt, DataType::Timestamp(TimeUnit::Microsecond, None));
+    }
+}
diff --git a/crates/iceberg/src/arrow/mod.rs b/crates/iceberg/src/arrow/mod.rs
index c091c45177..7823320452 100644
--- a/crates/iceberg/src/arrow/mod.rs
+++ b/crates/iceberg/src/arrow/mod.rs
@@ -27,6 +27,7 @@ pub(crate) mod caching_delete_file_loader;
 pub mod delete_file_loader;
 pub(crate) mod delete_filter;
 
+mod int96;
 mod reader;
 /// RecordBatch projection utilities
 pub mod record_batch_projector;
diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs
deleted file mode 100644
index 042a730e19..0000000000
--- a/crates/iceberg/src/arrow/reader.rs
+++ /dev/null
@@ -1,4670 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-//! Parquet file data reader
-
-use std::collections::{HashMap, HashSet};
-use std::ops::Range;
-use std::str::FromStr;
-use std::sync::Arc;
-
-use arrow_arith::boolean::{and, and_kleene, is_not_null, is_null, not, or, or_kleene};
-use arrow_array::{Array, ArrayRef, BooleanArray, Datum as ArrowDatum, RecordBatch, Scalar};
-use arrow_cast::cast::cast;
-use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
-use arrow_schema::{
-    ArrowError, DataType, FieldRef, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
-};
-use arrow_string::like::starts_with;
-use bytes::Bytes;
-use fnv::FnvHashSet;
-use futures::future::BoxFuture;
-use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt};
-use parquet::arrow::arrow_reader::{
-    ArrowPredicateFn, ArrowReaderMetadata, ArrowReaderOptions, RowFilter, RowSelection, RowSelector,
-};
-use parquet::arrow::async_reader::AsyncFileReader;
-use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder, ProjectionMask};
-use parquet::file::metadata::{
-    PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData,
-};
-use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
-use typed_builder::TypedBuilder;
-
-use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
-use crate::arrow::record_batch_transformer::RecordBatchTransformerBuilder;
-use crate::arrow::{arrow_schema_to_schema, get_arrow_datum};
-use crate::delete_vector::DeleteVector;
-use crate::error::Result;
-use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit};
-use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
-use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator;
-use crate::expr::{BoundPredicate, BoundReference};
-use crate::io::{FileIO, FileMetadata, FileRead};
-use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_metadata_field};
-use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream};
-use crate::spec::{Datum, NameMapping, NestedField, PrimitiveType, Schema, Type};
-use crate::utils::available_parallelism;
-use crate::{Error, ErrorKind};
-
-/// Default gap between byte ranges below which they are coalesced into a
-/// single request. Matches object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
-const DEFAULT_RANGE_COALESCE_BYTES: u64 = 1024 * 1024;
-
-/// Default maximum number of coalesced byte ranges fetched concurrently.
-/// Matches object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
-const DEFAULT_RANGE_FETCH_CONCURRENCY: usize = 10;
-
-/// Default number of bytes to prefetch when parsing Parquet footer metadata.
-/// Matches DataFusion's default `ParquetOptions::metadata_size_hint`.
-const DEFAULT_METADATA_SIZE_HINT: usize = 512 * 1024;
-
-/// Options for tuning Parquet file I/O.
-#[derive(Clone, Copy, Debug, TypedBuilder)]
-#[builder(field_defaults(setter(prefix = "with_")))]
-pub(crate) struct ParquetReadOptions {
-    /// Number of bytes to prefetch for parsing the Parquet metadata.
-    ///
-    /// This hint can help reduce the number of fetch requests. For more details see the
-    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
-    ///
-    /// Defaults to 512 KiB, matching DataFusion's default `ParquetOptions::metadata_size_hint`.
-    #[builder(default = Some(DEFAULT_METADATA_SIZE_HINT))]
-    pub(crate) metadata_size_hint: Option<usize>,
-    /// Gap threshold for merging nearby byte ranges into a single request.
-    /// Ranges with gaps smaller than this value will be coalesced.
-    ///
-    /// Defaults to 1 MiB, matching object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
-    #[builder(default = DEFAULT_RANGE_COALESCE_BYTES)]
-    pub(crate) range_coalesce_bytes: u64,
-    /// Maximum number of merged byte ranges to fetch concurrently.
-    ///
-    /// Defaults to 10, matching object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
-    #[builder(default = DEFAULT_RANGE_FETCH_CONCURRENCY)]
-    pub(crate) range_fetch_concurrency: usize,
-    /// Whether to preload the column index when reading Parquet metadata.
-    #[builder(default = true)]
-    pub(crate) preload_column_index: bool,
-    /// Whether to preload the offset index when reading Parquet metadata.
-    #[builder(default = true)]
-    pub(crate) preload_offset_index: bool,
-    /// Whether to preload the page index when reading Parquet metadata.
-    #[builder(default = false)]
-    pub(crate) preload_page_index: bool,
-}
-
-impl ParquetReadOptions {
-    pub(crate) fn metadata_size_hint(&self) -> Option<usize> {
-        self.metadata_size_hint
-    }
-
-    pub(crate) fn range_coalesce_bytes(&self) -> u64 {
-        self.range_coalesce_bytes
-    }
-
-    pub(crate) fn range_fetch_concurrency(&self) -> usize {
-        self.range_fetch_concurrency
-    }
-
-    pub(crate) fn preload_column_index(&self) -> bool {
-        self.preload_column_index
-    }
-
-    pub(crate) fn preload_offset_index(&self) -> bool {
-        self.preload_offset_index
-    }
-
-    pub(crate) fn preload_page_index(&self) -> bool {
-        self.preload_page_index
-    }
-}
-
-/// Builder to create ArrowReader
-pub struct ArrowReaderBuilder {
-    batch_size: Option<usize>,
-    file_io: FileIO,
-    concurrency_limit_data_files: usize,
-    row_group_filtering_enabled: bool,
-    row_selection_enabled: bool,
-    parquet_read_options: ParquetReadOptions,
-}
-
-impl ArrowReaderBuilder {
-    /// Create a new ArrowReaderBuilder
-    pub fn new(file_io: FileIO) -> Self {
-        let num_cpus = available_parallelism().get();
-
-        ArrowReaderBuilder {
-            batch_size: None,
-            file_io,
-            concurrency_limit_data_files: num_cpus,
-            row_group_filtering_enabled: true,
-            row_selection_enabled: false,
-            parquet_read_options: ParquetReadOptions::builder().build(),
-        }
-    }
-
-    /// Sets the max number of in flight data files that are being fetched
-    pub fn with_data_file_concurrency_limit(mut self, val: usize) -> Self {
-        self.concurrency_limit_data_files = val;
-        self
-    }
-
-    /// Sets the desired size of batches in the response
-    /// to something other than the default
-    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
-        self.batch_size = Some(batch_size);
-        self
-    }
-
-    /// Determines whether to enable row group filtering.
-    pub fn with_row_group_filtering_enabled(mut self, row_group_filtering_enabled: bool) -> Self {
-        self.row_group_filtering_enabled = row_group_filtering_enabled;
-        self
-    }
-
-    /// Determines whether to enable row selection.
-    pub fn with_row_selection_enabled(mut self, row_selection_enabled: bool) -> Self {
-        self.row_selection_enabled = row_selection_enabled;
-        self
-    }
-
-    /// Provide a hint as to the number of bytes to prefetch for parsing the Parquet metadata
-    ///
-    /// This hint can help reduce the number of fetch requests. For more details see the
-    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
-    pub fn with_metadata_size_hint(mut self, metadata_size_hint: usize) -> Self {
-        self.parquet_read_options.metadata_size_hint = Some(metadata_size_hint);
-        self
-    }
-
-    /// Sets the gap threshold for merging nearby byte ranges into a single request.
-    /// Ranges with gaps smaller than this value will be coalesced.
-    ///
-    /// Defaults to 1 MiB, matching object_store's OBJECT_STORE_COALESCE_DEFAULT.
-    pub fn with_range_coalesce_bytes(mut self, range_coalesce_bytes: u64) -> Self {
-        self.parquet_read_options.range_coalesce_bytes = range_coalesce_bytes;
-        self
-    }
-
-    /// Sets the maximum number of merged byte ranges to fetch concurrently.
-    ///
-    /// Defaults to 10, matching object_store's OBJECT_STORE_COALESCE_PARALLEL.
-    pub fn with_range_fetch_concurrency(mut self, range_fetch_concurrency: usize) -> Self {
-        self.parquet_read_options.range_fetch_concurrency = range_fetch_concurrency;
-        self
-    }
-
-    /// Build the ArrowReader.
-    pub fn build(self) -> ArrowReader {
-        ArrowReader {
-            batch_size: self.batch_size,
-            file_io: self.file_io.clone(),
-            delete_file_loader: CachingDeleteFileLoader::new(
-                self.file_io.clone(),
-                self.concurrency_limit_data_files,
-            ),
-            concurrency_limit_data_files: self.concurrency_limit_data_files,
-            row_group_filtering_enabled: self.row_group_filtering_enabled,
-            row_selection_enabled: self.row_selection_enabled,
-            parquet_read_options: self.parquet_read_options,
-        }
-    }
-}
-
-/// Reads data from Parquet files
-#[derive(Clone)]
-pub struct ArrowReader {
-    batch_size: Option<usize>,
-    file_io: FileIO,
-    delete_file_loader: CachingDeleteFileLoader,
-
-    /// the maximum number of data files that can be fetched at the same time
-    concurrency_limit_data_files: usize,
-
-    row_group_filtering_enabled: bool,
-    row_selection_enabled: bool,
-    parquet_read_options: ParquetReadOptions,
-}
-
-impl ArrowReader {
-    /// Take a stream of FileScanTasks and reads all the files.
-    /// Returns a stream of Arrow RecordBatches containing the data from the files
-    pub fn read(self, tasks: FileScanTaskStream) -> Result<ArrowRecordBatchStream> {
-        let file_io = self.file_io.clone();
-        let batch_size = self.batch_size;
-        let concurrency_limit_data_files = self.concurrency_limit_data_files;
-        let row_group_filtering_enabled = self.row_group_filtering_enabled;
-        let row_selection_enabled = self.row_selection_enabled;
-        let parquet_read_options = self.parquet_read_options;
-
-        // Fast-path for single concurrency to avoid overhead of try_flatten_unordered
-        let stream: ArrowRecordBatchStream = if concurrency_limit_data_files == 1 {
-            Box::pin(
-                tasks
-                    .and_then(move |task| {
-                        let file_io = file_io.clone();
-
-                        Self::process_file_scan_task(
-                            task,
-                            batch_size,
-                            file_io,
-                            self.delete_file_loader.clone(),
-                            row_group_filtering_enabled,
-                            row_selection_enabled,
-                            parquet_read_options,
-                        )
-                    })
-                    .map_err(|err| {
-                        Error::new(ErrorKind::Unexpected, "file scan task generate failed")
-                            .with_source(err)
-                    })
-                    .try_flatten(),
-            )
-        } else {
-            Box::pin(
-                tasks
-                    .map_ok(move |task| {
-                        let file_io = file_io.clone();
-
-                        Self::process_file_scan_task(
-                            task,
-                            batch_size,
-                            file_io,
-                            self.delete_file_loader.clone(),
-                            row_group_filtering_enabled,
-                            row_selection_enabled,
-                            parquet_read_options,
-                        )
-                    })
-                    .map_err(|err| {
-                        Error::new(ErrorKind::Unexpected, "file scan task generate failed")
-                            .with_source(err)
-                    })
-                    .try_buffer_unordered(concurrency_limit_data_files)
-                    .try_flatten_unordered(concurrency_limit_data_files),
-            )
-        };
-
-        Ok(stream)
-    }
-
-    async fn process_file_scan_task(
-        task: FileScanTask,
-        batch_size: Option<usize>,
-        file_io: FileIO,
-        delete_file_loader: CachingDeleteFileLoader,
-        row_group_filtering_enabled: bool,
-        row_selection_enabled: bool,
-        parquet_read_options: ParquetReadOptions,
-    ) -> Result<ArrowRecordBatchStream> {
-        let should_load_page_index =
-            (row_selection_enabled && task.predicate.is_some()) || !task.deletes.is_empty();
-        let mut parquet_read_options = parquet_read_options;
-        parquet_read_options.preload_page_index = should_load_page_index;
-
-        let delete_filter_rx =
-            delete_file_loader.load_deletes(&task.deletes, Arc::clone(&task.schema));
-
-        // Open the Parquet file once, loading its metadata
-        let (parquet_file_reader, arrow_metadata) = Self::open_parquet_file(
-            &task.data_file_path,
-            &file_io,
-            task.file_size_in_bytes,
-            parquet_read_options,
-        )
-        .await?;
-
-        // Check if Parquet file has embedded field IDs
-        // Corresponds to Java's ParquetSchemaUtil.hasIds()
-        // Reference: parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java:118
-        let missing_field_ids = arrow_metadata
-            .schema()
-            .fields()
-            .iter()
-            .next()
-            .is_some_and(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none());
-
-        // Three-branch schema resolution strategy matching Java's ReadConf constructor
-        //
-        // Per Iceberg spec Column Projection rules:
-        // "Columns in Iceberg data files are selected by field id. The table schema's column
-        //  names and order may change after a data file is written, and projection must be done
-        //  using field ids."
-        // https://iceberg.apache.org/spec/#column-projection
-        //
-        // When Parquet files lack field IDs (e.g., Hive/Spark migrations via add_files),
-        // we must assign field IDs BEFORE reading data to enable correct projection.
-        //
-        // Java's ReadConf determines field ID strategy:
-        // - Branch 1: hasIds(fileSchema) → trust embedded field IDs, use pruneColumns()
-        // - Branch 2: nameMapping present → applyNameMapping(), then pruneColumns()
-        // - Branch 3: fallback → addFallbackIds(), then pruneColumnsFallback()
-        let arrow_metadata = if missing_field_ids {
-            // Parquet file lacks field IDs - must assign them before reading
-            let arrow_schema = if let Some(name_mapping) = &task.name_mapping {
-                // Branch 2: Apply name mapping to assign correct Iceberg field IDs
-                // Per spec rule #2: "Use schema.name-mapping.default metadata to map field id
-                // to columns without field id"
-                // Corresponds to Java's ParquetSchemaUtil.applyNameMapping()
-                apply_name_mapping_to_arrow_schema(
-                    Arc::clone(arrow_metadata.schema()),
-                    name_mapping,
-                )?
-            } else {
-                // Branch 3: No name mapping - use position-based fallback IDs
-                // Corresponds to Java's ParquetSchemaUtil.addFallbackIds()
-                add_fallback_field_ids_to_arrow_schema(arrow_metadata.schema())
-            };
-
-            let options = ArrowReaderOptions::new().with_schema(arrow_schema);
-            ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
-                |e| {
-                    Error::new(
-                        ErrorKind::Unexpected,
-                        "Failed to create ArrowReaderMetadata with field ID schema",
-                    )
-                    .with_source(e)
-                },
-            )?
-        } else {
-            // Branch 1: File has embedded field IDs - trust them
-            arrow_metadata
-        };
-
-        // Build the stream reader, reusing the already-opened file reader
-        let mut record_batch_stream_builder =
-            ParquetRecordBatchStreamBuilder::new_with_metadata(parquet_file_reader, arrow_metadata);
-
-        // Filter out metadata fields for Parquet projection (they don't exist in files)
-        let project_field_ids_without_metadata: Vec<i32> = task
-            .project_field_ids
-            .iter()
-            .filter(|&&id| !is_metadata_field(id))
-            .copied()
-            .collect();
-
-        // Create projection mask based on field IDs
-        // - If file has embedded IDs: field-ID-based projection (missing_field_ids=false)
-        // - If name mapping applied: field-ID-based projection (missing_field_ids=true but IDs now match)
-        // - If fallback IDs: position-based projection (missing_field_ids=true)
-        let projection_mask = Self::get_arrow_projection_mask(
-            &project_field_ids_without_metadata,
-            &task.schema,
-            record_batch_stream_builder.parquet_schema(),
-            record_batch_stream_builder.schema(),
-            missing_field_ids, // Whether to use position-based (true) or field-ID-based (false) projection
-        )?;
-
-        record_batch_stream_builder =
-            record_batch_stream_builder.with_projection(projection_mask.clone());
-
-        // RecordBatchTransformer performs any transformations required on the RecordBatches
-        // that come back from the file, such as type promotion, default column insertion,
-        // column re-ordering, partition constants, and virtual field addition (like _file)
-        let mut record_batch_transformer_builder =
-            RecordBatchTransformerBuilder::new(task.schema_ref(), task.project_field_ids());
-
-        // Add the _file metadata column if it's in the projected fields
-        if task.project_field_ids().contains(&RESERVED_FIELD_ID_FILE) {
-            let file_datum = Datum::string(task.data_file_path.clone());
-            record_batch_transformer_builder =
-                record_batch_transformer_builder.with_constant(RESERVED_FIELD_ID_FILE, file_datum);
-        }
-
-        if let (Some(partition_spec), Some(partition_data)) =
-            (task.partition_spec.clone(), task.partition.clone())
-        {
-            record_batch_transformer_builder =
-                record_batch_transformer_builder.with_partition(partition_spec, partition_data)?;
-        }
-
-        let mut record_batch_transformer = record_batch_transformer_builder.build();
-
-        if let Some(batch_size) = batch_size {
-            record_batch_stream_builder = record_batch_stream_builder.with_batch_size(batch_size);
-        }
-
-        let delete_filter = delete_filter_rx.await.unwrap()?;
-        let delete_predicate = delete_filter.build_equality_delete_predicate(&task).await?;
-
-        // In addition to the optional predicate supplied in the `FileScanTask`,
-        // we also have an optional predicate resulting from equality delete files.
-        // If both are present, we logical-AND them together to form a single filter
-        // predicate that we can pass to the `RecordBatchStreamBuilder`.
-        let final_predicate = match (&task.predicate, delete_predicate) {
-            (None, None) => None,
-            (Some(predicate), None) => Some(predicate.clone()),
-            (None, Some(ref predicate)) => Some(predicate.clone()),
-            (Some(filter_predicate), Some(delete_predicate)) => {
-                Some(filter_predicate.clone().and(delete_predicate))
-            }
-        };
-
-        // There are three possible sources for potential lists of selected RowGroup indices,
-        // and two for `RowSelection`s.
-        // Selected RowGroup index lists can come from three sources:
-        //   * When task.start and task.length specify a byte range (file splitting);
-        //   * When there are equality delete files that are applicable;
-        //   * When there is a scan predicate and row_group_filtering_enabled = true.
-        // `RowSelection`s can be created in either or both of the following cases:
-        //   * When there are positional delete files that are applicable;
-        //   * When there is a scan predicate and row_selection_enabled = true
-        // Note that row group filtering from predicates only happens when
-        // there is a scan predicate AND row_group_filtering_enabled = true,
-        // but we perform row selection filtering if there are applicable
-        // equality delete files OR (there is a scan predicate AND row_selection_enabled),
-        // since the only implemented method of applying positional deletes is
-        // by using a `RowSelection`.
-        let mut selected_row_group_indices = None;
-        let mut row_selection = None;
-
-        // Filter row groups based on byte range from task.start and task.length.
-        // If both start and length are 0, read the entire file (backwards compatibility).
-        if task.start != 0 || task.length != 0 {
-            let byte_range_filtered_row_groups = Self::filter_row_groups_by_byte_range(
-                record_batch_stream_builder.metadata(),
-                task.start,
-                task.length,
-            )?;
-            selected_row_group_indices = Some(byte_range_filtered_row_groups);
-        }
-
-        if let Some(predicate) = final_predicate {
-            let (iceberg_field_ids, field_id_map) = Self::build_field_id_set_and_map(
-                record_batch_stream_builder.parquet_schema(),
-                &predicate,
-            )?;
-
-            let row_filter = Self::get_row_filter(
-                &predicate,
-                record_batch_stream_builder.parquet_schema(),
-                &iceberg_field_ids,
-                &field_id_map,
-            )?;
-            record_batch_stream_builder = record_batch_stream_builder.with_row_filter(row_filter);
-
-            if row_group_filtering_enabled {
-                let predicate_filtered_row_groups = Self::get_selected_row_group_indices(
-                    &predicate,
-                    record_batch_stream_builder.metadata(),
-                    &field_id_map,
-                    &task.schema,
-                )?;
-
-                // Merge predicate-based filtering with byte range filtering (if present)
-                // by taking the intersection of both filters
-                selected_row_group_indices = match selected_row_group_indices {
-                    Some(byte_range_filtered) => {
-                        // Keep only row groups that are in both filters
-                        let intersection: Vec<usize> = byte_range_filtered
-                            .into_iter()
-                            .filter(|idx| predicate_filtered_row_groups.contains(idx))
-                            .collect();
-                        Some(intersection)
-                    }
-                    None => Some(predicate_filtered_row_groups),
-                };
-            }
-
-            if row_selection_enabled {
-                row_selection = Some(Self::get_row_selection_for_filter_predicate(
-                    &predicate,
-                    record_batch_stream_builder.metadata(),
-                    &selected_row_group_indices,
-                    &field_id_map,
-                    &task.schema,
-                )?);
-            }
-        }
-
-        let positional_delete_indexes = delete_filter.get_delete_vector(&task);
-
-        if let Some(positional_delete_indexes) = positional_delete_indexes {
-            let delete_row_selection = {
-                let positional_delete_indexes = positional_delete_indexes.lock().unwrap();
-
-                Self::build_deletes_row_selection(
-                    record_batch_stream_builder.metadata().row_groups(),
-                    &selected_row_group_indices,
-                    &positional_delete_indexes,
-                )
-            }?;
-
-            // merge the row selection from the delete files with the row selection
-            // from the filter predicate, if there is one from the filter predicate
-            row_selection = match row_selection {
-                None => Some(delete_row_selection),
-                Some(filter_row_selection) => {
-                    Some(filter_row_selection.intersection(&delete_row_selection))
-                }
-            };
-        }
-
-        if let Some(row_selection) = row_selection {
-            record_batch_stream_builder =
-                record_batch_stream_builder.with_row_selection(row_selection);
-        }
-
-        if let Some(selected_row_group_indices) = selected_row_group_indices {
-            record_batch_stream_builder =
-                record_batch_stream_builder.with_row_groups(selected_row_group_indices);
-        }
-
-        // Build the batch stream and send all the RecordBatches that it generates
-        // to the requester.
-        let record_batch_stream =
-            record_batch_stream_builder
-                .build()?
-                .map(move |batch| match batch {
-                    Ok(batch) => {
-                        // Process the record batch (type promotion, column reordering, virtual fields, etc.)
-                        record_batch_transformer.process_record_batch(batch)
-                    }
-                    Err(err) => Err(err.into()),
-                });
-
-        Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream)
-    }
-
-    /// Opens a Parquet file and loads its metadata, returning both the reader and metadata.
-    /// The reader can be reused to build a `ParquetRecordBatchStreamBuilder` without
-    /// reopening the file.
-    pub(crate) async fn open_parquet_file(
-        data_file_path: &str,
-        file_io: &FileIO,
-        file_size_in_bytes: u64,
-        parquet_read_options: ParquetReadOptions,
-    ) -> Result<(ArrowFileReader, ArrowReaderMetadata)> {
-        let parquet_file = file_io.new_input(data_file_path)?;
-        let parquet_reader = parquet_file.reader().await?;
-        let mut reader = ArrowFileReader::new(
-            FileMetadata {
-                size: file_size_in_bytes,
-            },
-            parquet_reader,
-        )
-        .with_parquet_read_options(parquet_read_options);
-
-        let arrow_metadata = ArrowReaderMetadata::load_async(&mut reader, Default::default())
-            .await
-            .map_err(|e| {
-                Error::new(ErrorKind::Unexpected, "Failed to load Parquet metadata").with_source(e)
-            })?;
-
-        Ok((reader, arrow_metadata))
-    }
-
-    /// computes a `RowSelection` from positional delete indices.
-    ///
-    /// Using the Parquet page index, we build a `RowSelection` that rejects rows that are indicated
-    /// as having been deleted by a positional delete, taking into account any row groups that have
-    /// been skipped entirely by the filter predicate
-    fn build_deletes_row_selection(
-        row_group_metadata_list: &[RowGroupMetaData],
-        selected_row_groups: &Option<Vec<usize>>,
-        positional_deletes: &DeleteVector,
-    ) -> Result<RowSelection> {
-        let mut results: Vec<RowSelector> = Vec::new();
-        let mut selected_row_groups_idx = 0;
-        let mut current_row_group_base_idx: u64 = 0;
-        let mut delete_vector_iter = positional_deletes.iter();
-        let mut next_deleted_row_idx_opt = delete_vector_iter.next();
-
-        for (idx, row_group_metadata) in row_group_metadata_list.iter().enumerate() {
-            let row_group_num_rows = row_group_metadata.num_rows() as u64;
-            let next_row_group_base_idx = current_row_group_base_idx + row_group_num_rows;
-
-            // if row group selection is enabled,
-            if let Some(selected_row_groups) = selected_row_groups {
-                // if we've consumed all the selected row groups, we're done
-                if selected_row_groups_idx == selected_row_groups.len() {
-                    break;
-                }
-
-                if idx == selected_row_groups[selected_row_groups_idx] {
-                    // we're in a selected row group. Increment selected_row_groups_idx
-                    // so that next time around the for loop we're looking for the next
-                    // selected row group
-                    selected_row_groups_idx += 1;
-                } else {
-                    // Advance iterator past all deletes in the skipped row group.
-                    // advance_to() positions the iterator to the first delete >= next_row_group_base_idx.
-                    // However, if our cached next_deleted_row_idx_opt is in the skipped range,
-                    // we need to call next() to update the cache with the newly positioned value.
-                    delete_vector_iter.advance_to(next_row_group_base_idx);
-                    // Only update the cache if the cached value is stale (in the skipped range)
-                    if let Some(cached_idx) = next_deleted_row_idx_opt
-                        && cached_idx < next_row_group_base_idx
-                    {
-                        next_deleted_row_idx_opt = delete_vector_iter.next();
-                    }
-
-                    // still increment the current page base index but then skip to the next row group
-                    // in the file
-                    current_row_group_base_idx += row_group_num_rows;
-                    continue;
-                }
-            }
-
-            let mut next_deleted_row_idx = match next_deleted_row_idx_opt {
-                Some(next_deleted_row_idx) => {
-                    // if the index of the next deleted row is beyond this row group, add a selection for
-                    // the remainder of this row group and skip to the next row group
-                    if next_deleted_row_idx >= next_row_group_base_idx {
-                        results.push(RowSelector::select(row_group_num_rows as usize));
-                        current_row_group_base_idx += row_group_num_rows;
-                        continue;
-                    }
-
-                    next_deleted_row_idx
-                }
-
-                // If there are no more pos deletes, add a selector for the entirety of this row group.
-                _ => {
-                    results.push(RowSelector::select(row_group_num_rows as usize));
-                    current_row_group_base_idx += row_group_num_rows;
-                    continue;
-                }
-            };
-
-            let mut current_idx = current_row_group_base_idx;
-            'chunks: while next_deleted_row_idx < next_row_group_base_idx {
-                // `select` all rows that precede the next delete index
-                if current_idx < next_deleted_row_idx {
-                    let run_length = next_deleted_row_idx - current_idx;
-                    results.push(RowSelector::select(run_length as usize));
-                    current_idx += run_length;
-                }
-
-                // `skip` all consecutive deleted rows in the current row group
-                let mut run_length = 0;
-                while next_deleted_row_idx == current_idx
-                    && next_deleted_row_idx < next_row_group_base_idx
-                {
-                    run_length += 1;
-                    current_idx += 1;
-
-                    next_deleted_row_idx_opt = delete_vector_iter.next();
-                    next_deleted_row_idx = match next_deleted_row_idx_opt {
-                        Some(next_deleted_row_idx) => next_deleted_row_idx,
-                        _ => {
-                            // We've processed the final positional delete.
-                            // Conclude the skip and then break so that we select the remaining
-                            // rows in the row group and move on to the next row group
-                            results.push(RowSelector::skip(run_length));
-                            break 'chunks;
-                        }
-                    };
-                }
-                if run_length > 0 {
-                    results.push(RowSelector::skip(run_length));
-                }
-            }
-
-            if current_idx < next_row_group_base_idx {
-                results.push(RowSelector::select(
-                    (next_row_group_base_idx - current_idx) as usize,
-                ));
-            }
-
-            current_row_group_base_idx += row_group_num_rows;
-        }
-
-        Ok(results.into())
-    }
-
-    fn build_field_id_set_and_map(
-        parquet_schema: &SchemaDescriptor,
-        predicate: &BoundPredicate,
-    ) -> Result<(HashSet<i32>, HashMap<i32, usize>)> {
-        // Collects all Iceberg field IDs referenced in the filter predicate
-        let mut collector = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut collector, predicate)?;
-
-        let iceberg_field_ids = collector.field_ids();
-
-        // Without embedded field IDs, we fall back to position-based mapping for compatibility
-        let field_id_map = match build_field_id_map(parquet_schema)? {
-            Some(map) => map,
-            None => build_fallback_field_id_map(parquet_schema),
-        };
-
-        Ok((iceberg_field_ids, field_id_map))
-    }
-
-    /// Recursively extract leaf field IDs because Parquet projection works at the leaf column level.
-    /// Nested types (struct/list/map) are flattened in Parquet's columnar format.
-    fn include_leaf_field_id(field: &NestedField, field_ids: &mut Vec<i32>) {
-        match field.field_type.as_ref() {
-            Type::Primitive(_) => {
-                field_ids.push(field.id);
-            }
-            Type::Struct(struct_type) => {
-                for nested_field in struct_type.fields() {
-                    Self::include_leaf_field_id(nested_field, field_ids);
-                }
-            }
-            Type::List(list_type) => {
-                Self::include_leaf_field_id(&list_type.element_field, field_ids);
-            }
-            Type::Map(map_type) => {
-                Self::include_leaf_field_id(&map_type.key_field, field_ids);
-                Self::include_leaf_field_id(&map_type.value_field, field_ids);
-            }
-        }
-    }
-
-    fn get_arrow_projection_mask(
-        field_ids: &[i32],
-        iceberg_schema_of_task: &Schema,
-        parquet_schema: &SchemaDescriptor,
-        arrow_schema: &ArrowSchemaRef,
-        use_fallback: bool, // Whether file lacks embedded field IDs (e.g., migrated from Hive/Spark)
-    ) -> Result<ProjectionMask> {
-        fn type_promotion_is_valid(
-            file_type: Option<&PrimitiveType>,
-            projected_type: Option<&PrimitiveType>,
-        ) -> bool {
-            match (file_type, projected_type) {
-                (Some(lhs), Some(rhs)) if lhs == rhs => true,
-                (Some(PrimitiveType::Int), Some(PrimitiveType::Long)) => true,
-                (Some(PrimitiveType::Float), Some(PrimitiveType::Double)) => true,
-                (
-                    Some(PrimitiveType::Decimal {
-                        precision: file_precision,
-                        scale: file_scale,
-                    }),
-                    Some(PrimitiveType::Decimal {
-                        precision: requested_precision,
-                        scale: requested_scale,
-                    }),
-                ) if requested_precision >= file_precision && file_scale == requested_scale => true,
-                // Uuid will be store as Fixed(16) in parquet file, so the read back type will be Fixed(16).
-                (Some(PrimitiveType::Fixed(16)), Some(PrimitiveType::Uuid)) => true,
-                _ => false,
-            }
-        }
-
-        if field_ids.is_empty() {
-            return Ok(ProjectionMask::all());
-        }
-
-        if use_fallback {
-            // Position-based projection necessary because file lacks embedded field IDs
-            Self::get_arrow_projection_mask_fallback(field_ids, parquet_schema)
-        } else {
-            // Field-ID-based projection using embedded field IDs from Parquet metadata
-
-            // Parquet's columnar format requires leaf-level (not top-level struct/list/map) projection
-            let mut leaf_field_ids = vec![];
-            for field_id in field_ids {
-                let field = iceberg_schema_of_task.field_by_id(*field_id);
-                if let Some(field) = field {
-                    Self::include_leaf_field_id(field, &mut leaf_field_ids);
-                }
-            }
-
-            Self::get_arrow_projection_mask_with_field_ids(
-                &leaf_field_ids,
-                iceberg_schema_of_task,
-                parquet_schema,
-                arrow_schema,
-                type_promotion_is_valid,
-            )
-        }
-    }
-
-    /// Standard projection using embedded field IDs from Parquet metadata.
-    /// For iceberg-java compatibility with ParquetSchemaUtil.pruneColumns().
-    fn get_arrow_projection_mask_with_field_ids(
-        leaf_field_ids: &[i32],
-        iceberg_schema_of_task: &Schema,
-        parquet_schema: &SchemaDescriptor,
-        arrow_schema: &ArrowSchemaRef,
-        type_promotion_is_valid: fn(Option<&PrimitiveType>, Option<&PrimitiveType>) -> bool,
-    ) -> Result<ProjectionMask> {
-        let mut column_map = HashMap::new();
-        let fields = arrow_schema.fields();
-
-        // Pre-project only the fields that have been selected, possibly avoiding converting
-        // some Arrow types that are not yet supported.
-        let mut projected_fields: HashMap<FieldRef, i32> = HashMap::new();
-        let projected_arrow_schema = ArrowSchema::new_with_metadata(
-            fields.filter_leaves(|_, f| {
-                f.metadata()
-                    .get(PARQUET_FIELD_ID_META_KEY)
-                    .and_then(|field_id| i32::from_str(field_id).ok())
-                    .is_some_and(|field_id| {
-                        projected_fields.insert((*f).clone(), field_id);
-                        leaf_field_ids.contains(&field_id)
-                    })
-            }),
-            arrow_schema.metadata().clone(),
-        );
-        let iceberg_schema = arrow_schema_to_schema(&projected_arrow_schema)?;
-
-        fields.filter_leaves(|idx, field| {
-            let Some(field_id) = projected_fields.get(field).cloned() else {
-                return false;
-            };
-
-            let iceberg_field = iceberg_schema_of_task.field_by_id(field_id);
-            let parquet_iceberg_field = iceberg_schema.field_by_id(field_id);
-
-            if iceberg_field.is_none() || parquet_iceberg_field.is_none() {
-                return false;
-            }
-
-            if !type_promotion_is_valid(
-                parquet_iceberg_field
-                    .unwrap()
-                    .field_type
-                    .as_primitive_type(),
-                iceberg_field.unwrap().field_type.as_primitive_type(),
-            ) {
-                return false;
-            }
-
-            column_map.insert(field_id, idx);
-            true
-        });
-
-        // Schema evolution: New columns may not exist in old Parquet files.
-        // We only project existing columns; RecordBatchTransformer adds default/NULL values.
-        let mut indices = vec![];
-        for field_id in leaf_field_ids {
-            if let Some(col_idx) = column_map.get(field_id) {
-                indices.push(*col_idx);
-            }
-        }
-
-        if indices.is_empty() {
-            // Edge case: All requested columns are new (don't exist in file).
-            // Project all columns so RecordBatchTransformer has a batch to transform.
-            Ok(ProjectionMask::all())
-        } else {
-            Ok(ProjectionMask::leaves(parquet_schema, indices))
-        }
-    }
-
-    /// Fallback projection for Parquet files without field IDs.
-    /// Uses position-based matching: field ID N → column position N-1.
-    /// Projects entire top-level columns (including nested content) for iceberg-java compatibility.
-    fn get_arrow_projection_mask_fallback(
-        field_ids: &[i32],
-        parquet_schema: &SchemaDescriptor,
-    ) -> Result<ProjectionMask> {
-        // Position-based: field_id N → column N-1 (field IDs are 1-indexed)
-        let parquet_root_fields = parquet_schema.root_schema().get_fields();
-        let mut root_indices = vec![];
-
-        for field_id in field_ids.iter() {
-            let parquet_pos = (*field_id - 1) as usize;
-
-            if parquet_pos < parquet_root_fields.len() {
-                root_indices.push(parquet_pos);
-            }
-            // RecordBatchTransformer adds missing columns with NULL values
-        }
-
-        if root_indices.is_empty() {
-            Ok(ProjectionMask::all())
-        } else {
-            Ok(ProjectionMask::roots(parquet_schema, root_indices))
-        }
-    }
-
-    fn get_row_filter(
-        predicates: &BoundPredicate,
-        parquet_schema: &SchemaDescriptor,
-        iceberg_field_ids: &HashSet<i32>,
-        field_id_map: &HashMap<i32, usize>,
-    ) -> Result<RowFilter> {
-        // Collect Parquet column indices from field ids.
-        // If the field id is not found in Parquet schema, it will be ignored due to schema evolution.
-        let mut column_indices = iceberg_field_ids
-            .iter()
-            .filter_map(|field_id| field_id_map.get(field_id).cloned())
-            .collect::<Vec<_>>();
-        column_indices.sort();
-
-        // The converter that converts `BoundPredicates` to `ArrowPredicates`
-        let mut converter = PredicateConverter {
-            parquet_schema,
-            column_map: field_id_map,
-            column_indices: &column_indices,
-        };
-
-        // After collecting required leaf column indices used in the predicate,
-        // creates the projection mask for the Arrow predicates.
-        let projection_mask = ProjectionMask::leaves(parquet_schema, column_indices.clone());
-        let predicate_func = visit(&mut converter, predicates)?;
-        let arrow_predicate = ArrowPredicateFn::new(projection_mask, predicate_func);
-        Ok(RowFilter::new(vec![Box::new(arrow_predicate)]))
-    }
-
-    fn get_selected_row_group_indices(
-        predicate: &BoundPredicate,
-        parquet_metadata: &Arc<ParquetMetaData>,
-        field_id_map: &HashMap<i32, usize>,
-        snapshot_schema: &Schema,
-    ) -> Result<Vec<usize>> {
-        let row_groups_metadata = parquet_metadata.row_groups();
-        let mut results = Vec::with_capacity(row_groups_metadata.len());
-
-        for (idx, row_group_metadata) in row_groups_metadata.iter().enumerate() {
-            if RowGroupMetricsEvaluator::eval(
-                predicate,
-                row_group_metadata,
-                field_id_map,
-                snapshot_schema,
-            )? {
-                results.push(idx);
-            }
-        }
-
-        Ok(results)
-    }
-
-    fn get_row_selection_for_filter_predicate(
-        predicate: &BoundPredicate,
-        parquet_metadata: &Arc<ParquetMetaData>,
-        selected_row_groups: &Option<Vec<usize>>,
-        field_id_map: &HashMap<i32, usize>,
-        snapshot_schema: &Schema,
-    ) -> Result<RowSelection> {
-        let Some(column_index) = parquet_metadata.column_index() else {
-            return Err(Error::new(
-                ErrorKind::Unexpected,
-                "Parquet file metadata does not contain a column index",
-            ));
-        };
-
-        let Some(offset_index) = parquet_metadata.offset_index() else {
-            return Err(Error::new(
-                ErrorKind::Unexpected,
-                "Parquet file metadata does not contain an offset index",
-            ));
-        };
-
-        // If all row groups were filtered out, return an empty RowSelection (select no rows)
-        if let Some(selected_row_groups) = selected_row_groups
-            && selected_row_groups.is_empty()
-        {
-            return Ok(RowSelection::from(Vec::new()));
-        }
-
-        let mut selected_row_groups_idx = 0;
-
-        let page_index = column_index
-            .iter()
-            .enumerate()
-            .zip(offset_index)
-            .zip(parquet_metadata.row_groups());
-
-        let mut results = Vec::new();
-        for (((idx, column_index), offset_index), row_group_metadata) in page_index {
-            if let Some(selected_row_groups) = selected_row_groups {
-                // skip row groups that aren't present in selected_row_groups
-                if idx == selected_row_groups[selected_row_groups_idx] {
-                    selected_row_groups_idx += 1;
-                } else {
-                    continue;
-                }
-            }
-
-            let selections_for_page = PageIndexEvaluator::eval(
-                predicate,
-                column_index,
-                offset_index,
-                row_group_metadata,
-                field_id_map,
-                snapshot_schema,
-            )?;
-
-            results.push(selections_for_page);
-
-            if let Some(selected_row_groups) = selected_row_groups
-                && selected_row_groups_idx == selected_row_groups.len()
-            {
-                break;
-            }
-        }
-
-        Ok(results.into_iter().flatten().collect::<Vec<_>>().into())
-    }
-
-    /// Filters row groups by byte range to support Iceberg's file splitting.
-    ///
-    /// Iceberg splits large files at row group boundaries, so we only read row groups
-    /// whose byte ranges overlap with [start, start+length).
-    fn filter_row_groups_by_byte_range(
-        parquet_metadata: &Arc<ParquetMetaData>,
-        start: u64,
-        length: u64,
-    ) -> Result<Vec<usize>> {
-        let row_groups = parquet_metadata.row_groups();
-        let mut selected = Vec::new();
-        let end = start + length;
-
-        // Row groups are stored sequentially after the 4-byte magic header.
-        let mut current_byte_offset = 4u64;
-
-        for (idx, row_group) in row_groups.iter().enumerate() {
-            let row_group_size = row_group.compressed_size() as u64;
-            let row_group_end = current_byte_offset + row_group_size;
-
-            if current_byte_offset < end && start < row_group_end {
-                selected.push(idx);
-            }
-
-            current_byte_offset = row_group_end;
-        }
-
-        Ok(selected)
-    }
-}
-
-/// Build the map of parquet field id to Parquet column index in the schema.
-/// Returns None if the Parquet file doesn't have field IDs embedded (e.g., migrated tables).
-fn build_field_id_map(parquet_schema: &SchemaDescriptor) -> Result<Option<HashMap<i32, usize>>> {
-    let mut column_map = HashMap::new();
-
-    for (idx, field) in parquet_schema.columns().iter().enumerate() {
-        let field_type = field.self_type();
-        match field_type {
-            ParquetType::PrimitiveType { basic_info, .. } => {
-                if !basic_info.has_id() {
-                    return Ok(None);
-                }
-                column_map.insert(basic_info.id(), idx);
-            }
-            ParquetType::GroupType { .. } => {
-                return Err(Error::new(
-                    ErrorKind::DataInvalid,
-                    format!(
-                        "Leave column in schema should be primitive type but got {field_type:?}"
-                    ),
-                ));
-            }
-        };
-    }
-
-    Ok(Some(column_map))
-}
-
-/// Build a fallback field ID map for Parquet files without embedded field IDs.
-/// Position-based (1, 2, 3, ...) for compatibility with iceberg-java migrations.
-fn build_fallback_field_id_map(parquet_schema: &SchemaDescriptor) -> HashMap<i32, usize> {
-    let mut column_map = HashMap::new();
-
-    // 1-indexed to match iceberg-java's convention
-    for (idx, _field) in parquet_schema.columns().iter().enumerate() {
-        let field_id = (idx + 1) as i32;
-        column_map.insert(field_id, idx);
-    }
-
-    column_map
-}
-
-/// Apply name mapping to Arrow schema for Parquet files lacking field IDs.
-///
-/// Assigns Iceberg field IDs based on column names using the name mapping,
-/// enabling correct projection on migrated files (e.g., from Hive/Spark via add_files).
-///
-/// Per Iceberg spec Column Projection rule #2:
-/// "Use schema.name-mapping.default metadata to map field id to columns without field id"
-/// https://iceberg.apache.org/spec/#column-projection
-///
-/// Corresponds to Java's ParquetSchemaUtil.applyNameMapping() and ApplyNameMapping visitor.
-/// The key difference is Java operates on Parquet MessageType, while we operate on Arrow Schema.
-///
-/// # Arguments
-/// * `arrow_schema` - Arrow schema from Parquet file (without field IDs)
-/// * `name_mapping` - Name mapping from table metadata (TableProperties.DEFAULT_NAME_MAPPING)
-///
-/// # Returns
-/// Arrow schema with field IDs assigned based on name mapping
-fn apply_name_mapping_to_arrow_schema(
-    arrow_schema: ArrowSchemaRef,
-    name_mapping: &NameMapping,
-) -> Result<Arc<ArrowSchema>> {
-    debug_assert!(
-        arrow_schema
-            .fields()
-            .iter()
-            .next()
-            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
-        "Schema already has field IDs - name mapping should not be applied"
-    );
-
-    use arrow_schema::Field;
-
-    let fields_with_mapped_ids: Vec<_> = arrow_schema
-        .fields()
-        .iter()
-        .map(|field| {
-            // Look up this column name in name mapping to get the Iceberg field ID.
-            // Corresponds to Java's ApplyNameMapping visitor which calls
-            // nameMapping.find(currentPath()) and returns field.withId() if found.
-            //
-            // If the field isn't in the mapping, leave it WITHOUT assigning an ID
-            // (matching Java's behavior of returning the field unchanged).
-            // Later, during projection, fields without IDs are filtered out.
-            let mapped_field_opt = name_mapping
-                .fields()
-                .iter()
-                .find(|f| f.names().contains(&field.name().to_string()));
-
-            let mut metadata = field.metadata().clone();
-
-            if let Some(mapped_field) = mapped_field_opt
-                && let Some(field_id) = mapped_field.field_id()
-            {
-                // Field found in mapping with a field_id → assign it
-                metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
-            }
-            // If field_id is None, leave the field without an ID (will be filtered by projection)
-
-            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
-                .with_metadata(metadata)
-        })
-        .collect();
-
-    Ok(Arc::new(ArrowSchema::new_with_metadata(
-        fields_with_mapped_ids,
-        arrow_schema.metadata().clone(),
-    )))
-}
-
-/// Add position-based fallback field IDs to Arrow schema for Parquet files lacking them.
-/// Enables projection on migrated files (e.g., from Hive/Spark).
-///
-/// Why at schema level (not per-batch): Efficiency - avoids repeated schema modification.
-/// Why only top-level: Nested projection uses leaf column indices, not parent struct IDs.
-/// Why 1-indexed: Compatibility with iceberg-java's ParquetSchemaUtil.addFallbackIds().
-fn add_fallback_field_ids_to_arrow_schema(arrow_schema: &ArrowSchemaRef) -> Arc<ArrowSchema> {
-    debug_assert!(
-        arrow_schema
-            .fields()
-            .iter()
-            .next()
-            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
-        "Schema already has field IDs"
-    );
-
-    use arrow_schema::Field;
-
-    let fields_with_fallback_ids: Vec<_> = arrow_schema
-        .fields()
-        .iter()
-        .enumerate()
-        .map(|(pos, field)| {
-            let mut metadata = field.metadata().clone();
-            let field_id = (pos + 1) as i32; // 1-indexed for Java compatibility
-            metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
-
-            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
-                .with_metadata(metadata)
-        })
-        .collect();
-
-    Arc::new(ArrowSchema::new_with_metadata(
-        fields_with_fallback_ids,
-        arrow_schema.metadata().clone(),
-    ))
-}
-
-/// A visitor to collect field ids from bound predicates.
-struct CollectFieldIdVisitor {
-    field_ids: HashSet<i32>,
-}
-
-impl CollectFieldIdVisitor {
-    fn field_ids(self) -> HashSet<i32> {
-        self.field_ids
-    }
-}
-
-impl BoundPredicateVisitor for CollectFieldIdVisitor {
-    type T = ();
-
-    fn always_true(&mut self) -> Result<()> {
-        Ok(())
-    }
-
-    fn always_false(&mut self) -> Result<()> {
-        Ok(())
-    }
-
-    fn and(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
-        Ok(())
-    }
-
-    fn or(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
-        Ok(())
-    }
-
-    fn not(&mut self, _inner: ()) -> Result<()> {
-        Ok(())
-    }
-
-    fn is_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn is_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn less_than(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn less_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn greater_than(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn greater_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_eq(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn starts_with(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_starts_with(
-        &mut self,
-        reference: &BoundReference,
-        _literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn r#in(
-        &mut self,
-        reference: &BoundReference,
-        _literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-
-    fn not_in(
-        &mut self,
-        reference: &BoundReference,
-        _literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<()> {
-        self.field_ids.insert(reference.field().id);
-        Ok(())
-    }
-}
-
-/// A visitor to convert Iceberg bound predicates to Arrow predicates.
-struct PredicateConverter<'a> {
-    /// The Parquet schema descriptor.
-    pub parquet_schema: &'a SchemaDescriptor,
-    /// The map between field id and leaf column index in Parquet schema.
-    pub column_map: &'a HashMap<i32, usize>,
-    /// The required column indices in Parquet schema for the predicates.
-    pub column_indices: &'a Vec<usize>,
-}
-
-impl PredicateConverter<'_> {
-    /// When visiting a bound reference, we return index of the leaf column in the
-    /// required column indices which is used to project the column in the record batch.
-    /// Return None if the field id is not found in the column map, which is possible
-    /// due to schema evolution.
-    fn bound_reference(&mut self, reference: &BoundReference) -> Result<Option<usize>> {
-        // The leaf column's index in Parquet schema.
-        if let Some(column_idx) = self.column_map.get(&reference.field().id) {
-            if self.parquet_schema.get_column_root(*column_idx).is_group() {
-                return Err(Error::new(
-                    ErrorKind::DataInvalid,
-                    format!(
-                        "Leave column `{}` in predicates isn't a root column in Parquet schema.",
-                        reference.field().name
-                    ),
-                ));
-            }
-
-            // The leaf column's index in the required column indices.
-            let index = self
-                .column_indices
-                .iter()
-                .position(|&idx| idx == *column_idx)
-                .ok_or(Error::new(
-                    ErrorKind::DataInvalid,
-                    format!(
-                "Leave column `{}` in predicates cannot be found in the required column indices.",
-                reference.field().name
-            ),
-                ))?;
-
-            Ok(Some(index))
-        } else {
-            Ok(None)
-        }
-    }
-
-    /// Build an Arrow predicate that always returns true.
-    fn build_always_true(&self) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(|batch| {
-            Ok(BooleanArray::from(vec![true; batch.num_rows()]))
-        }))
-    }
-
-    /// Build an Arrow predicate that always returns false.
-    fn build_always_false(&self) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(|batch| {
-            Ok(BooleanArray::from(vec![false; batch.num_rows()]))
-        }))
-    }
-}
-
-/// Gets the leaf column from the record batch for the required column index. Only
-/// supports top-level columns for now.
-fn project_column(
-    batch: &RecordBatch,
-    column_idx: usize,
-) -> std::result::Result<ArrayRef, ArrowError> {
-    let column = batch.column(column_idx);
-
-    match column.data_type() {
-        DataType::Struct(_) => Err(ArrowError::SchemaError(
-            "Does not support struct column yet.".to_string(),
-        )),
-        _ => Ok(column.clone()),
-    }
-}
-
-type PredicateResult =
-    dyn FnMut(RecordBatch) -> std::result::Result<BooleanArray, ArrowError> + Send + 'static;
-
-impl BoundPredicateVisitor for PredicateConverter<'_> {
-    type T = Box<PredicateResult>;
-
-    fn always_true(&mut self) -> Result<Box<PredicateResult>> {
-        self.build_always_true()
-    }
-
-    fn always_false(&mut self) -> Result<Box<PredicateResult>> {
-        self.build_always_false()
-    }
-
-    fn and(
-        &mut self,
-        mut lhs: Box<PredicateResult>,
-        mut rhs: Box<PredicateResult>,
-    ) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(move |batch| {
-            let left = lhs(batch.clone())?;
-            let right = rhs(batch)?;
-            and_kleene(&left, &right)
-        }))
-    }
-
-    fn or(
-        &mut self,
-        mut lhs: Box<PredicateResult>,
-        mut rhs: Box<PredicateResult>,
-    ) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(move |batch| {
-            let left = lhs(batch.clone())?;
-            let right = rhs(batch)?;
-            or_kleene(&left, &right)
-        }))
-    }
-
-    fn not(&mut self, mut inner: Box<PredicateResult>) -> Result<Box<PredicateResult>> {
-        Ok(Box::new(move |batch| {
-            let pred_ret = inner(batch)?;
-            not(&pred_ret)
-        }))
-    }
-
-    fn is_null(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            Ok(Box::new(move |batch| {
-                let column = project_column(&batch, idx)?;
-                is_null(&column)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn not_null(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            Ok(Box::new(move |batch| {
-                let column = project_column(&batch, idx)?;
-                is_not_null(&column)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn is_nan(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if self.bound_reference(reference)?.is_some() {
-            self.build_always_true()
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_nan(
-        &mut self,
-        reference: &BoundReference,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if self.bound_reference(reference)?.is_some() {
-            self.build_always_false()
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn less_than(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                lt(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn less_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                lt_eq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn greater_than(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                gt(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn greater_than_or_eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                gt_eq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                eq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_eq(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                neq(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn starts_with(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                starts_with(&left, literal.as_ref())
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_starts_with(
-        &mut self,
-        reference: &BoundReference,
-        literal: &Datum,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literal = get_arrow_datum(literal)?;
-
-            Ok(Box::new(move |batch| {
-                let left = project_column(&batch, idx)?;
-                let literal = try_cast_literal(&literal, left.data_type())?;
-                // update here if arrow ever adds a native not_starts_with
-                not(&starts_with(&left, literal.as_ref())?)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-
-    fn r#in(
-        &mut self,
-        reference: &BoundReference,
-        literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literals: Vec<_> = literals
-                .iter()
-                .map(|lit| get_arrow_datum(lit).unwrap())
-                .collect();
-
-            Ok(Box::new(move |batch| {
-                // update this if arrow ever adds a native is_in kernel
-                let left = project_column(&batch, idx)?;
-
-                let mut acc = BooleanArray::from(vec![false; batch.num_rows()]);
-                for literal in &literals {
-                    let literal = try_cast_literal(literal, left.data_type())?;
-                    acc = or(&acc, &eq(&left, literal.as_ref())?)?
-                }
-
-                Ok(acc)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_false()
-        }
-    }
-
-    fn not_in(
-        &mut self,
-        reference: &BoundReference,
-        literals: &FnvHashSet<Datum>,
-        _predicate: &BoundPredicate,
-    ) -> Result<Box<PredicateResult>> {
-        if let Some(idx) = self.bound_reference(reference)? {
-            let literals: Vec<_> = literals
-                .iter()
-                .map(|lit| get_arrow_datum(lit).unwrap())
-                .collect();
-
-            Ok(Box::new(move |batch| {
-                // update this if arrow ever adds a native not_in kernel
-                let left = project_column(&batch, idx)?;
-                let mut acc = BooleanArray::from(vec![true; batch.num_rows()]);
-                for literal in &literals {
-                    let literal = try_cast_literal(literal, left.data_type())?;
-                    acc = and(&acc, &neq(&left, literal.as_ref())?)?
-                }
-
-                Ok(acc)
-            }))
-        } else {
-            // A missing column, treating it as null.
-            self.build_always_true()
-        }
-    }
-}
-
-/// ArrowFileReader is a wrapper around a FileRead that impls parquets AsyncFileReader.
-pub struct ArrowFileReader {
-    meta: FileMetadata,
-    parquet_read_options: ParquetReadOptions,
-    r: Box<dyn FileRead>,
-}
-
-impl ArrowFileReader {
-    /// Create a new ArrowFileReader
-    pub fn new(meta: FileMetadata, r: Box<dyn FileRead>) -> Self {
-        Self {
-            meta,
-            parquet_read_options: ParquetReadOptions::builder().build(),
-            r,
-        }
-    }
-
-    /// Configure all Parquet read options.
-    pub(crate) fn with_parquet_read_options(mut self, options: ParquetReadOptions) -> Self {
-        self.parquet_read_options = options;
-        self
-    }
-}
-
-impl AsyncFileReader for ArrowFileReader {
-    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
-        Box::pin(
-            self.r
-                .read(range.start..range.end)
-                .map_err(|err| parquet::errors::ParquetError::External(Box::new(err))),
-        )
-    }
-
-    /// Override the default `get_byte_ranges` which calls `get_bytes` sequentially.
-    /// The parquet reader calls this to fetch column chunks for a row group, so
-    /// without this override each column chunk is a serial round-trip to object storage.
-    /// Adapted from object_store's `coalesce_ranges` in `util.rs`.
-    fn get_byte_ranges(
-        &mut self,
-        ranges: Vec<Range<u64>>,
-    ) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
-        let coalesce_bytes = self.parquet_read_options.range_coalesce_bytes();
-        let concurrency = self.parquet_read_options.range_fetch_concurrency().max(1);
-
-        async move {
-            // Merge nearby ranges to reduce the number of object store requests.
-            let fetch_ranges = merge_ranges(&ranges, coalesce_bytes);
-            let r = &self.r;
-
-            // Fetch merged ranges concurrently.
-            let fetched: Vec<Bytes> = futures::stream::iter(fetch_ranges.iter().cloned())
-                .map(|range| async move {
-                    r.read(range)
-                        .await
-                        .map_err(|e| parquet::errors::ParquetError::External(Box::new(e)))
-                })
-                .buffered(concurrency)
-                .try_collect()
-                .await?;
-
-            // Slice the fetched data back into the originally requested ranges.
-            Ok(ranges
-                .iter()
-                .map(|range| {
-                    let idx = fetch_ranges.partition_point(|v| v.start <= range.start) - 1;
-                    let fetch_range = &fetch_ranges[idx];
-                    let fetch_bytes = &fetched[idx];
-                    let start = (range.start - fetch_range.start) as usize;
-                    let end = (range.end - fetch_range.start) as usize;
-                    fetch_bytes.slice(start..end.min(fetch_bytes.len()))
-                })
-                .collect())
-        }
-        .boxed()
-    }
-
-    // TODO: currently we don't respect `ArrowReaderOptions` cause it don't expose any method to access the option field
-    // we will fix it after `v55.1.0` is released in https://github.com/apache/arrow-rs/issues/7393
-    fn get_metadata(
-        &mut self,
-        _options: Option<&'_ ArrowReaderOptions>,
-    ) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
-        async move {
-            let reader = ParquetMetaDataReader::new()
-                .with_prefetch_hint(self.parquet_read_options.metadata_size_hint())
-                // Set the page policy first because it updates both column and offset policies.
-                .with_page_index_policy(PageIndexPolicy::from(
-                    self.parquet_read_options.preload_page_index(),
-                ))
-                .with_column_index_policy(PageIndexPolicy::from(
-                    self.parquet_read_options.preload_column_index(),
-                ))
-                .with_offset_index_policy(PageIndexPolicy::from(
-                    self.parquet_read_options.preload_offset_index(),
-                ));
-            let size = self.meta.size;
-            let meta = reader.load_and_finish(self, size).await?;
-
-            Ok(Arc::new(meta))
-        }
-        .boxed()
-    }
-}
-
-/// Merge overlapping or nearby byte ranges, combining ranges with gaps <= `coalesce` bytes.
-/// Adapted from object_store's `merge_ranges` in `util.rs`.
-fn merge_ranges(ranges: &[Range<u64>], coalesce: u64) -> Vec<Range<u64>> {
-    if ranges.is_empty() {
-        return vec![];
-    }
-
-    let mut ranges = ranges.to_vec();
-    ranges.sort_unstable_by_key(|r| r.start);
-
-    let mut merged = Vec::with_capacity(ranges.len());
-    let mut start_idx = 0;
-    let mut end_idx = 1;
-
-    while start_idx != ranges.len() {
-        let mut range_end = ranges[start_idx].end;
-
-        while end_idx != ranges.len()
-            && ranges[end_idx]
-                .start
-                .checked_sub(range_end)
-                .map(|delta| delta <= coalesce)
-                .unwrap_or(true)
-        {
-            range_end = range_end.max(ranges[end_idx].end);
-            end_idx += 1;
-        }
-
-        merged.push(ranges[start_idx].start..range_end);
-        start_idx = end_idx;
-        end_idx += 1;
-    }
-
-    merged
-}
-
-/// The Arrow type of an array that the Parquet reader reads may not match the exact Arrow type
-/// that Iceberg uses for literals - but they are effectively the same logical type,
-/// i.e. LargeUtf8 and Utf8 or Utf8View and Utf8 or Utf8View and LargeUtf8.
-///
-/// The Arrow compute kernels that we use must match the type exactly, so first cast the literal
-/// into the type of the batch we read from Parquet before sending it to the compute kernel.
-fn try_cast_literal(
-    literal: &Arc<dyn ArrowDatum + Send + Sync>,
-    column_type: &DataType,
-) -> std::result::Result<Arc<dyn ArrowDatum + Send + Sync>, ArrowError> {
-    let literal_array = literal.get().0;
-
-    // No cast required
-    if literal_array.data_type() == column_type {
-        return Ok(Arc::clone(literal));
-    }
-
-    let literal_array = cast(literal_array, column_type)?;
-    Ok(Arc::new(Scalar::new(literal_array)))
-}
-
-#[cfg(test)]
-mod tests {
-    use std::collections::{HashMap, HashSet};
-    use std::fs::File;
-    use std::ops::Range;
-    use std::sync::Arc;
-
-    use arrow_array::cast::AsArray;
-    use arrow_array::{ArrayRef, LargeStringArray, RecordBatch, StringArray};
-    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
-    use futures::TryStreamExt;
-    use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
-    use parquet::arrow::{ArrowWriter, ProjectionMask};
-    use parquet::basic::Compression;
-    use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
-    use parquet::file::properties::WriterProperties;
-    use parquet::schema::parser::parse_message_type;
-    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
-    use roaring::RoaringTreemap;
-    use tempfile::TempDir;
-
-    use crate::ErrorKind;
-    use crate::arrow::reader::{CollectFieldIdVisitor, PARQUET_FIELD_ID_META_KEY};
-    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
-    use crate::delete_vector::DeleteVector;
-    use crate::expr::visitors::bound_predicate_visitor::visit;
-    use crate::expr::{Bind, Predicate, Reference};
-    use crate::io::FileIO;
-    use crate::scan::{FileScanTask, FileScanTaskDeleteFile, FileScanTaskStream};
-    use crate::spec::{
-        DataContentType, DataFileFormat, Datum, NestedField, PrimitiveType, Schema, SchemaRef, Type,
-    };
-
-    fn table_schema_simple() -> SchemaRef {
-        Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_identifier_field_ids(vec![2])
-                .with_fields(vec![
-                    NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
-                    NestedField::optional(4, "qux", Type::Primitive(PrimitiveType::Float)).into(),
-                ])
-                .build()
-                .unwrap(),
-        )
-    }
-
-    #[test]
-    fn test_collect_field_id() {
-        let schema = table_schema_simple();
-        let expr = Reference::new("qux").is_null();
-        let bound_expr = expr.bind(schema, true).unwrap();
-
-        let mut visitor = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut visitor, &bound_expr).unwrap();
-
-        let mut expected = HashSet::default();
-        expected.insert(4_i32);
-
-        assert_eq!(visitor.field_ids, expected);
-    }
-
-    #[test]
-    fn test_collect_field_id_with_and() {
-        let schema = table_schema_simple();
-        let expr = Reference::new("qux")
-            .is_null()
-            .and(Reference::new("baz").is_null());
-        let bound_expr = expr.bind(schema, true).unwrap();
-
-        let mut visitor = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut visitor, &bound_expr).unwrap();
-
-        let mut expected = HashSet::default();
-        expected.insert(4_i32);
-        expected.insert(3);
-
-        assert_eq!(visitor.field_ids, expected);
-    }
-
-    #[test]
-    fn test_collect_field_id_with_or() {
-        let schema = table_schema_simple();
-        let expr = Reference::new("qux")
-            .is_null()
-            .or(Reference::new("baz").is_null());
-        let bound_expr = expr.bind(schema, true).unwrap();
-
-        let mut visitor = CollectFieldIdVisitor {
-            field_ids: HashSet::default(),
-        };
-        visit(&mut visitor, &bound_expr).unwrap();
-
-        let mut expected = HashSet::default();
-        expected.insert(4_i32);
-        expected.insert(3);
-
-        assert_eq!(visitor.field_ids, expected);
-    }
-
-    #[test]
-    fn test_arrow_projection_mask() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_identifier_field_ids(vec![1])
-                .with_fields(vec![
-                    NestedField::required(1, "c1", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::optional(2, "c2", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(
-                        3,
-                        "c3",
-                        Type::Primitive(PrimitiveType::Decimal {
-                            precision: 38,
-                            scale: 3,
-                        }),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("c1", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            // Type not supported
-            Field::new("c2", DataType::Duration(TimeUnit::Microsecond), true).with_metadata(
-                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string())]),
-            ),
-            // Precision is beyond the supported range
-            Field::new("c3", DataType::Decimal128(39, 3), true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "3".to_string(),
-            )])),
-        ]));
-
-        let message_type = "
-message schema {
-  required binary c1 (STRING) = 1;
-  optional int32 c2 (INTEGER(8,true)) = 2;
-  optional fixed_len_byte_array(17) c3 (DECIMAL(39,3)) = 3;
-}
-    ";
-        let parquet_type = parse_message_type(message_type).expect("should parse schema");
-        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_type));
-
-        // Try projecting the fields c2 and c3 with the unsupported data types
-        let err = ArrowReader::get_arrow_projection_mask(
-            &[1, 2, 3],
-            &schema,
-            &parquet_schema,
-            &arrow_schema,
-            false,
-        )
-        .unwrap_err();
-
-        assert_eq!(err.kind(), ErrorKind::DataInvalid);
-        assert_eq!(
-            err.to_string(),
-            "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string()
-        );
-
-        // Omitting field c2, we still get an error due to c3 being selected
-        let err = ArrowReader::get_arrow_projection_mask(
-            &[1, 3],
-            &schema,
-            &parquet_schema,
-            &arrow_schema,
-            false,
-        )
-        .unwrap_err();
-
-        assert_eq!(err.kind(), ErrorKind::DataInvalid);
-        assert_eq!(
-            err.to_string(),
-            "DataInvalid => Failed to create decimal type, source: DataInvalid => Decimals with precision larger than 38 are not supported: 39".to_string()
-        );
-
-        // Finally avoid selecting fields with unsupported data types
-        let mask = ArrowReader::get_arrow_projection_mask(
-            &[1],
-            &schema,
-            &parquet_schema,
-            &arrow_schema,
-            false,
-        )
-        .expect("Some ProjectionMask");
-        assert_eq!(mask, ProjectionMask::leaves(&parquet_schema, vec![0]));
-    }
-
-    #[tokio::test]
-    async fn test_kleene_logic_or_behaviour() {
-        // a IS NULL OR a = 'foo'
-        let predicate = Reference::new("a")
-            .is_null()
-            .or(Reference::new("a").equal_to(Datum::string("foo")));
-
-        // Table data: [NULL, "foo", "bar"]
-        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
-
-        // Expected: [NULL, "foo"].
-        let expected = vec![None, Some("foo".to_string())];
-
-        let (file_io, schema, table_location, _temp_dir) =
-            setup_kleene_logic(data_for_col_a, DataType::Utf8);
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
-
-        assert_eq!(result_data, expected);
-    }
-
-    #[tokio::test]
-    async fn test_kleene_logic_and_behaviour() {
-        // a IS NOT NULL AND a != 'foo'
-        let predicate = Reference::new("a")
-            .is_not_null()
-            .and(Reference::new("a").not_equal_to(Datum::string("foo")));
-
-        // Table data: [NULL, "foo", "bar"]
-        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
-
-        // Expected: ["bar"].
-        let expected = vec![Some("bar".to_string())];
-
-        let (file_io, schema, table_location, _temp_dir) =
-            setup_kleene_logic(data_for_col_a, DataType::Utf8);
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
-
-        assert_eq!(result_data, expected);
-    }
-
-    #[tokio::test]
-    async fn test_predicate_cast_literal() {
-        let predicates = vec![
-            // a == 'foo'
-            (Reference::new("a").equal_to(Datum::string("foo")), vec![
-                Some("foo".to_string()),
-            ]),
-            // a != 'foo'
-            (
-                Reference::new("a").not_equal_to(Datum::string("foo")),
-                vec![Some("bar".to_string())],
-            ),
-            // STARTS_WITH(a, 'foo')
-            (Reference::new("a").starts_with(Datum::string("f")), vec![
-                Some("foo".to_string()),
-            ]),
-            // NOT STARTS_WITH(a, 'foo')
-            (
-                Reference::new("a").not_starts_with(Datum::string("f")),
-                vec![Some("bar".to_string())],
-            ),
-            // a < 'foo'
-            (Reference::new("a").less_than(Datum::string("foo")), vec![
-                Some("bar".to_string()),
-            ]),
-            // a <= 'foo'
-            (
-                Reference::new("a").less_than_or_equal_to(Datum::string("foo")),
-                vec![Some("foo".to_string()), Some("bar".to_string())],
-            ),
-            // a > 'foo'
-            (
-                Reference::new("a").greater_than(Datum::string("bar")),
-                vec![Some("foo".to_string())],
-            ),
-            // a >= 'foo'
-            (
-                Reference::new("a").greater_than_or_equal_to(Datum::string("foo")),
-                vec![Some("foo".to_string())],
-            ),
-            // a IN ('foo', 'bar')
-            (
-                Reference::new("a").is_in([Datum::string("foo"), Datum::string("baz")]),
-                vec![Some("foo".to_string())],
-            ),
-            // a NOT IN ('foo', 'bar')
-            (
-                Reference::new("a").is_not_in([Datum::string("foo"), Datum::string("baz")]),
-                vec![Some("bar".to_string())],
-            ),
-        ];
-
-        // Table data: ["foo", "bar"]
-        let data_for_col_a = vec![Some("foo".to_string()), Some("bar".to_string())];
-
-        let (file_io, schema, table_location, _temp_dir) =
-            setup_kleene_logic(data_for_col_a, DataType::LargeUtf8);
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        for (predicate, expected) in predicates {
-            println!("testing predicate {predicate}");
-            let result_data = test_perform_read(
-                predicate.clone(),
-                schema.clone(),
-                table_location.clone(),
-                reader.clone(),
-            )
-            .await;
-
-            assert_eq!(result_data, expected, "predicate={predicate}");
-        }
-    }
-
-    async fn test_perform_read(
-        predicate: Predicate,
-        schema: SchemaRef,
-        table_location: String,
-        reader: ArrowReader,
-    ) -> Vec<Option<String>> {
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1],
-                predicate: Some(predicate.bind(schema, true).unwrap()),
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        result[0].columns()[0]
-            .as_string_opt::<i32>()
-            .unwrap()
-            .iter()
-            .map(|v| v.map(ToOwned::to_owned))
-            .collect::<Vec<_>>()
-    }
-
-    fn setup_kleene_logic(
-        data_for_col_a: Vec<Option<String>>,
-        col_a_type: DataType,
-    ) -> (FileIO, SchemaRef, String, TempDir) {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "a", Type::Primitive(PrimitiveType::String)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("a", col_a_type.clone(), true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        let file_io = FileIO::new_with_fs();
-
-        let col = match col_a_type {
-            DataType::Utf8 => Arc::new(StringArray::from(data_for_col_a)) as ArrayRef,
-            DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data_for_col_a)) as ArrayRef,
-            _ => panic!("unexpected col_a_type"),
-        };
-
-        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![col]).unwrap();
-
-        // Write the Parquet files
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer =
-            ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-
-        // writer must be closed to write footer
-        writer.close().unwrap();
-
-        (file_io, schema, table_location, tmp_dir)
-    }
-
-    #[test]
-    fn test_build_deletes_row_selection() {
-        let schema_descr = get_test_schema_descr();
-
-        let mut columns = vec![];
-        for ptr in schema_descr.columns() {
-            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
-            columns.push(column);
-        }
-
-        let row_groups_metadata = vec![
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 0),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 1),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 2),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 3),
-            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 4),
-        ];
-
-        let selected_row_groups = Some(vec![1, 3]);
-
-        /* cases to cover:
-           * {skip|select} {first|intermediate|last} {one row|multiple rows} in
-             {first|intermediate|last} {skipped|selected} row group
-           * row group selection disabled
-        */
-
-        let positional_deletes = RoaringTreemap::from_iter(&[
-            1, // in skipped rg 0, should be ignored
-            3, // run of three consecutive items in skipped rg0
-            4, 5, 998, // two consecutive items at end of skipped rg0
-            999, 1000, // solitary row at start of selected rg1 (1, 9)
-            1010, // run of 3 rows in selected rg1
-            1011, 1012, // (3, 485)
-            1498, // run of two items at end of selected rg1
-            1499, 1500, // run of two items at start of skipped rg2
-            1501, 1600, // should ignore, in skipped rg2
-            1999, // single row at end of skipped rg2
-            2000, // run of two items at start of selected rg3
-            2001, // (4, 98)
-            2100, // single row in selected row group 3 (1, 99)
-            2200, // run of 3 consecutive rows in selected row group 3
-            2201, 2202, // (3, 796)
-            2999, // single item at end of selected rg3 (1)
-            3000, // single item at start of skipped rg4
-        ]);
-
-        let positional_deletes = DeleteVector::new(positional_deletes);
-
-        // using selected row groups 1 and 3
-        let result = ArrowReader::build_deletes_row_selection(
-            &row_groups_metadata,
-            &selected_row_groups,
-            &positional_deletes,
-        )
-        .unwrap();
-
-        let expected = RowSelection::from(vec![
-            RowSelector::skip(1),
-            RowSelector::select(9),
-            RowSelector::skip(3),
-            RowSelector::select(485),
-            RowSelector::skip(4),
-            RowSelector::select(98),
-            RowSelector::skip(1),
-            RowSelector::select(99),
-            RowSelector::skip(3),
-            RowSelector::select(796),
-            RowSelector::skip(1),
-        ]);
-
-        assert_eq!(result, expected);
-
-        // selecting all row groups
-        let result = ArrowReader::build_deletes_row_selection(
-            &row_groups_metadata,
-            &None,
-            &positional_deletes,
-        )
-        .unwrap();
-
-        let expected = RowSelection::from(vec![
-            RowSelector::select(1),
-            RowSelector::skip(1),
-            RowSelector::select(1),
-            RowSelector::skip(3),
-            RowSelector::select(992),
-            RowSelector::skip(3),
-            RowSelector::select(9),
-            RowSelector::skip(3),
-            RowSelector::select(485),
-            RowSelector::skip(4),
-            RowSelector::select(98),
-            RowSelector::skip(1),
-            RowSelector::select(398),
-            RowSelector::skip(3),
-            RowSelector::select(98),
-            RowSelector::skip(1),
-            RowSelector::select(99),
-            RowSelector::skip(3),
-            RowSelector::select(796),
-            RowSelector::skip(2),
-            RowSelector::select(499),
-        ]);
-
-        assert_eq!(result, expected);
-    }
-
-    fn build_test_row_group_meta(
-        schema_descr: SchemaDescPtr,
-        columns: Vec<ColumnChunkMetaData>,
-        num_rows: i64,
-        ordinal: i16,
-    ) -> RowGroupMetaData {
-        RowGroupMetaData::builder(schema_descr.clone())
-            .set_num_rows(num_rows)
-            .set_total_byte_size(2000)
-            .set_column_metadata(columns)
-            .set_ordinal(ordinal)
-            .build()
-            .unwrap()
-    }
-
-    fn get_test_schema_descr() -> SchemaDescPtr {
-        use parquet::schema::types::Type as SchemaType;
-
-        let schema = SchemaType::group_type_builder("schema")
-            .with_fields(vec![
-                Arc::new(
-                    SchemaType::primitive_type_builder("a", parquet::basic::Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-                Arc::new(
-                    SchemaType::primitive_type_builder("b", parquet::basic::Type::INT32)
-                        .build()
-                        .unwrap(),
-                ),
-            ])
-            .build()
-            .unwrap();
-
-        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
-    }
-
-    /// Verifies that file splits respect byte ranges and only read specific row groups.
-    #[tokio::test]
-    async fn test_file_splits_respect_byte_ranges() {
-        use arrow_array::Int32Array;
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_path = format!("{table_location}/multi_row_group.parquet");
-
-        // Force each batch into its own row group for testing byte range filtering.
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
-            (0..100).collect::<Vec<i32>>(),
-        ))])
-        .unwrap();
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
-            (100..200).collect::<Vec<i32>>(),
-        ))])
-        .unwrap();
-        let batch3 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
-            (200..300).collect::<Vec<i32>>(),
-        ))])
-        .unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.write(&batch3).expect("Writing batch 3");
-        writer.close().unwrap();
-
-        // Read the file metadata to get row group byte positions
-        let file = File::open(&file_path).unwrap();
-        let reader = SerializedFileReader::new(file).unwrap();
-        let metadata = reader.metadata();
-
-        println!("File has {} row groups", metadata.num_row_groups());
-        assert_eq!(metadata.num_row_groups(), 3, "Expected 3 row groups");
-
-        // Get byte positions for each row group
-        let row_group_0 = metadata.row_group(0);
-        let row_group_1 = metadata.row_group(1);
-        let row_group_2 = metadata.row_group(2);
-
-        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
-        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
-        let rg2_start = rg1_start + row_group_1.compressed_size() as u64;
-        let file_end = rg2_start + row_group_2.compressed_size() as u64;
-
-        println!(
-            "Row group 0: {} rows, starts at byte {}, {} bytes compressed",
-            row_group_0.num_rows(),
-            rg0_start,
-            row_group_0.compressed_size()
-        );
-        println!(
-            "Row group 1: {} rows, starts at byte {}, {} bytes compressed",
-            row_group_1.num_rows(),
-            rg1_start,
-            row_group_1.compressed_size()
-        );
-        println!(
-            "Row group 2: {} rows, starts at byte {}, {} bytes compressed",
-            row_group_2.num_rows(),
-            rg2_start,
-            row_group_2.compressed_size()
-        );
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Task 1: read only the first row group
-        let task1 = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
-            start: rg0_start,
-            length: row_group_0.compressed_size() as u64,
-            record_count: Some(100),
-            data_file_path: file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        // Task 2: read the second and third row groups
-        let task2 = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
-            start: rg1_start,
-            length: file_end - rg1_start,
-            record_count: Some(200),
-            data_file_path: file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream;
-        let result1 = reader
-            .clone()
-            .read(tasks1)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        let total_rows_task1: usize = result1.iter().map(|b| b.num_rows()).sum();
-        println!(
-            "Task 1 (bytes {}-{}) returned {} rows",
-            rg0_start,
-            rg0_start + row_group_0.compressed_size() as u64,
-            total_rows_task1
-        );
-
-        let tasks2 = Box::pin(futures::stream::iter(vec![Ok(task2)])) as FileScanTaskStream;
-        let result2 = reader
-            .read(tasks2)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        let total_rows_task2: usize = result2.iter().map(|b| b.num_rows()).sum();
-        println!("Task 2 (bytes {rg1_start}-{file_end}) returned {total_rows_task2} rows");
-
-        assert_eq!(
-            total_rows_task1, 100,
-            "Task 1 should read only the first row group (100 rows), but got {total_rows_task1} rows"
-        );
-
-        assert_eq!(
-            total_rows_task2, 200,
-            "Task 2 should read only the second+third row groups (200 rows), but got {total_rows_task2} rows"
-        );
-
-        // Verify the actual data values are correct (not just the row count)
-        if total_rows_task1 > 0 {
-            let first_batch = &result1[0];
-            let id_col = first_batch
-                .column(0)
-                .as_primitive::<arrow_array::types::Int32Type>();
-            let first_val = id_col.value(0);
-            let last_val = id_col.value(id_col.len() - 1);
-            println!("Task 1 data range: {first_val} to {last_val}");
-
-            assert_eq!(first_val, 0, "Task 1 should start with id=0");
-            assert_eq!(last_val, 99, "Task 1 should end with id=99");
-        }
-
-        if total_rows_task2 > 0 {
-            let first_batch = &result2[0];
-            let id_col = first_batch
-                .column(0)
-                .as_primitive::<arrow_array::types::Int32Type>();
-            let first_val = id_col.value(0);
-            println!("Task 2 first value: {first_val}");
-
-            assert_eq!(first_val, 100, "Task 2 should start with id=100, not id=0");
-        }
-    }
-
-    /// Test schema evolution: reading old Parquet file (with only column 'a')
-    /// using a newer table schema (with columns 'a' and 'b').
-    /// This tests that:
-    /// 1. get_arrow_projection_mask allows missing columns
-    /// 2. RecordBatchTransformer adds missing column 'b' with NULL values
-    #[tokio::test]
-    async fn test_schema_evolution_add_column() {
-        use arrow_array::{Array, Int32Array};
-
-        // New table schema: columns 'a' and 'b' (b was added later, file only has 'a')
-        let new_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(2)
-                .with_fields(vec![
-                    NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "b", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Create Arrow schema for old Parquet file (only has column 'a')
-        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
-            Field::new("a", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Write old Parquet file with only column 'a'
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let data_a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
-        let to_write = RecordBatch::try_new(arrow_schema_old.clone(), vec![data_a]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-        let file = File::create(format!("{table_location}/old_file.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        // Read the old Parquet file using the NEW schema (with column 'b')
-        let reader = ArrowReaderBuilder::new(file_io).build();
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/old_file.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/old_file.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: new_schema.clone(),
-                project_field_ids: vec![1, 2], // Request both columns 'a' and 'b'
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Verify we got the correct data
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-
-        // Should have 2 columns now
-        assert_eq!(batch.num_columns(), 2);
-        assert_eq!(batch.num_rows(), 3);
-
-        // Column 'a' should have the original data
-        let col_a = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(col_a.values(), &[1, 2, 3]);
-
-        // Column 'b' should be all NULLs (it didn't exist in the old file)
-        let col_b = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(col_b.null_count(), 3);
-        assert!(col_b.is_null(0));
-        assert!(col_b.is_null(1));
-        assert!(col_b.is_null(2));
-    }
-
-    /// Test for bug where position deletes in later row groups are not applied correctly.
-    ///
-    /// When a file has multiple row groups and a position delete targets a row in a later
-    /// row group, the `build_deletes_row_selection` function had a bug where it would
-    /// fail to increment `current_row_group_base_idx` when skipping row groups.
-    ///
-    /// This test creates:
-    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
-    /// - A position delete file that deletes row 199 (last row in second row group)
-    ///
-    /// Expected behavior: Should return 199 rows (with id=200 deleted)
-    /// Bug behavior: Returns 200 rows (delete is not applied)
-    ///
-    /// This bug was discovered while running Apache Spark + Apache Iceberg integration tests
-    /// through DataFusion Comet. The following Iceberg Java tests failed due to this bug:
-    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadDelete::testDeleteWithMultipleRowGroupsParquet`
-    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadUpdate::testUpdateWithMultipleRowGroupsParquet`
-    #[tokio::test]
-    async fn test_position_delete_across_multiple_row_groups() {
-        use arrow_array::{Int32Array, Int64Array};
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        // Field IDs for positional delete schema
-        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
-        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        // Create table schema with a single 'id' column
-        let table_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Step 1: Create data file with 200 rows in 2 row groups
-        // Row group 0: rows 0-99 (ids 1-100)
-        // Row group 1: rows 100-199 (ids 101-200)
-        let data_file_path = format!("{table_location}/data.parquet");
-
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(1..=100),
-        )])
-        .unwrap();
-
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(101..=200),
-        )])
-        .unwrap();
-
-        // Force each batch into its own row group
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&data_file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.close().unwrap();
-
-        // Verify we created 2 row groups
-        let verify_file = File::open(&data_file_path).unwrap();
-        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
-        assert_eq!(
-            verify_reader.metadata().num_row_groups(),
-            2,
-            "Should have 2 row groups"
-        );
-
-        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
-        let delete_file_path = format!("{table_location}/deletes.parquet");
-
-        let delete_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
-            )])),
-            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
-            )])),
-        ]));
-
-        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
-        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
-            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
-            Arc::new(Int64Array::from_iter_values(vec![199i64])),
-        ])
-        .unwrap();
-
-        let delete_props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let delete_file = File::create(&delete_file_path).unwrap();
-        let mut delete_writer =
-            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
-        delete_writer.write(&delete_batch).unwrap();
-        delete_writer.close().unwrap();
-
-        // Step 3: Read the data file with the delete applied
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
-            start: 0,
-            length: 0,
-            record_count: Some(200),
-            data_file_path: data_file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: table_schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![FileScanTaskDeleteFile {
-                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
-                file_path: delete_file_path,
-                file_type: DataContentType::PositionDeletes,
-                partition_spec_id: 0,
-                equality_ids: None,
-            }],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Step 4: Verify we got 199 rows (not 200)
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-
-        println!("Total rows read: {total_rows}");
-        println!("Expected: 199 rows (deleted row 199 which had id=200)");
-
-        // This assertion will FAIL before the fix and PASS after the fix
-        assert_eq!(
-            total_rows, 199,
-            "Expected 199 rows after deleting row 199, but got {total_rows} rows. \
-             The bug causes position deletes in later row groups to be ignored."
-        );
-
-        // Verify the deleted row (id=200) is not present
-        let all_ids: Vec<i32> = result
-            .iter()
-            .flat_map(|batch| {
-                batch
-                    .column(0)
-                    .as_primitive::<arrow_array::types::Int32Type>()
-                    .values()
-                    .iter()
-                    .copied()
-            })
-            .collect();
-
-        assert!(
-            !all_ids.contains(&200),
-            "Row with id=200 should be deleted but was found in results"
-        );
-
-        // Verify we have all other ids (1-199)
-        let expected_ids: Vec<i32> = (1..=199).collect();
-        assert_eq!(
-            all_ids, expected_ids,
-            "Should have ids 1-199 but got different values"
-        );
-    }
-
-    /// Test for bug where position deletes are lost when skipping unselected row groups.
-    ///
-    /// This is a variant of `test_position_delete_across_multiple_row_groups` that exercises
-    /// the row group selection code path (`selected_row_groups: Some([...])`).
-    ///
-    /// When a file has multiple row groups and only some are selected for reading,
-    /// the `build_deletes_row_selection` function must correctly skip over deletes in
-    /// unselected row groups WITHOUT consuming deletes that belong to selected row groups.
-    ///
-    /// This test creates:
-    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
-    /// - A position delete file that deletes row 199 (last row in second row group)
-    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
-    ///
-    /// Expected behavior: Should return 99 rows (with row 199 deleted)
-    /// Bug behavior: Returns 100 rows (delete is lost when skipping row group 0)
-    ///
-    /// The bug occurs when processing row group 0 (unselected):
-    /// ```rust
-    /// delete_vector_iter.advance_to(next_row_group_base_idx); // Position at first delete >= 100
-    /// next_deleted_row_idx_opt = delete_vector_iter.next(); // BUG: Consumes delete at 199!
-    /// ```
-    ///
-    /// The fix is to NOT call `next()` after `advance_to()` when skipping unselected row groups,
-    /// because `advance_to()` already positions the iterator correctly without consuming elements.
-    #[tokio::test]
-    async fn test_position_delete_with_row_group_selection() {
-        use arrow_array::{Int32Array, Int64Array};
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        // Field IDs for positional delete schema
-        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
-        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        // Create table schema with a single 'id' column
-        let table_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Step 1: Create data file with 200 rows in 2 row groups
-        // Row group 0: rows 0-99 (ids 1-100)
-        // Row group 1: rows 100-199 (ids 101-200)
-        let data_file_path = format!("{table_location}/data.parquet");
-
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(1..=100),
-        )])
-        .unwrap();
-
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(101..=200),
-        )])
-        .unwrap();
-
-        // Force each batch into its own row group
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&data_file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.close().unwrap();
-
-        // Verify we created 2 row groups
-        let verify_file = File::open(&data_file_path).unwrap();
-        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
-        assert_eq!(
-            verify_reader.metadata().num_row_groups(),
-            2,
-            "Should have 2 row groups"
-        );
-
-        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
-        let delete_file_path = format!("{table_location}/deletes.parquet");
-
-        let delete_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
-            )])),
-            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
-            )])),
-        ]));
-
-        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
-        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
-            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
-            Arc::new(Int64Array::from_iter_values(vec![199i64])),
-        ])
-        .unwrap();
-
-        let delete_props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let delete_file = File::create(&delete_file_path).unwrap();
-        let mut delete_writer =
-            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
-        delete_writer.write(&delete_batch).unwrap();
-        delete_writer.close().unwrap();
-
-        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
-        // This exercises the row group selection code path where row group 0 is skipped
-        let metadata_file = File::open(&data_file_path).unwrap();
-        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
-        let metadata = metadata_reader.metadata();
-
-        let row_group_0 = metadata.row_group(0);
-        let row_group_1 = metadata.row_group(1);
-
-        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
-        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
-        let rg1_length = row_group_1.compressed_size() as u64;
-
-        println!(
-            "Row group 0: starts at byte {}, {} bytes compressed",
-            rg0_start,
-            row_group_0.compressed_size()
-        );
-        println!(
-            "Row group 1: starts at byte {}, {} bytes compressed",
-            rg1_start,
-            row_group_1.compressed_size()
-        );
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
-            start: rg1_start,
-            length: rg1_length,
-            record_count: Some(100), // Row group 1 has 100 rows
-            data_file_path: data_file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: table_schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![FileScanTaskDeleteFile {
-                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
-                file_path: delete_file_path,
-                file_type: DataContentType::PositionDeletes,
-                partition_spec_id: 0,
-                equality_ids: None,
-            }],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Step 4: Verify we got 99 rows (not 100)
-        // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-
-        println!("Total rows read from row group 1: {total_rows}");
-        println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)");
-
-        // This assertion will FAIL before the fix and PASS after the fix
-        assert_eq!(
-            total_rows, 99,
-            "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \
-             The bug causes position deletes to be lost when advance_to() is followed by next() \
-             when skipping unselected row groups."
-        );
-
-        // Verify the deleted row (id=200) is not present
-        let all_ids: Vec<i32> = result
-            .iter()
-            .flat_map(|batch| {
-                batch
-                    .column(0)
-                    .as_primitive::<arrow_array::types::Int32Type>()
-                    .values()
-                    .iter()
-                    .copied()
-            })
-            .collect();
-
-        assert!(
-            !all_ids.contains(&200),
-            "Row with id=200 should be deleted but was found in results"
-        );
-
-        // Verify we have ids 101-199 (not 101-200)
-        let expected_ids: Vec<i32> = (101..=199).collect();
-        assert_eq!(
-            all_ids, expected_ids,
-            "Should have ids 101-199 but got different values"
-        );
-    }
-    /// Test for bug where stale cached delete causes infinite loop when skipping row groups.
-    ///
-    /// This test exposes the inverse scenario of `test_position_delete_with_row_group_selection`:
-    /// - Position delete targets a row in the SKIPPED row group (not the selected one)
-    /// - After calling advance_to(), the cached delete index is stale
-    /// - Without updating the cache, the code enters an infinite loop
-    ///
-    /// This test creates:
-    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
-    /// - A position delete file that deletes row 0 (first row in SKIPPED row group 0)
-    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
-    ///
-    /// The bug occurs when skipping row group 0:
-    /// ```rust
-    /// let mut next_deleted_row_idx_opt = delete_vector_iter.next(); // Some(0)
-    /// // ... skip to row group 1 ...
-    /// delete_vector_iter.advance_to(100); // Iterator advances past delete at 0
-    /// // BUG: next_deleted_row_idx_opt is still Some(0) - STALE!
-    /// // When processing row group 1:
-    /// //   current_idx = 100, next_deleted_row_idx = 0, next_row_group_base_idx = 200
-    /// //   Loop condition: 0 < 200 (true)
-    /// //   But: current_idx (100) > next_deleted_row_idx (0)
-    /// //   And: current_idx (100) != next_deleted_row_idx (0)
-    /// //   Neither branch executes -> INFINITE LOOP!
-    /// ```
-    ///
-    /// Expected behavior: Should return 100 rows (delete at 0 doesn't affect row group 1)
-    /// Bug behavior: Infinite loop in build_deletes_row_selection
-    #[tokio::test]
-    async fn test_position_delete_in_skipped_row_group() {
-        use arrow_array::{Int32Array, Int64Array};
-        use parquet::file::reader::{FileReader, SerializedFileReader};
-
-        // Field IDs for positional delete schema
-        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
-        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-
-        // Create table schema with a single 'id' column
-        let table_schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-        ]));
-
-        // Step 1: Create data file with 200 rows in 2 row groups
-        // Row group 0: rows 0-99 (ids 1-100)
-        // Row group 1: rows 100-199 (ids 101-200)
-        let data_file_path = format!("{table_location}/data.parquet");
-
-        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(1..=100),
-        )])
-        .unwrap();
-
-        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
-            Int32Array::from_iter_values(101..=200),
-        )])
-        .unwrap();
-
-        // Force each batch into its own row group
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_max_row_group_row_count(Some(100))
-            .build();
-
-        let file = File::create(&data_file_path).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-        writer.write(&batch1).expect("Writing batch 1");
-        writer.write(&batch2).expect("Writing batch 2");
-        writer.close().unwrap();
-
-        // Verify we created 2 row groups
-        let verify_file = File::open(&data_file_path).unwrap();
-        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
-        assert_eq!(
-            verify_reader.metadata().num_row_groups(),
-            2,
-            "Should have 2 row groups"
-        );
-
-        // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0)
-        let delete_file_path = format!("{table_location}/deletes.parquet");
-
-        let delete_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
-            )])),
-            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
-            )])),
-        ]));
-
-        // Delete row at position 0 (0-indexed, so it's the first row: id=1)
-        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
-            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
-            Arc::new(Int64Array::from_iter_values(vec![0i64])),
-        ])
-        .unwrap();
-
-        let delete_props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let delete_file = File::create(&delete_file_path).unwrap();
-        let mut delete_writer =
-            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
-        delete_writer.write(&delete_batch).unwrap();
-        delete_writer.close().unwrap();
-
-        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
-        // This exercises the row group selection code path where row group 0 is skipped
-        let metadata_file = File::open(&data_file_path).unwrap();
-        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
-        let metadata = metadata_reader.metadata();
-
-        let row_group_0 = metadata.row_group(0);
-        let row_group_1 = metadata.row_group(1);
-
-        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
-        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
-        let rg1_length = row_group_1.compressed_size() as u64;
-
-        let file_io = FileIO::new_with_fs();
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
-        let task = FileScanTask {
-            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
-            start: rg1_start,
-            length: rg1_length,
-            record_count: Some(100), // Row group 1 has 100 rows
-            data_file_path: data_file_path.clone(),
-            data_file_format: DataFileFormat::Parquet,
-            schema: table_schema.clone(),
-            project_field_ids: vec![1],
-            predicate: None,
-            deletes: vec![FileScanTaskDeleteFile {
-                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
-                file_path: delete_file_path,
-                file_type: DataContentType::PositionDeletes,
-                partition_spec_id: 0,
-                equality_ids: None,
-            }],
-            partition: None,
-            partition_spec: None,
-            name_mapping: None,
-            case_sensitive: false,
-        };
-
-        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Step 4: Verify we got 100 rows (all of row group 1)
-        // The delete at position 0 is in row group 0, which is skipped, so it doesn't affect us
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-
-        assert_eq!(
-            total_rows, 100,
-            "Expected 100 rows from row group 1 (delete at position 0 is in skipped row group 0). \
-             If this hangs or fails, it indicates the cached delete index was not updated after advance_to()."
-        );
-
-        // Verify we have all ids from row group 1 (101-200)
-        let all_ids: Vec<i32> = result
-            .iter()
-            .flat_map(|batch| {
-                batch
-                    .column(0)
-                    .as_primitive::<arrow_array::types::Int32Type>()
-                    .values()
-                    .iter()
-                    .copied()
-            })
-            .collect();
-
-        let expected_ids: Vec<i32> = (101..=200).collect();
-        assert_eq!(
-            all_ids, expected_ids,
-            "Should have ids 101-200 (all of row group 1)"
-        );
-    }
-
-    /// Test reading Parquet files without field ID metadata (e.g., migrated tables).
-    /// This exercises the position-based fallback path.
-    ///
-    /// Corresponds to Java's ParquetSchemaUtil.addFallbackIds() + pruneColumnsFallback()
-    /// in /parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
-    #[tokio::test]
-    async fn test_read_parquet_file_without_field_ids() {
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Parquet file from a migrated table - no field ID metadata
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("name", DataType::Utf8, false),
-            Field::new("age", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let name_data = vec!["Alice", "Bob", "Charlie"];
-        let age_data = vec![30, 25, 35];
-
-        use arrow_array::Int32Array;
-        let name_col = Arc::new(StringArray::from(name_data.clone())) as ArrayRef;
-        let age_col = Arc::new(Int32Array::from(age_data.clone())) as ArrayRef;
-
-        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![name_col, age_col]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 3);
-        assert_eq!(batch.num_columns(), 2);
-
-        // Verify position-based mapping: field_id 1 → position 0, field_id 2 → position 1
-        let name_array = batch.column(0).as_string::<i32>();
-        assert_eq!(name_array.value(0), "Alice");
-        assert_eq!(name_array.value(1), "Bob");
-        assert_eq!(name_array.value(2), "Charlie");
-
-        let age_array = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(age_array.value(0), 30);
-        assert_eq!(age_array.value(1), 25);
-        assert_eq!(age_array.value(2), 35);
-    }
-
-    /// Test reading Parquet files without field IDs with partial projection.
-    /// Only a subset of columns are requested, verifying position-based fallback
-    /// handles column selection correctly.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_partial_projection() {
-        use arrow_array::Int32Array;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "col1", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "col2", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(3, "col3", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(4, "col4", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("col1", DataType::Utf8, false),
-            Field::new("col2", DataType::Int32, false),
-            Field::new("col3", DataType::Utf8, false),
-            Field::new("col4", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let col1_data = Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef;
-        let col2_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
-        let col3_data = Arc::new(StringArray::from(vec!["c", "d"])) as ArrayRef;
-        let col4_data = Arc::new(Int32Array::from(vec![30, 40])) as ArrayRef;
-
-        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![
-            col1_data, col2_data, col3_data, col4_data,
-        ])
-        .unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 3],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 2);
-
-        let col1_array = batch.column(0).as_string::<i32>();
-        assert_eq!(col1_array.value(0), "a");
-        assert_eq!(col1_array.value(1), "b");
-
-        let col3_array = batch.column(1).as_string::<i32>();
-        assert_eq!(col3_array.value(0), "c");
-        assert_eq!(col3_array.value(1), "d");
-    }
-
-    /// Test reading Parquet files without field IDs with schema evolution.
-    /// The Iceberg schema has more fields than the Parquet file, testing that
-    /// missing columns are filled with NULLs.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_schema_evolution() {
-        use arrow_array::{Array, Int32Array};
-
-        // Schema with field 3 added after the file was written
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(3, "city", Type::Primitive(PrimitiveType::String)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("name", DataType::Utf8, false),
-            Field::new("age", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
-        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![name_data, age_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2, 3],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 3);
-
-        let name_array = batch.column(0).as_string::<i32>();
-        assert_eq!(name_array.value(0), "Alice");
-        assert_eq!(name_array.value(1), "Bob");
-
-        let age_array = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(age_array.value(0), 30);
-        assert_eq!(age_array.value(1), 25);
-
-        // Verify missing column filled with NULLs
-        let city_array = batch.column(2).as_string::<i32>();
-        assert_eq!(city_array.null_count(), 2);
-        assert!(city_array.is_null(0));
-        assert!(city_array.is_null(1));
-    }
-
-    /// Test reading Parquet files without field IDs that have multiple row groups.
-    /// This ensures the position-based fallback works correctly across row group boundaries.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_multiple_row_groups() {
-        use arrow_array::Int32Array;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(2, "value", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("name", DataType::Utf8, false),
-            Field::new("value", DataType::Int32, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        // Small row group size to create multiple row groups
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .set_write_batch_size(2)
-            .set_max_row_group_row_count(Some(2))
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
-
-        // Write 6 rows in 3 batches (will create 3 row groups)
-        for batch_num in 0..3 {
-            let name_data = Arc::new(StringArray::from(vec![
-                format!("name_{}", batch_num * 2),
-                format!("name_{}", batch_num * 2 + 1),
-            ])) as ArrayRef;
-            let value_data =
-                Arc::new(Int32Array::from(vec![batch_num * 2, batch_num * 2 + 1])) as ArrayRef;
-
-            let batch =
-                RecordBatch::try_new(arrow_schema.clone(), vec![name_data, value_data]).unwrap();
-            writer.write(&batch).expect("Writing batch");
-        }
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert!(!result.is_empty());
-
-        let mut all_names = Vec::new();
-        let mut all_values = Vec::new();
-
-        for batch in &result {
-            let name_array = batch.column(0).as_string::<i32>();
-            let value_array = batch
-                .column(1)
-                .as_primitive::<arrow_array::types::Int32Type>();
-
-            for i in 0..batch.num_rows() {
-                all_names.push(name_array.value(i).to_string());
-                all_values.push(value_array.value(i));
-            }
-        }
-
-        assert_eq!(all_names.len(), 6);
-        assert_eq!(all_values.len(), 6);
-
-        for i in 0..6 {
-            assert_eq!(all_names[i], format!("name_{i}"));
-            assert_eq!(all_values[i], i as i32);
-        }
-    }
-
-    /// Test reading Parquet files without field IDs with nested types (struct).
-    /// Java's pruneColumnsFallback() projects entire top-level columns including nested content.
-    /// This test verifies that a top-level struct field is projected correctly with all its nested fields.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_with_struct() {
-        use arrow_array::{Int32Array, StructArray};
-        use arrow_schema::Fields;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(
-                        2,
-                        "person",
-                        Type::Struct(crate::spec::StructType::new(vec![
-                            NestedField::required(
-                                3,
-                                "name",
-                                Type::Primitive(PrimitiveType::String),
-                            )
-                            .into(),
-                            NestedField::required(4, "age", Type::Primitive(PrimitiveType::Int))
-                                .into(),
-                        ])),
-                    )
-                    .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new(
-                "person",
-                DataType::Struct(Fields::from(vec![
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("age", DataType::Int32, false),
-                ])),
-                false,
-            ),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let id_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
-        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
-        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
-        let person_data = Arc::new(StructArray::from(vec![
-            (
-                Arc::new(Field::new("name", DataType::Utf8, false)),
-                name_data,
-            ),
-            (
-                Arc::new(Field::new("age", DataType::Int32, false)),
-                age_data,
-            ),
-        ])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, person_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 2);
-
-        let id_array = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(id_array.value(0), 1);
-        assert_eq!(id_array.value(1), 2);
-
-        let person_array = batch.column(1).as_struct();
-        assert_eq!(person_array.num_columns(), 2);
-
-        let name_array = person_array.column(0).as_string::<i32>();
-        assert_eq!(name_array.value(0), "Alice");
-        assert_eq!(name_array.value(1), "Bob");
-
-        let age_array = person_array
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(age_array.value(0), 30);
-        assert_eq!(age_array.value(1), 25);
-    }
-
-    /// Test reading Parquet files without field IDs with schema evolution - column added in the middle.
-    /// When a new column is inserted between existing columns in the schema order,
-    /// the fallback projection must correctly map field IDs to output positions.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_schema_evolution_add_column_in_middle() {
-        use arrow_array::{Array, Int32Array};
-
-        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
-            Field::new("col0", DataType::Int32, true),
-            Field::new("col1", DataType::Int32, true),
-        ]));
-
-        // New column added between existing columns: col0 (id=1), newCol (id=5), col1 (id=2)
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::optional(1, "col0", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(5, "newCol", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "col1", Type::Primitive(PrimitiveType::Int)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let col0_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
-        let col1_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema_old.clone(), vec![col0_data, col1_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        let reader = ArrowReaderBuilder::new(file_io).build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 5, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-        assert_eq!(batch.num_rows(), 2);
-        assert_eq!(batch.num_columns(), 3);
-
-        let result_col0 = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(result_col0.value(0), 1);
-        assert_eq!(result_col0.value(1), 2);
-
-        // New column should be NULL (doesn't exist in old file)
-        let result_newcol = batch
-            .column(1)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(result_newcol.null_count(), 2);
-        assert!(result_newcol.is_null(0));
-        assert!(result_newcol.is_null(1));
-
-        let result_col1 = batch
-            .column(2)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(result_col1.value(0), 10);
-        assert_eq!(result_col1.value(1), 20);
-    }
-
-    /// Test reading Parquet files without field IDs with a filter that eliminates all row groups.
-    /// During development of field ID mapping, we saw a panic when row_selection_enabled=true and
-    /// all row groups are filtered out.
-    #[tokio::test]
-    async fn test_read_parquet_without_field_ids_filter_eliminates_all_rows() {
-        use arrow_array::{Float64Array, Int32Array};
-
-        // Schema with fields that will use fallback IDs 1, 2, 3
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
-                    NestedField::required(3, "value", Type::Primitive(PrimitiveType::Double))
-                        .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false),
-            Field::new("name", DataType::Utf8, false),
-            Field::new("value", DataType::Float64, false),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        // Write data where all ids are >= 10
-        let id_data = Arc::new(Int32Array::from(vec![10, 11, 12])) as ArrayRef;
-        let name_data = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
-        let value_data = Arc::new(Float64Array::from(vec![100.0, 200.0, 300.0])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data, value_data])
-                .unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        // Filter that eliminates all row groups: id < 5
-        let predicate = Reference::new("id").less_than(Datum::int(5));
-
-        // Enable both row_group_filtering and row_selection - triggered the panic
-        let reader = ArrowReaderBuilder::new(file_io)
-            .with_row_group_filtering_enabled(true)
-            .with_row_selection_enabled(true)
-            .build();
-
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2, 3],
-                predicate: Some(predicate.bind(schema, true).unwrap()),
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        // Should no longer panic
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Should return empty results
-        assert!(result.is_empty() || result.iter().all(|batch| batch.num_rows() == 0));
-    }
-
-    /// Test that concurrency=1 reads all files correctly and in deterministic order.
-    /// This verifies the fast-path optimization for single concurrency.
-    #[tokio::test]
-    async fn test_read_with_concurrency_one() {
-        use arrow_array::Int32Array;
-
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(1)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::required(2, "file_num", Type::Primitive(PrimitiveType::Int))
-                        .into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            Field::new("file_num", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "2".to_string(),
-            )])),
-        ]));
-
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        // Create 3 parquet files with different data
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-
-        for file_num in 0..3 {
-            let id_data = Arc::new(Int32Array::from_iter_values(
-                file_num * 10..(file_num + 1) * 10,
-            )) as ArrayRef;
-            let file_num_data = Arc::new(Int32Array::from(vec![file_num; 10])) as ArrayRef;
-
-            let to_write =
-                RecordBatch::try_new(arrow_schema.clone(), vec![id_data, file_num_data]).unwrap();
-
-            let file = File::create(format!("{table_location}/file_{file_num}.parquet")).unwrap();
-            let mut writer =
-                ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
-            writer.write(&to_write).expect("Writing batch");
-            writer.close().unwrap();
-        }
-
-        // Read with concurrency=1 (fast-path)
-        let reader = ArrowReaderBuilder::new(file_io)
-            .with_data_file_concurrency_limit(1)
-            .build();
-
-        // Create tasks in a specific order: file_0, file_1, file_2
-        let tasks = vec![
-            Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_0.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/file_0.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            }),
-            Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_1.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/file_1.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            }),
-            Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_2.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/file_2.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: None,
-                partition_spec: None,
-                name_mapping: None,
-                case_sensitive: false,
-            }),
-        ];
-
-        let tasks_stream = Box::pin(futures::stream::iter(tasks)) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks_stream)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Verify we got all 30 rows (10 from each file)
-        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
-        assert_eq!(total_rows, 30, "Should have 30 total rows");
-
-        // Collect all ids and file_nums to verify data
-        let mut all_ids = Vec::new();
-        let mut all_file_nums = Vec::new();
-
-        for batch in &result {
-            let id_col = batch
-                .column(0)
-                .as_primitive::<arrow_array::types::Int32Type>();
-            let file_num_col = batch
-                .column(1)
-                .as_primitive::<arrow_array::types::Int32Type>();
-
-            for i in 0..batch.num_rows() {
-                all_ids.push(id_col.value(i));
-                all_file_nums.push(file_num_col.value(i));
-            }
-        }
-
-        assert_eq!(all_ids.len(), 30);
-        assert_eq!(all_file_nums.len(), 30);
-
-        // With concurrency=1 and sequential processing, files should be processed in order
-        // file_0: ids 0-9, file_num=0
-        // file_1: ids 10-19, file_num=1
-        // file_2: ids 20-29, file_num=2
-        for i in 0..10 {
-            assert_eq!(all_file_nums[i], 0, "First 10 rows should be from file_0");
-            assert_eq!(all_ids[i], i as i32, "IDs should be 0-9");
-        }
-        for i in 10..20 {
-            assert_eq!(all_file_nums[i], 1, "Next 10 rows should be from file_1");
-            assert_eq!(all_ids[i], i as i32, "IDs should be 10-19");
-        }
-        for i in 20..30 {
-            assert_eq!(all_file_nums[i], 2, "Last 10 rows should be from file_2");
-            assert_eq!(all_ids[i], i as i32, "IDs should be 20-29");
-        }
-    }
-
-    /// Test bucket partitioning reads source column from data file (not partition metadata).
-    ///
-    /// This is an integration test verifying the complete ArrowReader pipeline with bucket partitioning.
-    /// It corresponds to TestRuntimeFiltering tests in Iceberg Java (e.g., testRenamedSourceColumnTable).
-    ///
-    /// # Iceberg Spec Requirements
-    ///
-    /// Per the Iceberg spec "Column Projection" section:
-    /// > "Return the value from partition metadata if an **Identity Transform** exists for the field"
-    ///
-    /// This means:
-    /// - Identity transforms (e.g., `identity(dept)`) use constants from partition metadata
-    /// - Non-identity transforms (e.g., `bucket(4, id)`) must read source columns from data files
-    /// - Partition metadata for bucket transforms stores bucket numbers (0-3), NOT source values
-    ///
-    /// Java's PartitionUtil.constantsMap() implements this via:
-    /// ```java
-    /// if (field.transform().isIdentity()) {
-    ///     idToConstant.put(field.sourceId(), converted);
-    /// }
-    /// ```
-    ///
-    /// # What This Test Verifies
-    ///
-    /// This test ensures the full ArrowReader → RecordBatchTransformer pipeline correctly handles
-    /// bucket partitioning when FileScanTask provides partition_spec and partition_data:
-    ///
-    /// - Parquet file has field_id=1 named "id" with actual data [1, 5, 9, 13]
-    /// - FileScanTask specifies partition_spec with bucket(4, id) and partition_data with bucket=1
-    /// - RecordBatchTransformer.constants_map() excludes bucket-partitioned field from constants
-    /// - ArrowReader correctly reads [1, 5, 9, 13] from the data file
-    /// - Values are NOT replaced with constant 1 from partition metadata
-    ///
-    /// # Why This Matters
-    ///
-    /// Without correct handling:
-    /// - Runtime filtering would break (e.g., `WHERE id = 5` would fail)
-    /// - Query results would be incorrect (all rows would have id=1)
-    /// - Bucket partitioning would be unusable for query optimization
-    ///
-    /// # References
-    /// - Iceberg spec: format/spec.md "Column Projection" + "Partition Transforms"
-    /// - Java test: spark/src/test/java/.../TestRuntimeFiltering.java
-    /// - Java impl: core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
-    #[tokio::test]
-    async fn test_bucket_partitioning_reads_source_column_from_file() {
-        use arrow_array::Int32Array;
-
-        use crate::spec::{Literal, PartitionSpec, Struct, Transform};
-
-        // Iceberg schema with id and name columns
-        let schema = Arc::new(
-            Schema::builder()
-                .with_schema_id(0)
-                .with_fields(vec![
-                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
-                    NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(),
-                ])
-                .build()
-                .unwrap(),
-        );
-
-        // Partition spec: bucket(4, id)
-        let partition_spec = Arc::new(
-            PartitionSpec::builder(schema.clone())
-                .with_spec_id(0)
-                .add_partition_field("id", "id_bucket", Transform::Bucket(4))
-                .unwrap()
-                .build()
-                .unwrap(),
-        );
-
-        // Partition data: bucket value is 1
-        let partition_data = Struct::from_iter(vec![Some(Literal::int(1))]);
-
-        // Create Arrow schema with field IDs for Parquet file
-        let arrow_schema = Arc::new(ArrowSchema::new(vec![
-            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "1".to_string(),
-            )])),
-            Field::new("name", DataType::Utf8, true).with_metadata(HashMap::from([(
-                PARQUET_FIELD_ID_META_KEY.to_string(),
-                "2".to_string(),
-            )])),
-        ]));
-
-        // Write Parquet file with data
-        let tmp_dir = TempDir::new().unwrap();
-        let table_location = tmp_dir.path().to_str().unwrap().to_string();
-        let file_io = FileIO::new_with_fs();
-
-        let id_data = Arc::new(Int32Array::from(vec![1, 5, 9, 13])) as ArrayRef;
-        let name_data =
-            Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])) as ArrayRef;
-
-        let to_write =
-            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data]).unwrap();
-
-        let props = WriterProperties::builder()
-            .set_compression(Compression::SNAPPY)
-            .build();
-        let file = File::create(format!("{}/data.parquet", &table_location)).unwrap();
-        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
-        writer.write(&to_write).expect("Writing batch");
-        writer.close().unwrap();
-
-        // Read the Parquet file with partition spec and data
-        let reader = ArrowReaderBuilder::new(file_io).build();
-        let tasks = Box::pin(futures::stream::iter(
-            vec![Ok(FileScanTask {
-                file_size_in_bytes: std::fs::metadata(format!("{table_location}/data.parquet"))
-                    .unwrap()
-                    .len(),
-                start: 0,
-                length: 0,
-                record_count: None,
-                data_file_path: format!("{table_location}/data.parquet"),
-                data_file_format: DataFileFormat::Parquet,
-                schema: schema.clone(),
-                project_field_ids: vec![1, 2],
-                predicate: None,
-                deletes: vec![],
-                partition: Some(partition_data),
-                partition_spec: Some(partition_spec),
-                name_mapping: None,
-                case_sensitive: false,
-            })]
-            .into_iter(),
-        )) as FileScanTaskStream;
-
-        let result = reader
-            .read(tasks)
-            .unwrap()
-            .try_collect::<Vec<RecordBatch>>()
-            .await
-            .unwrap();
-
-        // Verify we got the correct data
-        assert_eq!(result.len(), 1);
-        let batch = &result[0];
-
-        assert_eq!(batch.num_columns(), 2);
-        assert_eq!(batch.num_rows(), 4);
-
-        // The id column MUST contain actual values from the Parquet file [1, 5, 9, 13],
-        // NOT the constant partition value 1
-        let id_col = batch
-            .column(0)
-            .as_primitive::<arrow_array::types::Int32Type>();
-        assert_eq!(id_col.value(0), 1);
-        assert_eq!(id_col.value(1), 5);
-        assert_eq!(id_col.value(2), 9);
-        assert_eq!(id_col.value(3), 13);
-
-        let name_col = batch.column(1).as_string::<i32>();
-        assert_eq!(name_col.value(0), "Alice");
-        assert_eq!(name_col.value(1), "Bob");
-        assert_eq!(name_col.value(2), "Charlie");
-        assert_eq!(name_col.value(3), "Dave");
-    }
-
-    #[test]
-    fn test_merge_ranges_empty() {
-        assert_eq!(super::merge_ranges(&[], 1024), Vec::<Range<u64>>::new());
-    }
-
-    #[test]
-    fn test_merge_ranges_no_coalesce() {
-        // Ranges far apart should not be merged
-        let ranges = vec![0..100, 1_000_000..1_000_100];
-        let merged = super::merge_ranges(&ranges, 1024);
-        assert_eq!(merged, vec![0..100, 1_000_000..1_000_100]);
-    }
-
-    #[test]
-    fn test_merge_ranges_coalesce() {
-        // Ranges within the gap threshold should be merged
-        let ranges = vec![0..100, 200..300, 500..600];
-        let merged = super::merge_ranges(&ranges, 1024);
-        assert_eq!(merged, vec![0..600]);
-    }
-
-    #[test]
-    fn test_merge_ranges_overlapping() {
-        let ranges = vec![0..200, 100..300];
-        let merged = super::merge_ranges(&ranges, 0);
-        assert_eq!(merged, vec![0..300]);
-    }
-
-    #[test]
-    fn test_merge_ranges_unsorted() {
-        let ranges = vec![500..600, 0..100, 200..300];
-        let merged = super::merge_ranges(&ranges, 1024);
-        assert_eq!(merged, vec![0..600]);
-    }
-
-    /// Mock FileRead backed by a flat byte buffer.
-    struct MockFileRead {
-        data: bytes::Bytes,
-    }
-
-    impl MockFileRead {
-        fn new(size: usize) -> Self {
-            // Fill with sequential byte values so slices are verifiable.
-            let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
-            Self {
-                data: bytes::Bytes::from(data),
-            }
-        }
-    }
-
-    #[async_trait::async_trait]
-    impl crate::io::FileRead for MockFileRead {
-        async fn read(&self, range: Range<u64>) -> crate::Result<bytes::Bytes> {
-            Ok(self.data.slice(range.start as usize..range.end as usize))
-        }
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_no_coalesce() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(2048);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(1500..1600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 2048 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(0)
-                        .build(),
-                );
-
-        let result = reader
-            .get_byte_ranges(vec![0..100, 1500..1600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_with_coalesce() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(1024);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(200..300);
-        let expected_2 = mock.data.slice(500..600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 1024 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(1024)
-                        .build(),
-                );
-
-        // All ranges within coalesce threshold — should merge into one fetch.
-        let result = reader
-            .get_byte_ranges(vec![0..100, 200..300, 500..600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 3);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-        assert_eq!(result[2], expected_2);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_empty() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(1024);
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 1024 }, Box::new(mock));
-
-        let result = reader.get_byte_ranges(vec![]).await.unwrap();
-        assert!(result.is_empty());
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_coalesce_max() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(2048);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(1500..1600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 2048 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(u64::MAX)
-                        .build(),
-                );
-
-        // u64::MAX coalesce — all ranges merge into a single fetch.
-        let result = reader
-            .get_byte_ranges(vec![0..100, 1500..1600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_concurrency_zero() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        // concurrency=0 is clamped to 1, so this should not hang.
-        let mock = MockFileRead::new(1024);
-        let expected = mock.data.slice(0..100);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 1024 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_fetch_concurrency(0)
-                        .build(),
-                );
-
-        let result = reader
-            .get_byte_ranges(vec![0..100, 200..300])
-            .await
-            .unwrap();
-        assert_eq!(result.len(), 2);
-        assert_eq!(result[0], expected);
-    }
-
-    #[tokio::test]
-    async fn test_get_byte_ranges_concurrency_one() {
-        use parquet::arrow::async_reader::AsyncFileReader;
-
-        let mock = MockFileRead::new(2048);
-        let expected_0 = mock.data.slice(0..100);
-        let expected_1 = mock.data.slice(500..600);
-        let expected_2 = mock.data.slice(1500..1600);
-
-        let mut reader =
-            super::ArrowFileReader::new(crate::io::FileMetadata { size: 2048 }, Box::new(mock))
-                .with_parquet_read_options(
-                    super::ParquetReadOptions::builder()
-                        .with_range_coalesce_bytes(0)
-                        .with_range_fetch_concurrency(1)
-                        .build(),
-                );
-
-        // concurrency=1 with no coalescing — sequential fetches.
-        let result = reader
-            .get_byte_ranges(vec![0..100, 500..600, 1500..1600])
-            .await
-            .unwrap();
-
-        assert_eq!(result.len(), 3);
-        assert_eq!(result[0], expected_0);
-        assert_eq!(result[1], expected_1);
-        assert_eq!(result[2], expected_2);
-    }
-}
diff --git a/crates/iceberg/src/arrow/reader/file_reader.rs b/crates/iceberg/src/arrow/reader/file_reader.rs
new file mode 100644
index 0000000000..79fbcc7960
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/file_reader.rs
@@ -0,0 +1,368 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Async Parquet file reader that adapts an Iceberg `FileRead` to parquet's `AsyncFileReader`.
+
+use std::ops::Range;
+use std::sync::Arc;
+
+use bytes::Bytes;
+use futures::future::BoxFuture;
+use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt};
+use parquet::arrow::arrow_reader::ArrowReaderOptions;
+use parquet::arrow::async_reader::AsyncFileReader;
+use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData, ParquetMetaDataReader};
+
+use super::ParquetReadOptions;
+use crate::io::{FileMetadata, FileRead};
+
+/// ArrowFileReader is a wrapper around a FileRead that impls parquets AsyncFileReader.
+pub struct ArrowFileReader {
+    meta: FileMetadata,
+    parquet_read_options: ParquetReadOptions,
+    r: Box<dyn FileRead>,
+}
+
+impl ArrowFileReader {
+    /// Create a new ArrowFileReader
+    pub fn new(meta: FileMetadata, r: Box<dyn FileRead>) -> Self {
+        Self {
+            meta,
+            parquet_read_options: ParquetReadOptions::builder().build(),
+            r,
+        }
+    }
+
+    /// Configure all Parquet read options.
+    pub(crate) fn with_parquet_read_options(mut self, options: ParquetReadOptions) -> Self {
+        self.parquet_read_options = options;
+        self
+    }
+}
+
+impl AsyncFileReader for ArrowFileReader {
+    fn get_bytes(&mut self, range: Range<u64>) -> BoxFuture<'_, parquet::errors::Result<Bytes>> {
+        Box::pin(
+            self.r
+                .read(range.start..range.end)
+                .map_err(|err| parquet::errors::ParquetError::External(Box::new(err))),
+        )
+    }
+
+    /// Override the default `get_byte_ranges` which calls `get_bytes` sequentially.
+    /// The parquet reader calls this to fetch column chunks for a row group, so
+    /// without this override each column chunk is a serial round-trip to object storage.
+    /// Adapted from object_store's `coalesce_ranges` in `util.rs`.
+    fn get_byte_ranges(
+        &mut self,
+        ranges: Vec<Range<u64>>,
+    ) -> BoxFuture<'_, parquet::errors::Result<Vec<Bytes>>> {
+        let coalesce_bytes = self.parquet_read_options.range_coalesce_bytes();
+        let concurrency = self.parquet_read_options.range_fetch_concurrency().max(1);
+
+        async move {
+            // Merge nearby ranges to reduce the number of object store requests.
+            let fetch_ranges = merge_ranges(&ranges, coalesce_bytes);
+            let r = &self.r;
+
+            // Fetch merged ranges concurrently.
+            let fetched: Vec<Bytes> = futures::stream::iter(fetch_ranges.iter().cloned())
+                .map(|range| async move {
+                    r.read(range)
+                        .await
+                        .map_err(|e| parquet::errors::ParquetError::External(Box::new(e)))
+                })
+                .buffered(concurrency)
+                .try_collect()
+                .await?;
+
+            // Slice the fetched data back into the originally requested ranges.
+            Ok(ranges
+                .iter()
+                .map(|range| {
+                    let idx = fetch_ranges.partition_point(|v| v.start <= range.start) - 1;
+                    let fetch_range = &fetch_ranges[idx];
+                    let fetch_bytes = &fetched[idx];
+                    let start = (range.start - fetch_range.start) as usize;
+                    let end = (range.end - fetch_range.start) as usize;
+                    fetch_bytes.slice(start..end.min(fetch_bytes.len()))
+                })
+                .collect())
+        }
+        .boxed()
+    }
+
+    // TODO: currently we don't respect `ArrowReaderOptions` cause it don't expose any method to access the option field
+    // we will fix it after `v55.1.0` is released in https://github.com/apache/arrow-rs/issues/7393
+    fn get_metadata(
+        &mut self,
+        _options: Option<&'_ ArrowReaderOptions>,
+    ) -> BoxFuture<'_, parquet::errors::Result<Arc<ParquetMetaData>>> {
+        async move {
+            let reader = ParquetMetaDataReader::new()
+                .with_prefetch_hint(self.parquet_read_options.metadata_size_hint())
+                // Set the page policy first because it updates both column and offset policies.
+                .with_page_index_policy(PageIndexPolicy::from(
+                    self.parquet_read_options.preload_page_index(),
+                ))
+                .with_column_index_policy(PageIndexPolicy::from(
+                    self.parquet_read_options.preload_column_index(),
+                ))
+                .with_offset_index_policy(PageIndexPolicy::from(
+                    self.parquet_read_options.preload_offset_index(),
+                ));
+            let size = self.meta.size;
+            let meta = reader.load_and_finish(self, size).await?;
+
+            Ok(Arc::new(meta))
+        }
+        .boxed()
+    }
+}
+
+/// Merge overlapping or nearby byte ranges, combining ranges with gaps <= `coalesce` bytes.
+/// Adapted from object_store's `merge_ranges` in `util.rs`.
+fn merge_ranges(ranges: &[Range<u64>], coalesce: u64) -> Vec<Range<u64>> {
+    if ranges.is_empty() {
+        return vec![];
+    }
+
+    let mut ranges = ranges.to_vec();
+    ranges.sort_unstable_by_key(|r| r.start);
+
+    let mut merged = Vec::with_capacity(ranges.len());
+    let mut start_idx = 0;
+    let mut end_idx = 1;
+
+    while start_idx != ranges.len() {
+        let mut range_end = ranges[start_idx].end;
+
+        while end_idx != ranges.len()
+            && ranges[end_idx]
+                .start
+                .checked_sub(range_end)
+                .map(|delta| delta <= coalesce)
+                .unwrap_or(true)
+        {
+            range_end = range_end.max(ranges[end_idx].end);
+            end_idx += 1;
+        }
+
+        merged.push(ranges[start_idx].start..range_end);
+        start_idx = end_idx;
+        end_idx += 1;
+    }
+
+    merged
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use parquet::arrow::async_reader::AsyncFileReader;
+
+    use super::{ArrowFileReader, ParquetReadOptions, merge_ranges};
+    use crate::io::{FileMetadata, FileRead};
+
+    #[test]
+    fn test_merge_ranges_empty() {
+        assert_eq!(merge_ranges(&[], 1024), Vec::<Range<u64>>::new());
+    }
+
+    #[test]
+    fn test_merge_ranges_no_coalesce() {
+        // Ranges far apart should not be merged
+        let ranges = vec![0..100, 1_000_000..1_000_100];
+        let merged = merge_ranges(&ranges, 1024);
+        assert_eq!(merged, vec![0..100, 1_000_000..1_000_100]);
+    }
+
+    #[test]
+    fn test_merge_ranges_coalesce() {
+        // Ranges within the gap threshold should be merged
+        let ranges = vec![0..100, 200..300, 500..600];
+        let merged = merge_ranges(&ranges, 1024);
+        assert_eq!(merged, vec![0..600]);
+    }
+
+    #[test]
+    fn test_merge_ranges_overlapping() {
+        let ranges = vec![0..200, 100..300];
+        let merged = merge_ranges(&ranges, 0);
+        assert_eq!(merged, vec![0..300]);
+    }
+
+    #[test]
+    fn test_merge_ranges_unsorted() {
+        let ranges = vec![500..600, 0..100, 200..300];
+        let merged = merge_ranges(&ranges, 1024);
+        assert_eq!(merged, vec![0..600]);
+    }
+
+    /// Mock FileRead backed by a flat byte buffer.
+    struct MockFileRead {
+        data: bytes::Bytes,
+    }
+
+    impl MockFileRead {
+        fn new(size: usize) -> Self {
+            // Fill with sequential byte values so slices are verifiable.
+            let data: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+            Self {
+                data: bytes::Bytes::from(data),
+            }
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl FileRead for MockFileRead {
+        async fn read(&self, range: Range<u64>) -> crate::Result<bytes::Bytes> {
+            Ok(self.data.slice(range.start as usize..range.end as usize))
+        }
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_no_coalesce() {
+        let mock = MockFileRead::new(2048);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(1500..1600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 2048 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(0)
+                    .build(),
+            );
+
+        let result = reader
+            .get_byte_ranges(vec![0..100, 1500..1600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_with_coalesce() {
+        let mock = MockFileRead::new(1024);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(200..300);
+        let expected_2 = mock.data.slice(500..600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 1024 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(1024)
+                    .build(),
+            );
+
+        // All ranges within coalesce threshold — should merge into one fetch.
+        let result = reader
+            .get_byte_ranges(vec![0..100, 200..300, 500..600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+        assert_eq!(result[2], expected_2);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_empty() {
+        let mock = MockFileRead::new(1024);
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 1024 }, Box::new(mock));
+
+        let result = reader.get_byte_ranges(vec![]).await.unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_coalesce_max() {
+        let mock = MockFileRead::new(2048);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(1500..1600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 2048 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(u64::MAX)
+                    .build(),
+            );
+
+        // u64::MAX coalesce — all ranges merge into a single fetch.
+        let result = reader
+            .get_byte_ranges(vec![0..100, 1500..1600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_concurrency_zero() {
+        // concurrency=0 is clamped to 1, so this should not hang.
+        let mock = MockFileRead::new(1024);
+        let expected = mock.data.slice(0..100);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 1024 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_fetch_concurrency(0)
+                    .build(),
+            );
+
+        let result = reader
+            .get_byte_ranges(vec![0..100, 200..300])
+            .await
+            .unwrap();
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0], expected);
+    }
+
+    #[tokio::test]
+    async fn test_get_byte_ranges_concurrency_one() {
+        let mock = MockFileRead::new(2048);
+        let expected_0 = mock.data.slice(0..100);
+        let expected_1 = mock.data.slice(500..600);
+        let expected_2 = mock.data.slice(1500..1600);
+
+        let mut reader = ArrowFileReader::new(FileMetadata { size: 2048 }, Box::new(mock))
+            .with_parquet_read_options(
+                ParquetReadOptions::builder()
+                    .with_range_coalesce_bytes(0)
+                    .with_range_fetch_concurrency(1)
+                    .build(),
+            );
+
+        // concurrency=1 with no coalescing — sequential fetches.
+        let result = reader
+            .get_byte_ranges(vec![0..100, 500..600, 1500..1600])
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], expected_0);
+        assert_eq!(result[1], expected_1);
+        assert_eq!(result[2], expected_2);
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/mod.rs b/crates/iceberg/src/arrow/reader/mod.rs
new file mode 100644
index 0000000000..c6c41accb7
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/mod.rs
@@ -0,0 +1,154 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Parquet file data reader
+
+use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
+use crate::io::FileIO;
+use crate::util::available_parallelism;
+
+/// Default gap between byte ranges below which they are coalesced into a
+/// single request. Matches object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
+const DEFAULT_RANGE_COALESCE_BYTES: u64 = 1024 * 1024;
+
+/// Default maximum number of coalesced byte ranges fetched concurrently.
+/// Matches object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
+const DEFAULT_RANGE_FETCH_CONCURRENCY: usize = 10;
+
+/// Default number of bytes to prefetch when parsing Parquet footer metadata.
+/// Matches DataFusion's default `ParquetOptions::metadata_size_hint`.
+const DEFAULT_METADATA_SIZE_HINT: usize = 512 * 1024;
+
+mod file_reader;
+mod options;
+mod pipeline;
+mod positional_deletes;
+mod predicate_visitor;
+mod projection;
+mod row_filter;
+pub use file_reader::ArrowFileReader;
+pub(crate) use options::ParquetReadOptions;
+use predicate_visitor::{CollectFieldIdVisitor, PredicateConverter};
+use projection::{add_fallback_field_ids_to_arrow_schema, apply_name_mapping_to_arrow_schema};
+
+/// Builder to create ArrowReader
+pub struct ArrowReaderBuilder {
+    batch_size: Option<usize>,
+    file_io: FileIO,
+    concurrency_limit_data_files: usize,
+    row_group_filtering_enabled: bool,
+    row_selection_enabled: bool,
+    parquet_read_options: ParquetReadOptions,
+}
+
+impl ArrowReaderBuilder {
+    /// Create a new ArrowReaderBuilder
+    pub fn new(file_io: FileIO) -> Self {
+        let num_cpus = available_parallelism().get();
+
+        ArrowReaderBuilder {
+            batch_size: None,
+            file_io,
+            concurrency_limit_data_files: num_cpus,
+            row_group_filtering_enabled: true,
+            row_selection_enabled: false,
+            parquet_read_options: ParquetReadOptions::builder().build(),
+        }
+    }
+
+    /// Sets the max number of in flight data files that are being fetched
+    pub fn with_data_file_concurrency_limit(mut self, val: usize) -> Self {
+        self.concurrency_limit_data_files = val;
+        self
+    }
+
+    /// Sets the desired size of batches in the response
+    /// to something other than the default
+    pub fn with_batch_size(mut self, batch_size: usize) -> Self {
+        self.batch_size = Some(batch_size);
+        self
+    }
+
+    /// Determines whether to enable row group filtering.
+    pub fn with_row_group_filtering_enabled(mut self, row_group_filtering_enabled: bool) -> Self {
+        self.row_group_filtering_enabled = row_group_filtering_enabled;
+        self
+    }
+
+    /// Determines whether to enable row selection.
+    pub fn with_row_selection_enabled(mut self, row_selection_enabled: bool) -> Self {
+        self.row_selection_enabled = row_selection_enabled;
+        self
+    }
+
+    /// Provide a hint as to the number of bytes to prefetch for parsing the Parquet metadata
+    ///
+    /// This hint can help reduce the number of fetch requests. For more details see the
+    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
+    pub fn with_metadata_size_hint(mut self, metadata_size_hint: usize) -> Self {
+        self.parquet_read_options.metadata_size_hint = Some(metadata_size_hint);
+        self
+    }
+
+    /// Sets the gap threshold for merging nearby byte ranges into a single request.
+    /// Ranges with gaps smaller than this value will be coalesced.
+    ///
+    /// Defaults to 1 MiB, matching object_store's OBJECT_STORE_COALESCE_DEFAULT.
+    pub fn with_range_coalesce_bytes(mut self, range_coalesce_bytes: u64) -> Self {
+        self.parquet_read_options.range_coalesce_bytes = range_coalesce_bytes;
+        self
+    }
+
+    /// Sets the maximum number of merged byte ranges to fetch concurrently.
+    ///
+    /// Defaults to 10, matching object_store's OBJECT_STORE_COALESCE_PARALLEL.
+    pub fn with_range_fetch_concurrency(mut self, range_fetch_concurrency: usize) -> Self {
+        self.parquet_read_options.range_fetch_concurrency = range_fetch_concurrency;
+        self
+    }
+
+    /// Build the ArrowReader.
+    pub fn build(self) -> ArrowReader {
+        ArrowReader {
+            batch_size: self.batch_size,
+            file_io: self.file_io.clone(),
+            delete_file_loader: CachingDeleteFileLoader::new(
+                self.file_io.clone(),
+                self.concurrency_limit_data_files,
+            ),
+            concurrency_limit_data_files: self.concurrency_limit_data_files,
+            row_group_filtering_enabled: self.row_group_filtering_enabled,
+            row_selection_enabled: self.row_selection_enabled,
+            parquet_read_options: self.parquet_read_options,
+        }
+    }
+}
+
+/// Reads data from Parquet files
+#[derive(Clone)]
+pub struct ArrowReader {
+    batch_size: Option<usize>,
+    file_io: FileIO,
+    delete_file_loader: CachingDeleteFileLoader,
+
+    /// the maximum number of data files that can be fetched at the same time
+    concurrency_limit_data_files: usize,
+
+    row_group_filtering_enabled: bool,
+    row_selection_enabled: bool,
+    parquet_read_options: ParquetReadOptions,
+}
diff --git a/crates/iceberg/src/arrow/reader/options.rs b/crates/iceberg/src/arrow/reader/options.rs
new file mode 100644
index 0000000000..ae6a3ed18e
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/options.rs
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tunables for Parquet file I/O used by `ArrowReader`.
+
+use typed_builder::TypedBuilder;
+
+use super::{
+    DEFAULT_METADATA_SIZE_HINT, DEFAULT_RANGE_COALESCE_BYTES, DEFAULT_RANGE_FETCH_CONCURRENCY,
+};
+
+/// Options for tuning Parquet file I/O.
+#[derive(Clone, Copy, Debug, TypedBuilder)]
+#[builder(field_defaults(setter(prefix = "with_")))]
+pub(crate) struct ParquetReadOptions {
+    /// Number of bytes to prefetch for parsing the Parquet metadata.
+    ///
+    /// This hint can help reduce the number of fetch requests. For more details see the
+    /// [ParquetMetaDataReader documentation](https://docs.rs/parquet/latest/parquet/file/metadata/struct.ParquetMetaDataReader.html#method.with_prefetch_hint).
+    ///
+    /// Defaults to 512 KiB, matching DataFusion's default `ParquetOptions::metadata_size_hint`.
+    #[builder(default = Some(DEFAULT_METADATA_SIZE_HINT))]
+    pub(crate) metadata_size_hint: Option<usize>,
+    /// Gap threshold for merging nearby byte ranges into a single request.
+    /// Ranges with gaps smaller than this value will be coalesced.
+    ///
+    /// Defaults to 1 MiB, matching object_store's `OBJECT_STORE_COALESCE_DEFAULT`.
+    #[builder(default = DEFAULT_RANGE_COALESCE_BYTES)]
+    pub(crate) range_coalesce_bytes: u64,
+    /// Maximum number of merged byte ranges to fetch concurrently.
+    ///
+    /// Defaults to 10, matching object_store's `OBJECT_STORE_COALESCE_PARALLEL`.
+    #[builder(default = DEFAULT_RANGE_FETCH_CONCURRENCY)]
+    pub(crate) range_fetch_concurrency: usize,
+    /// Whether to preload the column index when reading Parquet metadata.
+    #[builder(default = true)]
+    pub(crate) preload_column_index: bool,
+    /// Whether to preload the offset index when reading Parquet metadata.
+    #[builder(default = true)]
+    pub(crate) preload_offset_index: bool,
+    /// Whether to preload the page index when reading Parquet metadata.
+    #[builder(default = false)]
+    pub(crate) preload_page_index: bool,
+}
+
+impl ParquetReadOptions {
+    pub(crate) fn metadata_size_hint(&self) -> Option<usize> {
+        self.metadata_size_hint
+    }
+
+    pub(crate) fn range_coalesce_bytes(&self) -> u64 {
+        self.range_coalesce_bytes
+    }
+
+    pub(crate) fn range_fetch_concurrency(&self) -> usize {
+        self.range_fetch_concurrency
+    }
+
+    pub(crate) fn preload_column_index(&self) -> bool {
+        self.preload_column_index
+    }
+
+    pub(crate) fn preload_offset_index(&self) -> bool {
+        self.preload_offset_index
+    }
+
+    pub(crate) fn preload_page_index(&self) -> bool {
+        self.preload_page_index
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/pipeline.rs b/crates/iceberg/src/arrow/reader/pipeline.rs
new file mode 100644
index 0000000000..94059fc62b
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/pipeline.rs
@@ -0,0 +1,1174 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The main `ArrowReader` pipeline: reading a stream of `FileScanTask`s,
+//! opening Parquet files and resolving schemas, then wiring projection,
+//! predicates, row-group / row selection, and delete handling into a stream
+//! of transformed Arrow `RecordBatch`es.
+
+use std::sync::Arc;
+
+use futures::{StreamExt, TryStreamExt};
+use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions};
+use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder};
+
+use super::{
+    ArrowFileReader, ArrowReader, ParquetReadOptions, add_fallback_field_ids_to_arrow_schema,
+    apply_name_mapping_to_arrow_schema,
+};
+use crate::arrow::caching_delete_file_loader::CachingDeleteFileLoader;
+use crate::arrow::int96::coerce_int96_timestamps;
+use crate::arrow::record_batch_transformer::RecordBatchTransformerBuilder;
+use crate::error::Result;
+use crate::io::{FileIO, FileMetadata};
+use crate::metadata_columns::{RESERVED_FIELD_ID_FILE, is_metadata_field};
+use crate::scan::{ArrowRecordBatchStream, FileScanTask, FileScanTaskStream};
+use crate::spec::Datum;
+use crate::{Error, ErrorKind};
+
+impl ArrowReader {
+    /// Take a stream of FileScanTasks and reads all the files.
+    /// Returns a stream of Arrow RecordBatches containing the data from the files
+    pub fn read(self, tasks: FileScanTaskStream) -> Result<ArrowRecordBatchStream> {
+        let file_io = self.file_io.clone();
+        let batch_size = self.batch_size;
+        let concurrency_limit_data_files = self.concurrency_limit_data_files;
+        let row_group_filtering_enabled = self.row_group_filtering_enabled;
+        let row_selection_enabled = self.row_selection_enabled;
+        let parquet_read_options = self.parquet_read_options;
+
+        // Fast-path for single concurrency to avoid overhead of try_flatten_unordered
+        let stream: ArrowRecordBatchStream = if concurrency_limit_data_files == 1 {
+            Box::pin(
+                tasks
+                    .and_then(move |task| {
+                        let file_io = file_io.clone();
+
+                        Self::process_file_scan_task(
+                            task,
+                            batch_size,
+                            file_io,
+                            self.delete_file_loader.clone(),
+                            row_group_filtering_enabled,
+                            row_selection_enabled,
+                            parquet_read_options,
+                        )
+                    })
+                    .map_err(|err| {
+                        Error::new(ErrorKind::Unexpected, "file scan task generate failed")
+                            .with_source(err)
+                    })
+                    .try_flatten(),
+            )
+        } else {
+            Box::pin(
+                tasks
+                    .map_ok(move |task| {
+                        let file_io = file_io.clone();
+
+                        Self::process_file_scan_task(
+                            task,
+                            batch_size,
+                            file_io,
+                            self.delete_file_loader.clone(),
+                            row_group_filtering_enabled,
+                            row_selection_enabled,
+                            parquet_read_options,
+                        )
+                    })
+                    .map_err(|err| {
+                        Error::new(ErrorKind::Unexpected, "file scan task generate failed")
+                            .with_source(err)
+                    })
+                    .try_buffer_unordered(concurrency_limit_data_files)
+                    .try_flatten_unordered(concurrency_limit_data_files),
+            )
+        };
+
+        Ok(stream)
+    }
+
+    async fn process_file_scan_task(
+        task: FileScanTask,
+        batch_size: Option<usize>,
+        file_io: FileIO,
+        delete_file_loader: CachingDeleteFileLoader,
+        row_group_filtering_enabled: bool,
+        row_selection_enabled: bool,
+        parquet_read_options: ParquetReadOptions,
+    ) -> Result<ArrowRecordBatchStream> {
+        let should_load_page_index =
+            (row_selection_enabled && task.predicate.is_some()) || !task.deletes.is_empty();
+        let mut parquet_read_options = parquet_read_options;
+        parquet_read_options.preload_page_index = should_load_page_index;
+
+        let delete_filter_rx =
+            delete_file_loader.load_deletes(&task.deletes, Arc::clone(&task.schema));
+
+        // Open the Parquet file once, loading its metadata
+        let (parquet_file_reader, arrow_metadata) = Self::open_parquet_file(
+            &task.data_file_path,
+            &file_io,
+            task.file_size_in_bytes,
+            parquet_read_options,
+        )
+        .await?;
+
+        // Check if Parquet file has embedded field IDs
+        // Corresponds to Java's ParquetSchemaUtil.hasIds()
+        // Reference: parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java:118
+        let missing_field_ids = arrow_metadata
+            .schema()
+            .fields()
+            .iter()
+            .next()
+            .is_some_and(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none());
+
+        // Three-branch schema resolution strategy matching Java's ReadConf constructor
+        //
+        // Per Iceberg spec Column Projection rules:
+        // "Columns in Iceberg data files are selected by field id. The table schema's column
+        //  names and order may change after a data file is written, and projection must be done
+        //  using field ids."
+        // https://iceberg.apache.org/spec/#column-projection
+        //
+        // When Parquet files lack field IDs (e.g., Hive/Spark migrations via add_files),
+        // we must assign field IDs BEFORE reading data to enable correct projection.
+        //
+        // Java's ReadConf determines field ID strategy:
+        // - Branch 1: hasIds(fileSchema) → trust embedded field IDs, use pruneColumns()
+        // - Branch 2: nameMapping present → applyNameMapping(), then pruneColumns()
+        // - Branch 3: fallback → addFallbackIds(), then pruneColumnsFallback()
+        let arrow_metadata = if missing_field_ids {
+            // Parquet file lacks field IDs - must assign them before reading
+            let arrow_schema = if let Some(name_mapping) = &task.name_mapping {
+                // Branch 2: Apply name mapping to assign correct Iceberg field IDs
+                // Per spec rule #2: "Use schema.name-mapping.default metadata to map field id
+                // to columns without field id"
+                // Corresponds to Java's ParquetSchemaUtil.applyNameMapping()
+                apply_name_mapping_to_arrow_schema(
+                    Arc::clone(arrow_metadata.schema()),
+                    name_mapping,
+                )?
+            } else {
+                // Branch 3: No name mapping - use position-based fallback IDs
+                // Corresponds to Java's ParquetSchemaUtil.addFallbackIds()
+                add_fallback_field_ids_to_arrow_schema(arrow_metadata.schema())
+            };
+
+            let options = ArrowReaderOptions::new().with_schema(arrow_schema);
+            ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
+                |e| {
+                    Error::new(
+                        ErrorKind::Unexpected,
+                        "Failed to create ArrowReaderMetadata with field ID schema",
+                    )
+                    .with_source(e)
+                },
+            )?
+        } else {
+            // Branch 1: File has embedded field IDs - trust them
+            arrow_metadata
+        };
+
+        // Coerce INT96 timestamp columns to the resolution specified by the Iceberg schema.
+        // This must happen before building the stream reader to avoid i64 overflow in arrow-rs.
+        let arrow_metadata = if let Some(coerced_schema) =
+            coerce_int96_timestamps(arrow_metadata.schema(), &task.schema)
+        {
+            let options = ArrowReaderOptions::new().with_schema(Arc::clone(&coerced_schema));
+            ArrowReaderMetadata::try_new(Arc::clone(arrow_metadata.metadata()), options).map_err(
+                |e| {
+                    Error::new(
+                        ErrorKind::Unexpected,
+                        format!(
+                            "Failed to create ArrowReaderMetadata with INT96-coerced schema: {coerced_schema}"
+                        ),
+                    )
+                    .with_source(e)
+                },
+            )?
+        } else {
+            arrow_metadata
+        };
+
+        // Build the stream reader, reusing the already-opened file reader
+        let mut record_batch_stream_builder =
+            ParquetRecordBatchStreamBuilder::new_with_metadata(parquet_file_reader, arrow_metadata);
+
+        // Filter out metadata fields for Parquet projection (they don't exist in files)
+        let project_field_ids_without_metadata: Vec<i32> = task
+            .project_field_ids
+            .iter()
+            .filter(|&&id| !is_metadata_field(id))
+            .copied()
+            .collect();
+
+        // Create projection mask based on field IDs
+        // - If file has embedded IDs: field-ID-based projection (missing_field_ids=false)
+        // - If name mapping applied: field-ID-based projection (missing_field_ids=true but IDs now match)
+        // - If fallback IDs: position-based projection (missing_field_ids=true)
+        let projection_mask = Self::get_arrow_projection_mask(
+            &project_field_ids_without_metadata,
+            &task.schema,
+            record_batch_stream_builder.parquet_schema(),
+            record_batch_stream_builder.schema(),
+            missing_field_ids, // Whether to use position-based (true) or field-ID-based (false) projection
+        )?;
+
+        record_batch_stream_builder =
+            record_batch_stream_builder.with_projection(projection_mask.clone());
+
+        // RecordBatchTransformer performs any transformations required on the RecordBatches
+        // that come back from the file, such as type promotion, default column insertion,
+        // column re-ordering, partition constants, and virtual field addition (like _file)
+        let mut record_batch_transformer_builder =
+            RecordBatchTransformerBuilder::new(task.schema_ref(), task.project_field_ids());
+
+        // Add the _file metadata column if it's in the projected fields
+        if task.project_field_ids().contains(&RESERVED_FIELD_ID_FILE) {
+            let file_datum = Datum::string(task.data_file_path.clone());
+            record_batch_transformer_builder =
+                record_batch_transformer_builder.with_constant(RESERVED_FIELD_ID_FILE, file_datum);
+        }
+
+        if let (Some(partition_spec), Some(partition_data)) =
+            (task.partition_spec.clone(), task.partition.clone())
+        {
+            record_batch_transformer_builder =
+                record_batch_transformer_builder.with_partition(partition_spec, partition_data)?;
+        }
+
+        let mut record_batch_transformer = record_batch_transformer_builder.build();
+
+        if let Some(batch_size) = batch_size {
+            record_batch_stream_builder = record_batch_stream_builder.with_batch_size(batch_size);
+        }
+
+        let delete_filter = delete_filter_rx.await.unwrap()?;
+        let delete_predicate = delete_filter.build_equality_delete_predicate(&task).await?;
+
+        // In addition to the optional predicate supplied in the `FileScanTask`,
+        // we also have an optional predicate resulting from equality delete files.
+        // If both are present, we logical-AND them together to form a single filter
+        // predicate that we can pass to the `RecordBatchStreamBuilder`.
+        let final_predicate = match (&task.predicate, delete_predicate) {
+            (None, None) => None,
+            (Some(predicate), None) => Some(predicate.clone()),
+            (None, Some(ref predicate)) => Some(predicate.clone()),
+            (Some(filter_predicate), Some(delete_predicate)) => {
+                Some(filter_predicate.clone().and(delete_predicate))
+            }
+        };
+
+        // There are three possible sources for potential lists of selected RowGroup indices,
+        // and two for `RowSelection`s.
+        // Selected RowGroup index lists can come from three sources:
+        //   * When task.start and task.length specify a byte range (file splitting);
+        //   * When there are equality delete files that are applicable;
+        //   * When there is a scan predicate and row_group_filtering_enabled = true.
+        // `RowSelection`s can be created in either or both of the following cases:
+        //   * When there are positional delete files that are applicable;
+        //   * When there is a scan predicate and row_selection_enabled = true
+        // Note that row group filtering from predicates only happens when
+        // there is a scan predicate AND row_group_filtering_enabled = true,
+        // but we perform row selection filtering if there are applicable
+        // equality delete files OR (there is a scan predicate AND row_selection_enabled),
+        // since the only implemented method of applying positional deletes is
+        // by using a `RowSelection`.
+        let mut selected_row_group_indices = None;
+        let mut row_selection = None;
+
+        // Filter row groups based on byte range from task.start and task.length.
+        // If both start and length are 0, read the entire file (backwards compatibility).
+        if task.start != 0 || task.length != 0 {
+            let byte_range_filtered_row_groups = Self::filter_row_groups_by_byte_range(
+                record_batch_stream_builder.metadata(),
+                task.start,
+                task.length,
+            )?;
+            selected_row_group_indices = Some(byte_range_filtered_row_groups);
+        }
+
+        if let Some(predicate) = final_predicate {
+            let (iceberg_field_ids, field_id_map) = Self::build_field_id_set_and_map(
+                record_batch_stream_builder.parquet_schema(),
+                &predicate,
+            )?;
+
+            let row_filter = Self::get_row_filter(
+                &predicate,
+                record_batch_stream_builder.parquet_schema(),
+                &iceberg_field_ids,
+                &field_id_map,
+            )?;
+            record_batch_stream_builder = record_batch_stream_builder.with_row_filter(row_filter);
+
+            if row_group_filtering_enabled {
+                let predicate_filtered_row_groups = Self::get_selected_row_group_indices(
+                    &predicate,
+                    record_batch_stream_builder.metadata(),
+                    &field_id_map,
+                    &task.schema,
+                )?;
+
+                // Merge predicate-based filtering with byte range filtering (if present)
+                // by taking the intersection of both filters
+                selected_row_group_indices = match selected_row_group_indices {
+                    Some(byte_range_filtered) => {
+                        // Keep only row groups that are in both filters
+                        let intersection: Vec<usize> = byte_range_filtered
+                            .into_iter()
+                            .filter(|idx| predicate_filtered_row_groups.contains(idx))
+                            .collect();
+                        Some(intersection)
+                    }
+                    None => Some(predicate_filtered_row_groups),
+                };
+            }
+
+            if row_selection_enabled {
+                row_selection = Some(Self::get_row_selection_for_filter_predicate(
+                    &predicate,
+                    record_batch_stream_builder.metadata(),
+                    &selected_row_group_indices,
+                    &field_id_map,
+                    &task.schema,
+                )?);
+            }
+        }
+
+        let positional_delete_indexes = delete_filter.get_delete_vector(&task);
+
+        if let Some(positional_delete_indexes) = positional_delete_indexes {
+            let delete_row_selection = {
+                let positional_delete_indexes = positional_delete_indexes.lock().unwrap();
+
+                Self::build_deletes_row_selection(
+                    record_batch_stream_builder.metadata().row_groups(),
+                    &selected_row_group_indices,
+                    &positional_delete_indexes,
+                )
+            }?;
+
+            // merge the row selection from the delete files with the row selection
+            // from the filter predicate, if there is one from the filter predicate
+            row_selection = match row_selection {
+                None => Some(delete_row_selection),
+                Some(filter_row_selection) => {
+                    Some(filter_row_selection.intersection(&delete_row_selection))
+                }
+            };
+        }
+
+        if let Some(row_selection) = row_selection {
+            record_batch_stream_builder =
+                record_batch_stream_builder.with_row_selection(row_selection);
+        }
+
+        if let Some(selected_row_group_indices) = selected_row_group_indices {
+            record_batch_stream_builder =
+                record_batch_stream_builder.with_row_groups(selected_row_group_indices);
+        }
+
+        // Build the batch stream and send all the RecordBatches that it generates
+        // to the requester.
+        let record_batch_stream =
+            record_batch_stream_builder
+                .build()?
+                .map(move |batch| match batch {
+                    Ok(batch) => {
+                        // Process the record batch (type promotion, column reordering, virtual fields, etc.)
+                        record_batch_transformer.process_record_batch(batch)
+                    }
+                    Err(err) => Err(err.into()),
+                });
+
+        Ok(Box::pin(record_batch_stream) as ArrowRecordBatchStream)
+    }
+
+    /// Opens a Parquet file and loads its metadata, returning both the reader and metadata.
+    /// The reader can be reused to build a `ParquetRecordBatchStreamBuilder` without
+    /// reopening the file.
+    pub(crate) async fn open_parquet_file(
+        data_file_path: &str,
+        file_io: &FileIO,
+        file_size_in_bytes: u64,
+        parquet_read_options: ParquetReadOptions,
+    ) -> Result<(ArrowFileReader, ArrowReaderMetadata)> {
+        let parquet_file = file_io.new_input(data_file_path)?;
+        let parquet_reader = parquet_file.reader().await?;
+        let mut reader = ArrowFileReader::new(
+            FileMetadata {
+                size: file_size_in_bytes,
+            },
+            parquet_reader,
+        )
+        .with_parquet_read_options(parquet_read_options);
+
+        let arrow_metadata = ArrowReaderMetadata::load_async(&mut reader, Default::default())
+            .await
+            .map_err(|e| {
+                Error::new(ErrorKind::Unexpected, "Failed to load Parquet metadata").with_source(e)
+            })?;
+
+        Ok((reader, arrow_metadata))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{Array, ArrayRef, RecordBatch};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use futures::TryStreamExt;
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
+    use parquet::basic::Compression;
+    use parquet::file::properties::WriterProperties;
+    use tempfile::TempDir;
+
+    use crate::arrow::ArrowReaderBuilder;
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    // INT96 encoding: [nanos_low_u32, nanos_high_u32, julian_day_u32]
+    // Julian day 2_440_588 = Unix epoch (1970-01-01)
+    const UNIX_EPOCH_JULIAN: i64 = 2_440_588;
+    const MICROS_PER_DAY: i64 = 86_400_000_000;
+    // Noon on 3333-01-01 (Julian day 2_953_529) — outside the i64 nanosecond range (~1677-2262).
+    const INT96_TEST_NANOS_WITHIN_DAY: u64 = 43_200_000_000_000;
+    const INT96_TEST_JULIAN_DAY: u32 = 2_953_529;
+
+    fn make_int96_test_value() -> (parquet::data_type::Int96, i64) {
+        let mut val = parquet::data_type::Int96::new();
+        val.set_data(
+            (INT96_TEST_NANOS_WITHIN_DAY & 0xFFFFFFFF) as u32,
+            (INT96_TEST_NANOS_WITHIN_DAY >> 32) as u32,
+            INT96_TEST_JULIAN_DAY,
+        );
+        let expected_micros = (INT96_TEST_JULIAN_DAY as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+            + (INT96_TEST_NANOS_WITHIN_DAY / 1_000) as i64;
+        (val, expected_micros)
+    }
+
+    async fn read_int96_batches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+    ) -> Vec<RecordBatch> {
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let file_size = std::fs::metadata(file_path).unwrap().len();
+        let task = FileScanTask {
+            file_size_in_bytes: file_size,
+            start: 0,
+            length: file_size,
+            record_count: None,
+            data_file_path: file_path.to_string(),
+            data_file_format: DataFileFormat::Parquet,
+            schema,
+            project_field_ids,
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        reader.read(tasks).unwrap().try_collect().await.unwrap()
+    }
+
+    // ArrowWriter cannot write INT96, so we use SerializedFileWriter directly.
+    fn write_int96_parquet_file(
+        table_location: &str,
+        filename: &str,
+        with_field_ids: bool,
+    ) -> (String, Vec<i64>) {
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{Int32Type, Int96, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let file_path = format!("{table_location}/{filename}");
+
+        let mut ts_builder = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL);
+        let mut id_builder = SchemaType::primitive_type_builder("id", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED);
+
+        if with_field_ids {
+            ts_builder = ts_builder.with_id(Some(1));
+            id_builder = id_builder.with_id(Some(2));
+        }
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(ts_builder.build().unwrap()),
+                Arc::new(id_builder.build().unwrap()),
+            ])
+            .build()
+            .unwrap();
+
+        // Dates outside the i64 nanosecond range (~1677-2262) overflow without coercion.
+        const NOON_NANOS: u64 = INT96_TEST_NANOS_WITHIN_DAY;
+        const JULIAN_3333: u32 = INT96_TEST_JULIAN_DAY;
+        const JULIAN_2100: u32 = 2_488_070;
+
+        let test_data: Vec<(u32, u32, u32, i64)> = vec![
+            // 3333-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+            // 3333-01-01 12:00:00
+            (
+                (NOON_NANOS & 0xFFFFFFFF) as u32,
+                (NOON_NANOS >> 32) as u32,
+                JULIAN_3333,
+                (JULIAN_3333 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY
+                    + (NOON_NANOS / 1_000) as i64,
+            ),
+            // 2100-01-01 00:00:00
+            (
+                0,
+                0,
+                JULIAN_2100,
+                (JULIAN_2100 as i64 - UNIX_EPOCH_JULIAN) * MICROS_PER_DAY,
+            ),
+        ];
+
+        let int96_values: Vec<Int96> = test_data
+            .iter()
+            .map(|(lo, hi, day, _)| {
+                let mut v = Int96::new();
+                v.set_data(*lo, *hi, *day);
+                v
+            })
+            .collect();
+
+        let id_values: Vec<i32> = (0..test_data.len() as i32).collect();
+        let expected_micros: Vec<i64> = test_data.iter().map(|(_, _, _, m)| *m).collect();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(schema), Default::default()).unwrap();
+
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            // def=1: ts is OPTIONAL and present. No repetition levels (top-level columns).
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&int96_values, Some(&vec![1; test_data.len()]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int32Type>()
+                .write_batch(&id_values, None, None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        (file_path, expected_micros)
+    }
+
+    async fn assert_int96_read_matches(
+        file_path: &str,
+        schema: SchemaRef,
+        project_field_ids: Vec<i32>,
+        expected_micros: &[i64],
+    ) {
+        use arrow_array::TimestampMicrosecondArray;
+
+        let batches = read_int96_batches(file_path, schema, project_field_ids).await;
+
+        assert_eq!(batches.len(), 1);
+        let ts_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray");
+
+        for (i, expected) in expected_micros.iter().enumerate() {
+            assert_eq!(
+                ts_array.value(i),
+                *expected,
+                "Row {i}: got {}, expected {expected}",
+                ts_array.value(i)
+            );
+        }
+    }
+
+    /// Test that concurrency=1 reads all files correctly and in deterministic order.
+    /// This verifies the fast-path optimization for single concurrency.
+    #[tokio::test]
+    async fn test_read_with_concurrency_one() {
+        use arrow_array::Int32Array;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(2, "file_num", Type::Primitive(PrimitiveType::Int))
+                        .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            Field::new("file_num", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "2".to_string(),
+            )])),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        // Create 3 parquet files with different data
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        for file_num in 0..3 {
+            let id_data = Arc::new(Int32Array::from_iter_values(
+                file_num * 10..(file_num + 1) * 10,
+            )) as ArrayRef;
+            let file_num_data = Arc::new(Int32Array::from(vec![file_num; 10])) as ArrayRef;
+
+            let to_write =
+                RecordBatch::try_new(arrow_schema.clone(), vec![id_data, file_num_data]).unwrap();
+
+            let file = File::create(format!("{table_location}/file_{file_num}.parquet")).unwrap();
+            let mut writer =
+                ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
+            writer.write(&to_write).expect("Writing batch");
+            writer.close().unwrap();
+        }
+
+        // Read with concurrency=1 (fast-path)
+        let reader = ArrowReaderBuilder::new(file_io)
+            .with_data_file_concurrency_limit(1)
+            .build();
+
+        // Create tasks in a specific order: file_0, file_1, file_2
+        let tasks = vec![
+            Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_0.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/file_0.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            }),
+            Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/file_1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            }),
+            Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/file_2.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/file_2.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            }),
+        ];
+
+        let tasks_stream = Box::pin(futures::stream::iter(tasks)) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks_stream)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Verify we got all 30 rows (10 from each file)
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+        assert_eq!(total_rows, 30, "Should have 30 total rows");
+
+        // Collect all ids and file_nums to verify data
+        let mut all_ids = Vec::new();
+        let mut all_file_nums = Vec::new();
+
+        for batch in &result {
+            let id_col = batch
+                .column(0)
+                .as_primitive::<arrow_array::types::Int32Type>();
+            let file_num_col = batch
+                .column(1)
+                .as_primitive::<arrow_array::types::Int32Type>();
+
+            for i in 0..batch.num_rows() {
+                all_ids.push(id_col.value(i));
+                all_file_nums.push(file_num_col.value(i));
+            }
+        }
+
+        assert_eq!(all_ids.len(), 30);
+        assert_eq!(all_file_nums.len(), 30);
+
+        // With concurrency=1 and sequential processing, files should be processed in order
+        // file_0: ids 0-9, file_num=0
+        // file_1: ids 10-19, file_num=1
+        // file_2: ids 20-29, file_num=2
+        for i in 0..10 {
+            assert_eq!(all_file_nums[i], 0, "First 10 rows should be from file_0");
+            assert_eq!(all_ids[i], i as i32, "IDs should be 0-9");
+        }
+        for i in 10..20 {
+            assert_eq!(all_file_nums[i], 1, "Next 10 rows should be from file_1");
+            assert_eq!(all_ids[i], i as i32, "IDs should be 10-19");
+        }
+        for i in 20..30 {
+            assert_eq!(all_file_nums[i], 2, "Last 10 rows should be from file_2");
+            assert_eq!(all_ids[i], i as i32, "IDs should be 20-29");
+        }
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_with_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "with_ids.parquet", true);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "ts", Type::Primitive(PrimitiveType::Timestamp))
+                        .into(),
+                    NestedField::required(2, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let (file_path, expected_micros) =
+            write_int96_parquet_file(&table_location, "no_ids.parquet", false);
+
+        assert_int96_read_matches(&file_path, schema, vec![1, 2], &expected_micros).await;
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_struct() {
+        use arrow_array::{StructArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/struct_int96.parquet");
+
+        let ts_type = SchemaType::primitive_type_builder("ts", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let struct_type = SchemaType::group_type_builder("data")
+            .with_repetition(Repetition::REQUIRED)
+            .with_id(Some(1))
+            .with_fields(vec![Arc::new(ts_type)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(struct_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // def=1: struct is REQUIRED so no level, ts is OPTIONAL and present (1).
+        // No repetition levels needed (no repeated groups).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[1]), None)
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "data",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::optional(
+                                2,
+                                "ts",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        ])),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let struct_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .expect("Expected StructArray");
+        let ts_array = struct_array
+            .column(0)
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside struct");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in struct: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_list() {
+        use arrow_array::{ListArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::Int96Type;
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/list_int96.parquet");
+
+        // 3-level LIST encoding:
+        //   optional group timestamps (LIST) {
+        //     repeated group list {
+        //       optional int96 element;
+        //     }
+        //   }
+        let element_type = SchemaType::primitive_type_builder("element", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let list_group = SchemaType::group_type_builder("list")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(element_type)])
+            .build()
+            .unwrap();
+
+        let list_type = SchemaType::group_type_builder("timestamps")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::List))
+            .with_fields(vec![Arc::new(list_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(list_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a list containing one INT96 element.
+        // def=3: list present (1) + repeated group (2) + element present (3)
+        // rep=0: start of a new list
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "timestamps",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::optional(
+                                2,
+                                "element",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let list_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .expect("Expected ListArray");
+        let ts_array = list_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray inside list");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in list: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+
+    #[tokio::test]
+    async fn test_read_int96_timestamps_in_map() {
+        use arrow_array::{MapArray, TimestampMicrosecondArray};
+        use parquet::basic::{Repetition, Type as PhysicalType};
+        use parquet::data_type::{ByteArrayType, Int96Type};
+        use parquet::file::writer::SerializedFileWriter;
+        use parquet::schema::types::Type as SchemaType;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/map_int96.parquet");
+
+        // MAP encoding:
+        //   optional group ts_map (MAP) {
+        //     repeated group key_value {
+        //       required binary key (UTF8);
+        //       optional int96 value;
+        //     }
+        //   }
+        let key_type = SchemaType::primitive_type_builder("key", PhysicalType::BYTE_ARRAY)
+            .with_repetition(Repetition::REQUIRED)
+            .with_logical_type(Some(parquet::basic::LogicalType::String))
+            .with_id(Some(2))
+            .build()
+            .unwrap();
+
+        let value_type = SchemaType::primitive_type_builder("value", PhysicalType::INT96)
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(3))
+            .build()
+            .unwrap();
+
+        let key_value_group = SchemaType::group_type_builder("key_value")
+            .with_repetition(Repetition::REPEATED)
+            .with_fields(vec![Arc::new(key_type), Arc::new(value_type)])
+            .build()
+            .unwrap();
+
+        let map_type = SchemaType::group_type_builder("ts_map")
+            .with_repetition(Repetition::OPTIONAL)
+            .with_id(Some(1))
+            .with_logical_type(Some(parquet::basic::LogicalType::Map))
+            .with_fields(vec![Arc::new(key_value_group)])
+            .build()
+            .unwrap();
+
+        let parquet_schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![Arc::new(map_type)])
+            .build()
+            .unwrap();
+
+        let (int96_val, expected_micros) = make_int96_test_value();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer =
+            SerializedFileWriter::new(file, Arc::new(parquet_schema), Default::default()).unwrap();
+
+        // Write a single row with a map containing one key-value pair.
+        // rep=0 for both columns: start of a new map.
+        // key def=2: map present (1) + key_value entry present (2), key is REQUIRED.
+        // value def=3: map present (1) + key_value entry present (2) + value present (3).
+        let mut row_group = writer.next_row_group().unwrap();
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<ByteArrayType>()
+                .write_batch(
+                    &[parquet::data_type::ByteArray::from("event_time")],
+                    Some(&[2]),
+                    Some(&[0]),
+                )
+                .unwrap();
+            col.close().unwrap();
+        }
+        {
+            let mut col = row_group.next_column().unwrap().unwrap();
+            col.typed::<Int96Type>()
+                .write_batch(&[int96_val], Some(&[3]), Some(&[0]))
+                .unwrap();
+            col.close().unwrap();
+        }
+        row_group.close().unwrap();
+        writer.close().unwrap();
+
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(
+                        1,
+                        "ts_map",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                2,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::optional(
+                                3,
+                                "value",
+                                Type::Primitive(PrimitiveType::Timestamp),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let batches = read_int96_batches(&file_path, iceberg_schema, vec![1]).await;
+
+        assert_eq!(batches.len(), 1);
+        let map_array = batches[0]
+            .column(0)
+            .as_any()
+            .downcast_ref::<MapArray>()
+            .expect("Expected MapArray");
+        let ts_array = map_array
+            .values()
+            .as_any()
+            .downcast_ref::<TimestampMicrosecondArray>()
+            .expect("Expected TimestampMicrosecondArray as map values");
+
+        assert_eq!(
+            ts_array.value(0),
+            expected_micros,
+            "INT96 in map: got {}, expected {expected_micros}",
+            ts_array.value(0)
+        );
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/positional_deletes.rs b/crates/iceberg/src/arrow/reader/positional_deletes.rs
new file mode 100644
index 0000000000..eea031852b
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/positional_deletes.rs
@@ -0,0 +1,931 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Positional delete handling for `ArrowReader`: converting a `DeleteVector`
+//! into a Parquet `RowSelection` that skips the deleted rows, while respecting
+//! any row-group selection made by the predicate evaluator.
+
+use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
+use parquet::file::metadata::RowGroupMetaData;
+
+use super::ArrowReader;
+use crate::delete_vector::DeleteVector;
+use crate::error::Result;
+
+impl ArrowReader {
+    /// computes a `RowSelection` from positional delete indices.
+    ///
+    /// Using the Parquet page index, we build a `RowSelection` that rejects rows that are indicated
+    /// as having been deleted by a positional delete, taking into account any row groups that have
+    /// been skipped entirely by the filter predicate
+    pub(super) fn build_deletes_row_selection(
+        row_group_metadata_list: &[RowGroupMetaData],
+        selected_row_groups: &Option<Vec<usize>>,
+        positional_deletes: &DeleteVector,
+    ) -> Result<RowSelection> {
+        let mut results: Vec<RowSelector> = Vec::new();
+        let mut selected_row_groups_idx = 0;
+        let mut current_row_group_base_idx: u64 = 0;
+        let mut delete_vector_iter = positional_deletes.iter();
+        let mut next_deleted_row_idx_opt = delete_vector_iter.next();
+
+        for (idx, row_group_metadata) in row_group_metadata_list.iter().enumerate() {
+            let row_group_num_rows = row_group_metadata.num_rows() as u64;
+            let next_row_group_base_idx = current_row_group_base_idx + row_group_num_rows;
+
+            // if row group selection is enabled,
+            if let Some(selected_row_groups) = selected_row_groups {
+                // if we've consumed all the selected row groups, we're done
+                if selected_row_groups_idx == selected_row_groups.len() {
+                    break;
+                }
+
+                if idx == selected_row_groups[selected_row_groups_idx] {
+                    // we're in a selected row group. Increment selected_row_groups_idx
+                    // so that next time around the for loop we're looking for the next
+                    // selected row group
+                    selected_row_groups_idx += 1;
+                } else {
+                    // Advance iterator past all deletes in the skipped row group.
+                    // advance_to() positions the iterator to the first delete >= next_row_group_base_idx.
+                    // However, if our cached next_deleted_row_idx_opt is in the skipped range,
+                    // we need to call next() to update the cache with the newly positioned value.
+                    delete_vector_iter.advance_to(next_row_group_base_idx);
+                    // Only update the cache if the cached value is stale (in the skipped range)
+                    if let Some(cached_idx) = next_deleted_row_idx_opt
+                        && cached_idx < next_row_group_base_idx
+                    {
+                        next_deleted_row_idx_opt = delete_vector_iter.next();
+                    }
+
+                    // still increment the current page base index but then skip to the next row group
+                    // in the file
+                    current_row_group_base_idx += row_group_num_rows;
+                    continue;
+                }
+            }
+
+            let mut next_deleted_row_idx = match next_deleted_row_idx_opt {
+                Some(next_deleted_row_idx) => {
+                    // if the index of the next deleted row is beyond this row group, add a selection for
+                    // the remainder of this row group and skip to the next row group
+                    if next_deleted_row_idx >= next_row_group_base_idx {
+                        results.push(RowSelector::select(row_group_num_rows as usize));
+                        current_row_group_base_idx += row_group_num_rows;
+                        continue;
+                    }
+
+                    next_deleted_row_idx
+                }
+
+                // If there are no more pos deletes, add a selector for the entirety of this row group.
+                _ => {
+                    results.push(RowSelector::select(row_group_num_rows as usize));
+                    current_row_group_base_idx += row_group_num_rows;
+                    continue;
+                }
+            };
+
+            let mut current_idx = current_row_group_base_idx;
+            'chunks: while next_deleted_row_idx < next_row_group_base_idx {
+                // `select` all rows that precede the next delete index
+                if current_idx < next_deleted_row_idx {
+                    let run_length = next_deleted_row_idx - current_idx;
+                    results.push(RowSelector::select(run_length as usize));
+                    current_idx += run_length;
+                }
+
+                // `skip` all consecutive deleted rows in the current row group
+                let mut run_length = 0;
+                while next_deleted_row_idx == current_idx
+                    && next_deleted_row_idx < next_row_group_base_idx
+                {
+                    run_length += 1;
+                    current_idx += 1;
+
+                    next_deleted_row_idx_opt = delete_vector_iter.next();
+                    next_deleted_row_idx = match next_deleted_row_idx_opt {
+                        Some(next_deleted_row_idx) => next_deleted_row_idx,
+                        _ => {
+                            // We've processed the final positional delete.
+                            // Conclude the skip and then break so that we select the remaining
+                            // rows in the row group and move on to the next row group
+                            results.push(RowSelector::skip(run_length));
+                            break 'chunks;
+                        }
+                    };
+                }
+                if run_length > 0 {
+                    results.push(RowSelector::skip(run_length));
+                }
+            }
+
+            if current_idx < next_row_group_base_idx {
+                results.push(RowSelector::select(
+                    (next_row_group_base_idx - current_idx) as usize,
+                ));
+            }
+
+            current_row_group_base_idx += row_group_num_rows;
+        }
+
+        Ok(results.into())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use futures::TryStreamExt;
+    use parquet::arrow::arrow_reader::{RowSelection, RowSelector};
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
+    use parquet::basic::Compression;
+    use parquet::file::metadata::{ColumnChunkMetaData, RowGroupMetaData};
+    use parquet::file::properties::WriterProperties;
+    use parquet::schema::types::{SchemaDescPtr, SchemaDescriptor};
+    use roaring::RoaringTreemap;
+    use tempfile::TempDir;
+
+    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
+    use crate::delete_vector::DeleteVector;
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskDeleteFile, FileScanTaskStream};
+    use crate::spec::{DataContentType, DataFileFormat, NestedField, PrimitiveType, Schema, Type};
+
+    fn build_test_row_group_meta(
+        schema_descr: SchemaDescPtr,
+        columns: Vec<ColumnChunkMetaData>,
+        num_rows: i64,
+        ordinal: i16,
+    ) -> RowGroupMetaData {
+        RowGroupMetaData::builder(schema_descr.clone())
+            .set_num_rows(num_rows)
+            .set_total_byte_size(2000)
+            .set_column_metadata(columns)
+            .set_ordinal(ordinal)
+            .build()
+            .unwrap()
+    }
+
+    fn get_test_schema_descr() -> SchemaDescPtr {
+        use parquet::schema::types::Type as SchemaType;
+
+        let schema = SchemaType::group_type_builder("schema")
+            .with_fields(vec![
+                Arc::new(
+                    SchemaType::primitive_type_builder("a", parquet::basic::Type::INT32)
+                        .build()
+                        .unwrap(),
+                ),
+                Arc::new(
+                    SchemaType::primitive_type_builder("b", parquet::basic::Type::INT32)
+                        .build()
+                        .unwrap(),
+                ),
+            ])
+            .build()
+            .unwrap();
+
+        Arc::new(SchemaDescriptor::new(Arc::new(schema)))
+    }
+
+    #[test]
+    fn test_build_deletes_row_selection() {
+        let schema_descr = get_test_schema_descr();
+
+        let mut columns = vec![];
+        for ptr in schema_descr.columns() {
+            let column = ColumnChunkMetaData::builder(ptr.clone()).build().unwrap();
+            columns.push(column);
+        }
+
+        let row_groups_metadata = vec![
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 0),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 1),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 2),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 1000, 3),
+            build_test_row_group_meta(schema_descr.clone(), columns.clone(), 500, 4),
+        ];
+
+        let selected_row_groups = Some(vec![1, 3]);
+
+        /* cases to cover:
+           * {skip|select} {first|intermediate|last} {one row|multiple rows} in
+             {first|intermediate|last} {skipped|selected} row group
+           * row group selection disabled
+        */
+
+        let positional_deletes = RoaringTreemap::from_iter(&[
+            1, // in skipped rg 0, should be ignored
+            3, // run of three consecutive items in skipped rg0
+            4, 5, 998, // two consecutive items at end of skipped rg0
+            999, 1000, // solitary row at start of selected rg1 (1, 9)
+            1010, // run of 3 rows in selected rg1
+            1011, 1012, // (3, 485)
+            1498, // run of two items at end of selected rg1
+            1499, 1500, // run of two items at start of skipped rg2
+            1501, 1600, // should ignore, in skipped rg2
+            1999, // single row at end of skipped rg2
+            2000, // run of two items at start of selected rg3
+            2001, // (4, 98)
+            2100, // single row in selected row group 3 (1, 99)
+            2200, // run of 3 consecutive rows in selected row group 3
+            2201, 2202, // (3, 796)
+            2999, // single item at end of selected rg3 (1)
+            3000, // single item at start of skipped rg4
+        ]);
+
+        let positional_deletes = DeleteVector::new(positional_deletes);
+
+        // using selected row groups 1 and 3
+        let result = ArrowReader::build_deletes_row_selection(
+            &row_groups_metadata,
+            &selected_row_groups,
+            &positional_deletes,
+        )
+        .unwrap();
+
+        let expected = RowSelection::from(vec![
+            RowSelector::skip(1),
+            RowSelector::select(9),
+            RowSelector::skip(3),
+            RowSelector::select(485),
+            RowSelector::skip(4),
+            RowSelector::select(98),
+            RowSelector::skip(1),
+            RowSelector::select(99),
+            RowSelector::skip(3),
+            RowSelector::select(796),
+            RowSelector::skip(1),
+        ]);
+
+        assert_eq!(result, expected);
+
+        // selecting all row groups
+        let result = ArrowReader::build_deletes_row_selection(
+            &row_groups_metadata,
+            &None,
+            &positional_deletes,
+        )
+        .unwrap();
+
+        let expected = RowSelection::from(vec![
+            RowSelector::select(1),
+            RowSelector::skip(1),
+            RowSelector::select(1),
+            RowSelector::skip(3),
+            RowSelector::select(992),
+            RowSelector::skip(3),
+            RowSelector::select(9),
+            RowSelector::skip(3),
+            RowSelector::select(485),
+            RowSelector::skip(4),
+            RowSelector::select(98),
+            RowSelector::skip(1),
+            RowSelector::select(398),
+            RowSelector::skip(3),
+            RowSelector::select(98),
+            RowSelector::skip(1),
+            RowSelector::select(99),
+            RowSelector::skip(3),
+            RowSelector::select(796),
+            RowSelector::skip(2),
+            RowSelector::select(499),
+        ]);
+
+        assert_eq!(result, expected);
+    }
+
+    /// Test for bug where position deletes in later row groups are not applied correctly.
+    ///
+    /// When a file has multiple row groups and a position delete targets a row in a later
+    /// row group, the `build_deletes_row_selection` function had a bug where it would
+    /// fail to increment `current_row_group_base_idx` when skipping row groups.
+    ///
+    /// This test creates:
+    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
+    /// - A position delete file that deletes row 199 (last row in second row group)
+    ///
+    /// Expected behavior: Should return 199 rows (with id=200 deleted)
+    /// Bug behavior: Returns 200 rows (delete is not applied)
+    ///
+    /// This bug was discovered while running Apache Spark + Apache Iceberg integration tests
+    /// through DataFusion Comet. The following Iceberg Java tests failed due to this bug:
+    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadDelete::testDeleteWithMultipleRowGroupsParquet`
+    /// - `org.apache.iceberg.spark.extensions.TestMergeOnReadUpdate::testUpdateWithMultipleRowGroupsParquet`
+    #[tokio::test]
+    async fn test_position_delete_across_multiple_row_groups() {
+        use arrow_array::{Int32Array, Int64Array};
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        // Field IDs for positional delete schema
+        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
+        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Create table schema with a single 'id' column
+        let table_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Step 1: Create data file with 200 rows in 2 row groups
+        // Row group 0: rows 0-99 (ids 1-100)
+        // Row group 1: rows 100-199 (ids 101-200)
+        let data_file_path = format!("{table_location}/data.parquet");
+
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(1..=100),
+        )])
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(101..=200),
+        )])
+        .unwrap();
+
+        // Force each batch into its own row group
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&data_file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.close().unwrap();
+
+        // Verify we created 2 row groups
+        let verify_file = File::open(&data_file_path).unwrap();
+        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
+        assert_eq!(
+            verify_reader.metadata().num_row_groups(),
+            2,
+            "Should have 2 row groups"
+        );
+
+        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
+        let delete_file_path = format!("{table_location}/deletes.parquet");
+
+        let delete_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
+            )])),
+            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
+            )])),
+        ]));
+
+        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
+        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
+            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
+            Arc::new(Int64Array::from_iter_values(vec![199i64])),
+        ])
+        .unwrap();
+
+        let delete_props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let delete_file = File::create(&delete_file_path).unwrap();
+        let mut delete_writer =
+            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
+        delete_writer.write(&delete_batch).unwrap();
+        delete_writer.close().unwrap();
+
+        // Step 3: Read the data file with the delete applied
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
+            start: 0,
+            length: 0,
+            record_count: Some(200),
+            data_file_path: data_file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: table_schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![FileScanTaskDeleteFile {
+                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
+                file_path: delete_file_path,
+                file_type: DataContentType::PositionDeletes,
+                partition_spec_id: 0,
+                equality_ids: None,
+            }],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Step 4: Verify we got 199 rows (not 200)
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+
+        println!("Total rows read: {total_rows}");
+        println!("Expected: 199 rows (deleted row 199 which had id=200)");
+
+        // This assertion will FAIL before the fix and PASS after the fix
+        assert_eq!(
+            total_rows, 199,
+            "Expected 199 rows after deleting row 199, but got {total_rows} rows. \
+             The bug causes position deletes in later row groups to be ignored."
+        );
+
+        // Verify the deleted row (id=200) is not present
+        let all_ids: Vec<i32> = result
+            .iter()
+            .flat_map(|batch| {
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+
+        assert!(
+            !all_ids.contains(&200),
+            "Row with id=200 should be deleted but was found in results"
+        );
+
+        // Verify we have all other ids (1-199)
+        let expected_ids: Vec<i32> = (1..=199).collect();
+        assert_eq!(
+            all_ids, expected_ids,
+            "Should have ids 1-199 but got different values"
+        );
+    }
+
+    /// Test for bug where position deletes are lost when skipping unselected row groups.
+    ///
+    /// This is a variant of `test_position_delete_across_multiple_row_groups` that exercises
+    /// the row group selection code path (`selected_row_groups: Some([...])`).
+    ///
+    /// When a file has multiple row groups and only some are selected for reading,
+    /// the `build_deletes_row_selection` function must correctly skip over deletes in
+    /// unselected row groups WITHOUT consuming deletes that belong to selected row groups.
+    ///
+    /// This test creates:
+    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
+    /// - A position delete file that deletes row 199 (last row in second row group)
+    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
+    ///
+    /// Expected behavior: Should return 99 rows (with row 199 deleted)
+    /// Bug behavior: Returns 100 rows (delete is lost when skipping row group 0)
+    ///
+    /// The bug occurs when processing row group 0 (unselected):
+    /// ```rust
+    /// delete_vector_iter.advance_to(next_row_group_base_idx); // Position at first delete >= 100
+    /// next_deleted_row_idx_opt = delete_vector_iter.next(); // BUG: Consumes delete at 199!
+    /// ```
+    ///
+    /// The fix is to NOT call `next()` after `advance_to()` when skipping unselected row groups,
+    /// because `advance_to()` already positions the iterator correctly without consuming elements.
+    #[tokio::test]
+    async fn test_position_delete_with_row_group_selection() {
+        use arrow_array::{Int32Array, Int64Array};
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        // Field IDs for positional delete schema
+        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
+        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Create table schema with a single 'id' column
+        let table_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Step 1: Create data file with 200 rows in 2 row groups
+        // Row group 0: rows 0-99 (ids 1-100)
+        // Row group 1: rows 100-199 (ids 101-200)
+        let data_file_path = format!("{table_location}/data.parquet");
+
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(1..=100),
+        )])
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(101..=200),
+        )])
+        .unwrap();
+
+        // Force each batch into its own row group
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&data_file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.close().unwrap();
+
+        // Verify we created 2 row groups
+        let verify_file = File::open(&data_file_path).unwrap();
+        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
+        assert_eq!(
+            verify_reader.metadata().num_row_groups(),
+            2,
+            "Should have 2 row groups"
+        );
+
+        // Step 2: Create position delete file that deletes row 199 (id=200, last row in row group 1)
+        let delete_file_path = format!("{table_location}/deletes.parquet");
+
+        let delete_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
+            )])),
+            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
+            )])),
+        ]));
+
+        // Delete row at position 199 (0-indexed, so it's the last row: id=200)
+        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
+            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
+            Arc::new(Int64Array::from_iter_values(vec![199i64])),
+        ])
+        .unwrap();
+
+        let delete_props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let delete_file = File::create(&delete_file_path).unwrap();
+        let mut delete_writer =
+            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
+        delete_writer.write(&delete_batch).unwrap();
+        delete_writer.close().unwrap();
+
+        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
+        // This exercises the row group selection code path where row group 0 is skipped
+        let metadata_file = File::open(&data_file_path).unwrap();
+        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
+        let metadata = metadata_reader.metadata();
+
+        let row_group_0 = metadata.row_group(0);
+        let row_group_1 = metadata.row_group(1);
+
+        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
+        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
+        let rg1_length = row_group_1.compressed_size() as u64;
+
+        println!(
+            "Row group 0: starts at byte {}, {} bytes compressed",
+            rg0_start,
+            row_group_0.compressed_size()
+        );
+        println!(
+            "Row group 1: starts at byte {}, {} bytes compressed",
+            rg1_start,
+            row_group_1.compressed_size()
+        );
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
+            start: rg1_start,
+            length: rg1_length,
+            record_count: Some(100), // Row group 1 has 100 rows
+            data_file_path: data_file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: table_schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![FileScanTaskDeleteFile {
+                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
+                file_path: delete_file_path,
+                file_type: DataContentType::PositionDeletes,
+                partition_spec_id: 0,
+                equality_ids: None,
+            }],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Step 4: Verify we got 99 rows (not 100)
+        // Row group 1 has 100 rows (ids 101-200), minus 1 delete (id=200) = 99 rows
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+
+        println!("Total rows read from row group 1: {total_rows}");
+        println!("Expected: 99 rows (row group 1 has 100 rows, 1 delete at position 199)");
+
+        // This assertion will FAIL before the fix and PASS after the fix
+        assert_eq!(
+            total_rows, 99,
+            "Expected 99 rows from row group 1 after deleting position 199, but got {total_rows} rows. \
+             The bug causes position deletes to be lost when advance_to() is followed by next() \
+             when skipping unselected row groups."
+        );
+
+        // Verify the deleted row (id=200) is not present
+        let all_ids: Vec<i32> = result
+            .iter()
+            .flat_map(|batch| {
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+
+        assert!(
+            !all_ids.contains(&200),
+            "Row with id=200 should be deleted but was found in results"
+        );
+
+        // Verify we have ids 101-199 (not 101-200)
+        let expected_ids: Vec<i32> = (101..=199).collect();
+        assert_eq!(
+            all_ids, expected_ids,
+            "Should have ids 101-199 but got different values"
+        );
+    }
+
+    /// Test for bug where stale cached delete causes infinite loop when skipping row groups.
+    ///
+    /// This test exposes the inverse scenario of `test_position_delete_with_row_group_selection`:
+    /// - Position delete targets a row in the SKIPPED row group (not the selected one)
+    /// - After calling advance_to(), the cached delete index is stale
+    /// - Without updating the cache, the code enters an infinite loop
+    ///
+    /// This test creates:
+    /// - A data file with 200 rows split into 2 row groups (0-99, 100-199)
+    /// - A position delete file that deletes row 0 (first row in SKIPPED row group 0)
+    /// - Row group selection that reads ONLY row group 1 (rows 100-199)
+    ///
+    /// The bug occurs when skipping row group 0:
+    /// ```rust
+    /// let mut next_deleted_row_idx_opt = delete_vector_iter.next(); // Some(0)
+    /// // ... skip to row group 1 ...
+    /// delete_vector_iter.advance_to(100); // Iterator advances past delete at 0
+    /// // BUG: next_deleted_row_idx_opt is still Some(0) - STALE!
+    /// // When processing row group 1:
+    /// //   current_idx = 100, next_deleted_row_idx = 0, next_row_group_base_idx = 200
+    /// //   Loop condition: 0 < 200 (true)
+    /// //   But: current_idx (100) > next_deleted_row_idx (0)
+    /// //   And: current_idx (100) != next_deleted_row_idx (0)
+    /// //   Neither branch executes -> INFINITE LOOP!
+    /// ```
+    ///
+    /// Expected behavior: Should return 100 rows (delete at 0 doesn't affect row group 1)
+    /// Bug behavior: Infinite loop in build_deletes_row_selection
+    #[tokio::test]
+    async fn test_position_delete_in_skipped_row_group() {
+        use arrow_array::{Int32Array, Int64Array};
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        // Field IDs for positional delete schema
+        const FIELD_ID_POSITIONAL_DELETE_FILE_PATH: u64 = 2147483546;
+        const FIELD_ID_POSITIONAL_DELETE_POS: u64 = 2147483545;
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        // Create table schema with a single 'id' column
+        let table_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Step 1: Create data file with 200 rows in 2 row groups
+        // Row group 0: rows 0-99 (ids 1-100)
+        // Row group 1: rows 100-199 (ids 101-200)
+        let data_file_path = format!("{table_location}/data.parquet");
+
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(1..=100),
+        )])
+        .unwrap();
+
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(
+            Int32Array::from_iter_values(101..=200),
+        )])
+        .unwrap();
+
+        // Force each batch into its own row group
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&data_file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.close().unwrap();
+
+        // Verify we created 2 row groups
+        let verify_file = File::open(&data_file_path).unwrap();
+        let verify_reader = SerializedFileReader::new(verify_file).unwrap();
+        assert_eq!(
+            verify_reader.metadata().num_row_groups(),
+            2,
+            "Should have 2 row groups"
+        );
+
+        // Step 2: Create position delete file that deletes row 0 (id=1, first row in row group 0)
+        let delete_file_path = format!("{table_location}/deletes.parquet");
+
+        let delete_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("file_path", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_FILE_PATH.to_string(),
+            )])),
+            Field::new("pos", DataType::Int64, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                FIELD_ID_POSITIONAL_DELETE_POS.to_string(),
+            )])),
+        ]));
+
+        // Delete row at position 0 (0-indexed, so it's the first row: id=1)
+        let delete_batch = RecordBatch::try_new(delete_schema.clone(), vec![
+            Arc::new(StringArray::from_iter_values(vec![data_file_path.clone()])),
+            Arc::new(Int64Array::from_iter_values(vec![0i64])),
+        ])
+        .unwrap();
+
+        let delete_props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let delete_file = File::create(&delete_file_path).unwrap();
+        let mut delete_writer =
+            ArrowWriter::try_new(delete_file, delete_schema, Some(delete_props)).unwrap();
+        delete_writer.write(&delete_batch).unwrap();
+        delete_writer.close().unwrap();
+
+        // Step 3: Get byte ranges to read ONLY row group 1 (rows 100-199)
+        // This exercises the row group selection code path where row group 0 is skipped
+        let metadata_file = File::open(&data_file_path).unwrap();
+        let metadata_reader = SerializedFileReader::new(metadata_file).unwrap();
+        let metadata = metadata_reader.metadata();
+
+        let row_group_0 = metadata.row_group(0);
+        let row_group_1 = metadata.row_group(1);
+
+        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
+        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
+        let rg1_length = row_group_1.compressed_size() as u64;
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        // Create FileScanTask that reads ONLY row group 1 via byte range filtering
+        let task = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&data_file_path).unwrap().len(),
+            start: rg1_start,
+            length: rg1_length,
+            record_count: Some(100), // Row group 1 has 100 rows
+            data_file_path: data_file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: table_schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![FileScanTaskDeleteFile {
+                file_size_in_bytes: std::fs::metadata(&delete_file_path).unwrap().len(),
+                file_path: delete_file_path,
+                file_type: DataContentType::PositionDeletes,
+                partition_spec_id: 0,
+                equality_ids: None,
+            }],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks = Box::pin(futures::stream::iter(vec![Ok(task)])) as FileScanTaskStream;
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Step 4: Verify we got 100 rows (all of row group 1)
+        // The delete at position 0 is in row group 0, which is skipped, so it doesn't affect us
+        let total_rows: usize = result.iter().map(|b| b.num_rows()).sum();
+
+        assert_eq!(
+            total_rows, 100,
+            "Expected 100 rows from row group 1 (delete at position 0 is in skipped row group 0). \
+             If this hangs or fails, it indicates the cached delete index was not updated after advance_to()."
+        );
+
+        // Verify we have all ids from row group 1 (101-200)
+        let all_ids: Vec<i32> = result
+            .iter()
+            .flat_map(|batch| {
+                batch
+                    .column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+
+        let expected_ids: Vec<i32> = (101..=200).collect();
+        assert_eq!(
+            all_ids, expected_ids,
+            "Should have ids 101-200 (all of row group 1)"
+        );
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/predicate_visitor.rs b/crates/iceberg/src/arrow/reader/predicate_visitor.rs
new file mode 100644
index 0000000000..272de49390
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/predicate_visitor.rs
@@ -0,0 +1,820 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Visitors that translate Iceberg bound predicates into the pieces needed for
+//! Arrow-level evaluation: collecting referenced field IDs and producing
+//! per-record-batch predicate closures.
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use arrow_arith::boolean::{and, and_kleene, is_not_null, is_null, not, or, or_kleene};
+use arrow_array::cast::AsArray;
+use arrow_array::types::{Float32Type, Float64Type};
+use arrow_array::{Array, ArrayRef, BooleanArray, Datum as ArrowDatum, RecordBatch, Scalar};
+use arrow_buffer::BooleanBuffer;
+use arrow_cast::cast::cast;
+use arrow_ord::cmp::{eq, gt, gt_eq, lt, lt_eq, neq};
+use arrow_schema::{ArrowError, DataType};
+use arrow_string::like::starts_with;
+use fnv::FnvHashSet;
+use parquet::schema::types::SchemaDescriptor;
+
+use crate::arrow::get_arrow_datum;
+use crate::error::Result;
+use crate::expr::visitors::bound_predicate_visitor::BoundPredicateVisitor;
+use crate::expr::{BoundPredicate, BoundReference};
+use crate::spec::Datum;
+use crate::{Error, ErrorKind};
+
+/// A visitor to collect field ids from bound predicates.
+pub(super) struct CollectFieldIdVisitor {
+    pub(super) field_ids: HashSet<i32>,
+}
+
+impl CollectFieldIdVisitor {
+    pub(super) fn field_ids(self) -> HashSet<i32> {
+        self.field_ids
+    }
+}
+
+impl BoundPredicateVisitor for CollectFieldIdVisitor {
+    type T = ();
+
+    fn always_true(&mut self) -> Result<()> {
+        Ok(())
+    }
+
+    fn always_false(&mut self) -> Result<()> {
+        Ok(())
+    }
+
+    fn and(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
+        Ok(())
+    }
+
+    fn or(&mut self, _lhs: (), _rhs: ()) -> Result<()> {
+        Ok(())
+    }
+
+    fn not(&mut self, _inner: ()) -> Result<()> {
+        Ok(())
+    }
+
+    fn is_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn is_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_nan(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn less_than(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn less_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn greater_than(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn greater_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_eq(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn starts_with(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_starts_with(
+        &mut self,
+        reference: &BoundReference,
+        _literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn r#in(
+        &mut self,
+        reference: &BoundReference,
+        _literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+
+    fn not_in(
+        &mut self,
+        reference: &BoundReference,
+        _literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<()> {
+        self.field_ids.insert(reference.field().id);
+        Ok(())
+    }
+}
+
+/// A visitor to convert Iceberg bound predicates to Arrow predicates.
+pub(super) struct PredicateConverter<'a> {
+    /// The Parquet schema descriptor.
+    pub(super) parquet_schema: &'a SchemaDescriptor,
+    /// The map between field id and leaf column index in Parquet schema.
+    pub(super) column_map: &'a HashMap<i32, usize>,
+    /// The required column indices in Parquet schema for the predicates.
+    pub(super) column_indices: &'a Vec<usize>,
+}
+
+impl PredicateConverter<'_> {
+    /// When visiting a bound reference, we return index of the leaf column in the
+    /// required column indices which is used to project the column in the record batch.
+    /// Return None if the field id is not found in the column map, which is possible
+    /// due to schema evolution.
+    fn bound_reference(&mut self, reference: &BoundReference) -> Result<Option<usize>> {
+        // The leaf column's index in Parquet schema.
+        if let Some(column_idx) = self.column_map.get(&reference.field().id) {
+            if self.parquet_schema.get_column_root(*column_idx).is_group() {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Leaf column `{}` in predicates isn't a root column in Parquet schema.",
+                        reference.field().name
+                    ),
+                ));
+            }
+
+            // The leaf column's index in the required column indices.
+            let index = self
+                .column_indices
+                .iter()
+                .position(|&idx| idx == *column_idx)
+                .ok_or(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                "Leaf column `{}` in predicates cannot be found in the required column indices.",
+                reference.field().name
+            ),
+                ))?;
+
+            Ok(Some(index))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Build an Arrow predicate that always returns true.
+    fn build_always_true(&self) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(|batch| {
+            Ok(BooleanArray::from(vec![true; batch.num_rows()]))
+        }))
+    }
+
+    /// Build an Arrow predicate that always returns false.
+    fn build_always_false(&self) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(|batch| {
+            Ok(BooleanArray::from(vec![false; batch.num_rows()]))
+        }))
+    }
+}
+
+/// Gets the leaf column from the record batch for the required column index. Only
+/// supports top-level columns for now.
+fn project_column(
+    batch: &RecordBatch,
+    column_idx: usize,
+) -> std::result::Result<ArrayRef, ArrowError> {
+    let column = batch.column(column_idx);
+
+    match column.data_type() {
+        DataType::Struct(_) => Err(ArrowError::SchemaError(
+            "Does not support struct column yet.".to_string(),
+        )),
+        _ => Ok(column.clone()),
+    }
+}
+
+fn compute_is_nan(array: &ArrayRef) -> std::result::Result<BooleanArray, ArrowError> {
+    // Compute NaN over the contiguous values slice, then fold the null bitmap
+    // in with a single bitwise AND so that null slots become false.
+    let (is_nan, nulls) = match array.data_type() {
+        DataType::Float32 => {
+            let arr = array.as_primitive::<Float32Type>();
+            (
+                BooleanBuffer::from_iter(arr.values().iter().map(|v| v.is_nan())),
+                arr.nulls(),
+            )
+        }
+        DataType::Float64 => {
+            let arr = array.as_primitive::<Float64Type>();
+            (
+                BooleanBuffer::from_iter(arr.values().iter().map(|v| v.is_nan())),
+                arr.nulls(),
+            )
+        }
+        _ => unreachable!("is_nan is only valid for float types"),
+    };
+
+    let values = match nulls {
+        Some(nulls) => &is_nan & nulls.inner(),
+        None => is_nan,
+    };
+
+    Ok(BooleanArray::new(values, None))
+}
+
+pub(super) type PredicateResult =
+    dyn FnMut(RecordBatch) -> std::result::Result<BooleanArray, ArrowError> + Send + 'static;
+
+impl BoundPredicateVisitor for PredicateConverter<'_> {
+    type T = Box<PredicateResult>;
+
+    fn always_true(&mut self) -> Result<Box<PredicateResult>> {
+        self.build_always_true()
+    }
+
+    fn always_false(&mut self) -> Result<Box<PredicateResult>> {
+        self.build_always_false()
+    }
+
+    fn and(
+        &mut self,
+        mut lhs: Box<PredicateResult>,
+        mut rhs: Box<PredicateResult>,
+    ) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(move |batch| {
+            let left = lhs(batch.clone())?;
+            let right = rhs(batch)?;
+            and_kleene(&left, &right)
+        }))
+    }
+
+    fn or(
+        &mut self,
+        mut lhs: Box<PredicateResult>,
+        mut rhs: Box<PredicateResult>,
+    ) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(move |batch| {
+            let left = lhs(batch.clone())?;
+            let right = rhs(batch)?;
+            or_kleene(&left, &right)
+        }))
+    }
+
+    fn not(&mut self, mut inner: Box<PredicateResult>) -> Result<Box<PredicateResult>> {
+        Ok(Box::new(move |batch| {
+            let pred_ret = inner(batch)?;
+            not(&pred_ret)
+        }))
+    }
+
+    fn is_null(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                is_null(&column)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn not_null(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                is_not_null(&column)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn is_nan(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                compute_is_nan(&column)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_nan(
+        &mut self,
+        reference: &BoundReference,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            Ok(Box::new(move |batch| {
+                let column = project_column(&batch, idx)?;
+                let is_nan = compute_is_nan(&column)?;
+                not(&is_nan)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn less_than(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                lt(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn less_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                lt_eq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn greater_than(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                gt(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn greater_than_or_eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                gt_eq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                eq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_eq(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                neq(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn starts_with(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                starts_with(&left, literal.as_ref())
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_starts_with(
+        &mut self,
+        reference: &BoundReference,
+        literal: &Datum,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literal = get_arrow_datum(literal)?;
+
+            Ok(Box::new(move |batch| {
+                let left = project_column(&batch, idx)?;
+                let literal = try_cast_literal(&literal, left.data_type())?;
+                // update here if arrow ever adds a native not_starts_with
+                not(&starts_with(&left, literal.as_ref())?)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+
+    fn r#in(
+        &mut self,
+        reference: &BoundReference,
+        literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literals: Vec<_> = literals
+                .iter()
+                .map(|lit| get_arrow_datum(lit).unwrap())
+                .collect();
+
+            Ok(Box::new(move |batch| {
+                // update this if arrow ever adds a native is_in kernel
+                let left = project_column(&batch, idx)?;
+
+                let mut acc = BooleanArray::from(vec![false; batch.num_rows()]);
+                for literal in &literals {
+                    let literal = try_cast_literal(literal, left.data_type())?;
+                    acc = or(&acc, &eq(&left, literal.as_ref())?)?
+                }
+
+                Ok(acc)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_false()
+        }
+    }
+
+    fn not_in(
+        &mut self,
+        reference: &BoundReference,
+        literals: &FnvHashSet<Datum>,
+        _predicate: &BoundPredicate,
+    ) -> Result<Box<PredicateResult>> {
+        if let Some(idx) = self.bound_reference(reference)? {
+            let literals: Vec<_> = literals
+                .iter()
+                .map(|lit| get_arrow_datum(lit).unwrap())
+                .collect();
+
+            Ok(Box::new(move |batch| {
+                // update this if arrow ever adds a native not_in kernel
+                let left = project_column(&batch, idx)?;
+                let mut acc = BooleanArray::from(vec![true; batch.num_rows()]);
+                for literal in &literals {
+                    let literal = try_cast_literal(literal, left.data_type())?;
+                    acc = and(&acc, &neq(&left, literal.as_ref())?)?
+                }
+
+                Ok(acc)
+            }))
+        } else {
+            // A missing column, treating it as null.
+            self.build_always_true()
+        }
+    }
+}
+
+/// The Arrow type of an array that the Parquet reader reads may not match the exact Arrow type
+/// that Iceberg uses for literals - but they are effectively the same logical type,
+/// i.e. LargeUtf8 and Utf8 or Utf8View and Utf8 or Utf8View and LargeUtf8.
+///
+/// The Arrow compute kernels that we use must match the type exactly, so first cast the literal
+/// into the type of the batch we read from Parquet before sending it to the compute kernel.
+fn try_cast_literal(
+    literal: &Arc<dyn ArrowDatum + Send + Sync>,
+    column_type: &DataType,
+) -> std::result::Result<Arc<dyn ArrowDatum + Send + Sync>, ArrowError> {
+    let literal_array = literal.get().0;
+
+    // No cast required
+    if literal_array.data_type() == column_type {
+        return Ok(Arc::clone(literal));
+    }
+
+    let literal_array = cast(literal_array, column_type)?;
+    Ok(Arc::new(Scalar::new(literal_array)))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::{HashMap, HashSet};
+    use std::sync::Arc;
+
+    use arrow_array::{Array, BooleanArray, RecordBatch};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use parquet::schema::parser::parse_message_type;
+    use parquet::schema::types::SchemaDescriptor;
+
+    use super::{CollectFieldIdVisitor, PredicateConverter};
+    use crate::expr::visitors::bound_predicate_visitor::visit;
+    use crate::expr::{Bind, Predicate, Reference};
+    use crate::spec::{NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    fn table_schema_simple() -> SchemaRef {
+        Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_identifier_field_ids(vec![2])
+                .with_fields(vec![
+                    NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "bar", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(3, "baz", Type::Primitive(PrimitiveType::Boolean)).into(),
+                    NestedField::optional(4, "qux", Type::Primitive(PrimitiveType::Float)).into(),
+                ])
+                .build()
+                .unwrap(),
+        )
+    }
+
+    #[test]
+    fn test_collect_field_id() {
+        let schema = table_schema_simple();
+        let expr = Reference::new("qux").is_null();
+        let bound_expr = expr.bind(schema, true).unwrap();
+
+        let mut visitor = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut visitor, &bound_expr).unwrap();
+
+        let mut expected = HashSet::default();
+        expected.insert(4_i32);
+
+        assert_eq!(visitor.field_ids, expected);
+    }
+
+    #[test]
+    fn test_collect_field_id_with_and() {
+        let schema = table_schema_simple();
+        let expr = Reference::new("qux")
+            .is_null()
+            .and(Reference::new("baz").is_null());
+        let bound_expr = expr.bind(schema, true).unwrap();
+
+        let mut visitor = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut visitor, &bound_expr).unwrap();
+
+        let mut expected = HashSet::default();
+        expected.insert(4_i32);
+        expected.insert(3);
+
+        assert_eq!(visitor.field_ids, expected);
+    }
+
+    #[test]
+    fn test_collect_field_id_with_or() {
+        let schema = table_schema_simple();
+        let expr = Reference::new("qux")
+            .is_null()
+            .or(Reference::new("baz").is_null());
+        let bound_expr = expr.bind(schema, true).unwrap();
+
+        let mut visitor = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut visitor, &bound_expr).unwrap();
+
+        let mut expected = HashSet::default();
+        expected.insert(4_i32);
+        expected.insert(3);
+
+        assert_eq!(visitor.field_ids, expected);
+    }
+
+    fn apply_predicate_to_batch(
+        predicate: Predicate,
+        schema: SchemaRef,
+        batch: RecordBatch,
+    ) -> BooleanArray {
+        let bound = predicate.bind(schema, true).unwrap();
+
+        // Build a trivial Parquet schema with one float column at field id 4
+        let message_type = "
+            message schema {
+              optional float qux = 4;
+            }
+        ";
+        let parquet_type = parse_message_type(message_type).expect("parse schema");
+        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_type));
+
+        let column_map = HashMap::from([(4i32, 0usize)]);
+        let column_indices = vec![0usize];
+
+        let mut converter = PredicateConverter {
+            parquet_schema: &parquet_schema,
+            column_map: &column_map,
+            column_indices: &column_indices,
+        };
+
+        let mut predicate_fn = visit(&mut converter, &bound).unwrap();
+        predicate_fn(batch).unwrap()
+    }
+
+    #[test]
+    fn test_predicate_converter_nan() {
+        use arrow_array::Float32Array;
+
+        let schema = table_schema_simple();
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new(
+            "qux",
+            DataType::Float32,
+            true,
+        )]));
+        let values = vec![Some(1.0f32), Some(f32::NAN), None, Some(0.0f32)];
+
+        // is_nan: non-null-propagating per Java's implementation - NULL → false
+        let batch = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Float32Array::from(
+            values.clone(),
+        ))])
+        .unwrap();
+        let result =
+            apply_predicate_to_batch(Reference::new("qux").is_nan(), schema.clone(), batch);
+        assert_eq!(
+            [
+                result.value(0),
+                result.value(1),
+                result.value(2),
+                result.value(3)
+            ],
+            [false, true, false, false]
+        );
+        assert!(!result.is_null(2));
+
+        // not_nan: non-null-propagating per Java's implementation - NULL → true
+        let batch =
+            RecordBatch::try_new(arrow_schema, vec![Arc::new(Float32Array::from(values))]).unwrap();
+        let result = apply_predicate_to_batch(Reference::new("qux").is_not_nan(), schema, batch);
+        assert_eq!(
+            [
+                result.value(0),
+                result.value(1),
+                result.value(2),
+                result.value(3)
+            ],
+            [true, false, true, true]
+        );
+        assert!(!result.is_null(2));
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/projection.rs b/crates/iceberg/src/arrow/reader/projection.rs
new file mode 100644
index 0000000000..d3fa00b84b
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/projection.rs
@@ -0,0 +1,1718 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Column projection for `ArrowReader`: building the Parquet projection mask
+//! from Iceberg field IDs, and mapping field IDs between Iceberg and Parquet
+//! (including fallback handling for files without embedded IDs).
+
+use std::collections::{HashMap, HashSet};
+use std::str::FromStr;
+use std::sync::Arc;
+
+use arrow_schema::{Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef};
+use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ProjectionMask};
+use parquet::schema::types::{SchemaDescriptor, Type as ParquetType};
+
+use super::{ArrowReader, CollectFieldIdVisitor};
+use crate::arrow::arrow_schema_to_schema;
+use crate::error::Result;
+use crate::expr::BoundPredicate;
+use crate::expr::visitors::bound_predicate_visitor::visit;
+use crate::spec::{NameMapping, NestedField, PrimitiveType, Schema, Type};
+use crate::{Error, ErrorKind};
+
+impl ArrowReader {
+    pub(super) fn build_field_id_set_and_map(
+        parquet_schema: &SchemaDescriptor,
+        predicate: &BoundPredicate,
+    ) -> Result<(HashSet<i32>, HashMap<i32, usize>)> {
+        // Collects all Iceberg field IDs referenced in the filter predicate
+        let mut collector = CollectFieldIdVisitor {
+            field_ids: HashSet::default(),
+        };
+        visit(&mut collector, predicate)?;
+
+        let iceberg_field_ids = collector.field_ids();
+
+        // Without embedded field IDs, we fall back to position-based mapping for compatibility
+        let field_id_map = match build_field_id_map(parquet_schema)? {
+            Some(map) => map,
+            None => build_fallback_field_id_map(parquet_schema),
+        };
+
+        Ok((iceberg_field_ids, field_id_map))
+    }
+
+    /// Recursively extract leaf field IDs because Parquet projection works at the leaf column level.
+    /// Nested types (struct/list/map) are flattened in Parquet's columnar format.
+    fn include_leaf_field_id(field: &NestedField, field_ids: &mut Vec<i32>) {
+        match field.field_type.as_ref() {
+            Type::Primitive(_) => {
+                field_ids.push(field.id);
+            }
+            Type::Struct(struct_type) => {
+                for nested_field in struct_type.fields() {
+                    Self::include_leaf_field_id(nested_field, field_ids);
+                }
+            }
+            Type::List(list_type) => {
+                Self::include_leaf_field_id(&list_type.element_field, field_ids);
+            }
+            Type::Map(map_type) => {
+                Self::include_leaf_field_id(&map_type.key_field, field_ids);
+                Self::include_leaf_field_id(&map_type.value_field, field_ids);
+            }
+        }
+    }
+
+    pub(super) fn get_arrow_projection_mask(
+        field_ids: &[i32],
+        iceberg_schema_of_task: &Schema,
+        parquet_schema: &SchemaDescriptor,
+        arrow_schema: &ArrowSchemaRef,
+        use_fallback: bool, // Whether file lacks embedded field IDs (e.g., migrated from Hive/Spark)
+    ) -> Result<ProjectionMask> {
+        fn type_promotion_is_valid(
+            file_type: Option<&PrimitiveType>,
+            projected_type: Option<&PrimitiveType>,
+        ) -> bool {
+            match (file_type, projected_type) {
+                (Some(lhs), Some(rhs)) if lhs == rhs => true,
+                (Some(PrimitiveType::Int), Some(PrimitiveType::Long)) => true,
+                (Some(PrimitiveType::Float), Some(PrimitiveType::Double)) => true,
+                (
+                    Some(PrimitiveType::Decimal {
+                        precision: file_precision,
+                        scale: file_scale,
+                    }),
+                    Some(PrimitiveType::Decimal {
+                        precision: requested_precision,
+                        scale: requested_scale,
+                    }),
+                ) if requested_precision >= file_precision && file_scale == requested_scale => true,
+                // Uuid will be store as Fixed(16) in parquet file, so the read back type will be Fixed(16).
+                (Some(PrimitiveType::Fixed(16)), Some(PrimitiveType::Uuid)) => true,
+                _ => false,
+            }
+        }
+
+        if field_ids.is_empty() {
+            return Ok(ProjectionMask::all());
+        }
+
+        if use_fallback {
+            // Position-based projection necessary because file lacks embedded field IDs
+            Self::get_arrow_projection_mask_fallback(field_ids, parquet_schema)
+        } else {
+            // Field-ID-based projection using embedded field IDs from Parquet metadata
+
+            // Parquet's columnar format requires leaf-level (not top-level struct/list/map) projection
+            let mut leaf_field_ids = vec![];
+            for field_id in field_ids {
+                let field = iceberg_schema_of_task.field_by_id(*field_id);
+                if let Some(field) = field {
+                    Self::include_leaf_field_id(field, &mut leaf_field_ids);
+                }
+            }
+
+            Self::get_arrow_projection_mask_with_field_ids(
+                &leaf_field_ids,
+                iceberg_schema_of_task,
+                parquet_schema,
+                arrow_schema,
+                type_promotion_is_valid,
+            )
+        }
+    }
+
+    /// Standard projection using embedded field IDs from Parquet metadata.
+    /// For iceberg-java compatibility with ParquetSchemaUtil.pruneColumns().
+    fn get_arrow_projection_mask_with_field_ids(
+        leaf_field_ids: &[i32],
+        iceberg_schema_of_task: &Schema,
+        parquet_schema: &SchemaDescriptor,
+        arrow_schema: &ArrowSchemaRef,
+        type_promotion_is_valid: fn(Option<&PrimitiveType>, Option<&PrimitiveType>) -> bool,
+    ) -> Result<ProjectionMask> {
+        let mut column_map = HashMap::new();
+        let fields = arrow_schema.fields();
+
+        // Pre-project only the fields that have been selected, possibly avoiding converting
+        // some Arrow types that are not yet supported.
+        let mut projected_fields: HashMap<arrow_schema::FieldRef, i32> = HashMap::new();
+        let projected_arrow_schema = ArrowSchema::new_with_metadata(
+            fields.filter_leaves(|_, f| {
+                f.metadata()
+                    .get(PARQUET_FIELD_ID_META_KEY)
+                    .and_then(|field_id| i32::from_str(field_id).ok())
+                    .is_some_and(|field_id| {
+                        projected_fields.insert((*f).clone(), field_id);
+                        leaf_field_ids.contains(&field_id)
+                    })
+            }),
+            arrow_schema.metadata().clone(),
+        );
+        let iceberg_schema = arrow_schema_to_schema(&projected_arrow_schema)?;
+
+        fields.filter_leaves(|idx, field| {
+            let Some(field_id) = projected_fields.get(field).cloned() else {
+                return false;
+            };
+
+            let iceberg_field = iceberg_schema_of_task.field_by_id(field_id);
+            let parquet_iceberg_field = iceberg_schema.field_by_id(field_id);
+
+            if iceberg_field.is_none() || parquet_iceberg_field.is_none() {
+                return false;
+            }
+
+            if !type_promotion_is_valid(
+                parquet_iceberg_field
+                    .unwrap()
+                    .field_type
+                    .as_primitive_type(),
+                iceberg_field.unwrap().field_type.as_primitive_type(),
+            ) {
+                return false;
+            }
+
+            column_map.insert(field_id, idx);
+            true
+        });
+
+        // Schema evolution: New columns may not exist in old Parquet files.
+        // We only project existing columns; RecordBatchTransformer adds default/NULL values.
+        let mut indices = vec![];
+        for field_id in leaf_field_ids {
+            if let Some(col_idx) = column_map.get(field_id) {
+                indices.push(*col_idx);
+            }
+        }
+
+        if indices.is_empty() {
+            // Edge case: All requested columns are new (don't exist in file).
+            // Project all columns so RecordBatchTransformer has a batch to transform.
+            Ok(ProjectionMask::all())
+        } else {
+            Ok(ProjectionMask::leaves(parquet_schema, indices))
+        }
+    }
+
+    /// Fallback projection for Parquet files without field IDs.
+    /// Uses position-based matching: field ID N → column position N-1.
+    /// Projects entire top-level columns (including nested content) for iceberg-java compatibility.
+    fn get_arrow_projection_mask_fallback(
+        field_ids: &[i32],
+        parquet_schema: &SchemaDescriptor,
+    ) -> Result<ProjectionMask> {
+        // Position-based: field_id N → column N-1 (field IDs are 1-indexed)
+        let parquet_root_fields = parquet_schema.root_schema().get_fields();
+        let mut root_indices = vec![];
+
+        for field_id in field_ids.iter() {
+            let parquet_pos = (*field_id - 1) as usize;
+
+            if parquet_pos < parquet_root_fields.len() {
+                root_indices.push(parquet_pos);
+            }
+            // RecordBatchTransformer adds missing columns with NULL values
+        }
+
+        if root_indices.is_empty() {
+            Ok(ProjectionMask::all())
+        } else {
+            Ok(ProjectionMask::roots(parquet_schema, root_indices))
+        }
+    }
+}
+
+/// Build the map of parquet field id to Parquet column index in the schema.
+/// Returns None if the Parquet file doesn't have field IDs embedded (e.g., migrated tables).
+pub(super) fn build_field_id_map(
+    parquet_schema: &SchemaDescriptor,
+) -> Result<Option<HashMap<i32, usize>>> {
+    let mut column_map = HashMap::new();
+
+    for (idx, field) in parquet_schema.columns().iter().enumerate() {
+        let field_type = field.self_type();
+        match field_type {
+            ParquetType::PrimitiveType { basic_info, .. } => {
+                if !basic_info.has_id() {
+                    return Ok(None);
+                }
+                column_map.insert(basic_info.id(), idx);
+            }
+            ParquetType::GroupType { .. } => {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Leaf column in schema should be primitive type but got {field_type:?}"
+                    ),
+                ));
+            }
+        };
+    }
+
+    Ok(Some(column_map))
+}
+
+/// Build a fallback field ID map for Parquet files without embedded field IDs.
+///
+/// Returns the number of primitive (leaf) columns in a Parquet type, recursing into groups.
+fn leaf_count(ty: &parquet::schema::types::Type) -> usize {
+    if ty.is_primitive() {
+        1
+    } else {
+        ty.get_fields().iter().map(|f| leaf_count(f)).sum()
+    }
+}
+
+/// Builds a mapping from fallback field IDs to leaf column indices for Parquet files
+/// without embedded field IDs. Returns entries only for primitive top-level fields.
+///
+/// Must use top-level field positions (not leaf column positions) to stay consistent
+/// with `add_fallback_field_ids_to_arrow_schema`, which assigns ordinal IDs to
+/// top-level Arrow fields. Using leaf positions instead would produce wrong indices
+/// when nested types (struct/list/map) expand into multiple leaf columns.
+///
+/// Mirrors iceberg-java's ParquetSchemaUtil.addFallbackIds() which iterates
+/// fileSchema.getFields() assigning ordinal IDs to top-level fields.
+pub(super) fn build_fallback_field_id_map(
+    parquet_schema: &SchemaDescriptor,
+) -> HashMap<i32, usize> {
+    let mut column_map = HashMap::new();
+    let mut leaf_idx = 0;
+
+    for (top_pos, field) in parquet_schema.root_schema().get_fields().iter().enumerate() {
+        let field_id = (top_pos + 1) as i32;
+        if field.is_primitive() {
+            column_map.insert(field_id, leaf_idx);
+        }
+        leaf_idx += leaf_count(field);
+    }
+
+    column_map
+}
+
+/// Apply name mapping to Arrow schema for Parquet files lacking field IDs.
+///
+/// Assigns Iceberg field IDs based on column names using the name mapping,
+/// enabling correct projection on migrated files (e.g., from Hive/Spark via add_files).
+///
+/// Per Iceberg spec Column Projection rule #2:
+/// "Use schema.name-mapping.default metadata to map field id to columns without field id"
+/// https://iceberg.apache.org/spec/#column-projection
+///
+/// Corresponds to Java's ParquetSchemaUtil.applyNameMapping() and ApplyNameMapping visitor.
+/// The key difference is Java operates on Parquet MessageType, while we operate on Arrow Schema.
+///
+/// # Arguments
+/// * `arrow_schema` - Arrow schema from Parquet file (without field IDs)
+/// * `name_mapping` - Name mapping from table metadata (TableProperties.DEFAULT_NAME_MAPPING)
+///
+/// # Returns
+/// Arrow schema with field IDs assigned based on name mapping
+pub(super) fn apply_name_mapping_to_arrow_schema(
+    arrow_schema: ArrowSchemaRef,
+    name_mapping: &NameMapping,
+) -> Result<Arc<ArrowSchema>> {
+    debug_assert!(
+        arrow_schema
+            .fields()
+            .iter()
+            .next()
+            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
+        "Schema already has field IDs - name mapping should not be applied"
+    );
+
+    let fields_with_mapped_ids: Vec<_> = arrow_schema
+        .fields()
+        .iter()
+        .map(|field| {
+            // Look up this column name in name mapping to get the Iceberg field ID.
+            // Corresponds to Java's ApplyNameMapping visitor which calls
+            // nameMapping.find(currentPath()) and returns field.withId() if found.
+            //
+            // If the field isn't in the mapping, leave it WITHOUT assigning an ID
+            // (matching Java's behavior of returning the field unchanged).
+            // Later, during projection, fields without IDs are filtered out.
+            let mapped_field_opt = name_mapping
+                .fields()
+                .iter()
+                .find(|f| f.names().contains(&field.name().to_string()));
+
+            let mut metadata = field.metadata().clone();
+
+            if let Some(mapped_field) = mapped_field_opt
+                && let Some(field_id) = mapped_field.field_id()
+            {
+                // Field found in mapping with a field_id → assign it
+                metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
+            }
+            // If field_id is None, leave the field without an ID (will be filtered by projection)
+
+            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
+                .with_metadata(metadata)
+        })
+        .collect();
+
+    Ok(Arc::new(ArrowSchema::new_with_metadata(
+        fields_with_mapped_ids,
+        arrow_schema.metadata().clone(),
+    )))
+}
+
+/// Add position-based fallback field IDs to Arrow schema for Parquet files lacking them.
+/// Enables projection on migrated files (e.g., from Hive/Spark).
+///
+/// Why at schema level (not per-batch): Efficiency - avoids repeated schema modification.
+/// Why only top-level: Nested projection uses leaf column indices, not parent struct IDs.
+/// Why 1-indexed: Compatibility with iceberg-java's ParquetSchemaUtil.addFallbackIds().
+pub(super) fn add_fallback_field_ids_to_arrow_schema(
+    arrow_schema: &ArrowSchemaRef,
+) -> Arc<ArrowSchema> {
+    debug_assert!(
+        arrow_schema
+            .fields()
+            .iter()
+            .next()
+            .is_none_or(|f| f.metadata().get(PARQUET_FIELD_ID_META_KEY).is_none()),
+        "Schema already has field IDs"
+    );
+
+    let fields_with_fallback_ids: Vec<_> = arrow_schema
+        .fields()
+        .iter()
+        .enumerate()
+        .map(|(pos, field)| {
+            let mut metadata = field.metadata().clone();
+            let field_id = (pos + 1) as i32; // 1-indexed for Java compatibility
+            metadata.insert(PARQUET_FIELD_ID_META_KEY.to_string(), field_id.to_string());
+
+            Field::new(field.name(), field.data_type().clone(), field.is_nullable())
+                .with_metadata(metadata)
+        })
+        .collect();
+
+    Arc::new(ArrowSchema::new_with_metadata(
+        fields_with_fallback_ids,
+        arrow_schema.metadata().clone(),
+    ))
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{ArrayRef, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema, TimeUnit};
+    use futures::TryStreamExt;
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY, ProjectionMask};
+    use parquet::basic::Compression;
+    use parquet::file::properties::WriterProperties;
+    use parquet::schema::parser::parse_message_type;
+    use parquet::schema::types::SchemaDescriptor;
+    use tempfile::TempDir;
+
+    use crate::ErrorKind;
+    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
+    use crate::expr::{Bind, Reference};
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, Type};
+
+    #[test]
+    fn test_arrow_projection_mask() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_identifier_field_ids(vec![1])
+                .with_fields(vec![
+                    NestedField::required(1, "c1", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::optional(2, "c2", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(
+                        3,
+                        "c3",
+                        Type::Primitive(PrimitiveType::Decimal {
+                            precision: 38,
+                            scale: 3,
+                        }),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("c1", DataType::Utf8, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            // Type not supported
+            Field::new("c2", DataType::Duration(TimeUnit::Microsecond), true).with_metadata(
+                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string())]),
+            ),
+            // Precision is beyond the supported range
+            Field::new("c3", DataType::Decimal128(39, 3), true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "3".to_string(),
+            )])),
+        ]));
+
+        let message_type = "
+message schema {
+  required binary c1 (STRING) = 1;
+  optional int32 c2 (INTEGER(8,true)) = 2;
+  optional fixed_len_byte_array(17) c3 (DECIMAL(39,3)) = 3;
+}
+    ";
+        let parquet_type = parse_message_type(message_type).expect("should parse schema");
+        let parquet_schema = SchemaDescriptor::new(Arc::new(parquet_type));
+
+        // Try projecting the fields c2 and c3 with the unsupported data types
+        let err = ArrowReader::get_arrow_projection_mask(
+            &[1, 2, 3],
+            &schema,
+            &parquet_schema,
+            &arrow_schema,
+            false,
+        )
+        .unwrap_err();
+
+        assert_eq!(err.kind(), ErrorKind::DataInvalid);
+        assert_eq!(
+            err.to_string(),
+            "DataInvalid => Unsupported Arrow data type: Duration(µs)".to_string()
+        );
+
+        // Omitting field c2, we still get an error due to c3 being selected
+        let err = ArrowReader::get_arrow_projection_mask(
+            &[1, 3],
+            &schema,
+            &parquet_schema,
+            &arrow_schema,
+            false,
+        )
+        .unwrap_err();
+
+        assert_eq!(err.kind(), ErrorKind::DataInvalid);
+        assert_eq!(
+            err.to_string(),
+            "DataInvalid => Failed to create decimal type, source: DataInvalid => Decimals with precision larger than 38 are not supported: 39".to_string()
+        );
+
+        // Finally avoid selecting fields with unsupported data types
+        let mask = ArrowReader::get_arrow_projection_mask(
+            &[1],
+            &schema,
+            &parquet_schema,
+            &arrow_schema,
+            false,
+        )
+        .expect("Some ProjectionMask");
+        assert_eq!(mask, ProjectionMask::leaves(&parquet_schema, vec![0]));
+    }
+
+    /// Test schema evolution: reading old Parquet file (with only column 'a')
+    /// using a newer table schema (with columns 'a' and 'b').
+    /// This tests that:
+    /// 1. get_arrow_projection_mask allows missing columns
+    /// 2. RecordBatchTransformer adds missing column 'b' with NULL values
+    #[tokio::test]
+    async fn test_schema_evolution_add_column() {
+        use arrow_array::{Array, Int32Array};
+
+        // New table schema: columns 'a' and 'b' (b was added later, file only has 'a')
+        let new_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(2)
+                .with_fields(vec![
+                    NestedField::required(1, "a", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "b", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Create Arrow schema for old Parquet file (only has column 'a')
+        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
+            Field::new("a", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        // Write old Parquet file with only column 'a'
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let data_a = Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef;
+        let to_write = RecordBatch::try_new(arrow_schema_old.clone(), vec![data_a]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let file = File::create(format!("{table_location}/old_file.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        // Read the old Parquet file using the NEW schema (with column 'b')
+        let reader = ArrowReaderBuilder::new(file_io).build();
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/old_file.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/old_file.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: new_schema.clone(),
+                project_field_ids: vec![1, 2], // Request both columns 'a' and 'b'
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Verify we got the correct data
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+
+        // Should have 2 columns now
+        assert_eq!(batch.num_columns(), 2);
+        assert_eq!(batch.num_rows(), 3);
+
+        // Column 'a' should have the original data
+        let col_a = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(col_a.values(), &[1, 2, 3]);
+
+        // Column 'b' should be all NULLs (it didn't exist in the old file)
+        let col_b = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(col_b.null_count(), 3);
+        assert!(col_b.is_null(0));
+        assert!(col_b.is_null(1));
+        assert!(col_b.is_null(2));
+    }
+
+    /// Test reading Parquet files without field ID metadata (e.g., migrated tables).
+    /// This exercises the position-based fallback path.
+    ///
+    /// Corresponds to Java's ParquetSchemaUtil.addFallbackIds() + pruneColumnsFallback()
+    /// in /parquet/src/main/java/org/apache/iceberg/parquet/ParquetSchemaUtil.java
+    #[tokio::test]
+    async fn test_read_parquet_file_without_field_ids() {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Parquet file from a migrated table - no field ID metadata
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("age", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let name_data = vec!["Alice", "Bob", "Charlie"];
+        let age_data = vec![30, 25, 35];
+
+        use arrow_array::Int32Array;
+        let name_col = Arc::new(StringArray::from(name_data.clone())) as ArrayRef;
+        let age_col = Arc::new(Int32Array::from(age_data.clone())) as ArrayRef;
+
+        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![name_col, age_col]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 3);
+        assert_eq!(batch.num_columns(), 2);
+
+        // Verify position-based mapping: field_id 1 → position 0, field_id 2 → position 1
+        let name_array = batch.column(0).as_string::<i32>();
+        assert_eq!(name_array.value(0), "Alice");
+        assert_eq!(name_array.value(1), "Bob");
+        assert_eq!(name_array.value(2), "Charlie");
+
+        let age_array = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(age_array.value(0), 30);
+        assert_eq!(age_array.value(1), 25);
+        assert_eq!(age_array.value(2), 35);
+    }
+
+    /// Test reading Parquet files without field IDs with partial projection.
+    /// Only a subset of columns are requested, verifying position-based fallback
+    /// handles column selection correctly.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_partial_projection() {
+        use arrow_array::Int32Array;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "col1", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "col2", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(3, "col3", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(4, "col4", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("col1", DataType::Utf8, false),
+            Field::new("col2", DataType::Int32, false),
+            Field::new("col3", DataType::Utf8, false),
+            Field::new("col4", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let col1_data = Arc::new(StringArray::from(vec!["a", "b"])) as ArrayRef;
+        let col2_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
+        let col3_data = Arc::new(StringArray::from(vec!["c", "d"])) as ArrayRef;
+        let col4_data = Arc::new(Int32Array::from(vec![30, 40])) as ArrayRef;
+
+        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![
+            col1_data, col2_data, col3_data, col4_data,
+        ])
+        .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 3],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 2);
+
+        let col1_array = batch.column(0).as_string::<i32>();
+        assert_eq!(col1_array.value(0), "a");
+        assert_eq!(col1_array.value(1), "b");
+
+        let col3_array = batch.column(1).as_string::<i32>();
+        assert_eq!(col3_array.value(0), "c");
+        assert_eq!(col3_array.value(1), "d");
+    }
+
+    /// Test reading Parquet files without field IDs with schema evolution.
+    /// The Iceberg schema has more fields than the Parquet file, testing that
+    /// missing columns are filled with NULLs.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_schema_evolution() {
+        use arrow_array::{Array, Int32Array};
+
+        // Schema with field 3 added after the file was written
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "age", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(3, "city", Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("age", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
+        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![name_data, age_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2, 3],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 3);
+
+        let name_array = batch.column(0).as_string::<i32>();
+        assert_eq!(name_array.value(0), "Alice");
+        assert_eq!(name_array.value(1), "Bob");
+
+        let age_array = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(age_array.value(0), 30);
+        assert_eq!(age_array.value(1), 25);
+
+        // Verify missing column filled with NULLs
+        let city_array = batch.column(2).as_string::<i32>();
+        assert_eq!(city_array.null_count(), 2);
+        assert!(city_array.is_null(0));
+        assert!(city_array.is_null(1));
+    }
+
+    /// Test reading Parquet files without field IDs that have multiple row groups.
+    /// This ensures the position-based fallback works correctly across row group boundaries.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_multiple_row_groups() {
+        use arrow_array::Int32Array;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(2, "value", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Int32, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        // Small row group size to create multiple row groups
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_write_batch_size(2)
+            .set_max_row_group_row_count(Some(2))
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+
+        // Write 6 rows in 3 batches (will create 3 row groups)
+        for batch_num in 0..3 {
+            let name_data = Arc::new(StringArray::from(vec![
+                format!("name_{}", batch_num * 2),
+                format!("name_{}", batch_num * 2 + 1),
+            ])) as ArrayRef;
+            let value_data =
+                Arc::new(Int32Array::from(vec![batch_num * 2, batch_num * 2 + 1])) as ArrayRef;
+
+            let batch =
+                RecordBatch::try_new(arrow_schema.clone(), vec![name_data, value_data]).unwrap();
+            writer.write(&batch).expect("Writing batch");
+        }
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert!(!result.is_empty());
+
+        let mut all_names = Vec::new();
+        let mut all_values = Vec::new();
+
+        for batch in &result {
+            let name_array = batch.column(0).as_string::<i32>();
+            let value_array = batch
+                .column(1)
+                .as_primitive::<arrow_array::types::Int32Type>();
+
+            for i in 0..batch.num_rows() {
+                all_names.push(name_array.value(i).to_string());
+                all_values.push(value_array.value(i));
+            }
+        }
+
+        assert_eq!(all_names.len(), 6);
+        assert_eq!(all_values.len(), 6);
+
+        for i in 0..6 {
+            assert_eq!(all_names[i], format!("name_{i}"));
+            assert_eq!(all_values[i], i as i32);
+        }
+    }
+
+    /// Test reading Parquet files without field IDs with nested types (struct).
+    /// Java's pruneColumnsFallback() projects entire top-level columns including nested content.
+    /// This test verifies that a top-level struct field is projected correctly with all its nested fields.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_with_struct() {
+        use arrow_array::{Int32Array, StructArray};
+        use arrow_schema::Fields;
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(
+                        2,
+                        "person",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::required(
+                                3,
+                                "name",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            NestedField::required(4, "age", Type::Primitive(PrimitiveType::Int))
+                                .into(),
+                        ])),
+                    )
+                    .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new(
+                "person",
+                DataType::Struct(Fields::from(vec![
+                    Field::new("name", DataType::Utf8, false),
+                    Field::new("age", DataType::Int32, false),
+                ])),
+                false,
+            ),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let id_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
+        let name_data = Arc::new(StringArray::from(vec!["Alice", "Bob"])) as ArrayRef;
+        let age_data = Arc::new(Int32Array::from(vec![30, 25])) as ArrayRef;
+        let person_data = Arc::new(StructArray::from(vec![
+            (
+                Arc::new(Field::new("name", DataType::Utf8, false)),
+                name_data,
+            ),
+            (
+                Arc::new(Field::new("age", DataType::Int32, false)),
+                age_data,
+            ),
+        ])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, person_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 2);
+
+        let id_array = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(id_array.value(0), 1);
+        assert_eq!(id_array.value(1), 2);
+
+        let person_array = batch.column(1).as_struct();
+        assert_eq!(person_array.num_columns(), 2);
+
+        let name_array = person_array.column(0).as_string::<i32>();
+        assert_eq!(name_array.value(0), "Alice");
+        assert_eq!(name_array.value(1), "Bob");
+
+        let age_array = person_array
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(age_array.value(0), 30);
+        assert_eq!(age_array.value(1), 25);
+    }
+
+    /// Test reading Parquet files without field IDs with schema evolution - column added in the middle.
+    /// When a new column is inserted between existing columns in the schema order,
+    /// the fallback projection must correctly map field IDs to output positions.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_schema_evolution_add_column_in_middle() {
+        use arrow_array::{Array, Int32Array};
+
+        let arrow_schema_old = Arc::new(ArrowSchema::new(vec![
+            Field::new("col0", DataType::Int32, true),
+            Field::new("col1", DataType::Int32, true),
+        ]));
+
+        // New column added between existing columns: col0 (id=1), newCol (id=5), col1 (id=2)
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "col0", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(5, "newCol", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "col1", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let col0_data = Arc::new(Int32Array::from(vec![1, 2])) as ArrayRef;
+        let col1_data = Arc::new(Int32Array::from(vec![10, 20])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema_old.clone(), vec![col0_data, col1_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 5, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+        assert_eq!(batch.num_rows(), 2);
+        assert_eq!(batch.num_columns(), 3);
+
+        let result_col0 = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(result_col0.value(0), 1);
+        assert_eq!(result_col0.value(1), 2);
+
+        // New column should be NULL (doesn't exist in old file)
+        let result_newcol = batch
+            .column(1)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(result_newcol.null_count(), 2);
+        assert!(result_newcol.is_null(0));
+        assert!(result_newcol.is_null(1));
+
+        let result_col1 = batch
+            .column(2)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(result_col1.value(0), 10);
+        assert_eq!(result_col1.value(1), 20);
+    }
+
+    /// Test reading Parquet files without field IDs with a filter that eliminates all row groups.
+    /// During development of field ID mapping, we saw a panic when row_selection_enabled=true and
+    /// all row groups are filtered out.
+    #[tokio::test]
+    async fn test_read_parquet_without_field_ids_filter_eliminates_all_rows() {
+        use arrow_array::{Float64Array, Int32Array};
+
+        // Schema with fields that will use fallback IDs 1, 2, 3
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(),
+                    NestedField::required(3, "value", Type::Primitive(PrimitiveType::Double))
+                        .into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false),
+            Field::new("name", DataType::Utf8, false),
+            Field::new("value", DataType::Float64, false),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        // Write data where all ids are >= 10
+        let id_data = Arc::new(Int32Array::from(vec![10, 11, 12])) as ArrayRef;
+        let name_data = Arc::new(StringArray::from(vec!["a", "b", "c"])) as ArrayRef;
+        let value_data = Arc::new(Float64Array::from(vec![100.0, 200.0, 300.0])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data, value_data])
+                .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        // Filter that eliminates all row groups: id < 5
+        let predicate = Reference::new("id").less_than(Datum::int(5));
+
+        // Enable both row_group_filtering and row_selection - triggered the panic
+        let reader = ArrowReaderBuilder::new(file_io)
+            .with_row_group_filtering_enabled(true)
+            .with_row_selection_enabled(true)
+            .build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2, 3],
+                predicate: Some(predicate.bind(schema, true).unwrap()),
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        // Should no longer panic
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Should return empty results
+        assert!(result.is_empty() || result.iter().all(|batch| batch.num_rows() == 0));
+    }
+
+    /// Test bucket partitioning reads source column from data file (not partition metadata).
+    ///
+    /// This is an integration test verifying the complete ArrowReader pipeline with bucket partitioning.
+    /// It corresponds to TestRuntimeFiltering tests in Iceberg Java (e.g., testRenamedSourceColumnTable).
+    ///
+    /// # Iceberg Spec Requirements
+    ///
+    /// Per the Iceberg spec "Column Projection" section:
+    /// > "Return the value from partition metadata if an **Identity Transform** exists for the field"
+    ///
+    /// This means:
+    /// - Identity transforms (e.g., `identity(dept)`) use constants from partition metadata
+    /// - Non-identity transforms (e.g., `bucket(4, id)`) must read source columns from data files
+    /// - Partition metadata for bucket transforms stores bucket numbers (0-3), NOT source values
+    ///
+    /// Java's PartitionUtil.constantsMap() implements this via:
+    /// ```java
+    /// if (field.transform().isIdentity()) {
+    ///     idToConstant.put(field.sourceId(), converted);
+    /// }
+    /// ```
+    ///
+    /// # What This Test Verifies
+    ///
+    /// This test ensures the full ArrowReader → RecordBatchTransformer pipeline correctly handles
+    /// bucket partitioning when FileScanTask provides partition_spec and partition_data:
+    ///
+    /// - Parquet file has field_id=1 named "id" with actual data [1, 5, 9, 13]
+    /// - FileScanTask specifies partition_spec with bucket(4, id) and partition_data with bucket=1
+    /// - RecordBatchTransformer.constants_map() excludes bucket-partitioned field from constants
+    /// - ArrowReader correctly reads [1, 5, 9, 13] from the data file
+    /// - Values are NOT replaced with constant 1 from partition metadata
+    ///
+    /// # Why This Matters
+    ///
+    /// Without correct handling:
+    /// - Runtime filtering would break (e.g., `WHERE id = 5` would fail)
+    /// - Query results would be incorrect (all rows would have id=1)
+    /// - Bucket partitioning would be unusable for query optimization
+    ///
+    /// # References
+    /// - Iceberg spec: format/spec.md "Column Projection" + "Partition Transforms"
+    /// - Java test: spark/src/test/java/.../TestRuntimeFiltering.java
+    /// - Java impl: core/src/main/java/org/apache/iceberg/util/PartitionUtil.java
+    #[tokio::test]
+    async fn test_bucket_partitioning_reads_source_column_from_file() {
+        use arrow_array::Int32Array;
+
+        use crate::spec::{Literal, PartitionSpec, Struct, Transform};
+
+        // Iceberg schema with id and name columns
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(0)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                    NestedField::optional(2, "name", Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        // Partition spec: bucket(4, id)
+        let partition_spec = Arc::new(
+            PartitionSpec::builder(schema.clone())
+                .with_spec_id(0)
+                .add_partition_field("id", "id_bucket", Transform::Bucket(4))
+                .unwrap()
+                .build()
+                .unwrap(),
+        );
+
+        // Partition data: bucket value is 1
+        let partition_data = Struct::from_iter(vec![Some(Literal::int(1))]);
+
+        // Create Arrow schema with field IDs for Parquet file
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+            Field::new("name", DataType::Utf8, true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "2".to_string(),
+            )])),
+        ]));
+
+        // Write Parquet file with data
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_io = FileIO::new_with_fs();
+
+        let id_data = Arc::new(Int32Array::from(vec![1, 5, 9, 13])) as ArrayRef;
+        let name_data =
+            Arc::new(StringArray::from(vec!["Alice", "Bob", "Charlie", "Dave"])) as ArrayRef;
+
+        let to_write =
+            RecordBatch::try_new(arrow_schema.clone(), vec![id_data, name_data]).unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let file = File::create(format!("{}/data.parquet", &table_location)).unwrap();
+        let mut writer = ArrowWriter::try_new(file, to_write.schema(), Some(props)).unwrap();
+        writer.write(&to_write).expect("Writing batch");
+        writer.close().unwrap();
+
+        // Read the Parquet file with partition spec and data
+        let reader = ArrowReaderBuilder::new(file_io).build();
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/data.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/data.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1, 2],
+                predicate: None,
+                deletes: vec![],
+                partition: Some(partition_data),
+                partition_spec: Some(partition_spec),
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        // Verify we got the correct data
+        assert_eq!(result.len(), 1);
+        let batch = &result[0];
+
+        assert_eq!(batch.num_columns(), 2);
+        assert_eq!(batch.num_rows(), 4);
+
+        // The id column MUST contain actual values from the Parquet file [1, 5, 9, 13],
+        // NOT the constant partition value 1
+        let id_col = batch
+            .column(0)
+            .as_primitive::<arrow_array::types::Int32Type>();
+        assert_eq!(id_col.value(0), 1);
+        assert_eq!(id_col.value(1), 5);
+        assert_eq!(id_col.value(2), 9);
+        assert_eq!(id_col.value(3), 13);
+
+        let name_col = batch.column(1).as_string::<i32>();
+        assert_eq!(name_col.value(0), "Alice");
+        assert_eq!(name_col.value(1), "Bob");
+        assert_eq!(name_col.value(2), "Charlie");
+        assert_eq!(name_col.value(3), "Dave");
+    }
+
+    /// Regression for <https://github.com/apache/iceberg-rust/issues/2306>:
+    /// predicate on a column after nested types in a migrated file (no field IDs).
+    /// Schema has struct, list, and map columns before the predicate target (`id`),
+    /// exercising the fallback field ID mapping across all nested type variants.
+    #[tokio::test]
+    async fn test_predicate_on_migrated_file_with_nested_types() {
+        use serde::{Deserialize, Serialize};
+        use serde_arrow::schema::{SchemaLike, TracingOptions};
+
+        #[derive(Serialize, Deserialize)]
+        struct Person {
+            name: String,
+            age: i32,
+        }
+
+        #[derive(Serialize, Deserialize)]
+        struct Row {
+            person: Person,
+            people: Vec<Person>,
+            props: std::collections::BTreeMap<String, String>,
+            id: i32,
+        }
+
+        let rows = vec![
+            Row {
+                person: Person {
+                    name: "Alice".into(),
+                    age: 30,
+                },
+                people: vec![Person {
+                    name: "Alice".into(),
+                    age: 30,
+                }],
+                props: [("k1".into(), "v1".into())].into(),
+                id: 1,
+            },
+            Row {
+                person: Person {
+                    name: "Bob".into(),
+                    age: 25,
+                },
+                people: vec![Person {
+                    name: "Bob".into(),
+                    age: 25,
+                }],
+                props: [("k2".into(), "v2".into())].into(),
+                id: 2,
+            },
+            Row {
+                person: Person {
+                    name: "Carol".into(),
+                    age: 40,
+                },
+                people: vec![Person {
+                    name: "Carol".into(),
+                    age: 40,
+                }],
+                props: [("k3".into(), "v3".into())].into(),
+                id: 3,
+            },
+        ];
+
+        let tracing_options = TracingOptions::default()
+            .map_as_struct(false)
+            .strings_as_large_utf8(false)
+            .sequence_as_large_list(false);
+        let fields = Vec::<arrow_schema::FieldRef>::from_type::<Row>(tracing_options).unwrap();
+        let arrow_schema = Arc::new(ArrowSchema::new(fields.clone()));
+        let batch = serde_arrow::to_record_batch(&fields, &rows).unwrap();
+
+        // Fallback field IDs: person=1, people=2, props=3, id=4
+        let iceberg_schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(
+                        1,
+                        "person",
+                        Type::Struct(crate::spec::StructType::new(vec![
+                            NestedField::required(
+                                5,
+                                "name",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            NestedField::required(6, "age", Type::Primitive(PrimitiveType::Int))
+                                .into(),
+                        ])),
+                    )
+                    .into(),
+                    NestedField::required(
+                        2,
+                        "people",
+                        Type::List(crate::spec::ListType {
+                            element_field: NestedField::required(
+                                7,
+                                "element",
+                                Type::Struct(crate::spec::StructType::new(vec![
+                                    NestedField::required(
+                                        8,
+                                        "name",
+                                        Type::Primitive(PrimitiveType::String),
+                                    )
+                                    .into(),
+                                    NestedField::required(
+                                        9,
+                                        "age",
+                                        Type::Primitive(PrimitiveType::Int),
+                                    )
+                                    .into(),
+                                ])),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                    NestedField::required(
+                        3,
+                        "props",
+                        Type::Map(crate::spec::MapType {
+                            key_field: NestedField::required(
+                                10,
+                                "key",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                            value_field: NestedField::required(
+                                11,
+                                "value",
+                                Type::Primitive(PrimitiveType::String),
+                            )
+                            .into(),
+                        }),
+                    )
+                    .into(),
+                    NestedField::required(4, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/1.parquet");
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+        let file = File::create(&file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema, Some(props)).unwrap();
+        writer.write(&batch).expect("Writing batch");
+        writer.close().unwrap();
+
+        let predicate = Reference::new("id").greater_than(Datum::int(1));
+
+        let reader = ArrowReaderBuilder::new(FileIO::new_with_fs())
+            .with_row_group_filtering_enabled(true)
+            .with_row_selection_enabled(true)
+            .build();
+
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: file_path,
+                data_file_format: DataFileFormat::Parquet,
+                schema: iceberg_schema.clone(),
+                project_field_ids: vec![4],
+                predicate: Some(predicate.bind(iceberg_schema, true).unwrap()),
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        let ids: Vec<i32> = result
+            .iter()
+            .flat_map(|b| {
+                b.column(0)
+                    .as_primitive::<arrow_array::types::Int32Type>()
+                    .values()
+                    .iter()
+                    .copied()
+            })
+            .collect();
+        assert_eq!(ids, vec![2, 3]);
+    }
+}
diff --git a/crates/iceberg/src/arrow/reader/row_filter.rs b/crates/iceberg/src/arrow/reader/row_filter.rs
new file mode 100644
index 0000000000..52f7260cc6
--- /dev/null
+++ b/crates/iceberg/src/arrow/reader/row_filter.rs
@@ -0,0 +1,616 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Predicate-driven row filtering for `ArrowReader`: constructing Arrow `RowFilter`s
+//! from Iceberg predicates, row-group selection based on column statistics, and
+//! row-selection via the Parquet page index. Also includes byte-range row-group
+//! filtering used for file splitting.
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use parquet::arrow::ProjectionMask;
+use parquet::arrow::arrow_reader::{ArrowPredicateFn, RowFilter, RowSelection};
+use parquet::file::metadata::ParquetMetaData;
+use parquet::schema::types::SchemaDescriptor;
+
+use super::{ArrowReader, PredicateConverter};
+use crate::error::Result;
+use crate::expr::BoundPredicate;
+use crate::expr::visitors::bound_predicate_visitor::visit;
+use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator;
+use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator;
+use crate::spec::Schema;
+use crate::{Error, ErrorKind};
+
+impl ArrowReader {
+    pub(super) fn get_row_filter(
+        predicates: &BoundPredicate,
+        parquet_schema: &SchemaDescriptor,
+        iceberg_field_ids: &HashSet<i32>,
+        field_id_map: &HashMap<i32, usize>,
+    ) -> Result<RowFilter> {
+        // Collect Parquet column indices from field ids.
+        // If the field id is not found in Parquet schema, it will be ignored due to schema evolution.
+        let mut column_indices = iceberg_field_ids
+            .iter()
+            .filter_map(|field_id| field_id_map.get(field_id).cloned())
+            .collect::<Vec<_>>();
+        column_indices.sort();
+
+        // The converter that converts `BoundPredicates` to `ArrowPredicates`
+        let mut converter = PredicateConverter {
+            parquet_schema,
+            column_map: field_id_map,
+            column_indices: &column_indices,
+        };
+
+        // After collecting required leaf column indices used in the predicate,
+        // creates the projection mask for the Arrow predicates.
+        let projection_mask = ProjectionMask::leaves(parquet_schema, column_indices.clone());
+        let predicate_func = visit(&mut converter, predicates)?;
+        let arrow_predicate = ArrowPredicateFn::new(projection_mask, predicate_func);
+        Ok(RowFilter::new(vec![Box::new(arrow_predicate)]))
+    }
+
+    pub(super) fn get_selected_row_group_indices(
+        predicate: &BoundPredicate,
+        parquet_metadata: &Arc<ParquetMetaData>,
+        field_id_map: &HashMap<i32, usize>,
+        snapshot_schema: &Schema,
+    ) -> Result<Vec<usize>> {
+        let row_groups_metadata = parquet_metadata.row_groups();
+        let mut results = Vec::with_capacity(row_groups_metadata.len());
+
+        for (idx, row_group_metadata) in row_groups_metadata.iter().enumerate() {
+            if RowGroupMetricsEvaluator::eval(
+                predicate,
+                row_group_metadata,
+                field_id_map,
+                snapshot_schema,
+            )? {
+                results.push(idx);
+            }
+        }
+
+        Ok(results)
+    }
+
+    pub(super) fn get_row_selection_for_filter_predicate(
+        predicate: &BoundPredicate,
+        parquet_metadata: &Arc<ParquetMetaData>,
+        selected_row_groups: &Option<Vec<usize>>,
+        field_id_map: &HashMap<i32, usize>,
+        snapshot_schema: &Schema,
+    ) -> Result<RowSelection> {
+        let Some(column_index) = parquet_metadata.column_index() else {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "Parquet file metadata does not contain a column index",
+            ));
+        };
+
+        let Some(offset_index) = parquet_metadata.offset_index() else {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "Parquet file metadata does not contain an offset index",
+            ));
+        };
+
+        // If all row groups were filtered out, return an empty RowSelection (select no rows)
+        if let Some(selected_row_groups) = selected_row_groups
+            && selected_row_groups.is_empty()
+        {
+            return Ok(RowSelection::from(Vec::new()));
+        }
+
+        let mut selected_row_groups_idx = 0;
+
+        let page_index = column_index
+            .iter()
+            .enumerate()
+            .zip(offset_index)
+            .zip(parquet_metadata.row_groups());
+
+        let mut results = Vec::new();
+        for (((idx, column_index), offset_index), row_group_metadata) in page_index {
+            if let Some(selected_row_groups) = selected_row_groups {
+                // skip row groups that aren't present in selected_row_groups
+                if idx == selected_row_groups[selected_row_groups_idx] {
+                    selected_row_groups_idx += 1;
+                } else {
+                    continue;
+                }
+            }
+
+            let selections_for_page = PageIndexEvaluator::eval(
+                predicate,
+                column_index,
+                offset_index,
+                row_group_metadata,
+                field_id_map,
+                snapshot_schema,
+            )?;
+
+            results.push(selections_for_page);
+
+            if let Some(selected_row_groups) = selected_row_groups
+                && selected_row_groups_idx == selected_row_groups.len()
+            {
+                break;
+            }
+        }
+
+        Ok(results.into_iter().flatten().collect::<Vec<_>>().into())
+    }
+
+    /// Filters row groups by byte range to support Iceberg's file splitting.
+    ///
+    /// Iceberg splits large files at row group boundaries, so we only read row groups
+    /// whose byte ranges overlap with [start, start+length).
+    pub(super) fn filter_row_groups_by_byte_range(
+        parquet_metadata: &Arc<ParquetMetaData>,
+        start: u64,
+        length: u64,
+    ) -> Result<Vec<usize>> {
+        let row_groups = parquet_metadata.row_groups();
+        let mut selected = Vec::new();
+        let end = start + length;
+
+        // Row groups are stored sequentially after the 4-byte magic header.
+        let mut current_byte_offset = 4u64;
+
+        for (idx, row_group) in row_groups.iter().enumerate() {
+            let row_group_size = row_group.compressed_size() as u64;
+            let row_group_end = current_byte_offset + row_group_size;
+
+            if current_byte_offset < end && start < row_group_end {
+                selected.push(idx);
+            }
+
+            current_byte_offset = row_group_end;
+        }
+
+        Ok(selected)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+    use std::fs::File;
+    use std::sync::Arc;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::{ArrayRef, LargeStringArray, RecordBatch, StringArray};
+    use arrow_schema::{DataType, Field, Schema as ArrowSchema};
+    use futures::TryStreamExt;
+    use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY};
+    use parquet::basic::Compression;
+    use parquet::file::properties::WriterProperties;
+    use tempfile::TempDir;
+
+    use crate::arrow::{ArrowReader, ArrowReaderBuilder};
+    use crate::expr::{Bind, Predicate, Reference};
+    use crate::io::FileIO;
+    use crate::scan::{FileScanTask, FileScanTaskStream};
+    use crate::spec::{DataFileFormat, Datum, NestedField, PrimitiveType, Schema, SchemaRef, Type};
+
+    async fn test_perform_read(
+        predicate: Predicate,
+        schema: SchemaRef,
+        table_location: String,
+        reader: ArrowReader,
+    ) -> Vec<Option<String>> {
+        let tasks = Box::pin(futures::stream::iter(
+            vec![Ok(FileScanTask {
+                file_size_in_bytes: std::fs::metadata(format!("{table_location}/1.parquet"))
+                    .unwrap()
+                    .len(),
+                start: 0,
+                length: 0,
+                record_count: None,
+                data_file_path: format!("{table_location}/1.parquet"),
+                data_file_format: DataFileFormat::Parquet,
+                schema: schema.clone(),
+                project_field_ids: vec![1],
+                predicate: Some(predicate.bind(schema, true).unwrap()),
+                deletes: vec![],
+                partition: None,
+                partition_spec: None,
+                name_mapping: None,
+                case_sensitive: false,
+            })]
+            .into_iter(),
+        )) as FileScanTaskStream;
+
+        let result = reader
+            .read(tasks)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        result[0].columns()[0]
+            .as_string_opt::<i32>()
+            .unwrap()
+            .iter()
+            .map(|v| v.map(ToOwned::to_owned))
+            .collect::<Vec<_>>()
+    }
+
+    fn setup_kleene_logic(
+        data_for_col_a: Vec<Option<String>>,
+        col_a_type: DataType,
+    ) -> (FileIO, SchemaRef, String, TempDir) {
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::optional(1, "a", Type::Primitive(PrimitiveType::String)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("a", col_a_type.clone(), true).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+
+        let file_io = FileIO::new_with_fs();
+
+        let col = match col_a_type {
+            DataType::Utf8 => Arc::new(StringArray::from(data_for_col_a)) as ArrayRef,
+            DataType::LargeUtf8 => Arc::new(LargeStringArray::from(data_for_col_a)) as ArrayRef,
+            _ => panic!("unexpected col_a_type"),
+        };
+
+        let to_write = RecordBatch::try_new(arrow_schema.clone(), vec![col]).unwrap();
+
+        // Write the Parquet files
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .build();
+
+        let file = File::create(format!("{table_location}/1.parquet")).unwrap();
+        let mut writer =
+            ArrowWriter::try_new(file, to_write.schema(), Some(props.clone())).unwrap();
+
+        writer.write(&to_write).expect("Writing batch");
+
+        // writer must be closed to write footer
+        writer.close().unwrap();
+
+        (file_io, schema, table_location, tmp_dir)
+    }
+
+    #[tokio::test]
+    async fn test_kleene_logic_or_behaviour() {
+        // a IS NULL OR a = 'foo'
+        let predicate = Reference::new("a")
+            .is_null()
+            .or(Reference::new("a").equal_to(Datum::string("foo")));
+
+        // Table data: [NULL, "foo", "bar"]
+        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
+
+        // Expected: [NULL, "foo"].
+        let expected = vec![None, Some("foo".to_string())];
+
+        let (file_io, schema, table_location, _temp_dir) =
+            setup_kleene_logic(data_for_col_a, DataType::Utf8);
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
+
+        assert_eq!(result_data, expected);
+    }
+
+    #[tokio::test]
+    async fn test_kleene_logic_and_behaviour() {
+        // a IS NOT NULL AND a != 'foo'
+        let predicate = Reference::new("a")
+            .is_not_null()
+            .and(Reference::new("a").not_equal_to(Datum::string("foo")));
+
+        // Table data: [NULL, "foo", "bar"]
+        let data_for_col_a = vec![None, Some("foo".to_string()), Some("bar".to_string())];
+
+        // Expected: ["bar"].
+        let expected = vec![Some("bar".to_string())];
+
+        let (file_io, schema, table_location, _temp_dir) =
+            setup_kleene_logic(data_for_col_a, DataType::Utf8);
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        let result_data = test_perform_read(predicate, schema, table_location, reader).await;
+
+        assert_eq!(result_data, expected);
+    }
+
+    #[tokio::test]
+    async fn test_predicate_cast_literal() {
+        let predicates = vec![
+            // a == 'foo'
+            (Reference::new("a").equal_to(Datum::string("foo")), vec![
+                Some("foo".to_string()),
+            ]),
+            // a != 'foo'
+            (
+                Reference::new("a").not_equal_to(Datum::string("foo")),
+                vec![Some("bar".to_string())],
+            ),
+            // STARTS_WITH(a, 'foo')
+            (Reference::new("a").starts_with(Datum::string("f")), vec![
+                Some("foo".to_string()),
+            ]),
+            // NOT STARTS_WITH(a, 'foo')
+            (
+                Reference::new("a").not_starts_with(Datum::string("f")),
+                vec![Some("bar".to_string())],
+            ),
+            // a < 'foo'
+            (Reference::new("a").less_than(Datum::string("foo")), vec![
+                Some("bar".to_string()),
+            ]),
+            // a <= 'foo'
+            (
+                Reference::new("a").less_than_or_equal_to(Datum::string("foo")),
+                vec![Some("foo".to_string()), Some("bar".to_string())],
+            ),
+            // a > 'foo'
+            (
+                Reference::new("a").greater_than(Datum::string("bar")),
+                vec![Some("foo".to_string())],
+            ),
+            // a >= 'foo'
+            (
+                Reference::new("a").greater_than_or_equal_to(Datum::string("foo")),
+                vec![Some("foo".to_string())],
+            ),
+            // a IN ('foo', 'bar')
+            (
+                Reference::new("a").is_in([Datum::string("foo"), Datum::string("baz")]),
+                vec![Some("foo".to_string())],
+            ),
+            // a NOT IN ('foo', 'bar')
+            (
+                Reference::new("a").is_not_in([Datum::string("foo"), Datum::string("baz")]),
+                vec![Some("bar".to_string())],
+            ),
+        ];
+
+        // Table data: ["foo", "bar"]
+        let data_for_col_a = vec![Some("foo".to_string()), Some("bar".to_string())];
+
+        let (file_io, schema, table_location, _temp_dir) =
+            setup_kleene_logic(data_for_col_a, DataType::LargeUtf8);
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        for (predicate, expected) in predicates {
+            println!("testing predicate {predicate}");
+            let result_data = test_perform_read(
+                predicate.clone(),
+                schema.clone(),
+                table_location.clone(),
+                reader.clone(),
+            )
+            .await;
+
+            assert_eq!(result_data, expected, "predicate={predicate}");
+        }
+    }
+
+    /// Verifies that file splits respect byte ranges and only read specific row groups.
+    #[tokio::test]
+    async fn test_file_splits_respect_byte_ranges() {
+        use arrow_array::Int32Array;
+        use parquet::file::reader::{FileReader, SerializedFileReader};
+
+        let schema = Arc::new(
+            Schema::builder()
+                .with_schema_id(1)
+                .with_fields(vec![
+                    NestedField::required(1, "id", Type::Primitive(PrimitiveType::Int)).into(),
+                ])
+                .build()
+                .unwrap(),
+        );
+
+        let arrow_schema = Arc::new(ArrowSchema::new(vec![
+            Field::new("id", DataType::Int32, false).with_metadata(HashMap::from([(
+                PARQUET_FIELD_ID_META_KEY.to_string(),
+                "1".to_string(),
+            )])),
+        ]));
+
+        let tmp_dir = TempDir::new().unwrap();
+        let table_location = tmp_dir.path().to_str().unwrap().to_string();
+        let file_path = format!("{table_location}/multi_row_group.parquet");
+
+        // Force each batch into its own row group for testing byte range filtering.
+        let batch1 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
+            (0..100).collect::<Vec<i32>>(),
+        ))])
+        .unwrap();
+        let batch2 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
+            (100..200).collect::<Vec<i32>>(),
+        ))])
+        .unwrap();
+        let batch3 = RecordBatch::try_new(arrow_schema.clone(), vec![Arc::new(Int32Array::from(
+            (200..300).collect::<Vec<i32>>(),
+        ))])
+        .unwrap();
+
+        let props = WriterProperties::builder()
+            .set_compression(Compression::SNAPPY)
+            .set_max_row_group_row_count(Some(100))
+            .build();
+
+        let file = File::create(&file_path).unwrap();
+        let mut writer = ArrowWriter::try_new(file, arrow_schema.clone(), Some(props)).unwrap();
+        writer.write(&batch1).expect("Writing batch 1");
+        writer.write(&batch2).expect("Writing batch 2");
+        writer.write(&batch3).expect("Writing batch 3");
+        writer.close().unwrap();
+
+        // Read the file metadata to get row group byte positions
+        let file = File::open(&file_path).unwrap();
+        let reader = SerializedFileReader::new(file).unwrap();
+        let metadata = reader.metadata();
+
+        println!("File has {} row groups", metadata.num_row_groups());
+        assert_eq!(metadata.num_row_groups(), 3, "Expected 3 row groups");
+
+        // Get byte positions for each row group
+        let row_group_0 = metadata.row_group(0);
+        let row_group_1 = metadata.row_group(1);
+        let row_group_2 = metadata.row_group(2);
+
+        let rg0_start = 4u64; // Parquet files start with 4-byte magic "PAR1"
+        let rg1_start = rg0_start + row_group_0.compressed_size() as u64;
+        let rg2_start = rg1_start + row_group_1.compressed_size() as u64;
+        let file_end = rg2_start + row_group_2.compressed_size() as u64;
+
+        println!(
+            "Row group 0: {} rows, starts at byte {}, {} bytes compressed",
+            row_group_0.num_rows(),
+            rg0_start,
+            row_group_0.compressed_size()
+        );
+        println!(
+            "Row group 1: {} rows, starts at byte {}, {} bytes compressed",
+            row_group_1.num_rows(),
+            rg1_start,
+            row_group_1.compressed_size()
+        );
+        println!(
+            "Row group 2: {} rows, starts at byte {}, {} bytes compressed",
+            row_group_2.num_rows(),
+            rg2_start,
+            row_group_2.compressed_size()
+        );
+
+        let file_io = FileIO::new_with_fs();
+        let reader = ArrowReaderBuilder::new(file_io).build();
+
+        // Task 1: read only the first row group
+        let task1 = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: rg0_start,
+            length: row_group_0.compressed_size() as u64,
+            record_count: Some(100),
+            data_file_path: file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        // Task 2: read the second and third row groups
+        let task2 = FileScanTask {
+            file_size_in_bytes: std::fs::metadata(&file_path).unwrap().len(),
+            start: rg1_start,
+            length: file_end - rg1_start,
+            record_count: Some(200),
+            data_file_path: file_path.clone(),
+            data_file_format: DataFileFormat::Parquet,
+            schema: schema.clone(),
+            project_field_ids: vec![1],
+            predicate: None,
+            deletes: vec![],
+            partition: None,
+            partition_spec: None,
+            name_mapping: None,
+            case_sensitive: false,
+        };
+
+        let tasks1 = Box::pin(futures::stream::iter(vec![Ok(task1)])) as FileScanTaskStream;
+        let result1 = reader
+            .clone()
+            .read(tasks1)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        let total_rows_task1: usize = result1.iter().map(|b| b.num_rows()).sum();
+        println!(
+            "Task 1 (bytes {}-{}) returned {} rows",
+            rg0_start,
+            rg0_start + row_group_0.compressed_size() as u64,
+            total_rows_task1
+        );
+
+        let tasks2 = Box::pin(futures::stream::iter(vec![Ok(task2)])) as FileScanTaskStream;
+        let result2 = reader
+            .read(tasks2)
+            .unwrap()
+            .try_collect::<Vec<RecordBatch>>()
+            .await
+            .unwrap();
+
+        let total_rows_task2: usize = result2.iter().map(|b| b.num_rows()).sum();
+        println!("Task 2 (bytes {rg1_start}-{file_end}) returned {total_rows_task2} rows");
+
+        assert_eq!(
+            total_rows_task1, 100,
+            "Task 1 should read only the first row group (100 rows), but got {total_rows_task1} rows"
+        );
+
+        assert_eq!(
+            total_rows_task2, 200,
+            "Task 2 should read only the second+third row groups (200 rows), but got {total_rows_task2} rows"
+        );
+
+        // Verify the actual data values are correct (not just the row count)
+        if total_rows_task1 > 0 {
+            let first_batch = &result1[0];
+            let id_col = first_batch
+                .column(0)
+                .as_primitive::<arrow_array::types::Int32Type>();
+            let first_val = id_col.value(0);
+            let last_val = id_col.value(id_col.len() - 1);
+            println!("Task 1 data range: {first_val} to {last_val}");
+
+            assert_eq!(first_val, 0, "Task 1 should start with id=0");
+            assert_eq!(last_val, 99, "Task 1 should end with id=99");
+        }
+
+        if total_rows_task2 > 0 {
+            let first_batch = &result2[0];
+            let id_col = first_batch
+                .column(0)
+                .as_primitive::<arrow_array::types::Int32Type>();
+            let first_val = id_col.value(0);
+            println!("Task 2 first value: {first_val}");
+
+            assert_eq!(first_val, 100, "Task 2 should start with id=100, not id=0");
+        }
+    }
+}
diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs
index bd9e249f48..9b504421ae 100644
--- a/crates/iceberg/src/arrow/schema.rs
+++ b/crates/iceberg/src/arrow/schema.rs
@@ -199,7 +199,10 @@ fn visit_struct<V: ArrowSchemaVisitor>(fields: &Fields, visitor: &mut V) -> Resu
 }
 
 /// Visit schema in post order.
-fn visit_schema<V: ArrowSchemaVisitor>(schema: &ArrowSchema, visitor: &mut V) -> Result<V::U> {
+pub(crate) fn visit_schema<V: ArrowSchemaVisitor>(
+    schema: &ArrowSchema,
+    visitor: &mut V,
+) -> Result<V::U> {
     let mut results = Vec::with_capacity(schema.fields().len());
     for field in schema.fields() {
         visitor.before_field(field)?;
@@ -759,6 +762,11 @@ pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Arc<dyn ArrowDatum + Send
             let array = FixedSizeBinaryArray::try_from_iter(vec![bytes].into_iter()).unwrap();
             Ok(Arc::new(Scalar::new(array)))
         }
+        (PrimitiveType::Fixed(_), PrimitiveLiteral::Binary(value)) => {
+            let array = FixedSizeBinaryArray::try_from_iter(std::iter::once(value.as_slice()))
+                .map_err(|e| Error::new(ErrorKind::DataInvalid, e.to_string()))?;
+            Ok(Arc::new(Scalar::new(array)))
+        }
 
         (primitive_type, _) => Err(Error::new(
             ErrorKind::FeatureUnsupported,
@@ -2151,6 +2159,18 @@ mod tests {
             assert!(is_scalar);
             assert_eq!(array.value(0), [66u8; 16]);
         }
+        {
+            let datum = Datum::fixed(vec![1u8, 2, 3, 4, 5, 6, 7, 8]);
+            let arrow_datum = get_arrow_datum(&datum).unwrap();
+            let (array, is_scalar) = arrow_datum.get();
+            let array = array
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .unwrap();
+            assert!(is_scalar);
+            assert_eq!(array.value_length(), 8);
+            assert_eq!(array.value(0), &[1u8, 2, 3, 4, 5, 6, 7, 8]);
+        }
     }
 
     #[test]
diff --git a/crates/iceberg/src/catalog/metadata_location.rs b/crates/iceberg/src/catalog/metadata_location.rs
index ed28118879..acd041d5e1 100644
--- a/crates/iceberg/src/catalog/metadata_location.rs
+++ b/crates/iceberg/src/catalog/metadata_location.rs
@@ -114,9 +114,9 @@ impl MetadataLocation {
         ))?;
 
         // Check for compression suffix (e.g., .gz)
-        let gzip_suffix = CompressionCodec::Gzip.suffix()?;
+        let gzip_suffix = CompressionCodec::gzip_default().suffix()?;
         let (stripped, compression_codec) = if let Some(s) = stripped.strip_suffix(gzip_suffix) {
-            (s, CompressionCodec::Gzip)
+            (s, CompressionCodec::gzip_default())
         } else {
             (stripped, CompressionCodec::None)
         };
@@ -261,7 +261,7 @@ mod test {
                     table_location: "/abc".to_string(),
                     version: 1234567,
                     id: Uuid::from_str("2cd22b57-5127-4198-92ba-e4e67c79821b").unwrap(),
-                    compression_codec: CompressionCodec::Gzip,
+                    compression_codec: CompressionCodec::gzip_default(),
                 }),
             ),
             // Negative version
@@ -345,10 +345,16 @@ mod test {
             "/test/table/metadata/00005-81056704-ce5b-41c4-bb83-eb6408081af6.gz.metadata.json",
         )
         .unwrap();
-        assert_eq!(location_gzip.compression_codec, CompressionCodec::Gzip);
+        assert_eq!(
+            location_gzip.compression_codec,
+            CompressionCodec::gzip_default()
+        );
 
         let next_gzip = location_gzip.with_next_version();
-        assert_eq!(next_gzip.compression_codec, CompressionCodec::Gzip);
+        assert_eq!(
+            next_gzip.compression_codec,
+            CompressionCodec::gzip_default()
+        );
         assert_eq!(next_gzip.version, 6);
     }
 
@@ -369,7 +375,10 @@ mod test {
         );
         let metadata_gzip = create_test_metadata(props_gzip);
         let updated_gzip = location.with_new_metadata(&metadata_gzip);
-        assert_eq!(updated_gzip.compression_codec, CompressionCodec::Gzip);
+        assert_eq!(
+            updated_gzip.compression_codec,
+            CompressionCodec::gzip_default()
+        );
         assert_eq!(updated_gzip.version, 0);
         assert_eq!(
             updated_gzip.to_string(),
diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs
index f296cf2260..43102adec9 100644
--- a/crates/iceberg/src/catalog/mod.rs
+++ b/crates/iceberg/src/catalog/mod.rs
@@ -144,7 +144,6 @@ pub trait CatalogBuilder: Default + Debug + Send + Sync {
     ///
     /// let catalog = MyCatalogBuilder::default()
     ///     .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-    ///         configured_scheme: "s3a".to_string(),
     ///         customized_credential_load: None,
     ///     }))
     ///     .load("my_catalog", props)
diff --git a/crates/iceberg/src/compression.rs b/crates/iceberg/src/compression.rs
index 42f5298437..929d9226e7 100644
--- a/crates/iceberg/src/compression.rs
+++ b/crates/iceberg/src/compression.rs
@@ -17,28 +17,101 @@
 
 //! Compression codec support for data compression and decompression.
 
+use std::fmt;
 use std::io::{Read, Write};
 
 use flate2::Compression;
 use flate2::read::GzDecoder;
 use flate2::write::GzEncoder;
-use serde::{Deserialize, Serialize};
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
 
 use crate::{Error, ErrorKind, Result};
 
+/// Default compression level for Zstandard (zstd).
+const ZSTD_DEFAULT_LEVEL: u8 = 3;
+/// Default compression level for Gzip.
+const GZIP_DEFAULT_LEVEL: u8 = 6;
+/// Maximum compression level for Gzip.
+const GZIP_MAX_LEVEL: u8 = 9;
+
 /// Data compression formats
-#[derive(Debug, PartialEq, Eq, Clone, Copy, Default, Serialize, Deserialize)]
-#[serde(rename_all = "lowercase")]
+#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
 pub enum CompressionCodec {
     #[default]
     /// No compression
     None,
     /// LZ4 single compression frame with content size present
     Lz4,
-    /// Zstandard single compression frame with content size present
-    Zstd,
-    /// Gzip compression
-    Gzip,
+    /// Zstandard single compression frame with content size present.
+    /// Level range is 0–22, where 0 means default compression level (not no compression).
+    /// Use [`CompressionCodec::zstd_default`] to construct with the default level.
+    Zstd(u8),
+    /// Gzip compression. Level range is 0–9, where 0 means no compression.
+    /// Use [`CompressionCodec::gzip_default`] to construct with the default level.
+    Gzip(u8),
+    /// Snappy compression
+    Snappy,
+}
+
+impl CompressionCodec {
+    /// Returns a Zstd codec with the default compression level.
+    pub const fn zstd_default() -> Self {
+        CompressionCodec::Zstd(ZSTD_DEFAULT_LEVEL)
+    }
+
+    /// Returns a Gzip codec with the default compression level.
+    pub const fn gzip_default() -> Self {
+        CompressionCodec::Gzip(GZIP_DEFAULT_LEVEL)
+    }
+
+    /// Returns the codec name as used in serialization and error messages.
+    pub fn name(&self) -> &'static str {
+        match self {
+            CompressionCodec::None => "none",
+            CompressionCodec::Lz4 => "lz4",
+            CompressionCodec::Zstd(_) => "zstd",
+            CompressionCodec::Gzip(_) => "gzip",
+            CompressionCodec::Snappy => "snappy",
+        }
+    }
+}
+
+// Note: serialize/deserialize do not round-trip the compression level. Iceberg configuration
+// only the codec name (e.g. "zstd"), not the level, so deserialization always produces the
+// default level. A `Zstd(5)` written to metadata will be read back as `Zstd(3)`. Some
+// compression configuration (e.g. Avro metadata) has a separate level field alongside the codec name.
+impl Serialize for CompressionCodec {
+    fn serialize<S: Serializer>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error> {
+        serializer.serialize_str(self.name())
+    }
+}
+
+impl<'de> Deserialize<'de> for CompressionCodec {
+    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> std::result::Result<Self, D::Error> {
+        let s = String::deserialize(deserializer)?;
+        match s.to_lowercase().as_str() {
+            "none" => Ok(CompressionCodec::None),
+            "lz4" => Ok(CompressionCodec::Lz4),
+            "zstd" => Ok(CompressionCodec::zstd_default()),
+            "gzip" => Ok(CompressionCodec::gzip_default()),
+            "snappy" => Ok(CompressionCodec::Snappy),
+            other => Err(serde::de::Error::unknown_variant(other, &[
+                "none", "lz4", "zstd", "gzip", "snappy",
+            ])),
+        }
+    }
+}
+
+impl fmt::Display for CompressionCodec {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            CompressionCodec::None => write!(f, "None"),
+            CompressionCodec::Lz4 => write!(f, "Lz4"),
+            CompressionCodec::Zstd(level) => write!(f, "Zstd(level={level})"),
+            CompressionCodec::Gzip(level) => write!(f, "Gzip(level={level})"),
+            CompressionCodec::Snappy => write!(f, "Snappy"),
+        }
+    }
 }
 
 impl CompressionCodec {
@@ -49,13 +122,17 @@ impl CompressionCodec {
                 ErrorKind::FeatureUnsupported,
                 "LZ4 decompression is not supported currently",
             )),
-            CompressionCodec::Zstd => Ok(zstd::stream::decode_all(&bytes[..])?),
-            CompressionCodec::Gzip => {
+            CompressionCodec::Zstd(_) => Ok(zstd::stream::decode_all(&bytes[..])?),
+            CompressionCodec::Gzip(_) => {
                 let mut decoder = GzDecoder::new(&bytes[..]);
                 let mut decompressed = Vec::new();
                 decoder.read_to_end(&mut decompressed)?;
                 Ok(decompressed)
             }
+            CompressionCodec::Snappy => Err(Error::new(
+                ErrorKind::FeatureUnsupported,
+                "Snappy decompression is not supported currently",
+            )),
         }
     }
 
@@ -66,19 +143,24 @@ impl CompressionCodec {
                 ErrorKind::FeatureUnsupported,
                 "LZ4 compression is not supported currently",
             )),
-            CompressionCodec::Zstd => {
+            CompressionCodec::Zstd(level) => {
                 let writer = Vec::<u8>::new();
-                let mut encoder = zstd::stream::Encoder::new(writer, 3)?;
+                let mut encoder = zstd::stream::Encoder::new(writer, *level as i32)?;
                 encoder.include_checksum(true)?;
                 encoder.set_pledged_src_size(Some(bytes.len().try_into()?))?;
                 std::io::copy(&mut &bytes[..], &mut encoder)?;
                 Ok(encoder.finish()?)
             }
-            CompressionCodec::Gzip => {
-                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
+            CompressionCodec::Gzip(level) => {
+                let compression = Compression::new((*level).min(GZIP_MAX_LEVEL) as u32);
+                let mut encoder = GzEncoder::new(Vec::new(), compression);
                 encoder.write_all(&bytes)?;
                 Ok(encoder.finish()?)
             }
+            CompressionCodec::Snappy => Err(Error::new(
+                ErrorKind::FeatureUnsupported,
+                "Snappy compression is not supported currently",
+            )),
         }
     }
 
@@ -95,8 +177,10 @@ impl CompressionCodec {
     pub fn suffix(&self) -> Result<&'static str> {
         match self {
             CompressionCodec::None => Ok(""),
-            CompressionCodec::Gzip => Ok(".gz"),
-            codec @ (CompressionCodec::Lz4 | CompressionCodec::Zstd) => Err(Error::new(
+            CompressionCodec::Gzip(_) => Ok(".gz"),
+            codec @ (CompressionCodec::Lz4
+            | CompressionCodec::Zstd(_)
+            | CompressionCodec::Snappy) => Err(Error::new(
                 ErrorKind::FeatureUnsupported,
                 format!("suffix not defined for {codec:?}"),
             )),
@@ -123,7 +207,10 @@ mod tests {
     async fn test_compression_codec_compress() {
         let bytes_vec = [0_u8; 100].to_vec();
 
-        let compression_codecs = [CompressionCodec::Zstd, CompressionCodec::Gzip];
+        let compression_codecs = [
+            CompressionCodec::zstd_default(),
+            CompressionCodec::gzip_default(),
+        ];
 
         for codec in compression_codecs {
             let compressed = codec.compress(bytes_vec.clone()).unwrap();
@@ -135,7 +222,10 @@ mod tests {
 
     #[tokio::test]
     async fn test_compression_codec_unsupported() {
-        let unsupported_codecs = [(CompressionCodec::Lz4, "LZ4")];
+        let unsupported_codecs = [
+            (CompressionCodec::Lz4, "LZ4"),
+            (CompressionCodec::Snappy, "Snappy"),
+        ];
         let bytes_vec = [0_u8; 100].to_vec();
 
         for (codec, name) in unsupported_codecs {
@@ -153,18 +243,34 @@ mod tests {
 
     #[test]
     fn test_suffix() {
-        // Test supported codecs
         assert_eq!(CompressionCodec::None.suffix().unwrap(), "");
-        assert_eq!(CompressionCodec::Gzip.suffix().unwrap(), ".gz");
+        assert_eq!(CompressionCodec::gzip_default().suffix().unwrap(), ".gz");
 
-        // Test unsupported codecs return errors
         assert!(CompressionCodec::Lz4.suffix().is_err());
-        assert!(CompressionCodec::Zstd.suffix().is_err());
+        assert!(CompressionCodec::zstd_default().suffix().is_err());
+        assert!(CompressionCodec::Snappy.suffix().is_err());
 
         let lz4_err = CompressionCodec::Lz4.suffix().unwrap_err();
         assert!(lz4_err.to_string().contains("suffix not defined for Lz4"));
 
-        let zstd_err = CompressionCodec::Zstd.suffix().unwrap_err();
+        let zstd_err = CompressionCodec::zstd_default().suffix().unwrap_err();
         assert!(zstd_err.to_string().contains("suffix not defined for Zstd"));
     }
+
+    #[test]
+    fn test_display() {
+        assert_eq!(CompressionCodec::None.to_string(), "None");
+        assert_eq!(CompressionCodec::Lz4.to_string(), "Lz4");
+        assert_eq!(
+            CompressionCodec::zstd_default().to_string(),
+            "Zstd(level=3)"
+        );
+        assert_eq!(CompressionCodec::Zstd(5).to_string(), "Zstd(level=5)");
+        assert_eq!(
+            CompressionCodec::gzip_default().to_string(),
+            "Gzip(level=6)"
+        );
+        assert_eq!(CompressionCodec::Gzip(9).to_string(), "Gzip(level=9)");
+        assert_eq!(CompressionCodec::Snappy.to_string(), "Snappy");
+    }
 }
diff --git a/crates/iceberg/src/encryption/file_decryptor.rs b/crates/iceberg/src/encryption/file_decryptor.rs
new file mode 100644
index 0000000000..e44c0e1d78
--- /dev/null
+++ b/crates/iceberg/src/encryption/file_decryptor.rs
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! File-level decryption helper for AGS1 stream-encrypted files.
+
+use std::fmt;
+use std::sync::Arc;
+
+use super::crypto::{AesGcmCipher, SecureKey};
+use super::stream::AesGcmFileRead;
+use crate::Result;
+use crate::io::FileRead;
+
+/// Holds the decryption material for a single encrypted file.
+///
+/// Created from a plaintext DEK and AAD prefix, then used to wrap
+/// an encrypted file reader for transparent decryption on read.
+pub struct AesGcmFileDecryptor {
+    cipher: Arc<AesGcmCipher>,
+    aad_prefix: Box<[u8]>,
+}
+
+impl AesGcmFileDecryptor {
+    /// Creates a new `AesGcmFileDecryptor` from a plaintext DEK and AAD prefix.
+    pub fn new(dek: &[u8], aad_prefix: impl Into<Box<[u8]>>) -> Result<Self> {
+        let key = SecureKey::new(dek)?;
+        let cipher = Arc::new(AesGcmCipher::new(key));
+        Ok(Self {
+            cipher,
+            aad_prefix: aad_prefix.into(),
+        })
+    }
+
+    /// Wraps a raw encrypted-file reader in a decrypting [`AesGcmFileRead`].
+    pub fn wrap_reader(
+        &self,
+        reader: Box<dyn FileRead>,
+        encrypted_file_length: u64,
+    ) -> Result<Box<dyn FileRead>> {
+        let decrypting = AesGcmFileRead::new(
+            reader,
+            Arc::clone(&self.cipher),
+            self.aad_prefix.clone(),
+            encrypted_file_length,
+        )?;
+        Ok(Box::new(decrypting))
+    }
+
+    /// Calculates the plaintext length from an encrypted file's total length.
+    pub fn plaintext_length(&self, encrypted_file_length: u64) -> Result<u64> {
+        AesGcmFileRead::calculate_plaintext_length(encrypted_file_length)
+    }
+}
+
+impl fmt::Debug for AesGcmFileDecryptor {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AesGcmFileDecryptor")
+            .field("aad_prefix_len", &self.aad_prefix.len())
+            .finish_non_exhaustive()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use bytes::Bytes;
+
+    use super::*;
+    use crate::encryption::AesGcmFileEncryptor;
+    use crate::io::FileWrite;
+
+    struct MemoryFileRead(Bytes);
+
+    #[async_trait::async_trait]
+    impl FileRead for MemoryFileRead {
+        async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+            Ok(self.0.slice(range.start as usize..range.end as usize))
+        }
+    }
+
+    struct MemoryFileWrite {
+        buffer: std::sync::Arc<std::sync::Mutex<Vec<u8>>>,
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for MemoryFileWrite {
+        async fn write(&mut self, bs: Bytes) -> Result<()> {
+            self.buffer.lock().unwrap().extend_from_slice(&bs);
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_wrap_reader_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello from file decryptor!";
+
+        // Encrypt via the encryptor wrapper
+        let encryptor = AesGcmFileEncryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let mut writer = encryptor.wrap_writer(Box::new(MemoryFileWrite {
+            buffer: buffer.clone(),
+        }));
+        writer.write(Bytes::from(plaintext.to_vec())).await.unwrap();
+        writer.close().await.unwrap();
+        let encrypted = buffer.lock().unwrap().clone();
+        let encrypted_len = encrypted.len() as u64;
+
+        // Decrypt via the decryptor wrapper
+        let decryptor = AesGcmFileDecryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let reader = decryptor
+            .wrap_reader(
+                Box::new(MemoryFileRead(Bytes::from(encrypted))),
+                encrypted_len,
+            )
+            .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_invalid_key_length() {
+        let result = AesGcmFileDecryptor::new(b"too-short", b"aad".as_slice());
+        assert!(result.is_err());
+    }
+
+    #[tokio::test]
+    async fn test_plaintext_length() {
+        let decryptor = AesGcmFileDecryptor::new(b"0123456789abcdef", b"aad".as_slice()).unwrap();
+        // header(8) + nonce(12) + 10 bytes ciphertext + tag(16) = 46
+        let encrypted_len = 8 + 12 + 10 + 16;
+        let plain_len = decryptor.plaintext_length(encrypted_len).unwrap();
+        assert_eq!(plain_len, 10);
+    }
+}
diff --git a/crates/iceberg/src/encryption/file_encryptor.rs b/crates/iceberg/src/encryption/file_encryptor.rs
new file mode 100644
index 0000000000..773438ad80
--- /dev/null
+++ b/crates/iceberg/src/encryption/file_encryptor.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! File-level encryption helper for AGS1 stream-encrypted files.
+
+use std::fmt;
+use std::sync::Arc;
+
+use super::crypto::{AesGcmCipher, SecureKey};
+use super::stream::AesGcmFileWrite;
+use crate::Result;
+use crate::io::FileWrite;
+
+/// Holds the encryption material for a single encrypted file.
+///
+/// This is the write-side counterpart to
+/// [`AesGcmFileDecryptor`](super::AesGcmFileDecryptor). Created from
+/// a plaintext DEK and AAD prefix, then used to wrap an output writer
+/// for transparent encryption on write.
+pub struct AesGcmFileEncryptor {
+    cipher: Arc<AesGcmCipher>,
+    aad_prefix: Box<[u8]>,
+}
+
+impl AesGcmFileEncryptor {
+    /// Creates a new `AesGcmFileEncryptor` from a plaintext DEK and AAD prefix.
+    pub fn new(dek: &[u8], aad_prefix: impl Into<Box<[u8]>>) -> Result<Self> {
+        let key = SecureKey::new(dek)?;
+        let cipher = Arc::new(AesGcmCipher::new(key));
+        Ok(Self {
+            cipher,
+            aad_prefix: aad_prefix.into(),
+        })
+    }
+
+    /// Wraps a raw output writer in an encrypting [`AesGcmFileWrite`].
+    pub fn wrap_writer(&self, writer: Box<dyn FileWrite>) -> Box<dyn FileWrite> {
+        Box::new(AesGcmFileWrite::new(
+            writer,
+            Arc::clone(&self.cipher),
+            self.aad_prefix.clone(),
+        ))
+    }
+}
+
+impl fmt::Debug for AesGcmFileEncryptor {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("AesGcmFileEncryptor")
+            .field("aad_prefix_len", &self.aad_prefix.len())
+            .finish_non_exhaustive()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use bytes::Bytes;
+
+    use super::*;
+    use crate::encryption::AesGcmFileDecryptor;
+    use crate::io::FileRead;
+
+    struct MemoryFileRead(Bytes);
+
+    #[async_trait::async_trait]
+    impl FileRead for MemoryFileRead {
+        async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+            Ok(self.0.slice(range.start as usize..range.end as usize))
+        }
+    }
+
+    struct MemoryFileWrite {
+        buffer: std::sync::Arc<std::sync::Mutex<Vec<u8>>>,
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for MemoryFileWrite {
+        async fn write(&mut self, bs: Bytes) -> Result<()> {
+            self.buffer.lock().unwrap().extend_from_slice(&bs);
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    #[tokio::test]
+    async fn test_wrap_writer_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello from file encryptor!";
+
+        // Encrypt via the encryptor wrapper
+        let encryptor = AesGcmFileEncryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let mut writer = encryptor.wrap_writer(Box::new(MemoryFileWrite {
+            buffer: buffer.clone(),
+        }));
+        writer.write(Bytes::from(plaintext.to_vec())).await.unwrap();
+        writer.close().await.unwrap();
+        let encrypted = buffer.lock().unwrap().clone();
+        let encrypted_len = encrypted.len() as u64;
+
+        // Decrypt via the decryptor wrapper
+        let decryptor = AesGcmFileDecryptor::new(key.as_slice(), aad_prefix.as_slice()).unwrap();
+        let reader = decryptor
+            .wrap_reader(
+                Box::new(MemoryFileRead(Bytes::from(encrypted))),
+                encrypted_len,
+            )
+            .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_invalid_key_length() {
+        let result = AesGcmFileEncryptor::new(b"bad-key", b"aad".as_slice());
+        assert!(result.is_err());
+    }
+}
diff --git a/crates/iceberg/src/encryption/mod.rs b/crates/iceberg/src/encryption/mod.rs
index 097f4f24e3..9888a153c7 100644
--- a/crates/iceberg/src/encryption/mod.rs
+++ b/crates/iceberg/src/encryption/mod.rs
@@ -21,5 +21,11 @@
 //! and decrypting data in Iceberg tables.
 
 mod crypto;
+mod file_decryptor;
+mod file_encryptor;
+mod stream;
 
 pub use crypto::{AesGcmCipher, AesKeySize, SecureKey};
+pub use file_decryptor::AesGcmFileDecryptor;
+pub use file_encryptor::AesGcmFileEncryptor;
+pub use stream::{AesGcmFileRead, AesGcmFileWrite};
diff --git a/crates/iceberg/src/encryption/stream.rs b/crates/iceberg/src/encryption/stream.rs
new file mode 100644
index 0000000000..130578f2b1
--- /dev/null
+++ b/crates/iceberg/src/encryption/stream.rs
@@ -0,0 +1,1249 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! AGS1 stream encryption/decryption for Iceberg.
+//!
+//! Implements the block-based AES-GCM stream format used by Iceberg for
+//! encrypting manifest lists and manifest files. The format is
+//! byte-compatible with Java's `AesGcmInputStream` / `AesGcmOutputStream`.
+//!
+//! # AGS1 File Format
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────┐
+//! │ Header (8 bytes)                            │
+//! │   Magic: "AGS1" (4 bytes, ASCII)            │
+//! │   Plain block size: u32 LE (4 bytes)        │
+//! │     Default: 1,048,576 (1 MiB)              │
+//! ├─────────────────────────────────────────────┤
+//! │ Block 0                                     │
+//! │   Nonce (12 bytes)                          │
+//! │   Ciphertext (up to plain_block_size bytes) │
+//! │   GCM Tag (16 bytes)                        │
+//! ├─────────────────────────────────────────────┤
+//! │ Block 1..N (same structure)                 │
+//! ├─────────────────────────────────────────────┤
+//! │ Final block (may be shorter)                │
+//! └─────────────────────────────────────────────┘
+//! ```
+//!
+//! Each block's AAD is: `aad_prefix || block_index (4 bytes, LE)`.
+
+use std::ops::Range;
+use std::sync::Arc;
+
+use bytes::{Bytes, BytesMut};
+
+use super::AesGcmCipher;
+use crate::io::{FileRead, FileWrite};
+use crate::{Error, ErrorKind, Result};
+
+/// Default plaintext block size (1 MiB), matching Java's `Ciphers.PLAIN_BLOCK_SIZE`.
+pub const PLAIN_BLOCK_SIZE: u32 = 1024 * 1024;
+
+/// AES-GCM nonce length in bytes.
+pub const NONCE_LENGTH: u32 = 12;
+
+/// AES-GCM authentication tag length in bytes.
+pub const GCM_TAG_LENGTH: u32 = 16;
+
+/// Cipher block size = plaintext block size + nonce + GCM tag.
+pub const CIPHER_BLOCK_SIZE: u32 = PLAIN_BLOCK_SIZE + NONCE_LENGTH + GCM_TAG_LENGTH;
+
+/// AGS1 stream magic bytes.
+pub const GCM_STREAM_MAGIC: [u8; 4] = *b"AGS1";
+
+/// AGS1 stream header length (4-byte magic + 4-byte block size).
+pub const GCM_STREAM_HEADER_LENGTH: u32 = 8;
+
+/// Minimum valid AGS1 stream length (header + one empty block).
+#[cfg(test)]
+pub const MIN_STREAM_LENGTH: u32 = GCM_STREAM_HEADER_LENGTH + NONCE_LENGTH + GCM_TAG_LENGTH;
+
+/// Constructs the per-block AAD for AGS1 stream encryption.
+///
+/// Format: `aad_prefix || block_index (4 bytes, little-endian)`
+///
+/// This matches Java's `Ciphers.streamBlockAAD()`.
+pub(crate) fn stream_block_aad(aad_prefix: &[u8], block_index: u32) -> Vec<u8> {
+    let index_bytes = block_index.to_le_bytes();
+    if aad_prefix.is_empty() {
+        index_bytes.to_vec()
+    } else {
+        let mut aad = Vec::with_capacity(aad_prefix.len() + 4);
+        aad.extend_from_slice(aad_prefix);
+        aad.extend_from_slice(&index_bytes);
+        aad
+    }
+}
+
+/// Transparent decryption of AGS1 stream-encrypted files.
+///
+/// Implements the [`FileRead`] trait, providing random-access reads over
+/// encrypted data. Each `read()` call determines which encrypted blocks
+/// overlap the requested plaintext range, reads and decrypts them, then
+/// returns the requested plaintext bytes.
+///
+/// # Usage
+///
+/// ```ignore
+/// // (ignored: requires async runtime and concrete FileRead/FileWrite impls)
+/// let reader = AesGcmFileRead::new(
+///     inner_reader,       // Box<dyn FileRead> for the encrypted file
+///     cipher,             // Arc<AesGcmCipher> with the DEK
+///     aad_prefix.to_vec(),
+///     encrypted_file_length,
+/// )?;
+///
+/// // Read plaintext bytes transparently
+/// let plaintext = reader.read(0..1024).await?;
+/// ```
+pub struct AesGcmFileRead {
+    /// The underlying encrypted file reader.
+    inner: Box<dyn FileRead>,
+    /// The AES-GCM cipher holding the DEK.
+    cipher: Arc<AesGcmCipher>,
+    /// AAD prefix from the key metadata.
+    aad_prefix: Box<[u8]>,
+    /// Total plaintext stream size in bytes.
+    plain_stream_size: u64,
+    /// Total number of encrypted blocks.
+    num_blocks: u64,
+    /// Size of the last cipher block (may be smaller than `CIPHER_BLOCK_SIZE`).
+    last_cipher_block_size: u32,
+}
+
+impl AesGcmFileRead {
+    /// Creates a new `AesGcmFileRead` for decrypting an AGS1 stream.
+    ///
+    /// Computes the plaintext size and block layout from the encrypted file
+    /// length. No I/O is performed; header validation happens implicitly
+    /// when blocks are decrypted (GCM authentication will fail on corrupt data).
+    ///
+    /// # Arguments
+    ///
+    /// * `inner` - Reader for the underlying encrypted file
+    /// * `cipher` - AES-GCM cipher initialized with the file's DEK
+    /// * `aad_prefix` - AAD prefix from the file's `StandardKeyMetadata`
+    /// * `encrypted_file_length` - Total byte length of the encrypted file
+    pub fn new(
+        inner: Box<dyn FileRead>,
+        cipher: Arc<AesGcmCipher>,
+        aad_prefix: Box<[u8]>,
+        encrypted_file_length: u64,
+    ) -> Result<Self> {
+        let plain_stream_size = Self::calculate_plaintext_length(encrypted_file_length)?;
+        let stream_length = encrypted_file_length - GCM_STREAM_HEADER_LENGTH as u64;
+
+        if stream_length == 0 {
+            return Ok(Self {
+                inner,
+                cipher,
+                aad_prefix,
+                plain_stream_size: 0,
+                num_blocks: 0,
+                last_cipher_block_size: 0,
+            });
+        }
+
+        let num_full_blocks = stream_length / CIPHER_BLOCK_SIZE as u64;
+        let cipher_bytes_in_last_block = (stream_length % CIPHER_BLOCK_SIZE as u64) as u32;
+        let full_blocks_only = cipher_bytes_in_last_block == 0;
+
+        let num_blocks = if full_blocks_only {
+            num_full_blocks
+        } else {
+            num_full_blocks + 1
+        };
+
+        if num_blocks > u32::MAX as u64 {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "AGS1 format supports at most {} blocks (~4 TiB per file), but file requires {num_blocks} blocks",
+                    u32::MAX
+                ),
+            ));
+        }
+
+        let last_cipher_block_size = if full_blocks_only {
+            CIPHER_BLOCK_SIZE
+        } else {
+            cipher_bytes_in_last_block
+        };
+
+        Ok(Self {
+            inner,
+            cipher,
+            aad_prefix,
+            plain_stream_size,
+            num_blocks,
+            last_cipher_block_size,
+        })
+    }
+
+    /// Returns the plaintext stream size in bytes.
+    pub fn plaintext_length(&self) -> u64 {
+        self.plain_stream_size
+    }
+
+    /// Calculates the plaintext length from an encrypted file's total length.
+    ///
+    /// This is a static calculation matching Java's
+    /// `AesGcmInputStream.calculatePlaintextLength()`.
+    pub fn calculate_plaintext_length(encrypted_file_length: u64) -> Result<u64> {
+        if encrypted_file_length < GCM_STREAM_HEADER_LENGTH as u64 {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "Encrypted file too short: {encrypted_file_length} bytes (minimum {GCM_STREAM_HEADER_LENGTH})"
+                ),
+            ));
+        }
+
+        let stream_length = encrypted_file_length - GCM_STREAM_HEADER_LENGTH as u64;
+
+        if stream_length == 0 {
+            return Ok(0);
+        }
+
+        let num_full_blocks = stream_length / CIPHER_BLOCK_SIZE as u64;
+        let cipher_bytes_in_last_block = stream_length % CIPHER_BLOCK_SIZE as u64;
+        let full_blocks_only = cipher_bytes_in_last_block == 0;
+
+        let plain_bytes_in_last_block = if full_blocks_only {
+            0
+        } else {
+            if cipher_bytes_in_last_block < (NONCE_LENGTH + GCM_TAG_LENGTH) as u64 {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Truncated encrypted file: last block is {} bytes (minimum {})",
+                        cipher_bytes_in_last_block,
+                        NONCE_LENGTH + GCM_TAG_LENGTH
+                    ),
+                ));
+            }
+            cipher_bytes_in_last_block - NONCE_LENGTH as u64 - GCM_TAG_LENGTH as u64
+        };
+
+        Ok(num_full_blocks * PLAIN_BLOCK_SIZE as u64 + plain_bytes_in_last_block)
+    }
+
+    /// Returns the encrypted byte offset for a given block index.
+    fn encrypted_block_offset(block_index: u64) -> u64 {
+        block_index * CIPHER_BLOCK_SIZE as u64 + GCM_STREAM_HEADER_LENGTH as u64
+    }
+
+    /// Returns the cipher block size for a given block index.
+    fn cipher_block_size(&self, block_index: u64) -> u32 {
+        if block_index == self.num_blocks - 1 {
+            self.last_cipher_block_size
+        } else {
+            CIPHER_BLOCK_SIZE
+        }
+    }
+}
+
+#[async_trait::async_trait]
+impl FileRead for AesGcmFileRead {
+    /// Reads and decrypts a plaintext byte range from the encrypted AGS1 stream.
+    ///
+    /// The caller specifies a range in **plaintext** coordinates (e.g. "bytes 0..1024
+    /// of the original file"). This method translates that into the encrypted file
+    /// layout and performs the following steps:
+    ///
+    /// 1. **Map to blocks** — divides the plaintext range by `PLAIN_BLOCK_SIZE` to
+    ///    find which encrypted blocks (`first_block..=last_block`) contain the
+    ///    requested data.
+    ///
+    /// 2. **Single I/O read** — calculates the contiguous byte range in the
+    ///    encrypted file that covers all needed blocks (including the 8-byte AGS1
+    ///    header offset, 12-byte nonces, and 16-byte GCM tags) and fetches them in
+    ///    one call to the inner `FileRead`.
+    ///
+    /// 3. **Decrypt per block** — iterates over each cipher block in the response,
+    ///    decrypts it with AES-GCM using the per-block AAD (`aad_prefix || block_index`),
+    ///    and slices out only the plaintext bytes that overlap the requested range.
+    ///
+    /// 4. **Assemble result** — concatenates the slices into a single `Bytes` buffer
+    ///    matching exactly `range.end - range.start` bytes.
+    ///
+    /// Because each block is independently encrypted with its own nonce and AAD,
+    /// arbitrary random-access reads are supported without decrypting the entire
+    /// file. GCM authentication is verified per-block, so any tampering is detected
+    /// at the granularity of individual blocks.
+    async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+        if range.start == range.end {
+            return Ok(Bytes::new());
+        }
+
+        if range.start > range.end {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "Invalid read range: start ({}) is greater than end ({})",
+                    range.start, range.end
+                ),
+            ));
+        }
+
+        if range.end > self.plain_stream_size {
+            return Err(Error::new(
+                ErrorKind::DataInvalid,
+                format!(
+                    "Read range {}..{} exceeds plaintext size {}",
+                    range.start, range.end, self.plain_stream_size
+                ),
+            ));
+        }
+
+        if self.num_blocks == 0 {
+            return Ok(Bytes::new());
+        }
+
+        let first_block = range.start / PLAIN_BLOCK_SIZE as u64;
+        let last_block = (range.end - 1) / PLAIN_BLOCK_SIZE as u64;
+
+        // Read all needed encrypted blocks in a single I/O call
+        let encrypted_start = Self::encrypted_block_offset(first_block);
+        let encrypted_end =
+            Self::encrypted_block_offset(last_block) + self.cipher_block_size(last_block) as u64;
+
+        let all_encrypted = self.inner.read(encrypted_start..encrypted_end).await?;
+
+        // Decrypt each block and extract the requested plaintext range
+        let result_len = (range.end - range.start) as usize;
+        let mut result = BytesMut::with_capacity(result_len);
+        let mut encrypted_offset = 0usize;
+
+        for block_idx in first_block..=last_block {
+            let block_size = self.cipher_block_size(block_idx) as usize;
+            let cipher_block = &all_encrypted[encrypted_offset..encrypted_offset + block_size];
+            encrypted_offset += block_size;
+
+            let aad = stream_block_aad(&self.aad_prefix, block_idx as u32);
+            let decrypted = self.cipher.decrypt(cipher_block, Some(&aad))?;
+
+            // Calculate which slice of this decrypted block we need
+            let block_plain_start = block_idx * PLAIN_BLOCK_SIZE as u64;
+            let slice_start = if block_idx == first_block {
+                (range.start - block_plain_start) as usize
+            } else {
+                0
+            };
+            let slice_end = if block_idx == last_block {
+                (range.end - block_plain_start) as usize
+            } else {
+                decrypted.len()
+            };
+
+            result.extend_from_slice(&decrypted[slice_start..slice_end]);
+        }
+
+        Ok(result.freeze())
+    }
+}
+
+/// Transparent encryption of AGS1 stream-encrypted files.
+///
+/// Implements the [`FileWrite`] trait, buffering plaintext and emitting
+/// encrypted AGS1 blocks. This is the streaming write counterpart to
+/// [`AesGcmFileRead`].
+///
+/// # Usage
+///
+/// ```ignore
+/// // (ignored: requires async runtime and concrete FileRead/FileWrite impls)
+/// let writer = AesGcmFileWrite::new(
+///     inner_writer,       // Box<dyn FileWrite> for the output file
+///     cipher,             // Arc<AesGcmCipher> with the DEK
+///     aad_prefix.to_vec(),
+/// );
+///
+/// writer.write(plaintext_chunk).await?;
+/// writer.close().await?;
+/// ```
+pub struct AesGcmFileWrite {
+    /// The underlying output writer.
+    inner: Box<dyn FileWrite>,
+    /// The AES-GCM cipher holding the DEK.
+    cipher: Arc<AesGcmCipher>,
+    /// AAD prefix from the key metadata.
+    aad_prefix: Box<[u8]>,
+    /// Plaintext buffer accumulating data before block encryption.
+    buffer: Vec<u8>,
+    /// Current block index for AAD construction.
+    block_index: u32,
+    /// Whether the AGS1 header has been written.
+    header_written: bool,
+    /// Whether close() has been called.
+    closed: bool,
+    /// Whether the writer is in a poisoned state due to a failed inner write.
+    /// Once poisoned, all subsequent operations are rejected because the inner
+    /// writer may have received partial data.
+    poisoned: bool,
+}
+
+impl AesGcmFileWrite {
+    /// Creates a new `AesGcmFileWrite` for encrypting to AGS1 format.
+    ///
+    /// No I/O is performed until `write()` or `close()` is called.
+    pub fn new(
+        inner: Box<dyn FileWrite>,
+        cipher: Arc<AesGcmCipher>,
+        aad_prefix: impl Into<Box<[u8]>>,
+    ) -> Self {
+        Self {
+            inner,
+            cipher,
+            aad_prefix: aad_prefix.into(),
+            buffer: Vec::new(),
+            block_index: 0,
+            header_written: false,
+            closed: false,
+            poisoned: false,
+        }
+    }
+
+    /// Writes the AGS1 header (magic + plain block size) to the inner writer.
+    async fn write_header(&mut self) -> Result<()> {
+        let mut header = Vec::with_capacity(GCM_STREAM_HEADER_LENGTH as usize);
+        header.extend_from_slice(&GCM_STREAM_MAGIC);
+        header.extend_from_slice(&PLAIN_BLOCK_SIZE.to_le_bytes());
+        if let Err(e) = self.inner.write(Bytes::from(header)).await {
+            self.poisoned = true;
+            return Err(e);
+        }
+        self.header_written = true;
+        Ok(())
+    }
+
+    /// Encrypts a plaintext block and writes it to the inner writer.
+    async fn encrypt_and_write_block(&mut self, block_data: &[u8]) -> Result<()> {
+        let aad = stream_block_aad(&self.aad_prefix, self.block_index);
+        let encrypted = self.cipher.encrypt(block_data, Some(&aad))?;
+        if let Err(e) = self.inner.write(Bytes::from(encrypted)).await {
+            self.poisoned = true;
+            return Err(e);
+        }
+        self.block_index = self.block_index.checked_add(1).ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                "AGS1 block index overflow: file exceeds the maximum supported size (~4 TiB)",
+            )
+        })?;
+        Ok(())
+    }
+
+    /// Encrypts the first `PLAIN_BLOCK_SIZE` bytes of the buffer in-place
+    /// and drains them, avoiding a 1 MiB temporary copy.
+    async fn encrypt_and_drain_block(&mut self) -> Result<()> {
+        let aad = stream_block_aad(&self.aad_prefix, self.block_index);
+        let encrypted = self
+            .cipher
+            .encrypt(&self.buffer[..PLAIN_BLOCK_SIZE as usize], Some(&aad))?;
+        if let Err(e) = self.inner.write(Bytes::from(encrypted)).await {
+            self.poisoned = true;
+            return Err(e);
+        }
+        self.block_index = self.block_index.checked_add(1).ok_or_else(|| {
+            Error::new(
+                ErrorKind::DataInvalid,
+                "AGS1 block index overflow: file exceeds the maximum supported size (~4 TiB)",
+            )
+        })?;
+        self.buffer.drain(..PLAIN_BLOCK_SIZE as usize);
+        Ok(())
+    }
+}
+
+#[async_trait::async_trait]
+impl FileWrite for AesGcmFileWrite {
+    async fn write(&mut self, bs: Bytes) -> Result<()> {
+        if self.closed {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "Cannot write to a closed AesGcmFileWrite",
+            ));
+        }
+        if self.poisoned {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "AesGcmFileWrite is in a poisoned state due to a previous write failure",
+            ));
+        }
+
+        if !self.header_written {
+            self.write_header().await?;
+        }
+
+        self.buffer.extend_from_slice(&bs);
+
+        // Flush full blocks
+        while self.buffer.len() >= PLAIN_BLOCK_SIZE as usize {
+            self.encrypt_and_drain_block().await?;
+        }
+
+        Ok(())
+    }
+
+    async fn close(&mut self) -> Result<()> {
+        if self.closed {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "AesGcmFileWrite already closed",
+            ));
+        }
+        if self.poisoned {
+            return Err(Error::new(
+                ErrorKind::Unexpected,
+                "AesGcmFileWrite is in a poisoned state due to a previous write failure",
+            ));
+        }
+
+        if !self.header_written {
+            self.write_header().await?;
+        }
+
+        // Write the final block if there's remaining data, or if this is an empty file
+        // (block_index == 0). Skip writing a spurious empty block when the plaintext was
+        // exactly block-aligned (buffer empty, blocks already written).
+        if !self.buffer.is_empty() || self.block_index == 0 {
+            let final_block = std::mem::take(&mut self.buffer);
+            self.encrypt_and_write_block(&final_block).await?;
+        }
+        self.closed = true;
+
+        self.inner.close().await
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Encrypts plaintext into AGS1 format for testing.
+    ///
+    /// Mirrors Java's `AesGcmOutputStream` behavior:
+    /// - Always writes header + at least one block (even for empty input)
+    /// - Full blocks are `PLAIN_BLOCK_SIZE` bytes; last block may be shorter
+    fn encrypt_ags1(plaintext: &[u8], cipher: &AesGcmCipher, aad_prefix: &[u8]) -> Vec<u8> {
+        let mut result = Vec::new();
+
+        // Write header: "AGS1" + PLAIN_BLOCK_SIZE (LE)
+        result.extend_from_slice(&GCM_STREAM_MAGIC);
+        result.extend_from_slice(&PLAIN_BLOCK_SIZE.to_le_bytes());
+
+        // Write blocks
+        let mut offset = 0;
+        let mut block_index = 0u32;
+
+        loop {
+            let remaining = plaintext.len() - offset;
+            let block_size = std::cmp::min(remaining, PLAIN_BLOCK_SIZE as usize);
+
+            // Block 0 is always written (even if empty); subsequent empty blocks are skipped
+            if block_size == 0 && block_index > 0 {
+                break;
+            }
+
+            let block_data = &plaintext[offset..offset + block_size];
+            let aad = stream_block_aad(aad_prefix, block_index);
+            let encrypted = cipher.encrypt(block_data, Some(&aad)).unwrap();
+            result.extend_from_slice(&encrypted);
+
+            offset += block_size;
+            block_index += 1;
+
+            // A partial block is always the last
+            if block_size < PLAIN_BLOCK_SIZE as usize {
+                break;
+            }
+        }
+
+        result
+    }
+
+    /// Helper to create an AesGcmCipher from raw key bytes.
+    fn make_cipher(key: &[u8]) -> AesGcmCipher {
+        use super::super::SecureKey;
+        let secure_key = SecureKey::new(key).unwrap();
+        AesGcmCipher::new(secure_key)
+    }
+
+    /// Helper to create an in-memory FileRead from bytes.
+    fn memory_reader(data: Vec<u8>) -> Box<dyn FileRead> {
+        Box::new(MemoryFileRead(Bytes::from(data)))
+    }
+
+    /// Simple in-memory FileRead for tests.
+    struct MemoryFileRead(Bytes);
+
+    #[async_trait::async_trait]
+    impl FileRead for MemoryFileRead {
+        async fn read(&self, range: Range<u64>) -> Result<Bytes> {
+            let start = range.start as usize;
+            let end = range.end as usize;
+            if end > self.0.len() {
+                return Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    format!(
+                        "Range {}..{} out of bounds for {} bytes",
+                        start,
+                        end,
+                        self.0.len()
+                    ),
+                ));
+            }
+            Ok(self.0.slice(start..end))
+        }
+    }
+
+    #[tokio::test]
+    async fn test_empty_file_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(b"", &cipher, aad_prefix);
+
+        // Verify minimum length: header(8) + nonce(12) + tag(16) = 36
+        assert_eq!(encrypted.len(), MIN_STREAM_LENGTH as usize);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), 0);
+
+        // Reading empty range should return empty bytes
+        let result = reader.read(0..0).await.unwrap();
+        assert!(result.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_small_file_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello, Iceberg encryption!";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+
+        // Read entire file
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_partial_read() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"aad-prefix-here!";
+        let plaintext = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        // Read a slice from the middle
+        let result = reader.read(10..20).await.unwrap();
+        assert_eq!(&result[..], &plaintext[10..20]);
+
+        // Read first byte
+        let result = reader.read(0..1).await.unwrap();
+        assert_eq!(&result[..], &plaintext[0..1]);
+
+        // Read last byte
+        let last = plaintext.len() as u64;
+        let result = reader.read(last - 1..last).await.unwrap();
+        assert_eq!(&result[..], &plaintext[plaintext.len() - 1..]);
+    }
+
+    #[tokio::test]
+    async fn test_multi_block_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"multi-block-aad!";
+
+        // 1.5 blocks of data
+        let size = PLAIN_BLOCK_SIZE as usize + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+
+        // Read entire file
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_cross_block_read() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"cross-block-aad!";
+
+        // 2.5 blocks of data
+        let size = PLAIN_BLOCK_SIZE as usize * 2 + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        // Read across block boundary (last 100 bytes of block 0 + first 100 bytes of block 1)
+        let boundary = PLAIN_BLOCK_SIZE as u64;
+        let result = reader.read(boundary - 100..boundary + 100).await.unwrap();
+        assert_eq!(
+            &result[..],
+            &plaintext[(boundary - 100) as usize..(boundary + 100) as usize]
+        );
+
+        // Read across two block boundaries (spans blocks 0, 1, and 2)
+        let result = reader.read(boundary - 50..boundary * 2 + 50).await.unwrap();
+        assert_eq!(
+            &result[..],
+            &plaintext[(boundary - 50) as usize..(boundary * 2 + 50) as usize]
+        );
+    }
+
+    #[tokio::test]
+    async fn test_exact_block_size() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"exact-block-aad!";
+
+        // Exactly 1 block
+        let plaintext: Vec<u8> = (0..PLAIN_BLOCK_SIZE as usize)
+            .map(|i| (i % 256) as u8)
+            .collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), PLAIN_BLOCK_SIZE as u64);
+
+        let result = reader.read(0..PLAIN_BLOCK_SIZE as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_block_size_plus_one() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"block-plus-one!!";
+
+        // 1 block + 1 byte
+        let size = PLAIN_BLOCK_SIZE as usize + 1;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), size as u64);
+
+        // Read the last byte (in block 1)
+        let result = reader.read(size as u64 - 1..size as u64).await.unwrap();
+        assert_eq!(result[0], plaintext[size - 1]);
+
+        // Read all
+        let result = reader.read(0..size as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_block_size_minus_one() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"block-minus-one!";
+
+        // 1 block - 1 byte
+        let size = PLAIN_BLOCK_SIZE as usize - 1;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(&plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), size as u64);
+
+        let result = reader.read(0..size as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_wrong_aad_fails() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"correct-aad-here";
+        let plaintext = b"sensitive data here";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        // Try to decrypt with wrong AAD
+        let mut bad_aad = aad_prefix.to_vec();
+        bad_aad[0] ^= 0xFF;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            bad_aad.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await;
+        assert!(result.is_err(), "Decryption with wrong AAD should fail");
+    }
+
+    #[tokio::test]
+    async fn test_wrong_key_fails() {
+        let key = b"0123456789abcdef";
+        let wrong_key = b"fedcba9876543210";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"sensitive data";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(wrong_key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64).await;
+        assert!(result.is_err(), "Decryption with wrong key should fail");
+    }
+
+    #[tokio::test]
+    async fn test_out_of_bounds_read() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"short data";
+        let cipher = make_cipher(key);
+
+        let encrypted = encrypt_ags1(plaintext, &cipher, aad_prefix);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        let result = reader.read(0..plaintext.len() as u64 + 1).await;
+        assert!(result.is_err(), "Reading past end should fail");
+    }
+
+    #[tokio::test]
+    async fn test_calculate_plaintext_length() {
+        // Empty file: header only (not valid per Java, but handled)
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(GCM_STREAM_HEADER_LENGTH as u64).unwrap(),
+            0
+        );
+
+        // Empty file with one empty block: header(8) + nonce(12) + tag(16) = 36
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(MIN_STREAM_LENGTH as u64).unwrap(),
+            0
+        );
+
+        // One full block: header(8) + cipher_block(1048604) = 1048612
+        let one_full = GCM_STREAM_HEADER_LENGTH as u64 + CIPHER_BLOCK_SIZE as u64;
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(one_full).unwrap(),
+            PLAIN_BLOCK_SIZE as u64
+        );
+
+        // One full block + 1 byte: need partial second block
+        // Second block = nonce(12) + 1 byte ciphertext + tag(16) = 29
+        let one_full_plus_one = one_full + NONCE_LENGTH as u64 + 1 + GCM_TAG_LENGTH as u64;
+        assert_eq!(
+            AesGcmFileRead::calculate_plaintext_length(one_full_plus_one).unwrap(),
+            PLAIN_BLOCK_SIZE as u64 + 1
+        );
+    }
+
+    #[tokio::test]
+    async fn test_stream_block_aad() {
+        // With prefix
+        let aad = stream_block_aad(b"prefix", 0);
+        assert_eq!(&aad[..6], b"prefix");
+        assert_eq!(&aad[6..], &0u32.to_le_bytes());
+
+        let aad = stream_block_aad(b"prefix", 1);
+        assert_eq!(&aad[..6], b"prefix");
+        assert_eq!(&aad[6..], &1u32.to_le_bytes());
+
+        // Without prefix
+        let aad = stream_block_aad(b"", 42);
+        assert_eq!(&aad[..], &42u32.to_le_bytes());
+    }
+
+    #[tokio::test]
+    async fn test_encrypted_file_too_short() {
+        let result = AesGcmFileRead::new(
+            memory_reader(vec![0; 4]),
+            Arc::new(make_cipher(b"0123456789abcdef")),
+            [].into(),
+            4,
+        );
+        assert!(result.is_err());
+    }
+
+    // --- AesGcmFileWrite tests ---
+
+    /// Shared-buffer FileWrite for testing AesGcmFileWrite output.
+    struct SharedMemoryWrite {
+        buffer: std::sync::Arc<std::sync::Mutex<Vec<u8>>>,
+    }
+
+    /// FileWrite that fails after a configured number of successful writes.
+    struct FailingFileWrite {
+        writes_before_failure: usize,
+        write_count: usize,
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for FailingFileWrite {
+        async fn write(&mut self, _bs: Bytes) -> Result<()> {
+            if self.write_count >= self.writes_before_failure {
+                return Err(Error::new(ErrorKind::Unexpected, "simulated write failure"));
+            }
+            self.write_count += 1;
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    #[async_trait::async_trait]
+    impl FileWrite for SharedMemoryWrite {
+        async fn write(&mut self, bs: Bytes) -> Result<()> {
+            self.buffer.lock().unwrap().extend_from_slice(&bs);
+            Ok(())
+        }
+
+        async fn close(&mut self) -> Result<()> {
+            Ok(())
+        }
+    }
+
+    /// Helper: one-shot encrypt through AesGcmFileWrite, return encrypted bytes.
+    async fn write_through_ags1(plaintext: &[u8], key: &[u8], aad_prefix: &[u8]) -> Vec<u8> {
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let inner: Box<dyn FileWrite> = Box::new(SharedMemoryWrite {
+            buffer: buffer.clone(),
+        });
+        let cipher = Arc::new(make_cipher(key));
+        let mut writer = AesGcmFileWrite::new(inner, cipher, aad_prefix.to_vec());
+
+        writer.write(Bytes::from(plaintext.to_vec())).await.unwrap();
+        writer.close().await.unwrap();
+
+        buffer.lock().unwrap().clone()
+    }
+
+    #[tokio::test]
+    async fn test_write_empty_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+
+        let encrypted = write_through_ags1(b"", key, aad_prefix).await;
+
+        // Should produce header + one empty encrypted block
+        assert_eq!(encrypted.len(), MIN_STREAM_LENGTH as usize);
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_write_small_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"test-aad-prefix!";
+        let plaintext = b"Hello, Iceberg encryption!";
+
+        let encrypted = write_through_ags1(plaintext, key, aad_prefix).await;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], plaintext);
+    }
+
+    #[tokio::test]
+    async fn test_write_multi_block_roundtrip() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"multi-block-aad!";
+
+        // 1.5 blocks of data
+        let size = PLAIN_BLOCK_SIZE as usize + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+
+        let encrypted = write_through_ags1(&plaintext, key, aad_prefix).await;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_cross_block_accumulation() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"cross-block-aad!";
+
+        let buffer = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
+        let inner: Box<dyn FileWrite> = Box::new(SharedMemoryWrite {
+            buffer: buffer.clone(),
+        });
+        let cipher = Arc::new(make_cipher(key));
+        let mut writer = AesGcmFileWrite::new(inner, cipher, aad_prefix.to_vec());
+
+        // Write 1.5 blocks in 1000-byte chunks
+        let total_size = PLAIN_BLOCK_SIZE as usize + PLAIN_BLOCK_SIZE as usize / 2;
+        let plaintext: Vec<u8> = (0..total_size).map(|i| (i % 256) as u8).collect();
+        let chunk_size = 1000;
+        for chunk in plaintext.chunks(chunk_size) {
+            writer.write(Bytes::from(chunk.to_vec())).await.unwrap();
+        }
+        writer.close().await.unwrap();
+
+        let encrypted = buffer.lock().unwrap().clone();
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), plaintext.len() as u64);
+        let result = reader.read(0..plaintext.len() as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_exact_block_size() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"exact-block-aad!";
+
+        // Exactly 1 block
+        let plaintext: Vec<u8> = (0..PLAIN_BLOCK_SIZE as usize)
+            .map(|i| (i % 256) as u8)
+            .collect();
+
+        let encrypted = write_through_ags1(&plaintext, key, aad_prefix).await;
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), PLAIN_BLOCK_SIZE as u64);
+        let result = reader.read(0..PLAIN_BLOCK_SIZE as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_block_aligned_no_spurious_empty_block() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"block-align-aad!";
+
+        // Write exactly one block of plaintext — close() should NOT add
+        // a trailing empty encrypted block (28 bytes: 12-byte nonce + 16-byte tag).
+        let plaintext: Vec<u8> = (0..PLAIN_BLOCK_SIZE as usize)
+            .map(|i| (i % 256) as u8)
+            .collect();
+
+        let encrypted_via_writer = write_through_ags1(&plaintext, key, aad_prefix).await;
+        let encrypted_via_reference = encrypt_ags1(&plaintext, &make_cipher(key), aad_prefix);
+
+        // Both should be the same length — no extra 28-byte empty block
+        assert_eq!(
+            encrypted_via_writer.len(),
+            encrypted_via_reference.len(),
+            "Writer output should match reference encryption length (no spurious trailing block)"
+        );
+
+        // Verify roundtrip
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted_via_writer.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted_via_writer.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), PLAIN_BLOCK_SIZE as u64);
+        let result = reader.read(0..PLAIN_BLOCK_SIZE as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_two_blocks_aligned_no_spurious_empty_block() {
+        let key = b"0123456789abcdef";
+        let aad_prefix = b"2blk-align-aad!!";
+
+        // Exactly 2 blocks
+        let size = PLAIN_BLOCK_SIZE as usize * 2;
+        let plaintext: Vec<u8> = (0..size).map(|i| (i % 256) as u8).collect();
+
+        let encrypted_via_writer = write_through_ags1(&plaintext, key, aad_prefix).await;
+        let encrypted_via_reference = encrypt_ags1(&plaintext, &make_cipher(key), aad_prefix);
+
+        assert_eq!(
+            encrypted_via_writer.len(),
+            encrypted_via_reference.len(),
+            "Writer output should match reference encryption length (no spurious trailing block)"
+        );
+
+        let reader = AesGcmFileRead::new(
+            memory_reader(encrypted_via_writer.clone()),
+            Arc::new(make_cipher(key)),
+            aad_prefix.as_slice().into(),
+            encrypted_via_writer.len() as u64,
+        )
+        .unwrap();
+
+        assert_eq!(reader.plaintext_length(), size as u64);
+        let result = reader.read(0..size as u64).await.unwrap();
+        assert_eq!(&result[..], &plaintext[..]);
+    }
+
+    #[tokio::test]
+    async fn test_write_poisoned_after_inner_write_failure() {
+        let cipher = Arc::new(make_cipher(b"0123456789abcdef"));
+        // Fail on the second write (first write is the header, second is block data)
+        let inner: Box<dyn FileWrite> = Box::new(FailingFileWrite {
+            writes_before_failure: 1,
+            write_count: 0,
+        });
+        let mut writer = AesGcmFileWrite::new(inner, cipher, b"aad-prefix-here!".to_vec());
+
+        // First write triggers header (succeeds) + block encrypt+write (fails)
+        let data = vec![0u8; PLAIN_BLOCK_SIZE as usize];
+        let result = writer.write(Bytes::from(data)).await;
+        assert!(result.is_err());
+
+        // Subsequent write should be rejected as poisoned
+        let result = writer.write(Bytes::from(b"more data".to_vec())).await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().to_string().contains("poisoned"),
+            "expected poisoned error"
+        );
+
+        // Close should also be rejected
+        let result = writer.close().await;
+        assert!(result.is_err());
+        assert!(
+            result.unwrap_err().to_string().contains("poisoned"),
+            "expected poisoned error on close"
+        );
+    }
+}
diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs
index 96d1c651cd..4cd676dab1 100644
--- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs
+++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs
@@ -793,7 +793,7 @@ mod tests {
     };
     use parquet::file::metadata::{PageIndexPolicy, ParquetMetaData};
     use parquet::file::properties::WriterProperties;
-    use rand::{Rng, thread_rng};
+    use rand::Rng;
     use tempfile::NamedTempFile;
 
     use super::PageIndexEvaluator;
@@ -1284,13 +1284,13 @@ mod tests {
 
     #[test]
     fn eval_in_length_of_set_above_limit_all_rows() -> Result<()> {
-        let mut rng = thread_rng();
+        let mut rng = rand::rng();
         let (metadata, _temp_file) = create_test_parquet_file()?;
         let (column_index, offset_index, row_group_metadata) = get_test_metadata(&metadata);
         let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?;
 
         let filter = Reference::new("col_float")
-            .is_in(std::iter::repeat_with(|| Datum::float(rng.gen_range(0.0..10.0))).take(1000))
+            .is_in(std::iter::repeat_with(|| Datum::float(rng.random_range(0.0..10.0))).take(1000))
             .bind(iceberg_schema_ref.clone(), false)?;
 
         let result = PageIndexEvaluator::eval(
diff --git a/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs
index 0506b33af0..ad7e19f548 100644
--- a/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs
+++ b/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs
@@ -528,7 +528,7 @@ mod tests {
     use parquet::schema::types::{
         ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType,
     };
-    use rand::{Rng, thread_rng};
+    use rand::Rng;
 
     use super::RowGroupMetricsEvaluator;
     use crate::Result;
@@ -1617,7 +1617,7 @@ mod tests {
 
     #[test]
     fn eval_true_for_too_many_literals_filter_is_in() -> Result<()> {
-        let mut rng = thread_rng();
+        let mut rng = rand::rng();
 
         let row_group_metadata = create_row_group_metadata(
             1,
@@ -1636,7 +1636,7 @@ mod tests {
         let (iceberg_schema_ref, field_id_map) = build_iceberg_schema_and_field_map()?;
 
         let filter = Reference::new("col_float")
-            .is_in(std::iter::repeat_with(|| Datum::float(rng.gen_range(0.0..10.0))).take(1000))
+            .is_in(std::iter::repeat_with(|| Datum::float(rng.random_range(0.0..10.0))).take(1000))
             .bind(iceberg_schema_ref.clone(), false)?;
 
         let result = RowGroupMetricsEvaluator::eval(
diff --git a/crates/iceberg/src/io/storage/config/s3.rs b/crates/iceberg/src/io/storage/config/s3.rs
index fae3a14757..64db47084e 100644
--- a/crates/iceberg/src/io/storage/config/s3.rs
+++ b/crates/iceberg/src/io/storage/config/s3.rs
@@ -69,8 +69,14 @@ pub const S3_DISABLE_CONFIG_LOAD: &str = "s3.disable-config-load";
 ///
 /// This struct contains all the configuration options for connecting to Amazon S3.
 /// Use the builder pattern via `S3Config::builder()` to construct instances.
-/// ```
-#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
+///
+/// Defaults follow the Iceberg `S3FileIOProperties` spec (see
+/// [`PATH_STYLE_ACCESS_DEFAULT = false`](https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIOProperties.java)),
+/// i.e. virtual-host-style addressing is enabled unless
+/// `s3.path-style-access=true` is explicitly set. This matches what
+/// Java clients do out of the box and is required for a number of
+/// S3-compatible stores that do not support path-style URLs.
+#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, TypedBuilder)]
 pub struct S3Config {
     /// S3 endpoint URL.
     #[builder(default, setter(strip_option, into))]
@@ -88,7 +94,9 @@ pub struct S3Config {
     #[builder(default, setter(strip_option, into))]
     pub region: Option<String>,
     /// Enable virtual host style (opposite of path style access).
-    #[builder(default)]
+    ///
+    /// Defaults to `true` to match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`.
+    #[builder(default = true)]
     pub enable_virtual_host_style: bool,
     /// Server side encryption type.
     #[builder(default, setter(strip_option, into))]
@@ -125,6 +133,12 @@ pub struct S3Config {
     pub disable_config_load: bool,
 }
 
+impl Default for S3Config {
+    fn default() -> Self {
+        Self::builder().build()
+    }
+}
+
 impl TryFrom<&StorageConfig> for S3Config {
     type Error = crate::Error;
 
@@ -267,6 +281,17 @@ mod tests {
         assert_eq!(s3_config.region.as_deref(), Some("eu-west-1"));
     }
 
+    #[test]
+    fn test_s3_config_default_is_virtual_host_style() {
+        // Matches Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
+        assert!(S3Config::default().enable_virtual_host_style);
+        assert!(
+            S3Config::try_from(&StorageConfig::new())
+                .unwrap()
+                .enable_virtual_host_style
+        );
+    }
+
     #[test]
     fn test_s3_config_path_style_access() {
         let storage_config = StorageConfig::new().with_prop(S3_PATH_STYLE_ACCESS, "true");
diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs
index 44a3601428..ae0708146b 100644
--- a/crates/iceberg/src/lib.rs
+++ b/crates/iceberg/src/lib.rs
@@ -95,9 +95,10 @@ pub mod arrow;
 pub(crate) mod delete_file_index;
 pub mod encryption;
 pub mod test_utils;
-mod utils;
 pub mod writer;
 
 mod delete_vector;
 pub mod metadata_columns;
 pub mod puffin;
+/// Utility functions and modules.
+pub mod util;
diff --git a/crates/iceberg/src/puffin/metadata.rs b/crates/iceberg/src/puffin/metadata.rs
index 1d39cf249b..e2dfc10c23 100644
--- a/crates/iceberg/src/puffin/metadata.rs
+++ b/crates/iceberg/src/puffin/metadata.rs
@@ -985,6 +985,9 @@ mod tests {
         assert!(result.is_ok());
         let metadata = result.unwrap();
         assert_eq!(metadata.blobs.len(), 1);
-        assert_eq!(metadata.blobs[0].compression_codec, CompressionCodec::Gzip);
+        assert_eq!(
+            metadata.blobs[0].compression_codec,
+            CompressionCodec::gzip_default()
+        );
     }
 }
diff --git a/crates/iceberg/src/puffin/mod.rs b/crates/iceberg/src/puffin/mod.rs
index 854d4070ff..0e054cac51 100644
--- a/crates/iceberg/src/puffin/mod.rs
+++ b/crates/iceberg/src/puffin/mod.rs
@@ -26,30 +26,22 @@ pub use blob::{APACHE_DATASKETCHES_THETA_V1, Blob, DELETION_VECTOR_V1};
 
 pub use crate::compression::CompressionCodec;
 
-/// Compression codecs supported by the Puffin spec.
-const SUPPORTED_PUFFIN_CODECS: &[CompressionCodec] = &[
-    CompressionCodec::None,
-    CompressionCodec::Lz4,
-    CompressionCodec::Zstd,
-];
-
 /// Validates that the compression codec is supported for Puffin files.
 /// Returns an error if the codec is not supported.
 fn validate_puffin_compression(codec: CompressionCodec) -> Result<()> {
-    if !SUPPORTED_PUFFIN_CODECS.contains(&codec) {
-        let supported_names: Vec<String> = SUPPORTED_PUFFIN_CODECS
-            .iter()
-            .map(|c| format!("{c:?}"))
-            .collect();
-        return Err(Error::new(
+    match codec {
+        CompressionCodec::None | CompressionCodec::Lz4 | CompressionCodec::Zstd(_) => Ok(()),
+        other => Err(Error::new(
             ErrorKind::DataInvalid,
             format!(
-                "Compression codec {codec:?} is not supported for Puffin files. Only {} are supported.",
-                supported_names.join(", ")
+                "Compression codec {} is not supported for Puffin files. Only {}, {}, and {} are supported.",
+                other.name(),
+                CompressionCodec::None.name(),
+                CompressionCodec::Lz4.name(),
+                CompressionCodec::zstd_default().name()
             ),
-        ));
+        )),
     }
-    Ok(())
 }
 
 mod metadata;
@@ -70,12 +62,13 @@ mod tests {
 
     #[test]
     fn test_puffin_codec_validation() {
-        // All codecs in SUPPORTED_PUFFIN_CODECS should be valid
-        for codec in SUPPORTED_PUFFIN_CODECS {
-            assert!(validate_puffin_compression(*codec).is_ok());
-        }
+        // Supported codecs
+        assert!(validate_puffin_compression(CompressionCodec::None).is_ok());
+        assert!(validate_puffin_compression(CompressionCodec::Lz4).is_ok());
+        assert!(validate_puffin_compression(CompressionCodec::zstd_default()).is_ok());
+        assert!(validate_puffin_compression(CompressionCodec::Zstd(5)).is_ok());
 
-        // Gzip should not be supported for Puffin files
-        assert!(validate_puffin_compression(CompressionCodec::Gzip).is_err());
+        // Unsupported codecs
+        assert!(validate_puffin_compression(CompressionCodec::gzip_default()).is_err());
     }
 }
diff --git a/crates/iceberg/src/puffin/reader.rs b/crates/iceberg/src/puffin/reader.rs
index d272f02d41..0aced4186f 100644
--- a/crates/iceberg/src/puffin/reader.rs
+++ b/crates/iceberg/src/puffin/reader.rs
@@ -144,7 +144,7 @@ mod tests {
             sequence_number: 1,
             offset: 4,
             length: 10,
-            compression_codec: CompressionCodec::Gzip,
+            compression_codec: CompressionCodec::gzip_default(),
             properties: HashMap::new(),
         };
 
@@ -153,7 +153,7 @@ mod tests {
         assert!(result.is_err());
         let err = result.unwrap_err();
         assert_eq!(err.kind(), ErrorKind::DataInvalid);
-        assert!(err.to_string().contains("Gzip"));
+        assert!(err.to_string().contains("gzip"));
         assert!(
             err.to_string()
                 .contains("is not supported for Puffin files")
diff --git a/crates/iceberg/src/puffin/test_utils.rs b/crates/iceberg/src/puffin/test_utils.rs
index 39fecc6f80..e0844e2002 100644
--- a/crates/iceberg/src/puffin/test_utils.rs
+++ b/crates/iceberg/src/puffin/test_utils.rs
@@ -77,7 +77,7 @@ pub(crate) fn zstd_compressed_metric_blob_0_metadata() -> BlobMetadata {
         sequence_number: METRIC_BLOB_0_SEQUENCE_NUMBER,
         offset: 4,
         length: 22,
-        compression_codec: CompressionCodec::Zstd,
+        compression_codec: CompressionCodec::zstd_default(),
         properties: HashMap::new(),
     }
 }
@@ -134,7 +134,7 @@ pub(crate) fn zstd_compressed_metric_blob_1_metadata() -> BlobMetadata {
         sequence_number: METRIC_BLOB_1_SEQUENCE_NUMBER,
         offset: 26,
         length: 77,
-        compression_codec: CompressionCodec::Zstd,
+        compression_codec: CompressionCodec::zstd_default(),
         properties: HashMap::new(),
     }
 }
diff --git a/crates/iceberg/src/puffin/writer.rs b/crates/iceberg/src/puffin/writer.rs
index 30b97f09dd..4af4970b04 100644
--- a/crates/iceberg/src/puffin/writer.rs
+++ b/crates/iceberg/src/puffin/writer.rs
@@ -251,7 +251,8 @@ mod tests {
     async fn test_write_zstd_compressed_metric_data() {
         let temp_dir = TempDir::new().unwrap();
         let blobs = vec![blob_0(), blob_1()];
-        let blobs_with_compression = blobs_with_compression(blobs.clone(), CompressionCodec::Zstd);
+        let blobs_with_compression =
+            blobs_with_compression(blobs.clone(), CompressionCodec::zstd_default());
 
         let input_file = write_puffin_file(&temp_dir, blobs_with_compression, file_properties())
             .await
@@ -323,7 +324,8 @@ mod tests {
     async fn test_zstd_compressed_metric_data_is_bit_identical_to_java_generated_file() {
         let temp_dir = TempDir::new().unwrap();
         let blobs = vec![blob_0(), blob_1()];
-        let blobs_with_compression = blobs_with_compression(blobs, CompressionCodec::Zstd);
+        let blobs_with_compression =
+            blobs_with_compression(blobs, CompressionCodec::zstd_default());
 
         assert_files_are_bit_identical(
             write_puffin_file(&temp_dir, blobs_with_compression, file_properties())
@@ -338,14 +340,15 @@ mod tests {
     async fn test_gzip_compression_rejected() {
         let temp_dir = TempDir::new().unwrap();
         let blobs = vec![blob_0()];
-        let blobs_with_compression = blobs_with_compression(blobs, CompressionCodec::Gzip);
+        let blobs_with_compression =
+            blobs_with_compression(blobs, CompressionCodec::gzip_default());
 
         let result = write_puffin_file(&temp_dir, blobs_with_compression, file_properties()).await;
 
         assert!(result.is_err());
         let err = result.unwrap_err();
         assert_eq!(err.kind(), ErrorKind::DataInvalid);
-        assert!(err.to_string().contains("Gzip"));
+        assert!(err.to_string().contains("gzip"));
         assert!(
             err.to_string()
                 .contains("is not supported for Puffin files")
diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs
index e52b3bdeae..4a1e27bdc1 100644
--- a/crates/iceberg/src/scan/mod.rs
+++ b/crates/iceberg/src/scan/mod.rs
@@ -40,7 +40,7 @@ use crate::metadata_columns::{get_metadata_field_id, is_metadata_column_name};
 use crate::runtime::spawn;
 use crate::spec::{DataContentType, SnapshotRef};
 use crate::table::Table;
-use crate::utils::available_parallelism;
+use crate::util::available_parallelism;
 use crate::{Error, ErrorKind, Result};
 
 /// A stream of arrow [`RecordBatch`]es.
@@ -683,6 +683,39 @@ pub mod tests {
             }
         }
 
+        /// Creates a fixture with 5 snapshots chained as:
+        ///   S1 (root) -> S2 -> S3 -> S4 -> S5 (current)
+        /// Useful for testing snapshot history traversal.
+        pub fn new_with_deep_history() -> Self {
+            let tmp_dir = TempDir::new().unwrap();
+            let table_location = tmp_dir.path().join("table1");
+            let table_metadata1_location = table_location.join("metadata/v1.json");
+
+            let file_io = FileIO::new_with_fs();
+
+            let table_metadata = {
+                let json_str = fs::read_to_string(format!(
+                    "{}/testdata/example_table_metadata_v2_deep_history.json",
+                    env!("CARGO_MANIFEST_DIR")
+                ))
+                .unwrap();
+                serde_json::from_str::<TableMetadata>(&json_str).unwrap()
+            };
+
+            let table = Table::builder()
+                .metadata(table_metadata)
+                .identifier(TableIdent::from_strs(["db", "table1"]).unwrap())
+                .file_io(file_io.clone())
+                .metadata_location(table_metadata1_location.as_os_str().to_str().unwrap())
+                .build()
+                .unwrap();
+
+            Self {
+                table_location: table_location.to_str().unwrap().to_string(),
+                table,
+            }
+        }
+
         pub fn new_unpartitioned() -> Self {
             let tmp_dir = TempDir::new().unwrap();
             let table_location = tmp_dir.path().join("table1");
diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs
index cc5ef737fb..1b3b605fd8 100644
--- a/crates/iceberg/src/spec/manifest/writer.rs
+++ b/crates/iceberg/src/spec/manifest/writer.rs
@@ -32,10 +32,14 @@ use crate::spec::manifest::_serde::{ManifestEntryV1, ManifestEntryV2};
 use crate::spec::manifest::{manifest_schema_v1, manifest_schema_v2};
 use crate::spec::{
     DataContentType, DataFile, FieldSummary, ManifestEntry, ManifestFile, ManifestMetadata,
-    ManifestStatus, PrimitiveLiteral, SchemaRef, StructType, UNASSIGNED_SNAPSHOT_ID,
+    ManifestStatus, PrimitiveLiteral, SchemaRef, StructType,
 };
 use crate::{Error, ErrorKind};
 
+/// Placeholder for snapshot ID. The field with this value must be replaced
+/// with the actual snapshot ID before it is committed.
+const UNASSIGNED_SNAPSHOT_ID: i64 = -1;
+
 /// The builder used to create a [`ManifestWriter`].
 pub struct ManifestWriterBuilder {
     output: OutputFile,
diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs
index f60579e014..72b5417c47 100644
--- a/crates/iceberg/src/spec/snapshot.rs
+++ b/crates/iceberg/src/spec/snapshot.rs
@@ -33,8 +33,6 @@ use crate::{Error, ErrorKind};
 
 /// The ref name of the main branch of the table.
 pub const MAIN_BRANCH: &str = "main";
-/// Placeholder for snapshot ID. The field with this value must be replaced with the actual snapshot ID before it is committed.
-pub const UNASSIGNED_SNAPSHOT_ID: i64 = -1;
 
 /// Reference to [`Snapshot`].
 pub type SnapshotRef = Arc<Snapshot>;
diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs
index b91599b74f..607fd98350 100644
--- a/crates/iceberg/src/spec/table_metadata.rs
+++ b/crates/iceberg/src/spec/table_metadata.rs
@@ -47,6 +47,9 @@ use crate::{Error, ErrorKind};
 static MAIN_BRANCH: &str = "main";
 pub(crate) static ONE_MINUTE_MS: i64 = 60_000;
 
+/// Sentinel value used by the Java implementation and older metadata files
+/// to represent a missing/empty current snapshot ID. During deserialization,
+/// this value is normalized to `None`.
 pub(crate) static EMPTY_SNAPSHOT_ID: i64 = -1;
 pub(crate) static INITIAL_SEQUENCE_NUMBER: i64 = 0;
 
@@ -457,7 +460,7 @@ impl TableMetadata {
             && metadata_content[0] == 0x1F
             && metadata_content[1] == 0x8B
         {
-            let decompressed_data = CompressionCodec::Gzip
+            let decompressed_data = CompressionCodec::gzip_default()
                 .decompress(metadata_content.to_vec())
                 .map_err(|e| {
                     Error::new(
@@ -499,7 +502,7 @@ impl TableMetadata {
 
         // Apply compression based on codec
         let data_to_write = match codec {
-            CompressionCodec::Gzip => codec.compress(json_data)?,
+            CompressionCodec::Gzip(_) => codec.compress(json_data)?,
             CompressionCodec::None => json_data,
             _ => {
                 return Err(Error::new(
@@ -765,8 +768,8 @@ pub(super) mod _serde {
     use uuid::Uuid;
 
     use super::{
-        DEFAULT_PARTITION_SPEC_ID, FormatVersion, MAIN_BRANCH, MetadataLog, SnapshotLog,
-        TableMetadata,
+        DEFAULT_PARTITION_SPEC_ID, EMPTY_SNAPSHOT_ID, FormatVersion, MAIN_BRANCH, MetadataLog,
+        SnapshotLog, TableMetadata,
     };
     use crate::spec::schema::_serde::{SchemaV1, SchemaV2};
     use crate::spec::snapshot::_serde::{SnapshotV1, SnapshotV2, SnapshotV3};
@@ -950,7 +953,7 @@ pub(super) mod _serde {
                 encryption_keys,
                 snapshots,
             } = value;
-            let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
+            let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) {
                 None
             } else {
                 value.current_snapshot_id
@@ -1063,7 +1066,7 @@ pub(super) mod _serde {
         fn try_from(value: TableMetadataV2) -> Result<Self, self::Error> {
             let snapshots = value.snapshots;
             let value = value.shared;
-            let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
+            let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) {
                 None
             } else {
                 value.current_snapshot_id
@@ -1170,7 +1173,7 @@ pub(super) mod _serde {
     impl TryFrom<TableMetadataV1> for TableMetadata {
         type Error = Error;
         fn try_from(value: TableMetadataV1) -> Result<Self, Error> {
-            let current_snapshot_id = if let &Some(-1) = &value.current_snapshot_id {
+            let current_snapshot_id = if value.current_snapshot_id == Some(EMPTY_SNAPSHOT_ID) {
                 None
             } else {
                 value.current_snapshot_id
@@ -3300,6 +3303,18 @@ mod tests {
         check_table_metadata_serde(&metadata, expected);
     }
 
+    #[test]
+    fn test_empty_snapshot_id_is_normalized_to_none() {
+        let metadata =
+            fs::read_to_string("testdata/table_metadata/TableMetadataV1Valid.json").unwrap();
+        let deserialized: TableMetadata = serde_json::from_str(&metadata).unwrap();
+        assert_eq!(
+            deserialized.current_snapshot_id(),
+            None,
+            "current_snapshot_id of -1 should be deserialized as None"
+        );
+    }
+
     #[test]
     fn test_table_metadata_v1_compat() {
         let metadata =
@@ -3618,7 +3633,7 @@ mod tests {
         let original_metadata: TableMetadata = get_test_table_metadata("TableMetadataV2Valid.json");
         let json = serde_json::to_string(&original_metadata).unwrap();
 
-        let compressed = CompressionCodec::Gzip
+        let compressed = CompressionCodec::gzip_default()
             .compress(json.into_bytes())
             .expect("failed to compress metadata");
         std::fs::write(&metadata_location, &compressed).expect("failed to write metadata");
diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs
index 62311a15a2..65dbae1bfc 100644
--- a/crates/iceberg/src/spec/table_metadata_builder.rs
+++ b/crates/iceberg/src/spec/table_metadata_builder.rs
@@ -570,7 +570,7 @@ impl TableMetadataBuilder {
 
     /// Remove a reference
     ///
-    /// If `ref_name='main'` the current snapshot id is set to -1.
+    /// If `ref_name='main'` the current snapshot id is set to `None`.
     pub fn remove_ref(mut self, ref_name: &str) -> Self {
         if ref_name == MAIN_BRANCH {
             self.metadata.current_snapshot_id = None;
diff --git a/crates/iceberg/src/spec/table_properties.rs b/crates/iceberg/src/spec/table_properties.rs
index 07c157304e..a3d4e7fdaa 100644
--- a/crates/iceberg/src/spec/table_properties.rs
+++ b/crates/iceberg/src/spec/table_properties.rs
@@ -78,18 +78,22 @@ pub(crate) fn parse_metadata_file_compression(
         Error::new(
             ErrorKind::DataInvalid,
             format!(
-                "Invalid metadata compression codec: {value}. Only 'none' and 'gzip' are supported."
+                "Invalid metadata compression codec: {value}. Only '{}' and '{}' are supported.",
+                CompressionCodec::None.name(),
+                CompressionCodec::gzip_default().name()
             ),
         )
     })?;
 
     // Validate that only None and Gzip are used for metadata
     match codec {
-        CompressionCodec::None | CompressionCodec::Gzip => Ok(codec),
-        CompressionCodec::Lz4 | CompressionCodec::Zstd => Err(Error::new(
+        CompressionCodec::None | CompressionCodec::Gzip(_) => Ok(codec),
+        _ => Err(Error::new(
             ErrorKind::DataInvalid,
             format!(
-                "Invalid metadata compression codec: {value}. Only 'none' and 'gzip' are supported for metadata files."
+                "Invalid metadata compression codec: {value}. Only '{}' and '{}' are supported for metadata files.",
+                CompressionCodec::None.name(),
+                CompressionCodec::gzip_default().name()
             ),
         )),
     }
@@ -324,7 +328,7 @@ mod tests {
         let table_properties = TableProperties::try_from(&props).unwrap();
         assert_eq!(
             table_properties.metadata_compression_codec,
-            CompressionCodec::Gzip
+            CompressionCodec::gzip_default()
         );
     }
 
@@ -351,7 +355,7 @@ mod tests {
         let table_properties = TableProperties::try_from(&props_upper).unwrap();
         assert_eq!(
             table_properties.metadata_compression_codec,
-            CompressionCodec::Gzip
+            CompressionCodec::gzip_default()
         );
 
         // Test mixed case
@@ -362,7 +366,7 @@ mod tests {
         let table_properties = TableProperties::try_from(&props_mixed).unwrap();
         assert_eq!(
             table_properties.metadata_compression_codec,
-            CompressionCodec::Gzip
+            CompressionCodec::gzip_default()
         );
 
         // Test "NONE" should also be case-insensitive
@@ -517,7 +521,7 @@ mod tests {
         )]);
         assert_eq!(
             parse_metadata_file_compression(&props).unwrap(),
-            CompressionCodec::Gzip
+            CompressionCodec::gzip_default()
         );
 
         // Test case insensitivity - "NONE"
@@ -537,7 +541,7 @@ mod tests {
         )]);
         assert_eq!(
             parse_metadata_file_compression(&props).unwrap(),
-            CompressionCodec::Gzip
+            CompressionCodec::gzip_default()
         );
 
         // Test case insensitivity - "GzIp"
@@ -547,7 +551,7 @@ mod tests {
         )]);
         assert_eq!(
             parse_metadata_file_compression(&props).unwrap(),
-            CompressionCodec::Gzip
+            CompressionCodec::gzip_default()
         );
 
         // Test default when property is missing
diff --git a/crates/iceberg/src/utils.rs b/crates/iceberg/src/util/mod.rs
similarity index 96%
rename from crates/iceberg/src/utils.rs
rename to crates/iceberg/src/util/mod.rs
index 00d3e69bd3..28eda66d49 100644
--- a/crates/iceberg/src/utils.rs
+++ b/crates/iceberg/src/util/mod.rs
@@ -17,6 +17,9 @@
 
 use std::num::NonZeroUsize;
 
+/// Utilities for working with snapshots.
+pub mod snapshot;
+
 // Use a default value of 1 as the safest option.
 // See https://doc.rust-lang.org/std/thread/fn.available_parallelism.html#limitations
 // for more details.
diff --git a/crates/iceberg/src/util/snapshot.rs b/crates/iceberg/src/util/snapshot.rs
new file mode 100644
index 0000000000..98997ae815
--- /dev/null
+++ b/crates/iceberg/src/util/snapshot.rs
@@ -0,0 +1,185 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::spec::{SnapshotRef, TableMetadataRef};
+
+struct Ancestors {
+    next: Option<SnapshotRef>,
+    get_snapshot: Box<dyn Fn(i64) -> Option<SnapshotRef> + Send>,
+}
+
+impl Iterator for Ancestors {
+    type Item = SnapshotRef;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let snapshot = self.next.take()?;
+        self.next = snapshot
+            .parent_snapshot_id()
+            .and_then(|id| (self.get_snapshot)(id));
+        Some(snapshot)
+    }
+}
+
+/// Iterate starting from `snapshot_id` (inclusive) to the root snapshot.
+pub fn ancestors_of(
+    table_metadata: &TableMetadataRef,
+    snapshot_id: i64,
+) -> impl Iterator<Item = SnapshotRef> + Send {
+    let initial = table_metadata.snapshot_by_id(snapshot_id).cloned();
+    let table_metadata = table_metadata.clone();
+    Ancestors {
+        next: initial,
+        get_snapshot: Box::new(move |id| table_metadata.snapshot_by_id(id).cloned()),
+    }
+}
+
+/// Iterate starting from `latest_snapshot_id` (inclusive) to `oldest_snapshot_id` (exclusive).
+pub fn ancestors_between(
+    table_metadata: &TableMetadataRef,
+    latest_snapshot_id: i64,
+    oldest_snapshot_id: Option<i64>,
+) -> impl Iterator<Item = SnapshotRef> + Send {
+    ancestors_of(table_metadata, latest_snapshot_id).take_while(move |snapshot| {
+        oldest_snapshot_id
+            .map(|id| snapshot.snapshot_id() != id)
+            .unwrap_or(true)
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::scan::tests::TableTestFixture;
+
+    // Five snapshots chained as: S1 (root) -> S2 -> S3 -> S4 -> S5 (current)
+    const S1: i64 = 3051729675574597004;
+    const S2: i64 = 3055729675574597004;
+    const S3: i64 = 3056729675574597004;
+    const S4: i64 = 3057729675574597004;
+    const S5: i64 = 3059729675574597004;
+
+    fn metadata() -> TableMetadataRef {
+        let fixture = TableTestFixture::new_with_deep_history();
+        std::sync::Arc::new(fixture.table.metadata().clone())
+    }
+
+    // --- ancestors_of ---
+
+    #[test]
+    fn test_ancestors_of_nonexistent_snapshot_returns_empty() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_of(&meta, 999).map(|s| s.snapshot_id()).collect();
+        assert!(ids.is_empty());
+    }
+
+    #[test]
+    fn test_ancestors_of_root_returns_only_root() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_of(&meta, S1).map(|s| s.snapshot_id()).collect();
+        assert_eq!(ids, vec![S1]);
+    }
+
+    #[test]
+    fn test_ancestors_of_leaf_returns_full_chain() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_of(&meta, S5).map(|s| s.snapshot_id()).collect();
+        assert_eq!(ids, vec![S5, S4, S3, S2, S1]);
+    }
+
+    #[test]
+    fn test_ancestors_of_mid_chain_returns_partial_chain() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_of(&meta, S3).map(|s| s.snapshot_id()).collect();
+        assert_eq!(ids, vec![S3, S2, S1]);
+    }
+
+    #[test]
+    fn test_ancestors_of_second_snapshot() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_of(&meta, S2).map(|s| s.snapshot_id()).collect();
+        assert_eq!(ids, vec![S2, S1]);
+    }
+
+    // --- ancestors_between ---
+
+    #[test]
+    fn test_ancestors_between_same_id_returns_empty() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_between(&meta, S3, Some(S3))
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert!(ids.is_empty());
+    }
+
+    #[test]
+    fn test_ancestors_between_no_oldest_returns_all_ancestors() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_between(&meta, S5, None)
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert_eq!(ids, vec![S5, S4, S3, S2, S1]);
+    }
+
+    #[test]
+    fn test_ancestors_between_excludes_oldest_snapshot() {
+        let meta = metadata();
+        // S5 down to (but not including) S2
+        let ids: Vec<i64> = ancestors_between(&meta, S5, Some(S2))
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert_eq!(ids, vec![S5, S4, S3]);
+    }
+
+    #[test]
+    fn test_ancestors_between_adjacent_snapshots() {
+        let meta = metadata();
+        // S3 down to (but not including) S2 — only S3 itself
+        let ids: Vec<i64> = ancestors_between(&meta, S3, Some(S2))
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert_eq!(ids, vec![S3]);
+    }
+
+    #[test]
+    fn test_ancestors_between_leaf_and_root() {
+        let meta = metadata();
+        // S5 down to (but not including) S1
+        let ids: Vec<i64> = ancestors_between(&meta, S5, Some(S1))
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert_eq!(ids, vec![S5, S4, S3, S2]);
+    }
+
+    #[test]
+    fn test_ancestors_between_nonexistent_oldest_returns_full_chain() {
+        let meta = metadata();
+        // oldest_snapshot_id doesn't exist in the chain, so take_while never stops
+        let ids: Vec<i64> = ancestors_between(&meta, S5, Some(999))
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert_eq!(ids, vec![S5, S4, S3, S2, S1]);
+    }
+
+    #[test]
+    fn test_ancestors_between_nonexistent_latest_returns_empty() {
+        let meta = metadata();
+        let ids: Vec<i64> = ancestors_between(&meta, 999, Some(S1))
+            .map(|s| s.snapshot_id())
+            .collect();
+        assert!(ids.is_empty());
+    }
+}
diff --git a/crates/iceberg/src/writer/file_writer/rolling_writer.rs b/crates/iceberg/src/writer/file_writer/rolling_writer.rs
index b86f6a2ea7..b0b2d2f191 100644
--- a/crates/iceberg/src/writer/file_writer/rolling_writer.rs
+++ b/crates/iceberg/src/writer/file_writer/rolling_writer.rs
@@ -399,7 +399,7 @@ mod tests {
             "Kelly", "Larry", "Mallory", "Shawn",
         ];
 
-        let mut rng = rand::thread_rng();
+        let mut rng = rand::rng();
         let batch_num = 10;
         let batch_rows = 100;
         let expected_rows = batch_num * batch_rows;
diff --git a/crates/iceberg/testdata/example_table_metadata_v2_deep_history.json b/crates/iceberg/testdata/example_table_metadata_v2_deep_history.json
new file mode 100644
index 0000000000..a354958697
--- /dev/null
+++ b/crates/iceberg/testdata/example_table_metadata_v2_deep_history.json
@@ -0,0 +1,104 @@
+{
+  "format-version": 2,
+  "table-uuid": "9c12d441-03fe-4693-9a96-a0705ddf69c1",
+  "location": "s3://bucket/test/location",
+  "last-sequence-number": 34,
+  "last-updated-ms": 1602638573590,
+  "last-column-id": 3,
+  "current-schema-id": 1,
+  "schemas": [
+    {
+      "type": "struct",
+      "schema-id": 0,
+      "fields": [
+        {"id": 1, "name": "x", "required": true, "type": "long"}
+      ]
+    },
+    {
+      "type": "struct",
+      "schema-id": 1,
+      "identifier-field-ids": [1, 2],
+      "fields": [
+        {"id": 1, "name": "x", "required": true, "type": "long"},
+        {"id": 2, "name": "y", "required": true, "type": "long", "doc": "comment"},
+        {"id": 3, "name": "z", "required": true, "type": "long"}
+      ]
+    }
+  ],
+  "default-spec-id": 0,
+  "partition-specs": [
+    {
+      "spec-id": 0,
+      "fields": [
+        {"name": "x", "transform": "identity", "source-id": 1, "field-id": 1000}
+      ]
+    }
+  ],
+  "last-partition-id": 1000,
+  "default-sort-order-id": 3,
+  "sort-orders": [
+    {
+      "order-id": 3,
+      "fields": [
+        {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"},
+        {"transform": "bucket[4]", "source-id": 3, "direction": "desc", "null-order": "nulls-last"}
+      ]
+    }
+  ],
+  "properties": {},
+  "current-snapshot-id": 3059729675574597004,
+  "snapshots": [
+    {
+      "snapshot-id": 3051729675574597004,
+      "timestamp-ms": 1515100955770,
+      "sequence-number": 0,
+      "summary": {"operation": "append"},
+      "manifest-list": "s3://bucket/metadata/snap-3051729675574597004.avro"
+    },
+    {
+      "snapshot-id": 3055729675574597004,
+      "parent-snapshot-id": 3051729675574597004,
+      "timestamp-ms": 1555100955770,
+      "sequence-number": 1,
+      "summary": {"operation": "append"},
+      "manifest-list": "s3://bucket/metadata/snap-3055729675574597004.avro",
+      "schema-id": 1
+    },
+    {
+      "snapshot-id": 3056729675574597004,
+      "parent-snapshot-id": 3055729675574597004,
+      "timestamp-ms": 1575100955770,
+      "sequence-number": 2,
+      "summary": {"operation": "append"},
+      "manifest-list": "s3://bucket/metadata/snap-3056729675574597004.avro",
+      "schema-id": 1
+    },
+    {
+      "snapshot-id": 3057729675574597004,
+      "parent-snapshot-id": 3056729675574597004,
+      "timestamp-ms": 1595100955770,
+      "sequence-number": 3,
+      "summary": {"operation": "overwrite"},
+      "manifest-list": "s3://bucket/metadata/snap-3057729675574597004.avro",
+      "schema-id": 1
+    },
+    {
+      "snapshot-id": 3059729675574597004,
+      "parent-snapshot-id": 3057729675574597004,
+      "timestamp-ms": 1602638573590,
+      "sequence-number": 4,
+      "summary": {"operation": "append"},
+      "manifest-list": "s3://bucket/metadata/snap-3059729675574597004.avro",
+      "schema-id": 1
+    }
+  ],
+  "snapshot-log": [
+    {"snapshot-id": 3051729675574597004, "timestamp-ms": 1515100955770},
+    {"snapshot-id": 3055729675574597004, "timestamp-ms": 1555100955770},
+    {"snapshot-id": 3056729675574597004, "timestamp-ms": 1575100955770},
+    {"snapshot-id": 3057729675574597004, "timestamp-ms": 1595100955770},
+    {"snapshot-id": 3059729675574597004, "timestamp-ms": 1602638573590}
+  ],
+  "metadata-log": [],
+  "refs": {"main": {"snapshot-id": 3059729675574597004, "type": "branch"}}
+}
diff --git a/crates/integration_tests/src/lib.rs b/crates/integration_tests/src/lib.rs
index 4bf8f4d19c..feafa3ae9f 100644
--- a/crates/integration_tests/src/lib.rs
+++ b/crates/integration_tests/src/lib.rs
@@ -18,7 +18,9 @@
 use std::collections::HashMap;
 use std::sync::OnceLock;
 
-use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY};
+use iceberg::io::{
+    S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION, S3_SECRET_ACCESS_KEY,
+};
 use iceberg_catalog_rest::REST_CATALOG_PROP_URI;
 use iceberg_test_utils::{get_minio_endpoint, get_rest_catalog_endpoint, set_up};
 
@@ -45,6 +47,7 @@ impl GlobalTestFixture {
             (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()),
             (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()),
             (S3_REGION.to_string(), "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS.to_string(), "true".to_string()),
         ]);
 
         GlobalTestFixture { catalog_config }
diff --git a/crates/integration_tests/tests/common/mod.rs b/crates/integration_tests/tests/common/mod.rs
index e49a57465c..b7197a3a46 100644
--- a/crates/integration_tests/tests/common/mod.rs
+++ b/crates/integration_tests/tests/common/mod.rs
@@ -28,7 +28,6 @@ pub async fn random_ns() -> Namespace {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integration_tests/tests/conflict_commit_test.rs b/crates/integration_tests/tests/conflict_commit_test.rs
index 3b1362b95d..af2c7a7779 100644
--- a/crates/integration_tests/tests/conflict_commit_test.rs
+++ b/crates/integration_tests/tests/conflict_commit_test.rs
@@ -43,7 +43,6 @@ async fn test_append_data_file_conflict() {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integration_tests/tests/read_evolved_schema.rs b/crates/integration_tests/tests/read_evolved_schema.rs
index ae25a08987..f7416be2d4 100644
--- a/crates/integration_tests/tests/read_evolved_schema.rs
+++ b/crates/integration_tests/tests/read_evolved_schema.rs
@@ -34,7 +34,6 @@ async fn test_evolved_schema() {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integration_tests/tests/read_positional_deletes.rs b/crates/integration_tests/tests/read_positional_deletes.rs
index d4c4afeaf3..0f79596a12 100644
--- a/crates/integration_tests/tests/read_positional_deletes.rs
+++ b/crates/integration_tests/tests/read_positional_deletes.rs
@@ -30,7 +30,6 @@ async fn test_read_table_with_positional_deletes() {
     let fixture = get_test_fixture();
     let rest_catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load("rest", fixture.catalog_config.clone())
diff --git a/crates/integrations/datafusion/src/physical_plan/mod.rs b/crates/integrations/datafusion/src/physical_plan/mod.rs
index 5a9845cde0..aeac30de32 100644
--- a/crates/integrations/datafusion/src/physical_plan/mod.rs
+++ b/crates/integrations/datafusion/src/physical_plan/mod.rs
@@ -26,5 +26,6 @@ pub(crate) mod write;
 
 pub(crate) const DATA_FILES_COL_NAME: &str = "data_files";
 
+pub use expr_to_predicate::convert_filters_to_predicate;
 pub use project::project_with_partition;
 pub use scan::IcebergTableScan;
diff --git a/crates/storage/opendal/README.md b/crates/storage/opendal/README.md
index c5092eb97a..a4ad512e17 100644
--- a/crates/storage/opendal/README.md
+++ b/crates/storage/opendal/README.md
@@ -61,7 +61,6 @@ use iceberg_storage_opendal::OpenDalStorageFactory;
 async fn main() -> iceberg::Result<()> {
     let catalog = RestCatalogBuilder::default()
         .with_storage_factory(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .load(
diff --git a/crates/storage/opendal/src/azdls.rs b/crates/storage/opendal/src/azdls.rs
index 6251f8cdaa..b47c55d9e7 100644
--- a/crates/storage/opendal/src/azdls.rs
+++ b/crates/storage/opendal/src/azdls.rs
@@ -91,10 +91,9 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap<String, String>) -> Res
 pub(crate) fn azdls_create_operator<'a>(
     absolute_path: &'a str,
     config: &AzdlsConfig,
-    configured_scheme: &AzureStorageScheme,
 ) -> Result<(opendal::Operator, &'a str)> {
     let path = absolute_path.parse::<AzureStoragePath>()?;
-    match_path_with_config(&path, config, configured_scheme)?;
+    match_path_with_config(&path, config)?;
 
     let op = azdls_config_build(config, &path)?;
 
@@ -160,18 +159,7 @@ impl FromStr for AzureStorageScheme {
 }
 
 /// Validates whether the given path matches what's configured for the backend.
-pub(crate) fn match_path_with_config(
-    path: &AzureStoragePath,
-    config: &AzdlsConfig,
-    configured_scheme: &AzureStorageScheme,
-) -> Result<()> {
-    ensure_data_valid!(
-        &path.scheme == configured_scheme,
-        "Storage::Azdls: Scheme mismatch: configured {}, passed {}",
-        configured_scheme,
-        path.scheme
-    );
-
+pub(crate) fn match_path_with_config(path: &AzureStoragePath, config: &AzdlsConfig) -> Result<()> {
     if let Some(ref configured_account_name) = config.account_name {
         ensure_data_valid!(
             &path.account_name == configured_account_name,
@@ -408,7 +396,6 @@ mod tests {
                         endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
                 ),
                 Some(("myfs", "/path/to/file.parquet")),
             ),
@@ -421,33 +408,19 @@ mod tests {
                         endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
-                ),
-                None,
-            ),
-            (
-                "different scheme",
-                (
-                    "wasbs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet",
-                    AzdlsConfig {
-                        account_name: Some("myaccount".to_string()),
-                        endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
-                        ..Default::default()
-                    },
-                    AzureStorageScheme::Abfss,
                 ),
                 None,
             ),
             (
                 "incompatible scheme for endpoint",
                 (
-                    "abfs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet",
+                    // `abfss` implies https; configured endpoint is plain http.
+                    "abfss://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet",
                     AzdlsConfig {
                         account_name: Some("myaccount".to_string()),
                         endpoint: Some("http://myaccount.dfs.core.windows.net".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
                 ),
                 None,
             ),
@@ -460,7 +433,6 @@ mod tests {
                         endpoint: Some("https://myaccount.dfs.core.chinacloudapi.cn".to_string()),
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfss,
                 ),
                 None,
             ),
@@ -474,14 +446,27 @@ mod tests {
                         endpoint: None,
                         ..Default::default()
                     },
-                    AzureStorageScheme::Abfs,
+                ),
+                Some(("myfs", "/path/to/file.parquet")),
+            ),
+            (
+                "scheme differs from a previously-configured one is accepted",
+                (
+                    // No configured scheme exists anymore; both abfss and wasbs
+                    // should be accepted by the same storage.
+                    "wasbs://myfs@myaccount.blob.core.windows.net/path/to/file.parquet",
+                    AzdlsConfig {
+                        account_name: Some("myaccount".to_string()),
+                        endpoint: Some("https://myaccount.blob.core.windows.net".to_string()),
+                        ..Default::default()
+                    },
                 ),
                 Some(("myfs", "/path/to/file.parquet")),
             ),
         ];
 
         for (name, input, expected) in test_cases {
-            let result = azdls_create_operator(input.0, &input.1, &input.2);
+            let result = azdls_create_operator(input.0, &input.1);
             match expected {
                 Some((expected_filesystem, expected_path)) => {
                     assert!(result.is_ok(), "Test case {name} failed: {result:?}");
diff --git a/crates/storage/opendal/src/lib.rs b/crates/storage/opendal/src/lib.rs
index 8160680523..a0336868e3 100644
--- a/crates/storage/opendal/src/lib.rs
+++ b/crates/storage/opendal/src/lib.rs
@@ -46,7 +46,6 @@ use utils::from_opendal_error;
 cfg_if! {
     if #[cfg(feature = "opendal-azdls")] {
         mod azdls;
-        use azdls::AzureStorageScheme;
         use azdls::*;
         use opendal::services::AzdlsConfig;
     }
@@ -108,9 +107,6 @@ pub enum OpenDalStorageFactory {
     /// S3 storage factory.
     #[cfg(feature = "opendal-s3")]
     S3 {
-        /// s3 storage could have `s3://` and `s3a://`.
-        /// Storing the scheme string here to return the correct path.
-        configured_scheme: String,
         /// Custom AWS credential loader.
         #[serde(skip)]
         customized_credential_load: Option<s3::CustomAwsCredentialLoader>,
@@ -123,10 +119,7 @@ pub enum OpenDalStorageFactory {
     Oss,
     /// Azure Data Lake Storage factory.
     #[cfg(feature = "opendal-azdls")]
-    Azdls {
-        /// The configured Azure storage scheme.
-        configured_scheme: AzureStorageScheme,
-    },
+    Azdls,
 }
 
 #[typetag::serde(name = "OpenDalStorageFactory")]
@@ -142,10 +135,8 @@ impl StorageFactory for OpenDalStorageFactory {
             OpenDalStorageFactory::Fs => Ok(Arc::new(OpenDalStorage::LocalFs)),
             #[cfg(feature = "opendal-s3")]
             OpenDalStorageFactory::S3 {
-                configured_scheme,
                 customized_credential_load,
             } => Ok(Arc::new(OpenDalStorage::S3 {
-                configured_scheme: configured_scheme.clone(),
                 config: s3_config_parse(config.props().clone())?.into(),
                 customized_credential_load: customized_credential_load.clone(),
             })),
@@ -158,12 +149,9 @@ impl StorageFactory for OpenDalStorageFactory {
                 config: oss_config_parse(config.props().clone())?.into(),
             })),
             #[cfg(feature = "opendal-azdls")]
-            OpenDalStorageFactory::Azdls { configured_scheme } => {
-                Ok(Arc::new(OpenDalStorage::Azdls {
-                    configured_scheme: configured_scheme.clone(),
-                    config: azdls_config_parse(config.props().clone())?.into(),
-                }))
-            }
+            OpenDalStorageFactory::Azdls => Ok(Arc::new(OpenDalStorage::Azdls {
+                config: azdls_config_parse(config.props().clone())?.into(),
+            })),
             #[cfg(all(
                 not(feature = "opendal-memory"),
                 not(feature = "opendal-fs"),
@@ -196,11 +184,11 @@ pub enum OpenDalStorage {
     #[cfg(feature = "opendal-fs")]
     LocalFs,
     /// S3 storage variant.
+    ///
+    /// Accepts any S3-family URL (`s3://`, `s3a://`, `s3n://`); the scheme is
+    /// derived from the path at call time.
     #[cfg(feature = "opendal-s3")]
     S3 {
-        /// s3 storage could have `s3://` and `s3a://`.
-        /// Storing the scheme string here to return the correct path.
-        configured_scheme: String,
         /// S3 configuration.
         config: Arc<S3Config>,
         /// Custom AWS credential loader.
@@ -220,16 +208,13 @@ pub enum OpenDalStorage {
         config: Arc<OssConfig>,
     },
     /// Azure Data Lake Storage variant.
-    /// Expects paths of the form
+    ///
+    /// Accepts paths of the form
     /// `abfs[s]://<filesystem>@<account>.dfs.<endpoint-suffix>/<path>` or
     /// `wasb[s]://<container>@<account>.blob.<endpoint-suffix>/<path>`.
+    /// The scheme is derived from the path at call time.
     #[cfg(feature = "opendal-azdls")]
-    #[allow(private_interfaces)]
     Azdls {
-        /// The configured Azure storage scheme.
-        /// Because Azdls accepts multiple possible schemes, we store the full
-        /// passed scheme here to later validate schemes passed via paths.
-        configured_scheme: AzureStorageScheme,
         /// Azure DLS configuration.
         config: Arc<AzdlsConfig>,
     },
@@ -274,15 +259,21 @@ impl OpenDalStorage {
             }
             #[cfg(feature = "opendal-s3")]
             OpenDalStorage::S3 {
-                configured_scheme,
                 config,
                 customized_credential_load,
             } => {
                 let op = s3_config_build(config, customized_credential_load, path)?;
                 let op_info = op.info();
 
-                // Check prefix of s3 path.
-                let prefix = format!("{}://{}/", configured_scheme, op_info.name());
+                // Use the URL scheme in the path for prefix matching. This enables
+                // use of S3-compatible storage backends using custom schemes (e.g., `minio://`, `r2://`).
+                let url = url::Url::parse(path).map_err(|e| {
+                    Error::new(
+                        ErrorKind::DataInvalid,
+                        format!("Invalid s3 url: {path}: {e}"),
+                    )
+                })?;
+                let prefix = format!("{}://{}/", url.scheme(), op_info.name());
                 if path.starts_with(&prefix) {
                     (op, &path[prefix.len()..])
                 } else {
@@ -319,10 +310,7 @@ impl OpenDalStorage {
                 }
             }
             #[cfg(feature = "opendal-azdls")]
-            OpenDalStorage::Azdls {
-                configured_scheme,
-                config,
-            } => azdls_create_operator(path, config, configured_scheme)?,
+            OpenDalStorage::Azdls { config } => azdls_create_operator(path, config)?,
             #[cfg(all(
                 not(feature = "opendal-s3"),
                 not(feature = "opendal-fs"),
@@ -357,9 +345,7 @@ impl OpenDalStorage {
             #[cfg(feature = "opendal-fs")]
             OpenDalStorage::LocalFs => Ok(path.strip_prefix("file:/").unwrap_or(&path[1..])),
             #[cfg(feature = "opendal-s3")]
-            OpenDalStorage::S3 {
-                configured_scheme, ..
-            } => {
+            OpenDalStorage::S3 { .. } => {
                 let url = url::Url::parse(path)?;
                 let bucket = url.host_str().ok_or_else(|| {
                     Error::new(
@@ -367,7 +353,7 @@ impl OpenDalStorage {
                         format!("Invalid s3 url: {path}, missing bucket"),
                     )
                 })?;
-                let prefix = format!("{}://{}/", configured_scheme, bucket);
+                let prefix = format!("{}://{}/", url.scheme(), bucket);
                 if path.starts_with(&prefix) {
                     Ok(&path[prefix.len()..])
                 } else {
@@ -416,12 +402,9 @@ impl OpenDalStorage {
                 }
             }
             #[cfg(feature = "opendal-azdls")]
-            OpenDalStorage::Azdls {
-                configured_scheme,
-                config,
-            } => {
+            OpenDalStorage::Azdls { config } => {
                 let azure_path = path.parse::<AzureStoragePath>()?;
-                match_path_with_config(&azure_path, config, configured_scheme)?;
+                match_path_with_config(&azure_path, config)?;
                 let relative_path_len = azure_path.path.len();
                 Ok(&path[path.len() - relative_path_len..])
             }
@@ -631,47 +614,21 @@ mod tests {
     #[test]
     fn test_relativize_path_s3() {
         let storage = OpenDalStorage::S3 {
-            configured_scheme: "s3".to_string(),
             config: Arc::new(S3Config::default()),
             customized_credential_load: None,
         };
 
-        assert_eq!(
-            storage
-                .relativize_path("s3://my-bucket/path/to/file.parquet")
-                .unwrap(),
-            "path/to/file.parquet"
-        );
-
-        // s3a scheme
-        let storage_s3a = OpenDalStorage::S3 {
-            configured_scheme: "s3a".to_string(),
-            config: Arc::new(S3Config::default()),
-            customized_credential_load: None,
-        };
-        assert_eq!(
-            storage_s3a
-                .relativize_path("s3a://my-bucket/path/to/file.parquet")
-                .unwrap(),
-            "path/to/file.parquet"
-        );
-    }
-
-    #[cfg(feature = "opendal-s3")]
-    #[test]
-    fn test_relativize_path_s3_scheme_mismatch() {
-        let storage = OpenDalStorage::S3 {
-            configured_scheme: "s3".to_string(),
-            config: Arc::new(S3Config::default()),
-            customized_credential_load: None,
-        };
-
-        // Scheme mismatch should error
-        assert!(
-            storage
-                .relativize_path("s3a://my-bucket/path/to/file.parquet")
-                .is_err()
-        );
+        // All S3-family schemes are accepted by the same storage instance.
+        // Custom schemes for S3-compatible stores (e.g., `minio://`) are also
+        // accepted because the path's scheme is used as-is for prefix matching.
+        for scheme in ["s3", "s3a", "s3n", "minio"] {
+            assert_eq!(
+                storage
+                    .relativize_path(&format!("{scheme}://my-bucket/path/to/file.parquet"))
+                    .unwrap(),
+                "path/to/file.parquet"
+            );
+        }
     }
 
     #[cfg(feature = "opendal-gcs")]
@@ -736,7 +693,6 @@ mod tests {
     #[test]
     fn test_relativize_path_azdls() {
         let storage = OpenDalStorage::Azdls {
-            configured_scheme: AzureStorageScheme::Abfss,
             config: Arc::new(AzdlsConfig {
                 account_name: Some("myaccount".to_string()),
                 endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
@@ -751,24 +707,4 @@ mod tests {
             "/path/to/file.parquet"
         );
     }
-
-    #[cfg(feature = "opendal-azdls")]
-    #[test]
-    fn test_relativize_path_azdls_scheme_mismatch() {
-        let storage = OpenDalStorage::Azdls {
-            configured_scheme: AzureStorageScheme::Abfss,
-            config: Arc::new(AzdlsConfig {
-                account_name: Some("myaccount".to_string()),
-                endpoint: Some("https://myaccount.dfs.core.windows.net".to_string()),
-                ..Default::default()
-            }),
-        };
-
-        // wasbs scheme doesn't match configured abfss
-        assert!(
-            storage
-                .relativize_path("wasbs://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet")
-                .is_err()
-        );
-    }
 }
diff --git a/crates/storage/opendal/src/resolving.rs b/crates/storage/opendal/src/resolving.rs
index 7c06cf96a5..64a16b18d2 100644
--- a/crates/storage/opendal/src/resolving.rs
+++ b/crates/storage/opendal/src/resolving.rs
@@ -70,29 +70,28 @@ fn parse_scheme(scheme: &str) -> Result<Scheme> {
     }
 }
 
-/// Extract the scheme string from a path URL.
-fn extract_scheme(path: &str) -> Result<String> {
+/// Extract the [`Scheme`] family from a path URL.
+fn extract_scheme(path: &str) -> Result<Scheme> {
     let url = Url::parse(path).map_err(|e| {
         Error::new(
             ErrorKind::DataInvalid,
             format!("Invalid path: {path}, failed to parse URL: {e}"),
         )
     })?;
-    Ok(url.scheme().to_string())
+    parse_scheme(url.scheme())
 }
 
 /// Build an [`OpenDalStorage`] variant for the given scheme and config properties.
 fn build_storage_for_scheme(
-    scheme: &str,
+    scheme: Scheme,
     props: &HashMap<String, String>,
     #[cfg(feature = "opendal-s3")] customized_credential_load: &Option<CustomAwsCredentialLoader>,
 ) -> Result<OpenDalStorage> {
-    match parse_scheme(scheme)? {
+    match scheme {
         #[cfg(feature = "opendal-s3")]
         Scheme::S3 => {
             let config = crate::s3::s3_config_parse(props.clone())?;
             Ok(OpenDalStorage::S3 {
-                configured_scheme: scheme.to_string(),
                 config: Arc::new(config),
                 customized_credential_load: customized_credential_load.clone(),
             })
@@ -113,10 +112,8 @@ fn build_storage_for_scheme(
         }
         #[cfg(feature = "opendal-azdls")]
         Scheme::Azdls => {
-            let configured_scheme: crate::azdls::AzureStorageScheme = scheme.parse()?;
             let config = crate::azdls::azdls_config_parse(props.clone())?;
             Ok(OpenDalStorage::Azdls {
-                configured_scheme,
                 config: Arc::new(config),
             })
         }
@@ -196,14 +193,15 @@ impl StorageFactory for OpenDalResolvingStorageFactory {
 /// to the appropriate [`OpenDalStorage`] variant.
 ///
 /// Sub-storages are lazily created on first use for each scheme and cached
-/// for subsequent operations.
+/// for subsequent operations. Scheme aliases like `s3`/`s3a`/`s3n` map to
+/// the same [`Scheme`] variant, so they share a storage instance.
 #[derive(Debug, Serialize, Deserialize)]
 pub struct OpenDalResolvingStorage {
     /// Configuration properties shared across all backends.
     props: HashMap<String, String>,
-    /// Cache of scheme → storage mappings.
+    /// Cache of scheme to storage mappings.
     #[serde(skip, default)]
-    storages: RwLock<HashMap<String, Arc<OpenDalStorage>>>,
+    storages: RwLock<HashMap<Scheme, Arc<OpenDalStorage>>>,
     /// Custom AWS credential loader for S3 storage.
     #[cfg(feature = "opendal-s3")]
     #[serde(skip)]
@@ -239,7 +237,7 @@ impl OpenDalResolvingStorage {
         }
 
         let storage = build_storage_for_scheme(
-            &scheme,
+            scheme,
             &self.props,
             #[cfg(feature = "opendal-s3")]
             &self.customized_credential_load,
@@ -288,7 +286,7 @@ impl Storage for OpenDalResolvingStorage {
     async fn delete_stream(&self, mut paths: BoxStream<'static, String>) -> Result<()> {
         // Group paths by scheme so each resolved storage receives a batch,
         // avoiding repeated operator creation per path.
-        let mut grouped: HashMap<String, Vec<String>> = HashMap::new();
+        let mut grouped: HashMap<Scheme, Vec<String>> = HashMap::new();
         while let Some(path) = paths.next().await {
             let scheme = extract_scheme(&path)?;
             grouped.entry(scheme).or_default().push(path);
@@ -317,3 +315,54 @@ impl Storage for OpenDalResolvingStorage {
         ))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Builds a resolving storage with empty props, suitable for `resolve()`
+    /// calls that don't actually hit any backend.
+    fn empty_resolving_storage() -> OpenDalResolvingStorage {
+        OpenDalResolvingStorage {
+            props: HashMap::new(),
+            storages: RwLock::new(HashMap::new()),
+            #[cfg(feature = "opendal-s3")]
+            customized_credential_load: None,
+        }
+    }
+
+    #[cfg(feature = "opendal-s3")]
+    #[test]
+    fn test_resolve_s3_aliases_share_instance() {
+        let storage = empty_resolving_storage();
+
+        // All three S3-family schemes must collapse to a single cached
+        // `Arc<OpenDalStorage>` so that catalogs handing the resolver a mix
+        // of `s3://`, `s3a://`, `s3n://` paths don't rebuild operators.
+        let a = storage.resolve("s3://bucket/key").unwrap();
+        let b = storage.resolve("s3a://bucket/key").unwrap();
+        let c = storage.resolve("s3n://bucket/key").unwrap();
+
+        assert!(Arc::ptr_eq(&a, &b), "s3 and s3a should share one instance");
+        assert!(Arc::ptr_eq(&a, &c), "s3 and s3n should share one instance");
+    }
+
+    #[cfg(feature = "opendal-azdls")]
+    #[test]
+    fn test_resolve_azdls_aliases_share_instance() {
+        let storage = empty_resolving_storage();
+
+        let path_for = |scheme: &str| {
+            format!("{scheme}://myfs@myaccount.dfs.core.windows.net/path/to/file.parquet")
+        };
+
+        // All Azure schemes collapse onto one cached instance.
+        let abfss = storage.resolve(&path_for("abfss")).unwrap();
+        let abfs = storage.resolve(&path_for("abfs")).unwrap();
+
+        assert!(
+            Arc::ptr_eq(&abfss, &abfs),
+            "abfss and abfs should share one instance"
+        );
+    }
+}
diff --git a/crates/storage/opendal/src/s3.rs b/crates/storage/opendal/src/s3.rs
index 7db88d273f..2e21418606 100644
--- a/crates/storage/opendal/src/s3.rs
+++ b/crates/storage/opendal/src/s3.rs
@@ -37,6 +37,12 @@ use crate::utils::{from_opendal_error, is_truthy};
 /// Parse iceberg props to s3 config.
 pub(crate) fn s3_config_parse(mut m: HashMap<String, String>) -> Result<S3Config> {
     let mut cfg = S3Config::default();
+    // Match Iceberg `S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false`:
+    // virtual-host-style addressing is the spec default. opendal's own
+    // default is path-style, which disagrees with the Java SDK and breaks
+    // S3-compatible stores that only accept virtual-hosted-style URLs.
+    // Any explicit `s3.path-style-access` property below overrides this.
+    cfg.enable_virtual_host_style = true;
     if let Some(endpoint) = m.remove(S3_ENDPOINT) {
         cfg.endpoint = Some(endpoint);
     };
@@ -177,3 +183,28 @@ impl AwsCredentialLoad for CustomAwsCredentialLoader {
         self.0.load_credential(client).await
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashMap;
+
+    use iceberg::io::S3_PATH_STYLE_ACCESS;
+
+    use super::s3_config_parse;
+
+    fn parse_with(prop: Option<&str>) -> bool {
+        let mut props = HashMap::new();
+        if let Some(v) = prop {
+            props.insert(S3_PATH_STYLE_ACCESS.to_string(), v.to_string());
+        }
+        s3_config_parse(props).unwrap().enable_virtual_host_style
+    }
+
+    #[test]
+    fn s3_config_parse_path_style_access() {
+        // Match Iceberg S3FileIOProperties.PATH_STYLE_ACCESS_DEFAULT = false.
+        assert!(parse_with(None));
+        assert!(parse_with(Some("false")));
+        assert!(!parse_with(Some("true")));
+    }
+}
diff --git a/crates/storage/opendal/tests/file_io_s3_test.rs b/crates/storage/opendal/tests/file_io_s3_test.rs
index 207a4454d7..d6dd8a3b45 100644
--- a/crates/storage/opendal/tests/file_io_s3_test.rs
+++ b/crates/storage/opendal/tests/file_io_s3_test.rs
@@ -26,7 +26,8 @@ mod tests {
     use async_trait::async_trait;
     use futures::StreamExt;
     use iceberg::io::{
-        FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
+        FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
+        S3_SECRET_ACCESS_KEY,
     };
     use iceberg_storage_opendal::{CustomAwsCredentialLoader, OpenDalStorageFactory};
     use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up};
@@ -39,7 +40,6 @@ mod tests {
         let minio_endpoint = get_minio_endpoint();
 
         FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: None,
         }))
         .with_props(vec![
@@ -47,6 +47,7 @@ mod tests {
             (S3_ACCESS_KEY_ID, "admin".to_string()),
             (S3_SECRET_ACCESS_KEY, "password".to_string()),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ])
         .build()
     }
@@ -132,13 +133,13 @@ mod tests {
 
         // Test that the loader can be used in FileIOBuilder with OpenDalStorageFactory
         let _builder = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: Some(custom_loader),
         }))
         .with_props(vec![
             (S3_ENDPOINT, "http://localhost:9000".to_string()),
             ("bucket", "test-bucket".to_string()),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ]);
     }
 
@@ -154,12 +155,12 @@ mod tests {
 
         // Build FileIO with custom credential loader via OpenDalStorageFactory
         let file_io_with_custom_creds = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: Some(custom_loader),
         }))
         .with_props(vec![
             (S3_ENDPOINT, minio_endpoint),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ])
         .build();
 
@@ -182,12 +183,12 @@ mod tests {
 
         // Build FileIO with custom credential loader via OpenDalStorageFactory
         let file_io_with_custom_creds = FileIOBuilder::new(Arc::new(OpenDalStorageFactory::S3 {
-            configured_scheme: "s3".to_string(),
             customized_credential_load: Some(custom_loader),
         }))
         .with_props(vec![
             (S3_ENDPOINT, minio_endpoint),
             (S3_REGION, "us-east-1".to_string()),
+            (S3_PATH_STYLE_ACCESS, "true".to_string()),
         ])
         .build();
 
diff --git a/crates/storage/opendal/tests/resolving_storage_test.rs b/crates/storage/opendal/tests/resolving_storage_test.rs
index 4572ad2c2d..c235089508 100644
--- a/crates/storage/opendal/tests/resolving_storage_test.rs
+++ b/crates/storage/opendal/tests/resolving_storage_test.rs
@@ -29,7 +29,8 @@ mod tests {
     use std::sync::Arc;
 
     use iceberg::io::{
-        FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY,
+        FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_PATH_STYLE_ACCESS, S3_REGION,
+        S3_SECRET_ACCESS_KEY,
     };
     use iceberg_storage_opendal::OpenDalResolvingStorageFactory;
     use iceberg_test_utils::{get_minio_endpoint, normalize_test_name_with_parts, set_up};
@@ -45,6 +46,7 @@ mod tests {
                 (S3_ACCESS_KEY_ID, "admin".to_string()),
                 (S3_SECRET_ACCESS_KEY, "password".to_string()),
                 (S3_REGION, "us-east-1".to_string()),
+                (S3_PATH_STYLE_ACCESS, "true".to_string()),
             ])
             .build()
     }
@@ -288,6 +290,7 @@ mod tests {
             .with_props(vec![
                 (S3_ENDPOINT, minio_endpoint),
                 (S3_REGION, "us-east-1".to_string()),
+                (S3_PATH_STYLE_ACCESS, "true".to_string()),
             ])
             .build();