diff --git a/.github/workflows/audit.yml b/.github/workflows/audit.yml
index cae620baf46c..0b5c78405b10 100644
--- a/.github/workflows/audit.yml
+++ b/.github/workflows/audit.yml
@@ -42,7 +42,7 @@ jobs:
     steps:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
       - name: Install cargo-audit
-        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
+        uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532  # v2.62.49
         with:
           tool: cargo-audit
       - name: Run audit check
diff --git a/.github/workflows/extended.yml b/.github/workflows/extended.yml
index 23bd66a0cf35..85e40731a959 100644
--- a/.github/workflows/extended.yml
+++ b/.github/workflows/extended.yml
@@ -44,6 +44,7 @@ on:
       - 'datafusion/physical*/**/*.rs'
       - 'datafusion/expr*/**/*.rs'
       - 'datafusion/optimizer/**/*.rs'
+      - 'datafusion-testing'
   workflow_dispatch:
     inputs:
       pr_number:
@@ -123,7 +124,7 @@ jobs:
             --lib \
             --tests \
             --bins \
-            --features avro,json,backtrace,extended_tests,recursive_protection
+            --features avro,json,backtrace,extended_tests,recursive_protection,parquet_encryption
       - name: Verify Working Directory Clean
         run: git diff --exit-code
       - name: Cleanup
@@ -168,7 +169,7 @@ jobs:
           rust-version: stable
       - name: Run sqllogictest
         run: |
-          cargo test --features backtrace --profile release-nonlto --test sqllogictests -- --include-sqlite
+          cargo test --features backtrace,parquet_encryption --profile release-nonlto --test sqllogictests -- --include-sqlite
           cargo clean
 
 
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index e9606e15c4ec..9bdcade8eb2e 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -302,6 +302,15 @@ jobs:
             --features serde,avro,json,backtrace,integration-tests,parquet_encryption
       - name: Verify Working Directory Clean
         run: git diff --exit-code
+        # Check no temporary directories created during test.
+        # `false/` folder is excuded for rust cache.
+      - name: Verify Working Directory Clean (No Untracked Files)
+        run: |
+          STATUS="$(git status --porcelain | sed -e '/^?? false\/$/d' -e '/^?? false$/d')"
+          if [ -n "$STATUS" ]; then
+            echo "$STATUS"
+            exit 1
+          fi
 
   # datafusion-cli tests
   linux-test-datafusion-cli:
@@ -425,7 +434,7 @@ jobs:
           sudo apt-get update -qq
           sudo apt-get install -y -qq clang
       - name: Setup wasm-pack
-        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
+        uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532  # v2.62.49
         with:
           tool: wasm-pack
       - name: Run tests with headless mode
@@ -466,7 +475,7 @@ jobs:
           export RUST_MIN_STACK=20971520
           export TPCH_DATA=`realpath datafusion/sqllogictest/test_files/tpch/data`
           cargo test plan_q --package datafusion-benchmarks --profile ci --features=ci -- --test-threads=1
-          INCLUDE_TPCH=true cargo test --features backtrace --profile ci --package datafusion-sqllogictest --test sqllogictests
+          INCLUDE_TPCH=true cargo test --features backtrace,parquet_encryption --profile ci --package datafusion-sqllogictest --test sqllogictests
       - name: Verify Working Directory Clean
         run: git diff --exit-code
 
@@ -752,7 +761,7 @@ jobs:
       - name: Setup Rust toolchain
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
-        uses: taiki-e/install-action@ebb229c6baa68383264f2822689b07b4916d9177  # v2.62.36
+        uses: taiki-e/install-action@44c6d64aa62cd779e873306675c7a58e86d6d532  # v2.62.49
         with:
           tool: cargo-msrv
 
@@ -797,4 +806,4 @@ jobs:
       - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8  # v5.0.0
         with:
           persist-credentials: false
-      - uses: crate-ci/typos@80c8a4945eec0f6d464eaf9e65ed98ef085283d1  # v1.38.1
+      - uses: crate-ci/typos@07d900b8fa1097806b8adb6391b0d3e0ac2fdea7  # v1.39.0
diff --git a/Cargo.lock b/Cargo.lock
index e368dcf9a91e..9cf4d96415c0 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -83,9 +83,9 @@ dependencies = [
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
 dependencies = [
  "memchr",
 ]
@@ -128,9 +128,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.6.20"
+version = "0.6.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192"
+checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -143,9 +143,9 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.11"
+version = "1.0.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
+checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
 
 [[package]]
 name = "anstyle-parse"
@@ -203,14 +203,23 @@ dependencies = [
  "serde_bytes",
  "serde_json",
  "snap",
- "strum 0.27.2",
- "strum_macros 0.27.2",
+ "strum",
+ "strum_macros",
  "thiserror",
  "uuid",
  "xz2",
  "zstd",
 ]
 
+[[package]]
+name = "ar_archive_writer"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a"
+dependencies = [
+ "object",
+]
+
 [[package]]
 name = "arrayref"
 version = "0.3.9"
@@ -225,9 +234,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc"
+checksum = "4df8bb5b0bd64c0b9bc61317fcc480bad0f00e56d3bc32c69a4c8dada4786bae"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -249,23 +258,23 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8"
+checksum = "a1a640186d3bd30a24cb42264c2dafb30e236a6f50d510e56d40b708c9582491"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
  "chrono",
- "num",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-array"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d"
+checksum = "219fe420e6800979744c8393b687afb0252b3f8a89b91027d27887b72aa36d31"
 dependencies = [
  "ahash 0.8.12",
  "arrow-buffer",
@@ -275,25 +284,28 @@ dependencies = [
  "chrono-tz",
  "half",
  "hashbrown 0.16.0",
- "num",
+ "num-complex",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-buffer"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc"
+checksum = "76885a2697a7edf6b59577f568b456afc94ce0e2edc15b784ce3685b6c3c5c27"
 dependencies = [
  "bytes",
  "half",
- "num",
+ "num-bigint",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-cast"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023"
+checksum = "9c9ebb4c987e6b3b236fb4a14b20b34835abfdd80acead3ccf1f9bf399e1f168"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -306,15 +318,15 @@ dependencies = [
  "comfy-table",
  "half",
  "lexical-core",
- "num",
+ "num-traits",
  "ryu",
 ]
 
 [[package]]
 name = "arrow-csv"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb"
+checksum = "92386159c8d4bce96f8bd396b0642a0d544d471bdc2ef34d631aec80db40a09c"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -327,21 +339,22 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0"
+checksum = "727681b95de313b600eddc2a37e736dcb21980a40f640314dcf360e2f36bc89b"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
  "half",
- "num",
+ "num-integer",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-flight"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c8b0ba0784d56bc6266b79f5de7a24b47024e7b3a0045d2ad4df3d9b686099f"
+checksum = "f70bb56412a007b0cfc116d15f24dda6adeed9611a213852a004cda20085a3b9"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -359,16 +372,17 @@ dependencies = [
  "futures",
  "once_cell",
  "paste",
- "prost 0.13.5",
- "prost-types 0.13.5",
+ "prost",
+ "prost-types",
  "tonic",
+ "tonic-prost",
 ]
 
 [[package]]
 name = "arrow-ipc"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5"
+checksum = "da9ba92e3de170295c98a84e5af22e2b037f0c7b32449445e6c493b5fca27f27"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -382,9 +396,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b"
+checksum = "b969b4a421ae83828591c6bf5450bd52e6d489584142845ad6a861f42fe35df8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -394,19 +408,21 @@ dependencies = [
  "chrono",
  "half",
  "indexmap 2.12.0",
+ "itoa",
  "lexical-core",
  "memchr",
- "num",
- "serde",
+ "num-traits",
+ "ryu",
+ "serde_core",
  "serde_json",
  "simdutf8",
 ]
 
 [[package]]
 name = "arrow-ord"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f"
+checksum = "141c05298b21d03e88062317a1f1a73f5ba7b6eb041b350015b1cd6aabc0519b"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -417,9 +433,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-pyarrow"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d924b32e96f8bb74d94cd82bd97b313c432fcb0ea331689ef9e7c6b8be4b258"
+checksum = "cfcfb2be2e9096236f449c11f425cddde18c4cc540f516d90f066f10a29ed515"
 dependencies = [
  "arrow-array",
  "arrow-data",
@@ -429,9 +445,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753"
+checksum = "c5f3c06a6abad6164508ed283c7a02151515cef3de4b4ff2cebbcaeb85533db2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -442,34 +458,35 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe"
+checksum = "9cfa7a03d1eee2a4d061476e1840ad5c9867a544ca6c4c59256496af5d0a8be5"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "serde",
+ "serde_core",
  "serde_json",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a"
+checksum = "bafa595babaad59f2455f4957d0f26448fb472722c186739f4fac0823a1bdb47"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
- "num",
+ "num-traits",
 ]
 
 [[package]]
 name = "arrow-string"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d"
+checksum = "32f46457dbbb99f2650ff3ac23e46a929e0ab81db809b02aa5511c258348bef2"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -477,7 +494,7 @@ dependencies = [
  "arrow-schema",
  "arrow-select",
  "memchr",
- "num",
+ "num-traits",
  "regex",
  "regex-syntax",
 ]
@@ -494,6 +511,22 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "astral-tokio-tar"
+version = "0.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec179a06c1769b1e42e1e2cbe74c7dcdb3d6383c838454d063eaac5bbb7ebbe5"
+dependencies = [
+ "filetime",
+ "futures-core",
+ "libc",
+ "portable-atomic",
+ "rustc-hash",
+ "tokio",
+ "tokio-stream",
+ "xattr",
+]
+
 [[package]]
 name = "async-compression"
 version = "0.4.19"
@@ -528,7 +561,29 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
+]
+
+[[package]]
+name = "async-stream"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476"
+dependencies = [
+ "async-stream-impl",
+ "futures-core",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "async-stream-impl"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -539,7 +594,7 @@ checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -565,9 +620,9 @@ checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "aws-config"
-version = "1.8.7"
+version = "1.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04b37ddf8d2e9744a0b9c19ce0b78efe4795339a90b66b7bae77987092cd2e69"
+checksum = "1856b1b48b65f71a4dd940b1c0931f9a7b646d4a924b9828ffefc1454714668a"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -595,9 +650,9 @@ dependencies = [
 
 [[package]]
 name = "aws-credential-types"
-version = "1.2.7"
+version = "1.2.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "799a1290207254984cb7c05245111bc77958b92a3c9bb449598044b36341cce6"
+checksum = "86590e57ea40121d47d3f2e131bfd873dea15d78dc2f4604f4734537ad9e56c4"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-runtime-api",
@@ -607,9 +662,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-rs"
-version = "1.14.0"
+version = "1.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "94b8ff6c09cd57b16da53641caa860168b88c172a5ee163b0288d3d6eea12786"
+checksum = "879b6c89592deb404ba4dc0ae6b58ffd1795c78991cbb5b8bc441c48a070440d"
 dependencies = [
  "aws-lc-sys",
  "zeroize",
@@ -617,9 +672,9 @@ dependencies = [
 
 [[package]]
 name = "aws-lc-sys"
-version = "0.31.0"
+version = "0.32.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e44d16778acaf6a9ec9899b92cebd65580b83f685446bf2e1f5d3d732f99dcd"
+checksum = "107a4e9d9cab9963e04e84bb8dee0e25f2a987f9a8bad5ed054abd439caa8f8c"
 dependencies = [
  "bindgen",
  "cc",
@@ -630,9 +685,9 @@ dependencies = [
 
 [[package]]
 name = "aws-runtime"
-version = "1.5.11"
+version = "1.5.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e1ed337dabcf765ad5f2fb426f13af22d576328aaf09eac8f70953530798ec0"
+checksum = "8fe0fd441565b0b318c76e7206c8d1d0b0166b3e986cf30e890b61feb6192045"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
@@ -654,9 +709,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.85.0"
+version = "1.89.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f2c741e2e439f07b5d1b33155e246742353d82167c785a2ff547275b7e32483"
+checksum = "a9c1b1af02288f729e95b72bd17988c009aa72e26dcb59b3200f86d7aea726c9"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -676,9 +731,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.87.0"
+version = "1.91.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6428ae5686b18c0ee99f6f3c39d94ae3f8b42894cdc35c35d8fb2470e9db2d4c"
+checksum = "4e8122301558dc7c6c68e878af918880b82ff41897a60c8c4e18e4dc4d93e9f1"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -698,9 +753,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.87.0"
+version = "1.91.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5871bec9a79a3e8d928c7788d654f135dde0e71d2dd98089388bab36b37ef607"
+checksum = "8f8090151d4d1e971269957b10dbf287bba551ab812e591ce0516b1c73b75d27"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -721,9 +776,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sigv4"
-version = "1.3.4"
+version = "1.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "084c34162187d39e3740cb635acd73c4e3a551a36146ad6fe8883c929c9f876c"
+checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-http",
@@ -743,9 +798,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.5"
+version = "1.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c"
+checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -754,15 +809,16 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.62.3"
+version = "0.62.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c4dacf2d38996cf729f55e7a762b30918229917eca115de45dfa8dfb97796c9"
+checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca"
 dependencies = [
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
  "bytes-utils",
  "futures-core",
+ "futures-util",
  "http 0.2.12",
  "http 1.3.1",
  "http-body 0.4.6",
@@ -774,9 +830,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http-client"
-version = "1.1.1"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "147e8eea63a40315d704b97bf9bc9b8c1402ae94f89d5ad6f7550d963309da1b"
+checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-runtime-api",
@@ -798,27 +854,27 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.61.5"
+version = "0.61.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eaa31b350998e703e9826b2104dd6f63be0508666e1aba88137af060e8944047"
+checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54"
 dependencies = [
  "aws-smithy-types",
 ]
 
 [[package]]
 name = "aws-smithy-observability"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393"
+checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc"
 dependencies = [
  "aws-smithy-runtime-api",
 ]
 
 [[package]]
 name = "aws-smithy-query"
-version = "0.60.7"
+version = "0.60.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb"
+checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9"
 dependencies = [
  "aws-smithy-types",
  "urlencoding",
@@ -826,9 +882,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.9.2"
+version = "1.9.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fa63ad37685ceb7762fa4d73d06f1d5493feb88e3f27259b9ed277f4c01b185"
+checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-http",
@@ -850,9 +906,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.9.0"
+version = "1.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07f5e0fc8a6b3f2303f331b94504bbf754d85488f402d6f1dd7a6080f99afe56"
+checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
@@ -867,9 +923,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.3.2"
+version = "1.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8"
+checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e"
 dependencies = [
  "base64-simd",
  "bytes",
@@ -890,18 +946,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-xml"
-version = "0.60.10"
+version = "0.60.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728"
+checksum = "eab77cdd036b11056d2a30a7af7b775789fb024bf216acc13884c6c97752ae56"
 dependencies = [
  "xmlparser",
 ]
 
 [[package]]
 name = "aws-types"
-version = "1.3.8"
+version = "1.3.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b069d19bf01e46298eaedd7c6f283fe565a59263e53eebec945f3e6398f42390"
+checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -913,9 +969,9 @@ dependencies = [
 
 [[package]]
 name = "axum"
-version = "0.8.4"
+version = "0.8.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "021e862c184ae977658b36c4500f7feac3221ca5da43e3f25bd04ab6c79a29b5"
+checksum = "8a18ed336352031311f4e0b4dd2ff392d4fbb370777c9d18d7fc9d7359f73871"
 dependencies = [
  "axum-core",
  "bytes",
@@ -929,8 +985,7 @@ dependencies = [
  "mime",
  "percent-encoding",
  "pin-project-lite",
- "rustversion",
- "serde",
+ "serde_core",
  "sync_wrapper",
  "tower",
  "tower-layer",
@@ -939,9 +994,9 @@ dependencies = [
 
 [[package]]
 name = "axum-core"
-version = "0.5.2"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68464cd0412f486726fb3373129ef5d2993f90c34bc2bc1c1e9943b2f4fc7ca6"
+checksum = "59446ce19cd142f8833f856eb31f3eb097812d1479ab224f54d72428ca21ea22"
 dependencies = [
  "bytes",
  "futures-core",
@@ -950,7 +1005,6 @@ dependencies = [
  "http-body-util",
  "mime",
  "pin-project-lite",
- "rustversion",
  "sync_wrapper",
  "tower-layer",
  "tower-service",
@@ -980,9 +1034,9 @@ dependencies = [
 
 [[package]]
 name = "bigdecimal"
-version = "0.4.8"
+version = "0.4.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013"
+checksum = "560f42649de9fa436b73517378a147ec21f6c997a546581df4b4b31677828934"
 dependencies = [
  "autocfg",
  "libm",
@@ -998,7 +1052,7 @@ version = "0.72.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cexpr",
  "clang-sys",
  "itertools 0.13.0",
@@ -1009,7 +1063,7 @@ dependencies = [
  "regex",
  "rustc-hash",
  "shlex",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -1020,9 +1074,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.9.4"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2261d10cca569e4643e526d8dc2e62e433cc8aba21ab764233731f8d369bf394"
+checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3"
 
 [[package]]
 name = "bitvec"
@@ -1069,13 +1123,17 @@ dependencies = [
 
 [[package]]
 name = "bollard"
-version = "0.18.1"
+version = "0.19.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97ccca1260af6a459d75994ad5acc1651bcabcbdbc41467cc9786519ab854c30"
+checksum = "87a52479c9237eb04047ddb94788c41ca0d26eaff8b697ecfbb4c32f7fdc3b1b"
 dependencies = [
+ "async-stream",
  "base64 0.22.1",
+ "bitflags 2.10.0",
+ "bollard-buildkit-proto",
  "bollard-stubs",
  "bytes",
+ "chrono",
  "futures-core",
  "futures-util",
  "hex",
@@ -1088,7 +1146,9 @@ dependencies = [
  "hyper-util",
  "hyperlocal",
  "log",
+ "num",
  "pin-project-lite",
+ "rand 0.9.2",
  "rustls",
  "rustls-native-certs",
  "rustls-pemfile",
@@ -1100,28 +1160,49 @@ dependencies = [
  "serde_urlencoded",
  "thiserror",
  "tokio",
+ "tokio-stream",
  "tokio-util",
+ "tonic",
  "tower-service",
  "url",
  "winapi",
 ]
 
+[[package]]
+name = "bollard-buildkit-proto"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85a885520bf6249ab931a764ffdb87b0ceef48e6e7d807cfdb21b751e086e1ad"
+dependencies = [
+ "prost",
+ "prost-types",
+ "tonic",
+ "tonic-prost",
+ "ureq",
+]
+
 [[package]]
 name = "bollard-stubs"
-version = "1.47.1-rc.27.3.1"
+version = "1.49.1-rc.28.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f179cfbddb6e77a5472703d4b30436bff32929c0aa8a9008ecf23d1d3cdd0da"
+checksum = "5731fe885755e92beff1950774068e0cae67ea6ec7587381536fca84f1779623"
 dependencies = [
+ "base64 0.22.1",
+ "bollard-buildkit-proto",
+ "bytes",
+ "chrono",
+ "prost",
  "serde",
+ "serde_json",
  "serde_repr",
  "serde_with",
 ]
 
 [[package]]
 name = "bon"
-version = "3.7.2"
+version = "3.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb"
+checksum = "ebeb9aaf9329dff6ceb65c689ca3db33dbf15f324909c60e4e5eef5701ce31b1"
 dependencies = [
  "bon-macros",
  "rustversion",
@@ -1129,9 +1210,9 @@ dependencies = [
 
 [[package]]
 name = "bon-macros"
-version = "3.7.2"
+version = "3.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005"
+checksum = "77e9d642a7e3a318e37c2c9427b5a6a48aa1ad55dcd986f3034ab2239045a645"
 dependencies = [
  "darling",
  "ident_case",
@@ -1139,7 +1220,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -1162,7 +1243,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -1188,9 +1269,9 @@ dependencies = [
 
 [[package]]
 name = "bstr"
-version = "1.12.0"
+version = "1.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
 dependencies = [
  "memchr",
  "serde",
@@ -1282,9 +1363,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.38"
+version = "1.2.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80f41ae168f955c12fb8960b057d70d0ca153fb83182b57d86380443527be7e9"
+checksum = "35900b6c8d709fb1d854671ae27aeaa9eec2f8b01b364e1619a40da3e6fe2afe"
 dependencies = [
  "find-msvc-tools",
  "jobserver",
@@ -1303,9 +1384,9 @@ dependencies = [
 
 [[package]]
 name = "cfg-if"
-version = "1.0.3"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
 [[package]]
 name = "cfg_aliases"
@@ -1324,7 +1405,7 @@ dependencies = [
  "num-traits",
  "serde",
  "wasm-bindgen",
- "windows-link 0.2.0",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -1388,9 +1469,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.48"
+version = "4.5.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2134bb3ea021b78629caa971416385309e0131b351b25e01dc16fb54e1b5fae"
+checksum = "4c26d721170e0295f191a69bd9a1f93efcdb0aff38684b61ab5750468972e5f5"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1398,9 +1479,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.48"
+version = "4.5.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2ba64afa3c0a6df7fa517765e31314e983f51dda798ffba27b988194fb65dc9"
+checksum = "75835f0c7bf681bfd05abe44e965760fea999a5286c6eb2d59883634fd02011a"
 dependencies = [
  "anstream",
  "anstyle",
@@ -1410,21 +1491,21 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.47"
+version = "4.5.49"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbfd7eae0b0f1a6e63d4b13c9c478de77c2eb546fba158ad50b4203dc24b9f9c"
+checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "clap_lex"
-version = "0.7.5"
+version = "0.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d"
 
 [[package]]
 name = "clipboard-win"
@@ -1452,13 +1533,12 @@ checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 
 [[package]]
 name = "comfy-table"
-version = "7.1.2"
+version = "7.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56"
+checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b"
 dependencies = [
- "strum 0.26.3",
- "strum_macros 0.26.4",
- "unicode-width 0.2.1",
+ "unicode-segmentation",
+ "unicode-width 0.2.2",
 ]
 
 [[package]]
@@ -1482,8 +1562,8 @@ dependencies = [
  "encode_unicode",
  "libc",
  "once_cell",
- "unicode-width 0.2.1",
- "windows-sys 0.61.0",
+ "unicode-width 0.2.2",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -1582,26 +1662,23 @@ dependencies = [
 
 [[package]]
 name = "criterion"
-version = "0.5.1"
+version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f"
+checksum = "e1c047a62b0cc3e145fa84415a3191f628e980b194c2755aa12300a4e6cbd928"
 dependencies = [
  "anes",
  "cast",
  "ciborium",
- "clap 4.5.48",
+ "clap 4.5.51",
  "criterion-plot",
  "futures",
- "is-terminal",
- "itertools 0.10.5",
+ "itertools 0.13.0",
  "num-traits",
- "once_cell",
  "oorandom",
  "plotters",
  "rayon",
  "regex",
  "serde",
- "serde_derive",
  "serde_json",
  "tinytemplate",
  "tokio",
@@ -1610,12 +1687,12 @@ dependencies = [
 
 [[package]]
 name = "criterion-plot"
-version = "0.5.0"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1"
+checksum = "9b1bcc0dc7dfae599d84ad0b1a55f80cde8af3725da8313b528da95ef783e338"
 dependencies = [
  "cast",
- "itertools 0.10.5",
+ "itertools 0.13.0",
 ]
 
 [[package]]
@@ -1670,30 +1747,30 @@ dependencies = [
 
 [[package]]
 name = "csv"
-version = "1.3.1"
+version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
 dependencies = [
  "csv-core",
  "itoa",
  "ryu",
- "serde",
+ "serde_core",
 ]
 
 [[package]]
 name = "csv-core"
-version = "0.1.12"
+version = "0.1.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
 dependencies = [
  "memchr",
 ]
 
 [[package]]
 name = "ctor"
-version = "0.4.3"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec09e802f5081de6157da9a75701d6c713d8dc3ba52571fd4bd25f412644e8a6"
+checksum = "3ffc71fcdcdb40d6f087edddf7f8f1f8f79e6cf922f555a9ee8779752d4819bd"
 dependencies = [
  "ctor-proc-macro",
  "dtor",
@@ -1701,9 +1778,9 @@ dependencies = [
 
 [[package]]
 name = "ctor-proc-macro"
-version = "0.0.6"
+version = "0.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2931af7e13dc045d8e9d26afccc6fa115d64e115c9c84b1166288b46f6782c2"
+checksum = "52560adf09603e58c9a7ee1fe1dcb95a16927b17c127f0ac02d6e768a0e25bc1"
 
 [[package]]
 name = "cty"
@@ -1732,7 +1809,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -1743,7 +1820,7 @@ checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -1762,7 +1839,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1834,7 +1911,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-benchmarks"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion",
@@ -1859,7 +1936,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1882,7 +1959,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-catalog-listing"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1900,19 +1977,18 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "object_store",
- "tokio",
 ]
 
 [[package]]
 name = "datafusion-cli"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
  "aws-config",
  "aws-credential-types",
  "chrono",
- "clap 4.5.48",
+ "clap 4.5.51",
  "ctor",
  "datafusion",
  "datafusion-common",
@@ -1937,7 +2013,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "apache-avro",
@@ -1964,7 +2040,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common-runtime"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "futures",
  "log",
@@ -1973,7 +2049,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-compression",
@@ -2008,7 +2084,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-arrow"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-ipc",
@@ -2031,7 +2107,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-avro"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "apache-avro",
  "arrow",
@@ -2050,7 +2126,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-csv"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2071,7 +2147,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-json"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2091,7 +2167,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-datasource-parquet"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2120,11 +2196,11 @@ dependencies = [
 
 [[package]]
 name = "datafusion-doc"
-version = "50.3.0"
+version = "51.0.0"
 
 [[package]]
 name = "datafusion-examples"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-flight",
@@ -2143,7 +2219,7 @@ dependencies = [
  "mimalloc",
  "nix",
  "object_store",
- "prost 0.13.5",
+ "prost",
  "rand 0.9.2",
  "serde_json",
  "tempfile",
@@ -2158,7 +2234,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2179,7 +2255,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2203,7 +2279,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2214,7 +2290,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-ffi"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -2229,14 +2305,14 @@ dependencies = [
  "doc-comment",
  "futures",
  "log",
- "prost 0.13.5",
+ "prost",
  "semver",
  "tokio",
 ]
 
 [[package]]
 name = "datafusion-functions"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -2257,6 +2333,7 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "md-5",
+ "num-traits",
  "rand 0.9.2",
  "regex",
  "sha2",
@@ -2267,7 +2344,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2288,7 +2365,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-aggregate-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2301,7 +2378,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-nested"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "arrow-ord",
@@ -2324,7 +2401,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-table"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2338,7 +2415,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2354,7 +2431,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-functions-window-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -2362,16 +2439,16 @@ dependencies = [
 
 [[package]]
 name = "datafusion-macros"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "datafusion-doc",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
@@ -2398,7 +2475,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2423,7 +2500,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-adapter"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2436,7 +2513,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2448,7 +2525,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-optimizer"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2468,7 +2545,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "ahash 0.8.12",
  "arrow",
@@ -2504,7 +2581,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "chrono",
@@ -2532,7 +2609,7 @@ dependencies = [
  "object_store",
  "pbjson",
  "pretty_assertions",
- "prost 0.13.5",
+ "prost",
  "serde",
  "serde_json",
  "tokio",
@@ -2540,19 +2617,19 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto-common"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
  "doc-comment",
  "pbjson",
- "prost 0.13.5",
+ "prost",
  "serde",
 ]
 
 [[package]]
 name = "datafusion-pruning"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -2570,7 +2647,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-session"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "async-trait",
  "datafusion-common",
@@ -2582,7 +2659,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-spark"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2602,7 +2679,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "bigdecimal",
@@ -2628,14 +2705,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sqllogictest"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "arrow",
  "async-trait",
  "bigdecimal",
  "bytes",
  "chrono",
- "clap 4.5.48",
+ "clap 4.5.51",
  "datafusion",
  "datafusion-spark",
  "datafusion-substrait",
@@ -2662,7 +2739,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-substrait"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "async-recursion",
  "async-trait",
@@ -2674,7 +2751,7 @@ dependencies = [
  "itertools 0.14.0",
  "object_store",
  "pbjson-types",
- "prost 0.13.5",
+ "prost",
  "serde_json",
  "substrait",
  "tokio",
@@ -2684,7 +2761,7 @@ dependencies = [
 
 [[package]]
 name = "datafusion-wasmtest"
-version = "50.3.0"
+version = "51.0.0"
 dependencies = [
  "chrono",
  "console_error_panic_hook",
@@ -2705,12 +2782,12 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.5.3"
+version = "0.5.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc"
+checksum = "ececcb659e7ba858fb4f10388c250a7252eb0a27373f1a72b8748afdd248e587"
 dependencies = [
  "powerfmt",
- "serde",
+ "serde_core",
 ]
 
 [[package]]
@@ -2748,7 +2825,7 @@ dependencies = [
  "libc",
  "option-ext",
  "redox_users",
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -2759,14 +2836,14 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "doc-comment"
-version = "0.3.3"
+version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
+checksum = "780955b8b195a21ab8e4ac6b60dd1dbdcec1dc6c51c0617964b08c81785e12c9"
 
 [[package]]
 name = "docker_credential"
@@ -2781,18 +2858,18 @@ dependencies = [
 
 [[package]]
 name = "dtor"
-version = "0.0.6"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97cbdf2ad6846025e8e25df05171abfb30e3ababa12ee0a0e44b9bbe570633a8"
+checksum = "404d02eeb088a82cfd873006cb713fe411306c7d182c344905e101fb1167d301"
 dependencies = [
  "dtor-proc-macro",
 ]
 
 [[package]]
 name = "dtor-proc-macro"
-version = "0.0.5"
+version = "0.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7454e41ff9012c00d53cf7f475c5e3afa3b91b7c90568495495e8d9bf47a1055"
+checksum = "f678cf4a922c215c63e0de95eb1ff08a958a81d47e485cf9da1e27bf6305cfa5"
 
 [[package]]
 name = "dunce"
@@ -2815,7 +2892,7 @@ dependencies = [
  "enum-ordinalize",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -2838,29 +2915,29 @@ checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
 
 [[package]]
 name = "enum-ordinalize"
-version = "4.3.0"
+version = "4.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5"
+checksum = "4a1091a7bb1f8f2c4b28f1fe2cef4980ca2d410a3d727d67ecc3178c9b0800f0"
 dependencies = [
  "enum-ordinalize-derive",
 ]
 
 [[package]]
 name = "enum-ordinalize-derive"
-version = "4.3.1"
+version = "4.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
+checksum = "8ca9601fb2d62598ee17836250842873a413586e5d7ed88b356e38ddbb0ec631"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "env_filter"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
+checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2"
 dependencies = [
  "log",
  "regex",
@@ -2892,7 +2969,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -2985,9 +3062,9 @@ dependencies = [
 
 [[package]]
 name = "find-msvc-tools"
-version = "0.1.2"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ced73b1dacfc750a6db6c0a0c3a3853c8b41997e2e2c563dc90804ae6867959"
+checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127"
 
 [[package]]
 name = "fixedbitset"
@@ -2997,19 +3074,19 @@ checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99"
 
 [[package]]
 name = "flatbuffers"
-version = "25.2.10"
+version = "25.9.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1"
+checksum = "09b6620799e7340ebd9968d2e0708eb82cf1971e9a16821e2091b6d6e475eed5"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "rustc_version",
 ]
 
 [[package]]
 name = "flate2"
-version = "1.1.4"
+version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9"
+checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
 dependencies = [
  "crc32fast",
  "libz-rs-sys",
@@ -3028,6 +3105,12 @@ version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "foldhash"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb"
+
 [[package]]
 name = "form_urlencoded"
 version = "1.2.2"
@@ -3039,9 +3122,9 @@ dependencies = [
 
 [[package]]
 name = "fs-err"
-version = "3.1.2"
+version = "3.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44f150ffc8782f35521cec2b23727707cb4045706ba3c854e86bef66b3a8cdbd"
+checksum = "6ad492b2cf1d89d568a43508ab24f98501fe03f2f31c01e1d0fe7366a71745d2"
 dependencies = [
  "autocfg",
 ]
@@ -3114,7 +3197,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -3157,16 +3240,16 @@ dependencies = [
 name = "gen"
 version = "0.1.0"
 dependencies = [
- "pbjson-build 0.8.0",
- "prost-build 0.14.1",
+ "pbjson-build",
+ "prost-build",
 ]
 
 [[package]]
 name = "gen-common"
 version = "0.1.0"
 dependencies = [
- "pbjson-build 0.8.0",
- "prost-build 0.14.1",
+ "pbjson-build",
+ "prost-build",
 ]
 
 [[package]]
@@ -3180,9 +3263,9 @@ dependencies = [
 
 [[package]]
 name = "generic-array"
-version = "0.14.7"
+version = "0.14.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2"
 dependencies = [
  "typenum",
  "version_check",
@@ -3223,9 +3306,9 @@ checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
 
 [[package]]
 name = "globset"
-version = "0.4.16"
+version = "0.4.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5"
+checksum = "52dfc19153a48bde0cbd630453615c8151bce3a5adfac7a0aebfbf0a1e1f57e3"
 dependencies = [
  "aho-corasick",
  "bstr",
@@ -3255,9 +3338,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.7.0"
+version = "2.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e54c115d4f30f52c67202f079c5f9d8b49db4691f460fdb0b4c2e838261b2ba5"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -3290,9 +3373,7 @@ version = "0.15.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
 dependencies = [
- "allocator-api2",
- "equivalent",
- "foldhash",
+ "foldhash 0.1.5",
 ]
 
 [[package]]
@@ -3300,6 +3381,11 @@ name = "hashbrown"
 version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash 0.2.0",
+]
 
 [[package]]
 name = "heck"
@@ -3316,12 +3402,6 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
-[[package]]
-name = "hermit-abi"
-version = "0.5.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
-
 [[package]]
 name = "hex"
 version = "0.4.3"
@@ -3339,11 +3419,11 @@ dependencies = [
 
 [[package]]
 name = "home"
-version = "0.5.11"
+version = "0.5.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf"
+checksum = "cc627f471c528ff0c4a49e1d5e60450c8f6461dd6d10ba9dcd3a61d3dff7728d"
 dependencies = [
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -3506,7 +3586,7 @@ dependencies = [
  "libc",
  "percent-encoding",
  "pin-project-lite",
- "socket2 0.6.0",
+ "socket2",
  "tokio",
  "tower-service",
  "tracing",
@@ -3539,7 +3619,7 @@ dependencies = [
  "js-sys",
  "log",
  "wasm-bindgen",
- "windows-core 0.62.0",
+ "windows-core 0.62.2",
 ]
 
 [[package]]
@@ -3553,9 +3633,9 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
+checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
 dependencies = [
  "displaydoc",
  "potential_utf",
@@ -3566,9 +3646,9 @@ dependencies = [
 
 [[package]]
 name = "icu_locale_core"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
+checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -3579,11 +3659,10 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
+checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599"
 dependencies = [
- "displaydoc",
  "icu_collections",
  "icu_normalizer_data",
  "icu_properties",
@@ -3594,42 +3673,38 @@ dependencies = [
 
 [[package]]
 name = "icu_normalizer_data"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
+checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
 
 [[package]]
 name = "icu_properties"
-version = "2.0.1"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
+checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99"
 dependencies = [
- "displaydoc",
  "icu_collections",
  "icu_locale_core",
  "icu_properties_data",
  "icu_provider",
- "potential_utf",
  "zerotrie",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_properties_data"
-version = "2.0.1"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
+checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899"
 
 [[package]]
 name = "icu_provider"
-version = "2.0.0"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
+checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614"
 dependencies = [
  "displaydoc",
  "icu_locale_core",
- "stable_deref_trait",
- "tinystr",
  "writeable",
  "yoke",
  "zerofrom",
@@ -3689,22 +3764,25 @@ dependencies = [
 
 [[package]]
 name = "indicatif"
-version = "0.18.0"
+version = "0.18.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70a646d946d06bedbbc4cac4c218acf4bbf2d87757a784857025f4d447e4e1cd"
+checksum = "ade6dfcba0dfb62ad59e59e7241ec8912af34fd29e0e743e3db992bd278e8b65"
 dependencies = [
  "console 0.16.1",
  "portable-atomic",
- "unicode-width 0.2.1",
+ "unicode-width 0.2.2",
  "unit-prefix",
  "web-time",
 ]
 
 [[package]]
 name = "indoc"
-version = "2.0.6"
+version = "2.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd"
+checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
+dependencies = [
+ "rustversion",
+]
 
 [[package]]
 name = "insta"
@@ -3746,39 +3824,19 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
 
 [[package]]
 name = "iri-string"
-version = "0.7.8"
+version = "0.7.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"
+checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397"
 dependencies = [
  "memchr",
  "serde",
 ]
 
-[[package]]
-name = "is-terminal"
-version = "0.4.16"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
-dependencies = [
- "hermit-abi",
- "libc",
- "windows-sys 0.59.0",
-]
-
 [[package]]
 name = "is_terminal_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
-
-[[package]]
-name = "itertools"
-version = "0.10.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473"
-dependencies = [
- "either",
-]
+checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
 
 [[package]]
 name = "itertools"
@@ -3806,26 +3864,26 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 
 [[package]]
 name = "jiff"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
+checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35"
 dependencies = [
  "jiff-static",
  "log",
  "portable-atomic",
  "portable-atomic-util",
- "serde",
+ "serde_core",
 ]
 
 [[package]]
 name = "jiff-static"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
+checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -3840,9 +3898,9 @@ dependencies = [
 
 [[package]]
 name = "js-sys"
-version = "0.3.81"
+version = "0.3.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305"
+checksum = "b011eec8cc36da2aab2d5cff675ec18454fad408585853910a202391cf9f8e65"
 dependencies = [
  "once_cell",
  "wasm-bindgen",
@@ -3919,9 +3977,9 @@ checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7"
 
 [[package]]
 name = "libc"
-version = "0.2.176"
+version = "0.2.177"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174"
+checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
 
 [[package]]
 name = "libloading"
@@ -3940,7 +3998,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
 dependencies = [
  "cfg-if",
- "windows-link 0.2.0",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -3966,9 +4024,9 @@ version = "0.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "libc",
- "redox_syscall 0.5.17",
+ "redox_syscall",
 ]
 
 [[package]]
@@ -3979,7 +4037,7 @@ checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
 dependencies = [
  "anstream",
  "anstyle",
- "clap 4.5.48",
+ "clap 4.5.51",
  "escape8259",
 ]
 
@@ -4000,17 +4058,16 @@ checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
 
 [[package]]
 name = "litemap"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
+checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
 
 [[package]]
 name = "lock_api"
-version = "0.4.13"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
 dependencies = [
- "autocfg",
  "scopeguard",
 ]
 
@@ -4064,9 +4121,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.7.5"
+version = "2.7.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
 
 [[package]]
 name = "memoffset"
@@ -4120,13 +4177,13 @@ dependencies = [
 
 [[package]]
 name = "mio"
-version = "1.0.4"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
+checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873"
 dependencies = [
  "libc",
  "wasi",
- "windows-sys 0.59.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -4150,7 +4207,7 @@ version = "0.30.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "cfg_aliases",
  "libc",
@@ -4177,11 +4234,11 @@ dependencies = [
 
 [[package]]
 name = "nu-ansi-term"
-version = "0.50.1"
+version = "0.50.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399"
+checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
 dependencies = [
- "windows-sys 0.52.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -4267,23 +4324,32 @@ dependencies = [
 
 [[package]]
 name = "objc2-core-foundation"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c10c2894a6fed806ade6027bcd50662746363a9589d3ec9d9bef30a4e4bc166"
+checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
 ]
 
 [[package]]
 name = "objc2-io-kit"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71c1c64d6120e51cd86033f67176b1cb66780c2efe34dec55176f77befd93c0a"
+checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15"
 dependencies = [
  "libc",
  "objc2-core-foundation",
 ]
 
+[[package]]
+name = "object"
+version = "0.32.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "object_store"
 version = "0.12.4"
@@ -4329,9 +4395,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 
 [[package]]
 name = "once_cell_polyfill"
-version = "1.70.1"
+version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
 [[package]]
 name = "oorandom"
@@ -4368,15 +4434,15 @@ checksum = "1a80800c0488c3a21695ea981a54918fbb37abf04f4d0720c453632255e2ff0e"
 
 [[package]]
 name = "owo-colors"
-version = "4.2.2"
+version = "4.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48dd4f4a2c8405440fd0462561f0e5806bd0f77e86f51c761481bdd4018b545e"
+checksum = "9c6901729fa79e91a0913333229e9ca5dc725089d1c363b2f4b4760709dc4a52"
 
 [[package]]
 name = "parking_lot"
-version = "0.12.4"
+version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
 dependencies = [
  "lock_api",
  "parking_lot_core",
@@ -4384,22 +4450,22 @@ dependencies = [
 
 [[package]]
 name = "parking_lot_core"
-version = "0.9.11"
+version = "0.9.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
 dependencies = [
  "cfg-if",
  "libc",
- "redox_syscall 0.5.17",
+ "redox_syscall",
  "smallvec",
- "windows-targets 0.52.6",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
 name = "parquet"
-version = "56.2.0"
+version = "57.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27"
+checksum = "7a0f31027ef1af7549f7cec603a9a21dce706d3f8d7c2060a68f43c1773be95a"
 dependencies = [
  "ahash 0.8.12",
  "arrow-array",
@@ -4418,8 +4484,9 @@ dependencies = [
  "half",
  "hashbrown 0.16.0",
  "lz4_flex",
- "num",
  "num-bigint",
+ "num-integer",
+ "num-traits",
  "object_store",
  "paste",
  "ring",
@@ -4454,7 +4521,7 @@ dependencies = [
  "regex",
  "regex-syntax",
  "structmeta",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -4465,26 +4532,14 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "pbjson"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7e6349fa080353f4a597daffd05cb81572a9c031a6d4fff7e504947496fcc68"
+checksum = "898bac3fa00d0ba57a4e8289837e965baa2dee8c3749f3b11d45a64b4223d9c3"
 dependencies = [
- "base64 0.21.7",
+ "base64 0.22.1",
  "serde",
 ]
 
-[[package]]
-name = "pbjson-build"
-version = "0.7.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eea3058763d6e656105d1403cb04e0a41b7bbac6362d413e7c33be0c32279c9"
-dependencies = [
- "heck 0.5.0",
- "itertools 0.13.0",
- "prost 0.13.5",
- "prost-types 0.13.5",
-]
-
 [[package]]
 name = "pbjson-build"
 version = "0.8.0"
@@ -4493,22 +4548,22 @@ checksum = "af22d08a625a2213a78dbb0ffa253318c5c79ce3133d32d296655a7bdfb02095"
 dependencies = [
  "heck 0.5.0",
  "itertools 0.14.0",
- "prost 0.14.1",
- "prost-types 0.14.1",
+ "prost",
+ "prost-types",
 ]
 
 [[package]]
 name = "pbjson-types"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e54e5e7bfb1652f95bc361d76f3c780d8e526b134b85417e774166ee941f0887"
+checksum = "8e748e28374f10a330ee3bb9f29b828c0ac79831a32bab65015ad9b661ead526"
 dependencies = [
  "bytes",
  "chrono",
  "pbjson",
- "pbjson-build 0.7.0",
- "prost 0.13.5",
- "prost-build 0.13.5",
+ "pbjson-build",
+ "prost",
+ "prost-build",
  "serde",
 ]
 
@@ -4594,7 +4649,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -4667,7 +4722,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -4690,9 +4745,9 @@ dependencies = [
 
 [[package]]
 name = "postgres-types"
-version = "0.2.10"
+version = "0.2.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77a120daaabfcb0e324d5bf6e411e9222994cb3795c79943a0ef28ed27ea76e4"
+checksum = "ef4605b7c057056dd35baeb6ac0c0338e4975b1f2bef0f65da953285eb007095"
 dependencies = [
  "bytes",
  "chrono",
@@ -4703,9 +4758,9 @@ dependencies = [
 
 [[package]]
 name = "potential_utf"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a"
+checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77"
 dependencies = [
  "zerovec",
 ]
@@ -4742,7 +4797,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
 dependencies = [
  "proc-macro2",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -4780,23 +4835,13 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.101"
+version = "1.0.103"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de"
+checksum = "5ee95bc4ef87b8d5ba32e8b7714ccc834865276eab0aed5c9958d00ec45f49e8"
 dependencies = [
  "unicode-ident",
 ]
 
-[[package]]
-name = "prost"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
-dependencies = [
- "bytes",
- "prost-derive 0.13.5",
-]
-
 [[package]]
 name = "prost"
 version = "0.14.1"
@@ -4804,27 +4849,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7231bd9b3d3d33c86b58adbac74b5ec0ad9f496b19d22801d773636feaa95f3d"
 dependencies = [
  "bytes",
- "prost-derive 0.14.1",
-]
-
-[[package]]
-name = "prost-build"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
-dependencies = [
- "heck 0.5.0",
- "itertools 0.14.0",
- "log",
- "multimap",
- "once_cell",
- "petgraph 0.7.1",
- "prettyplease",
- "prost 0.13.5",
- "prost-types 0.13.5",
- "regex",
- "syn 2.0.106",
- "tempfile",
+ "prost-derive",
 ]
 
 [[package]]
@@ -4840,26 +4865,13 @@ dependencies = [
  "once_cell",
  "petgraph 0.7.1",
  "prettyplease",
- "prost 0.14.1",
- "prost-types 0.14.1",
+ "prost",
+ "prost-types",
  "regex",
- "syn 2.0.106",
+ "syn 2.0.110",
  "tempfile",
 ]
 
-[[package]]
-name = "prost-derive"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
-dependencies = [
- "anyhow",
- "itertools 0.14.0",
- "proc-macro2",
- "quote",
- "syn 2.0.106",
-]
-
 [[package]]
 name = "prost-derive"
 version = "0.14.1"
@@ -4870,16 +4882,7 @@ dependencies = [
  "itertools 0.14.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
-]
-
-[[package]]
-name = "prost-types"
-version = "0.13.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
-dependencies = [
- "prost 0.13.5",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -4888,7 +4891,7 @@ version = "0.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b9b4db3d6da204ed77bb26ba83b6122a73aeb2e87e25fbf7ad2e84c4ccbf8f72"
 dependencies = [
- "prost 0.14.1",
+ "prost",
 ]
 
 [[package]]
@@ -4902,10 +4905,11 @@ dependencies = [
 
 [[package]]
 name = "psm"
-version = "0.1.26"
+version = "0.1.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f"
+checksum = "d11f2fedc3b7dafdc2851bc52f277377c5473d378859be234bc7ebb593144d01"
 dependencies = [
+ "ar_archive_writer",
  "cc",
 ]
 
@@ -4931,9 +4935,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a"
+checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
 dependencies = [
  "indoc",
  "libc",
@@ -4948,19 +4952,18 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598"
+checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
 dependencies = [
- "once_cell",
  "target-lexicon",
 ]
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c"
+checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -4968,27 +4971,27 @@ dependencies = [
 
 [[package]]
 name = "pyo3-macros"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8725c0a622b374d6cb051d11a0983786448f7785336139c3c94f5aa6bef7e50"
+checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
 dependencies = [
  "proc-macro2",
  "pyo3-macros-backend",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "pyo3-macros-backend"
-version = "0.25.1"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4109984c22491085343c05b0dbc54ddc405c3cf7b4374fc533f5c3313a572ccc"
+checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
 dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "pyo3-build-config",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5020,7 +5023,7 @@ dependencies = [
  "quinn-udp",
  "rustc-hash",
  "rustls",
- "socket2 0.6.0",
+ "socket2",
  "thiserror",
  "tokio",
  "tracing",
@@ -5057,16 +5060,16 @@ dependencies = [
  "cfg_aliases",
  "libc",
  "once_cell",
- "socket2 0.6.0",
+ "socket2",
  "tracing",
  "windows-sys 0.60.2",
 ]
 
 [[package]]
 name = "quote"
-version = "1.0.41"
+version = "1.0.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1"
+checksum = "a338cc41d27e6cc6dce6cefc13a0729dfbb81c262b1f519331575dd80ef3067f"
 dependencies = [
  "proc-macro2",
 ]
@@ -5199,25 +5202,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
 dependencies = [
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "redox_syscall"
-version = "0.3.5"
+version = "0.5.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
 dependencies = [
- "bitflags 1.3.2",
-]
-
-[[package]]
-name = "redox_syscall"
-version = "0.5.17"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77"
-dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
 ]
 
 [[package]]
@@ -5233,29 +5227,29 @@ dependencies = [
 
 [[package]]
 name = "ref-cast"
-version = "1.0.24"
+version = "1.0.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf"
+checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d"
 dependencies = [
  "ref-cast-impl",
 ]
 
 [[package]]
 name = "ref-cast-impl"
-version = "1.0.24"
+version = "1.0.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
+checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "regex"
-version = "1.11.3"
+version = "1.12.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b5288124840bee7b386bc413c487869b360b2b4ec421ea56425128692f2a82c"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -5265,9 +5259,9 @@ dependencies = [
 
 [[package]]
 name = "regex-automata"
-version = "0.4.11"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "833eb9ce86d40ef33cb1306d8accf7bc8ec2bfea4355cbdebb3df68b40925cad"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -5276,23 +5270,23 @@ dependencies = [
 
 [[package]]
 name = "regex-lite"
-version = "0.1.7"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "943f41321c63ef1c92fd763bfe054d2668f7f225a5c29f0105903dc2fc04ba30"
+checksum = "8d942b98df5e658f56f20d592c7f868833fe38115e65c33003d8cd224b0155da"
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.6"
+version = "0.8.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
 
 [[package]]
 name = "regress"
-version = "0.10.4"
+version = "0.10.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "145bb27393fe455dd64d6cbc8d059adfa392590a45eadf079c01b11857e7b010"
+checksum = "2057b2325e68a893284d1538021ab90279adac1139957ca2a74426c6f118fb48"
 dependencies = [
- "hashbrown 0.15.5",
+ "hashbrown 0.16.0",
  "memchr",
 ]
 
@@ -5322,9 +5316,9 @@ dependencies = [
 
 [[package]]
 name = "reqwest"
-version = "0.12.23"
+version = "0.12.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb"
+checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
 dependencies = [
  "base64 0.22.1",
  "bytes",
@@ -5407,21 +5401,20 @@ dependencies = [
 
 [[package]]
 name = "rstest"
-version = "0.25.0"
+version = "0.26.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fc39292f8613e913f7df8fa892b8944ceb47c247b78e1b1ae2f09e019be789d"
+checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49"
 dependencies = [
  "futures-timer",
  "futures-util",
  "rstest_macros",
- "rustc_version",
 ]
 
 [[package]]
 name = "rstest_macros"
-version = "0.25.0"
+version = "0.26.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f168d99749d307be9de54d23fd226628d99768225ef08f6ffb52e0182a27746"
+checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0"
 dependencies = [
  "cfg-if",
  "glob",
@@ -5431,7 +5424,7 @@ dependencies = [
  "regex",
  "relative-path",
  "rustc_version",
- "syn 2.0.106",
+ "syn 2.0.110",
  "unicode-ident",
 ]
 
@@ -5443,14 +5436,14 @@ checksum = "b3a8fb4672e840a587a66fc577a5491375df51ddb88f2a2c2a792598c326fe14"
 dependencies = [
  "quote",
  "rand 0.8.5",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "rust_decimal"
-version = "1.38.0"
+version = "1.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8975fc98059f365204d635119cf9c5a60ae67b841ed49b5422a9a7e56cdfac0"
+checksum = "35affe401787a9bd846712274d97654355d21b2a2c092a3139aabe31e9022282"
 dependencies = [
  "arrayvec",
  "borsh",
@@ -5484,20 +5477,21 @@ version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
 name = "rustls"
-version = "0.23.32"
+version = "0.23.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd3c25631629d034ce7cd9940adc9d45762d46de2b0f57193c4443b92c6d4d40"
+checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f"
 dependencies = [
  "aws-lc-rs",
+ "log",
  "once_cell",
  "ring",
  "rustls-pki-types",
@@ -5508,9 +5502,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-native-certs"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
+checksum = "9980d917ebb0c0536119ba501e90834767bffc3d60641457fd84a1f3fd337923"
 dependencies = [
  "openssl-probe",
  "rustls-pki-types",
@@ -5529,9 +5523,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.12.0"
+version = "1.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
+checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a"
 dependencies = [
  "web-time",
  "zeroize",
@@ -5539,9 +5533,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-webpki"
-version = "0.103.6"
+version = "0.103.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8572f3c2cb9934231157b45499fc41e1f58c589fdfb81a844ba873265e80f8eb"
+checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52"
 dependencies = [
  "aws-lc-rs",
  "ring",
@@ -5561,7 +5555,7 @@ version = "17.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e902948a25149d50edc1a8e0141aad50f54e22ba83ff988cf8f7c9ef07f50564"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "cfg-if",
  "clipboard-win",
  "fd-lock",
@@ -5572,7 +5566,7 @@ dependencies = [
  "nix",
  "radix_trie",
  "unicode-segmentation",
- "unicode-width 0.2.1",
+ "unicode-width 0.2.2",
  "utf8parse",
  "windows-sys 0.60.2",
 ]
@@ -5598,7 +5592,7 @@ version = "0.1.28"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "891d81b926048e76efe18581bf793546b4c0eaf8448d72be8de2bbee5fd166e1"
 dependencies = [
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -5627,9 +5621,9 @@ dependencies = [
 
 [[package]]
 name = "schemars"
-version = "1.0.4"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
+checksum = "9558e172d4e8533736ba97870c4b2cd63f84b382a3d6eb063da41b91cce17289"
 dependencies = [
  "dyn-clone",
  "ref-cast",
@@ -5646,7 +5640,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_derive_internals",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5663,11 +5657,11 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
 
 [[package]]
 name = "security-framework"
-version = "3.5.0"
+version = "3.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc198e42d9b7510827939c9a15f5062a0c913f3371d765977e586d2fe6c16f4a"
+checksum = "b3297343eaf830f66ede390ea39da1d462b6b0c1b000f420d0a83f898bbbe6ef"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "core-foundation",
  "core-foundation-sys",
  "libc",
@@ -5737,7 +5731,7 @@ checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5748,7 +5742,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5772,7 +5766,7 @@ checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5784,7 +5778,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5801,9 +5795,9 @@ dependencies = [
 
 [[package]]
 name = "serde_with"
-version = "3.14.1"
+version = "3.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c522100790450cf78eeac1507263d0a350d4d5b30df0c8e1fe051a10c22b376e"
+checksum = "aa66c845eee442168b2c8134fec70ac50dc20e760769c8ba0ad1319ca1959b04"
 dependencies = [
  "base64 0.22.1",
  "chrono",
@@ -5811,9 +5805,8 @@ dependencies = [
  "indexmap 1.9.3",
  "indexmap 2.12.0",
  "schemars 0.9.0",
- "schemars 1.0.4",
- "serde",
- "serde_derive",
+ "schemars 1.1.0",
+ "serde_core",
  "serde_json",
  "serde_with_macros",
  "time",
@@ -5821,14 +5814,14 @@ dependencies = [
 
 [[package]]
 name = "serde_with_macros"
-version = "3.14.1"
+version = "3.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "327ada00f7d64abaac1e55a6911e90cf665aa051b9a561c7006c157f4633135e"
+checksum = "b91a903660542fced4e99881aa481bdbaec1634568ee02e0b8bd57c64cb38955"
 dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -5952,22 +5945,12 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.5.10"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
-[[package]]
-name = "socket2"
-version = "0.6.0"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "233504af464074f9d066d7b5416c5f9b894a5862a6506e306f7b816cdd6f1807"
+checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881"
 dependencies = [
  "libc",
- "windows-sys 0.59.0",
+ "windows-sys 0.60.2",
 ]
 
 [[package]]
@@ -6014,20 +5997,20 @@ checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "stable_deref_trait"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
 
 [[package]]
 name = "stacker"
-version = "0.1.21"
+version = "0.1.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b"
+checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59"
 dependencies = [
  "cc",
  "cfg-if",
@@ -6062,7 +6045,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "structmeta-derive",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -6073,7 +6056,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -6100,31 +6083,12 @@ dependencies = [
  "syn 1.0.109",
 ]
 
-[[package]]
-name = "strum"
-version = "0.26.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
-
 [[package]]
 name = "strum"
 version = "0.27.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
 
-[[package]]
-name = "strum_macros"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
-dependencies = [
- "heck 0.5.0",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.106",
-]
-
 [[package]]
 name = "strum_macros"
 version = "0.27.2"
@@ -6134,7 +6098,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -6149,18 +6113,18 @@ dependencies = [
 
 [[package]]
 name = "substrait"
-version = "0.58.0"
+version = "0.62.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de6d24c270c6c672a86c183c3a8439ba46c1936f93cf7296aa692de3b0ff0228"
+checksum = "21f1cb6d0bcd097a39fc25f7236236be29881fe122e282e4173d6d007a929927"
 dependencies = [
  "heck 0.5.0",
  "pbjson",
- "pbjson-build 0.7.0",
+ "pbjson-build",
  "pbjson-types",
  "prettyplease",
- "prost 0.13.5",
- "prost-build 0.13.5",
- "prost-types 0.13.5",
+ "prost",
+ "prost-build",
+ "prost-types",
  "protobuf-src",
  "regress",
  "schemars 0.8.22",
@@ -6168,7 +6132,7 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_yaml",
- "syn 2.0.106",
+ "syn 2.0.110",
  "typify",
  "walkdir",
 ]
@@ -6192,9 +6156,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.106"
+version = "2.0.110"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+checksum = "a99801b5bd34ede4cf3fc688c5919368fea4e4814a4664359503e6015b280aea"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6218,7 +6182,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -6257,7 +6221,7 @@ dependencies = [
  "getrandom 0.3.4",
  "once_cell",
  "rustix",
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -6273,13 +6237,13 @@ dependencies = [
 
 [[package]]
 name = "testcontainers"
-version = "0.24.0"
+version = "0.25.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23bb7577dca13ad86a78e8271ef5d322f37229ec83b8d98da6d996c588a1ddb1"
+checksum = "3f3ac71069f20ecfa60c396316c283fbf35e6833a53dff551a31b5458da05edc"
 dependencies = [
+ "astral-tokio-tar",
  "async-trait",
  "bollard",
- "bollard-stubs",
  "bytes",
  "docker_credential",
  "either",
@@ -6295,16 +6259,16 @@ dependencies = [
  "thiserror",
  "tokio",
  "tokio-stream",
- "tokio-tar",
  "tokio-util",
+ "ulid",
  "url",
 ]
 
 [[package]]
 name = "testcontainers-modules"
-version = "0.12.1"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eac95cde96549fc19c6bf19ef34cc42bd56e264c1cb97e700e21555be0ecf9e2"
+checksum = "1966329d5bb3f89d33602d2db2da971fb839f9297dad16527abf4564e2ae0a6d"
 dependencies = [
  "testcontainers",
 ]
@@ -6335,7 +6299,7 @@ checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -6400,9 +6364,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
+checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -6445,9 +6409,9 @@ dependencies = [
  "parking_lot",
  "pin-project-lite",
  "signal-hook-registry",
- "socket2 0.6.0",
+ "socket2",
  "tokio-macros",
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -6458,14 +6422,14 @@ checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "tokio-postgres"
-version = "0.7.14"
+version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a156efe7fff213168257853e1dfde202eed5f487522cbbbf7d219941d753d853"
+checksum = "2b40d66d9b2cfe04b628173409368e58247e8eddbbd3b0e6c6ba1d09f20f6c9e"
 dependencies = [
  "async-trait",
  "byteorder",
@@ -6481,7 +6445,7 @@ dependencies = [
  "postgres-protocol",
  "postgres-types",
  "rand 0.9.2",
- "socket2 0.6.0",
+ "socket2",
  "tokio",
  "tokio-util",
  "whoami",
@@ -6489,9 +6453,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-rustls"
-version = "0.26.3"
+version = "0.26.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05f63835928ca123f1bef57abbcd23bb2ba0ac9ae1235f1e65bda0d06e7786bd"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
 dependencies = [
  "rustls",
  "tokio",
@@ -6508,26 +6472,11 @@ dependencies = [
  "tokio",
 ]
 
-[[package]]
-name = "tokio-tar"
-version = "0.3.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d5714c010ca3e5c27114c1cdeb9d14641ace49874aa5626d7149e47aedace75"
-dependencies = [
- "filetime",
- "futures-core",
- "libc",
- "redox_syscall 0.3.5",
- "tokio",
- "tokio-stream",
- "xattr",
-]
-
 [[package]]
 name = "tokio-util"
-version = "0.7.16"
+version = "0.7.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5"
+checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594"
 dependencies = [
  "bytes",
  "futures-core",
@@ -6538,18 +6487,18 @@ dependencies = [
 
 [[package]]
 name = "toml_datetime"
-version = "0.7.2"
+version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32f1085dec27c2b6632b04c80b3bb1b4300d6495d1e129693bdda7d91e72eec1"
+checksum = "f2cdb639ebbc97961c51720f858597f7f24c4fc295327923af55b74c3c724533"
 dependencies = [
  "serde_core",
 ]
 
 [[package]]
 name = "toml_edit"
-version = "0.23.6"
+version = "0.23.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3effe7c0e86fdff4f69cdd2ccc1b96f933e24811c5441d44904e8683e27184b"
+checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d"
 dependencies = [
  "indexmap 2.12.0",
  "toml_datetime",
@@ -6559,18 +6508,18 @@ dependencies = [
 
 [[package]]
 name = "toml_parser"
-version = "1.0.3"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4cf893c33be71572e0e9aa6dd15e6677937abd686b066eac3f8cd3531688a627"
+checksum = "c0cbe268d35bdb4bb5a56a2de88d0ad0eb70af5384a99d648cd4b3d04039800e"
 dependencies = [
  "winnow",
 ]
 
 [[package]]
 name = "tonic"
-version = "0.13.1"
+version = "0.14.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e581ba15a835f4d9ea06c55ab1bd4dce26fc53752c69a04aac00703bfb49ba9"
+checksum = "eb7613188ce9f7df5bfe185db26c5814347d110db17920415cf2fbcad85e7203"
 dependencies = [
  "async-trait",
  "axum",
@@ -6585,8 +6534,8 @@ dependencies = [
  "hyper-util",
  "percent-encoding",
  "pin-project",
- "prost 0.13.5",
- "socket2 0.5.10",
+ "socket2",
+ "sync_wrapper",
  "tokio",
  "tokio-stream",
  "tower",
@@ -6595,6 +6544,17 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "tonic-prost"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66bd50ad6ce1252d87ef024b3d64fe4c3cf54a86fb9ef4c631fdd0ded7aeaa67"
+dependencies = [
+ "bytes",
+ "prost",
+ "tonic",
+]
+
 [[package]]
 name = "tower"
 version = "0.5.2"
@@ -6620,7 +6580,7 @@ version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
 dependencies = [
- "bitflags 2.9.4",
+ "bitflags 2.10.0",
  "bytes",
  "futures-util",
  "http 1.3.1",
@@ -6663,7 +6623,7 @@ checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -6736,9 +6696,9 @@ checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a"
 
 [[package]]
 name = "typenum"
-version = "1.18.0"
+version = "1.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
+checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
 
 [[package]]
 name = "typewit"
@@ -6748,9 +6708,9 @@ checksum = "f8c1ae7cc0fdb8b842d65d127cb981574b0d2b249b74d1c7a2986863dc134f71"
 
 [[package]]
 name = "typify"
-version = "0.4.3"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7144144e97e987c94758a3017c920a027feac0799df325d6df4fc8f08d02068e"
+checksum = "e6d5bcc6f62eb1fa8aa4098f39b29f93dcb914e17158b76c50360911257aa629"
 dependencies = [
  "typify-impl",
  "typify-macro",
@@ -6758,9 +6718,9 @@ dependencies = [
 
 [[package]]
 name = "typify-impl"
-version = "0.4.3"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "062879d46aa4c9dfe0d33b035bbaf512da192131645d05deacb7033ec8581a09"
+checksum = "a1eb359f7ffa4f9ebe947fa11a1b2da054564502968db5f317b7e37693cb2240"
 dependencies = [
  "heck 0.5.0",
  "log",
@@ -6771,16 +6731,16 @@ dependencies = [
  "semver",
  "serde",
  "serde_json",
- "syn 2.0.106",
+ "syn 2.0.110",
  "thiserror",
  "unicode-ident",
 ]
 
 [[package]]
 name = "typify-macro"
-version = "0.4.3"
+version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9708a3ceb6660ba3f8d2b8f0567e7d4b8b198e2b94d093b8a6077a751425de9e"
+checksum = "911c32f3c8514b048c1b228361bebb5e6d73aeec01696e8cc0e82e2ffef8ab7a"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -6789,10 +6749,20 @@ dependencies = [
  "serde",
  "serde_json",
  "serde_tokenstream",
- "syn 2.0.106",
+ "syn 2.0.110",
  "typify-impl",
 ]
 
+[[package]]
+name = "ulid"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "470dbf6591da1b39d43c14523b2b469c86879a53e8b758c8e090a470fe7b1fbe"
+dependencies = [
+ "rand 0.9.2",
+ "web-time",
+]
+
 [[package]]
 name = "unicode-bidi"
 version = "0.3.18"
@@ -6801,24 +6771,24 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.19"
+version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d"
+checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 
 [[package]]
 name = "unicode-normalization"
-version = "0.1.24"
+version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
 dependencies = [
  "tinyvec",
 ]
 
 [[package]]
 name = "unicode-properties"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e70f2a8b45122e719eb623c01822704c4e0907e7e426a05927e1a1cfff5b75d0"
+checksum = "7df058c713841ad818f1dc5d3fd88063241cc61f49f5fbea4b951e8cf5a8d71d"
 
 [[package]]
 name = "unicode-segmentation"
@@ -6834,9 +6804,9 @@ checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af"
 
 [[package]]
 name = "unicode-width"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
 
 [[package]]
 name = "unindent"
@@ -6862,6 +6832,34 @@ version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
+[[package]]
+name = "ureq"
+version = "3.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d39cb1dbab692d82a977c0392ffac19e188bd9186a9f32806f0aaa859d75585a"
+dependencies = [
+ "base64 0.22.1",
+ "log",
+ "percent-encoding",
+ "rustls",
+ "rustls-pki-types",
+ "ureq-proto",
+ "utf-8",
+ "webpki-roots",
+]
+
+[[package]]
+name = "ureq-proto"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b4531c118335662134346048ddb0e54cc86bd7e81866757873055f0e38f5d2"
+dependencies = [
+ "base64 0.22.1",
+ "http 1.3.1",
+ "httparse",
+ "log",
+]
+
 [[package]]
 name = "url"
 version = "2.5.7"
@@ -6880,6 +6878,12 @@ version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
 
+[[package]]
+name = "utf-8"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
+
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -6964,9 +6968,9 @@ checksum = "b8dad83b4f25e74f184f64c43b150b91efe7647395b42289f38e50566d82855b"
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.104"
+version = "0.2.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d"
+checksum = "da95793dfc411fbbd93f5be7715b0578ec61fe87cb1a42b12eb625caa5c5ea60"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -6975,25 +6979,11 @@ dependencies = [
  "wasm-bindgen-shared",
 ]
 
-[[package]]
-name = "wasm-bindgen-backend"
-version = "0.2.104"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19"
-dependencies = [
- "bumpalo",
- "log",
- "proc-macro2",
- "quote",
- "syn 2.0.106",
- "wasm-bindgen-shared",
-]
-
 [[package]]
 name = "wasm-bindgen-futures"
-version = "0.4.54"
+version = "0.4.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c"
+checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -7004,9 +6994,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.104"
+version = "0.2.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119"
+checksum = "04264334509e04a7bf8690f2384ef5265f05143a4bff3889ab7a3269adab59c2"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -7014,31 +7004,31 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.104"
+version = "0.2.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7"
+checksum = "420bc339d9f322e562942d52e115d57e950d12d88983a14c79b86859ee6c7ebc"
 dependencies = [
+ "bumpalo",
  "proc-macro2",
  "quote",
- "syn 2.0.106",
- "wasm-bindgen-backend",
+ "syn 2.0.110",
  "wasm-bindgen-shared",
 ]
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.104"
+version = "0.2.105"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1"
+checksum = "76f218a38c84bcb33c25ec7059b07847d465ce0e0a76b995e134a45adcb6af76"
 dependencies = [
  "unicode-ident",
 ]
 
 [[package]]
 name = "wasm-bindgen-test"
-version = "0.3.54"
+version = "0.3.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e381134e148c1062f965a42ed1f5ee933eef2927c3f70d1812158f711d39865"
+checksum = "bfc379bfb624eb59050b509c13e77b4eb53150c350db69628141abce842f2373"
 dependencies = [
  "js-sys",
  "minicov",
@@ -7049,13 +7039,13 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-test-macro"
-version = "0.3.54"
+version = "0.3.55"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b673bca3298fe582aeef8352330ecbad91849f85090805582400850f8270a2e8"
+checksum = "085b2df989e1e6f9620c1311df6c996e83fe16f57792b272ce1e024ac16a90f1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -7073,9 +7063,9 @@ dependencies = [
 
 [[package]]
 name = "web-sys"
-version = "0.3.81"
+version = "0.3.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120"
+checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -7091,6 +7081,15 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "webpki-roots"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2878ef029c47c6e8cf779119f20fcf52bde7ad42a731b2a304bc221df17571e"
+dependencies = [
+ "rustls-pki-types",
+]
+
 [[package]]
 name = "whoami"
 version = "1.6.1"
@@ -7124,7 +7123,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys 0.61.0",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -7170,15 +7169,15 @@ dependencies = [
 
 [[package]]
 name = "windows-core"
-version = "0.62.0"
+version = "0.62.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "57fe7168f7de578d2d8a05b07fd61870d2e73b4020e9f49aa00da8471723497c"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
 dependencies = [
  "windows-implement",
  "windows-interface",
- "windows-link 0.2.0",
- "windows-result 0.4.0",
- "windows-strings 0.5.0",
+ "windows-link 0.2.1",
+ "windows-result 0.4.1",
+ "windows-strings 0.5.1",
 ]
 
 [[package]]
@@ -7194,24 +7193,24 @@ dependencies = [
 
 [[package]]
 name = "windows-implement"
-version = "0.60.0"
+version = "0.60.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
 name = "windows-interface"
-version = "0.59.1"
+version = "0.59.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -7222,9 +7221,9 @@ checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
 
 [[package]]
 name = "windows-link"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
 [[package]]
 name = "windows-numerics"
@@ -7247,11 +7246,11 @@ dependencies = [
 
 [[package]]
 name = "windows-result"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
 dependencies = [
- "windows-link 0.2.0",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -7265,11 +7264,11 @@ dependencies = [
 
 [[package]]
 name = "windows-strings"
-version = "0.5.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
 dependencies = [
- "windows-link 0.2.0",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -7296,16 +7295,16 @@ version = "0.60.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
 dependencies = [
- "windows-targets 0.53.3",
+ "windows-targets 0.53.5",
 ]
 
 [[package]]
 name = "windows-sys"
-version = "0.61.0"
+version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e201184e40b2ede64bc2ea34968b28e33622acdbbf37104f0e4a33f7abe657aa"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
 dependencies = [
- "windows-link 0.2.0",
+ "windows-link 0.2.1",
 ]
 
 [[package]]
@@ -7326,19 +7325,19 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.53.3"
+version = "0.53.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
 dependencies = [
- "windows-link 0.1.3",
- "windows_aarch64_gnullvm 0.53.0",
- "windows_aarch64_msvc 0.53.0",
- "windows_i686_gnu 0.53.0",
- "windows_i686_gnullvm 0.53.0",
- "windows_i686_msvc 0.53.0",
- "windows_x86_64_gnu 0.53.0",
- "windows_x86_64_gnullvm 0.53.0",
- "windows_x86_64_msvc 0.53.0",
+ "windows-link 0.2.1",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
 ]
 
 [[package]]
@@ -7358,9 +7357,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -7370,9 +7369,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -7382,9 +7381,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
 
 [[package]]
 name = "windows_i686_gnullvm"
@@ -7394,9 +7393,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
 
 [[package]]
 name = "windows_i686_gnullvm"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -7406,9 +7405,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -7418,9 +7417,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -7430,9 +7429,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -7442,9 +7441,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.53.0"
+version = "0.53.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
 
 [[package]]
 name = "winnow"
@@ -7463,9 +7462,9 @@ checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
 
 [[package]]
 name = "writeable"
-version = "0.6.1"
+version = "0.6.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
+checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9"
 
 [[package]]
 name = "wyz"
@@ -7509,11 +7508,10 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
 
 [[package]]
 name = "yoke"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
+checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954"
 dependencies = [
- "serde",
  "stable_deref_trait",
  "yoke-derive",
  "zerofrom",
@@ -7521,13 +7519,13 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.8.0"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
+checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
  "synstructure",
 ]
 
@@ -7548,7 +7546,7 @@ checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
@@ -7568,21 +7566,21 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
  "synstructure",
 ]
 
 [[package]]
 name = "zeroize"
-version = "1.8.1"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
 
 [[package]]
 name = "zerotrie"
-version = "0.2.2"
+version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
+checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851"
 dependencies = [
  "displaydoc",
  "yoke",
@@ -7591,9 +7589,9 @@ dependencies = [
 
 [[package]]
 name = "zerovec"
-version = "0.11.4"
+version = "0.11.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b"
+checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -7602,13 +7600,13 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.11.1"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
+checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.106",
+ "syn 2.0.110",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 3e0861c07ab0..36198430e40b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -77,9 +77,9 @@ license = "Apache-2.0"
 readme = "README.md"
 repository = "https://github.com/apache/datafusion"
 # Define Minimum Supported Rust Version (MSRV)
-rust-version = "1.87.0"
+rust-version = "1.88.0"
 # Define DataFusion version
-version = "50.3.0"
+version = "51.0.0"
 
 [workspace.dependencies]
 # We turn off default-features for some dependencies here so the workspaces which inherit them can
@@ -91,63 +91,63 @@ ahash = { version = "0.8", default-features = false, features = [
     "runtime-rng",
 ] }
 apache-avro = { version = "0.20", default-features = false }
-arrow = { version = "56.2.0", features = [
+arrow = { version = "57.0.0", features = [
     "prettyprint",
     "chrono-tz",
 ] }
-arrow-buffer = { version = "56.2.0", default-features = false }
-arrow-flight = { version = "56.2.0", features = [
+arrow-buffer = { version = "57.0.0", default-features = false }
+arrow-flight = { version = "57.0.0", features = [
     "flight-sql-experimental",
 ] }
-arrow-ipc = { version = "56.2.0", default-features = false, features = [
+arrow-ipc = { version = "57.0.0", default-features = false, features = [
     "lz4",
 ] }
-arrow-ord = { version = "56.2.0", default-features = false }
-arrow-schema = { version = "56.2.0", default-features = false }
+arrow-ord = { version = "57.0.0", default-features = false }
+arrow-schema = { version = "57.0.0", default-features = false }
 async-trait = "0.1.89"
 bigdecimal = "0.4.8"
 bytes = "1.10"
 chrono = { version = "0.4.42", default-features = false }
-criterion = "0.5.1"
-ctor = "0.4.3"
+criterion = "0.7"
+ctor = "0.6.1"
 dashmap = "6.0.1"
-datafusion = { path = "datafusion/core", version = "50.3.0", default-features = false }
-datafusion-catalog = { path = "datafusion/catalog", version = "50.3.0" }
-datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "50.3.0" }
-datafusion-common = { path = "datafusion/common", version = "50.3.0", default-features = false }
-datafusion-common-runtime = { path = "datafusion/common-runtime", version = "50.3.0" }
-datafusion-datasource = { path = "datafusion/datasource", version = "50.3.0", default-features = false }
-datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "50.3.0", default-features = false }
-datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "50.3.0", default-features = false }
-datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "50.3.0", default-features = false }
-datafusion-datasource-json = { path = "datafusion/datasource-json", version = "50.3.0", default-features = false }
-datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "50.3.0", default-features = false }
-datafusion-doc = { path = "datafusion/doc", version = "50.3.0" }
-datafusion-execution = { path = "datafusion/execution", version = "50.3.0", default-features = false }
-datafusion-expr = { path = "datafusion/expr", version = "50.3.0", default-features = false }
-datafusion-expr-common = { path = "datafusion/expr-common", version = "50.3.0" }
-datafusion-ffi = { path = "datafusion/ffi", version = "50.3.0" }
-datafusion-functions = { path = "datafusion/functions", version = "50.3.0" }
-datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "50.3.0" }
-datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "50.3.0" }
-datafusion-functions-nested = { path = "datafusion/functions-nested", version = "50.3.0", default-features = false }
-datafusion-functions-table = { path = "datafusion/functions-table", version = "50.3.0" }
-datafusion-functions-window = { path = "datafusion/functions-window", version = "50.3.0" }
-datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "50.3.0" }
-datafusion-macros = { path = "datafusion/macros", version = "50.3.0" }
-datafusion-optimizer = { path = "datafusion/optimizer", version = "50.3.0", default-features = false }
-datafusion-physical-expr = { path = "datafusion/physical-expr", version = "50.3.0", default-features = false }
-datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "50.3.0", default-features = false }
-datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "50.3.0", default-features = false }
-datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "50.3.0" }
-datafusion-physical-plan = { path = "datafusion/physical-plan", version = "50.3.0" }
-datafusion-proto = { path = "datafusion/proto", version = "50.3.0" }
-datafusion-proto-common = { path = "datafusion/proto-common", version = "50.3.0" }
-datafusion-pruning = { path = "datafusion/pruning", version = "50.3.0" }
-datafusion-session = { path = "datafusion/session", version = "50.3.0" }
-datafusion-spark = { path = "datafusion/spark", version = "50.3.0" }
-datafusion-sql = { path = "datafusion/sql", version = "50.3.0" }
-datafusion-substrait = { path = "datafusion/substrait", version = "50.3.0" }
+datafusion = { path = "datafusion/core", version = "51.0.0", default-features = false }
+datafusion-catalog = { path = "datafusion/catalog", version = "51.0.0" }
+datafusion-catalog-listing = { path = "datafusion/catalog-listing", version = "51.0.0" }
+datafusion-common = { path = "datafusion/common", version = "51.0.0", default-features = false }
+datafusion-common-runtime = { path = "datafusion/common-runtime", version = "51.0.0" }
+datafusion-datasource = { path = "datafusion/datasource", version = "51.0.0", default-features = false }
+datafusion-datasource-arrow = { path = "datafusion/datasource-arrow", version = "51.0.0", default-features = false }
+datafusion-datasource-avro = { path = "datafusion/datasource-avro", version = "51.0.0", default-features = false }
+datafusion-datasource-csv = { path = "datafusion/datasource-csv", version = "51.0.0", default-features = false }
+datafusion-datasource-json = { path = "datafusion/datasource-json", version = "51.0.0", default-features = false }
+datafusion-datasource-parquet = { path = "datafusion/datasource-parquet", version = "51.0.0", default-features = false }
+datafusion-doc = { path = "datafusion/doc", version = "51.0.0" }
+datafusion-execution = { path = "datafusion/execution", version = "51.0.0", default-features = false }
+datafusion-expr = { path = "datafusion/expr", version = "51.0.0", default-features = false }
+datafusion-expr-common = { path = "datafusion/expr-common", version = "51.0.0" }
+datafusion-ffi = { path = "datafusion/ffi", version = "51.0.0" }
+datafusion-functions = { path = "datafusion/functions", version = "51.0.0" }
+datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "51.0.0" }
+datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "51.0.0" }
+datafusion-functions-nested = { path = "datafusion/functions-nested", version = "51.0.0", default-features = false }
+datafusion-functions-table = { path = "datafusion/functions-table", version = "51.0.0" }
+datafusion-functions-window = { path = "datafusion/functions-window", version = "51.0.0" }
+datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "51.0.0" }
+datafusion-macros = { path = "datafusion/macros", version = "51.0.0" }
+datafusion-optimizer = { path = "datafusion/optimizer", version = "51.0.0", default-features = false }
+datafusion-physical-expr = { path = "datafusion/physical-expr", version = "51.0.0", default-features = false }
+datafusion-physical-expr-adapter = { path = "datafusion/physical-expr-adapter", version = "51.0.0", default-features = false }
+datafusion-physical-expr-common = { path = "datafusion/physical-expr-common", version = "51.0.0", default-features = false }
+datafusion-physical-optimizer = { path = "datafusion/physical-optimizer", version = "51.0.0" }
+datafusion-physical-plan = { path = "datafusion/physical-plan", version = "51.0.0" }
+datafusion-proto = { path = "datafusion/proto", version = "51.0.0" }
+datafusion-proto-common = { path = "datafusion/proto-common", version = "51.0.0" }
+datafusion-pruning = { path = "datafusion/pruning", version = "51.0.0" }
+datafusion-session = { path = "datafusion/session", version = "51.0.0" }
+datafusion-spark = { path = "datafusion/spark", version = "51.0.0" }
+datafusion-sql = { path = "datafusion/sql", version = "51.0.0" }
+datafusion-substrait = { path = "datafusion/substrait", version = "51.0.0" }
 
 doc-comment = "0.3"
 env_logger = "0.11"
@@ -156,29 +156,30 @@ half = { version = "2.7.0", default-features = false }
 hashbrown = { version = "0.14.5", features = ["raw"] }
 hex = { version = "0.4.3" }
 indexmap = "2.12.0"
+insta = { version = "1.43.2", features = ["glob", "filters"] }
 itertools = "0.14"
 log = "^0.4"
+num-traits = { version = "0.2" }
 object_store = { version = "0.12.4", default-features = false }
 parking_lot = "0.12"
-parquet = { version = "56.2.0", default-features = false, features = [
+parquet = { version = "57.0.0", default-features = false, features = [
     "arrow",
     "async",
     "object_store",
 ] }
-pbjson = { version = "0.7.0" }
-pbjson-types = "0.7"
+pbjson = { version = "0.8.0" }
+pbjson-types = "0.8"
 # Should match arrow-flight's version of prost.
-insta = { version = "1.43.2", features = ["glob", "filters"] }
-prost = "0.13.1"
+prost = "0.14.1"
 rand = "0.9"
 recursive = "0.1.1"
-regex = "1.11"
-rstest = "0.25.0"
+regex = "1.12"
+rstest = "0.26.1"
 serde_json = "1"
 sqlparser = { version = "0.59.0", default-features = false, features = ["std", "visitor"] }
 tempfile = "3"
-testcontainers = { version = "0.24", features = ["default"] }
-testcontainers-modules = { version = "0.12" }
+testcontainers = { version = "0.25.2", features = ["default"] }
+testcontainers-modules = { version = "0.13" }
 tokio = { version = "1.48", features = ["macros", "rt", "sync"] }
 url = "2.5.7"
 
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
index b3fd520814db..870c826f5581 100644
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@@ -26,6 +26,9 @@ repository = { workspace = true }
 license = { workspace = true }
 rust-version = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/ci/scripts/rust_clippy.sh b/ci/scripts/rust_clippy.sh
index 1557bd56eab4..6a00ad810956 100755
--- a/ci/scripts/rust_clippy.sh
+++ b/ci/scripts/rust_clippy.sh
@@ -18,4 +18,4 @@
 # under the License.
 
 set -ex
-cargo clippy --all-targets --workspace --features avro,pyarrow,integration-tests,extended_tests -- -D warnings
+cargo clippy --all-targets --workspace --features avro,pyarrow,integration-tests,extended_tests -- -D warnings
\ No newline at end of file
diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml
index 53744e6c609b..f3069b492352 100644
--- a/datafusion-cli/Cargo.toml
+++ b/datafusion-cli/Cargo.toml
@@ -40,7 +40,7 @@ async-trait = { workspace = true }
 aws-config = "1.8.7"
 aws-credential-types = "1.2.7"
 chrono = { workspace = true }
-clap = { version = "4.5.47", features = ["cargo", "derive"] }
+clap = { version = "4.5.50", features = ["cargo", "derive"] }
 datafusion = { workspace = true, features = [
     "avro",
     "compression",
diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs
index 3ec446c51583..d23b12469e38 100644
--- a/datafusion-cli/src/functions.rs
+++ b/datafusion-cli/src/functions.rs
@@ -419,7 +419,9 @@ impl TableFunctionImpl for ParquetMetadataFunc {
                     stats_max_value_arr.push(None);
                 };
                 compression_arr.push(format!("{:?}", column.compression()));
-                encodings_arr.push(format!("{:?}", column.encodings()));
+                // need to collect into Vec to format
+                let encodings: Vec<_> = column.encodings().collect();
+                encodings_arr.push(format!("{:?}", encodings));
                 index_page_offset_arr.push(column.index_page_offset());
                 dictionary_page_offset_arr.push(column.dictionary_page_offset());
                 data_page_offset_arr.push(column.data_page_offset());
diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs
index bdb2fdf5198e..09fa8ef15af8 100644
--- a/datafusion-cli/src/main.rs
+++ b/datafusion-cli/src/main.rs
@@ -497,7 +497,7 @@ mod tests {
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                    | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type  | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings                    | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [RLE_DICTIONARY, PLAIN, RLE] |                   | 4                      | 46               | 121                   | 123                     |
+        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [PLAIN, RLE, RLE_DICTIONARY] |                   | 4                      | 46               | 121                   | 123                     |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -510,7 +510,7 @@ mod tests {
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                    | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type  | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression | encodings                    | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [RLE_DICTIONARY, PLAIN, RLE] |                   | 4                      | 46               | 121                   | 123                     |
+        | ../datafusion/core/tests/data/fixed_size_list_array.parquet | 0            | 2                  | 1                     | 123             | 0         | 125         | 4          | "f0.list.item" | INT64 | 1         | 4         | 0                |                      | 1               | 4               | SNAPPY      | [PLAIN, RLE, RLE_DICTIONARY] |                   | 4                      | 46               | 121                   | 123                     |
         +-------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+-------+-----------+-----------+------------------+----------------------+-----------------+-----------------+-------------+------------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -532,7 +532,7 @@ mod tests {
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         | filename                                                        | row_group_id | row_group_num_rows | row_group_num_columns | row_group_bytes | column_id | file_offset | num_values | path_in_schema | type       | stats_min | stats_max | stats_null_count | stats_distinct_count | stats_min_value | stats_max_value | compression        | encodings                | index_page_offset | dictionary_page_offset | data_page_offset | total_compressed_size | total_uncompressed_size |
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
-        | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0            | 14                 | 1                     | 163             | 0         | 4           | 14         | "String"       | BYTE_ARRAY | Hello     | today     | 0                |                      | Hello           | today           | GZIP(GzipLevel(6)) | [BIT_PACKED, RLE, PLAIN] |                   |                        | 4                | 152                   | 163                     |
+        | ../parquet-testing/data/data_index_bloom_encoding_stats.parquet | 0            | 14                 | 1                     | 163             | 0         | 4           | 14         | "String"       | BYTE_ARRAY | Hello     | today     | 0                |                      | Hello           | today           | GZIP(GzipLevel(6)) | [PLAIN, RLE, BIT_PACKED] |                   |                        | 4                | 152                   | 163                     |
         +-----------------------------------------------------------------+--------------+--------------------+-----------------------+-----------------+-----------+-------------+------------+----------------+------------+-----------+-----------+------------------+----------------------+-----------------+-----------------+--------------------+--------------------------+-------------------+------------------------+------------------+-----------------------+-------------------------+
         "#);
 
@@ -592,9 +592,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 10181               | 2    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 2    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 6957                | 2    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 2    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
@@ -623,9 +623,9 @@ mod tests {
         +-----------------------------------+-----------------+---------------------+------+------------------+
         | filename                          | file_size_bytes | metadata_size_bytes | hits | extra            |
         +-----------------------------------+-----------------+---------------------+------+------------------+
-        | alltypes_plain.parquet            | 1851            | 10181               | 5    | page_index=false |
-        | alltypes_tiny_pages.parquet       | 454233          | 881418              | 2    | page_index=true  |
-        | lz4_raw_compressed_larger.parquet | 380836          | 2939                | 3    | page_index=false |
+        | alltypes_plain.parquet            | 1851            | 6957                | 5    | page_index=false |
+        | alltypes_tiny_pages.parquet       | 454233          | 267014              | 2    | page_index=true  |
+        | lz4_raw_compressed_larger.parquet | 380836          | 996                 | 3    | page_index=false |
         +-----------------------------------+-----------------+---------------------+------+------------------+
         ");
 
diff --git a/datafusion-examples/Cargo.toml b/datafusion-examples/Cargo.toml
index 68bb5376a1ac..61711f8472eb 100644
--- a/datafusion-examples/Cargo.toml
+++ b/datafusion-examples/Cargo.toml
@@ -29,21 +29,12 @@ license = { workspace = true }
 authors = { workspace = true }
 rust-version = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
-[[example]]
-name = "flight_sql_server"
-path = "examples/flight/flight_sql_server.rs"
-
-[[example]]
-name = "flight_server"
-path = "examples/flight/flight_server.rs"
-
-[[example]]
-name = "flight_client"
-path = "examples/flight/flight_client.rs"
-
 [[example]]
 name = "dataframe_to_s3"
 path = "examples/external_dependency/dataframe-to-s3.rs"
@@ -52,10 +43,6 @@ path = "examples/external_dependency/dataframe-to-s3.rs"
 name = "query_aws_s3"
 path = "examples/external_dependency/query-aws-s3.rs"
 
-[[example]]
-name = "custom_file_casts"
-path = "examples/custom_file_casts.rs"
-
 [dev-dependencies]
 arrow = { workspace = true }
 # arrow_schema is required for record_batch! macro :sad:
@@ -81,7 +68,7 @@ serde_json = { workspace = true }
 tempfile = { workspace = true }
 test-utils = { path = "../test-utils" }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
-tonic = "0.13.1"
+tonic = "0.14"
 tracing = { version = "0.1" }
 tracing-subscriber = { version = "0.3" }
 url = { workspace = true }
diff --git a/datafusion-examples/README.md b/datafusion-examples/README.md
index f1bcbcce8200..62e51a790014 100644
--- a/datafusion-examples/README.md
+++ b/datafusion-examples/README.md
@@ -46,27 +46,28 @@ cargo run --example dataframe
 
 ## Single Process
 
-- [`advanced_udaf.rs`](examples/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
-- [`advanced_udf.rs`](examples/advanced_udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF)
-- [`advanced_udwf.rs`](examples/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
+- [`examples/udf/advanced_udaf.rs`](examples/udf/advanced_udaf.rs): Define and invoke a more complicated User Defined Aggregate Function (UDAF)
+- [`examples/udf/advanced_udf.rs`](examples/udf/advanced_udf.rs): Define and invoke a more complicated User Defined Scalar Function (UDF)
+- [`examples/udf/advanced_udwf.rs`](examples/udf/advanced_udwf.rs): Define and invoke a more complicated User Defined Window Function (UDWF)
 - [`advanced_parquet_index.rs`](examples/advanced_parquet_index.rs): Creates a detailed secondary index that covers the contents of several parquet files
-- [`async_udf.rs`](examples/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF)
+- [`examples/udf/async_udf.rs`](examples/udf/async_udf.rs): Define and invoke an asynchronous User Defined Scalar Function (UDF)
 - [`analyzer_rule.rs`](examples/analyzer_rule.rs): Use a custom AnalyzerRule to change a query's semantics (row level access control)
 - [`catalog.rs`](examples/catalog.rs): Register the table into a custom catalog
 - [`composed_extension_codec`](examples/composed_extension_codec.rs): Example of using multiple extension codecs for serialization / deserialization
-- [`csv_sql_streaming.rs`](examples/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
-- [`csv_json_opener.rs`](examples/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es
-- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
-- [`custom_file_casts.rs`](examples/custom_file_casts.rs): Implement custom casting rules to adapt file schemas
-- [`custom_file_format.rs`](examples/custom_file_format.rs): Write data to a custom file format
+- [`examples/custom_data_source/csv_sql_streaming.rs`](examples/custom_data_source/csv_sql_streaming.rs): Build and run a streaming query plan from a SQL statement against a local CSV file
+- [`examples/custom_data_source/csv_json_opener.rs`](examples/custom_data_source/csv_json_opener.rs): Use low level `FileOpener` APIs to read CSV/JSON into Arrow `RecordBatch`es
+- [`examples/custom_data_source/custom_datasource.rs`](examples/custom_data_source/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
+- [`examples/custom_data_source/custom_file_casts.rs`](examples/custom_data_source/custom_file_casts.rs): Implement custom casting rules to adapt file schemas
+- [`examples/custom_data_source/custom_file_format.rs`](examples/custom_data_source/custom_file_format.rs): Write data to a custom file format
 - [`dataframe-to-s3.rs`](examples/external_dependency/dataframe-to-s3.rs): Run a query using a DataFrame against a parquet file from s3 and writing back to s3
 - [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame API against parquet files, csv files, and in-memory data, including multiple subqueries. Also demonstrates the various methods to write out a DataFrame to a table, parquet file, csv file, and json file.
+- [`examples/builtin_functions/date_time`](examples/builtin_functions/date_time.rs): Examples of date-time related functions and queries
 - [`default_column_values.rs`](examples/default_column_values.rs): Implement custom default value handling for missing columns using field metadata and PhysicalExprAdapter
 - [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results (Arrow ArrayRefs) into Rust structs
 - [`expr_api.rs`](examples/expr_api.rs): Create, execute, simplify, analyze and coerce `Expr`s
-- [`file_stream_provider.rs`](examples/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
-- [`flight_sql_server.rs`](examples/flight/flight_sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from JDBC clients
-- [`function_factory.rs`](examples/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
+- [`examples/custom_data_source/file_stream_provider.rs`](examples/custom_data_source/file_stream_provider.rs): Run a query on `FileStreamProvider` which implements `StreamProvider` for reading and writing to arbitrary stream sources / sinks.
+- [`flight/sql_server.rs`](examples/flight/sql_server.rs): Run DataFusion as a standalone process and execute SQL queries from Flight and and FlightSQL (e.g. JDBC) clients
+- [`examples/builtin_functions/function_factory.rs`](examples/builtin_functions/function_factory.rs): Register `CREATE FUNCTION` handler to implement SQL macros
 - [`memory_pool_tracking.rs`](examples/memory_pool_tracking.rs): Demonstrates TrackConsumersPool for memory tracking and debugging with enhanced error messages
 - [`memory_pool_execution_plan.rs`](examples/memory_pool_execution_plan.rs): Shows how to implement memory-aware ExecutionPlan with memory reservation and spilling
 - [`optimizer_rule.rs`](examples/optimizer_rule.rs): Use a custom OptimizerRule to replace certain predicates
@@ -81,17 +82,17 @@ cargo run --example dataframe
 - [`pruning.rs`](examples/pruning.rs): Use pruning to rule out files based on statistics
 - [`query-aws-s3.rs`](examples/external_dependency/query-aws-s3.rs): Configure `object_store` and run a query against files stored in AWS S3
 - [`query-http-csv.rs`](examples/query-http-csv.rs): Configure `object_store` and run a query against files vi HTTP
-- [`regexp.rs`](examples/regexp.rs): Examples of using regular expression functions
+- [`examples/builtin_functions/regexp.rs`](examples/builtin_functions/regexp.rs): Examples of using regular expression functions
 - [`remote_catalog.rs`](examples/regexp.rs): Examples of interfacing with a remote catalog (e.g. over a network)
-- [`simple_udaf.rs`](examples/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
-- [`simple_udf.rs`](examples/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF)
-- [`simple_udfw.rs`](examples/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF)
+- [`examples/udf/simple_udaf.rs`](examples/udf/simple_udaf.rs): Define and invoke a User Defined Aggregate Function (UDAF)
+- [`examples/udf/simple_udf.rs`](examples/udf/simple_udf.rs): Define and invoke a User Defined Scalar Function (UDF)
+- [`examples/udf/simple_udtf.rs`](examples/udf/simple_udtf.rs): Define and invoke a User Defined Table Function (UDTF)
+- [`examples/udf/simple_udfw.rs`](examples/udf/simple_udwf.rs): Define and invoke a User Defined Window Function (UDWF)
 - [`sql_analysis.rs`](examples/sql_analysis.rs): Analyse SQL queries with DataFusion structures
 - [`sql_frontend.rs`](examples/sql_frontend.rs): Create LogicalPlans (only) from sql strings
 - [`sql_dialect.rs`](examples/sql_dialect.rs): Example of implementing a custom SQL dialect on top of `DFParser`
 - [`sql_query.rs`](examples/memtable.rs): Query data using SQL (in memory `RecordBatches`, local Parquet files)
-- [`date_time_function.rs`](examples/date_time_function.rs): Examples of date-time related functions and queries.
 
 ## Distributed
 
-- [`flight_client.rs`](examples/flight/flight_client.rs) and [`flight_server.rs`](examples/flight/flight_server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol.
+- [`examples/flight/client.rs`](examples/flight/client.rs) and [`examples/flight/server.rs`](examples/flight/server.rs): Run DataFusion as a standalone process and execute SQL queries from a client using the Arrow Flight protocol.
diff --git a/datafusion-examples/examples/advanced_parquet_index.rs b/datafusion-examples/examples/advanced_parquet_index.rs
index 55400e219283..67bfc5b1bcf5 100644
--- a/datafusion-examples/examples/advanced_parquet_index.rs
+++ b/datafusion-examples/examples/advanced_parquet_index.rs
@@ -121,7 +121,6 @@ use url::Url;
 ///         │ ╚═══════════════════╝ │      1. With cached ParquetMetadata, so
 ///         └───────────────────────┘      the ParquetSource does not re-read /
 ///          Parquet File                  decode the thrift footer
-///
 /// ```
 ///
 /// Within a Row Group, Column Chunks store data in DataPages. This example also
@@ -492,19 +491,18 @@ impl TableProvider for IndexTableProvider {
                 .with_file(indexed_file);
 
         let file_source = Arc::new(
-            ParquetSource::default()
+            ParquetSource::new(schema.clone())
                 // provide the predicate so the DataSourceExec can try and prune
                 // row groups internally
                 .with_predicate(predicate)
                 // provide the factory to create parquet reader without re-reading metadata
                 .with_parquet_file_reader_factory(Arc::new(reader_factory)),
         );
-        let file_scan_config =
-            FileScanConfigBuilder::new(object_store_url, schema, file_source)
-                .with_limit(limit)
-                .with_projection(projection.cloned())
-                .with_file(partitioned_file)
-                .build();
+        let file_scan_config = FileScanConfigBuilder::new(object_store_url, file_source)
+            .with_limit(limit)
+            .with_projection_indices(projection.cloned())
+            .with_file(partitioned_file)
+            .build();
 
         // Finally, put it all together into a DataSourceExec
         Ok(DataSourceExec::from_data_source(file_scan_config))
diff --git a/datafusion-examples/examples/date_time_functions.rs b/datafusion-examples/examples/builtin_functions/date_time.rs
similarity index 97%
rename from datafusion-examples/examples/date_time_functions.rs
rename to datafusion-examples/examples/builtin_functions/date_time.rs
index 2628319ae31f..178cba979cb9 100644
--- a/datafusion-examples/examples/date_time_functions.rs
+++ b/datafusion-examples/examples/builtin_functions/date_time.rs
@@ -26,8 +26,20 @@ use datafusion::common::assert_contains;
 use datafusion::error::Result;
 use datafusion::prelude::*;
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// Example: Working with Date and Time Functions
+///
+/// This example demonstrates how to work with various date and time
+/// functions in DataFusion using both the DataFrame API and SQL queries.
+///
+/// It includes:
+/// - `make_date`: building `DATE` values from year, month, and day columns
+/// - `to_date`: converting string expressions into `DATE` values
+/// - `to_timestamp`: parsing strings or numeric values into `TIMESTAMP`s
+/// - `to_char`: formatting dates, timestamps, and durations as strings
+///
+/// Together, these examples show how to create, convert, and format temporal
+/// data using DataFusion’s built-in functions.
+pub async fn date_time() -> Result<()> {
     query_make_date().await?;
     query_to_date().await?;
     query_to_timestamp().await?;
diff --git a/datafusion-examples/examples/function_factory.rs b/datafusion-examples/examples/builtin_functions/function_factory.rs
similarity index 99%
rename from datafusion-examples/examples/function_factory.rs
rename to datafusion-examples/examples/builtin_functions/function_factory.rs
index d4312ae59409..5d41e7a26071 100644
--- a/datafusion-examples/examples/function_factory.rs
+++ b/datafusion-examples/examples/builtin_functions/function_factory.rs
@@ -42,8 +42,7 @@ use std::sync::Arc;
 ///
 /// This example is rather simple and does not cover all cases required for a
 /// real implementation.
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn function_factory() -> Result<()> {
     // First we must configure the SessionContext with our function factory
     let ctx = SessionContext::new()
         // register custom function factory
diff --git a/datafusion-examples/examples/builtin_functions/main.rs b/datafusion-examples/examples/builtin_functions/main.rs
new file mode 100644
index 000000000000..c307bc9532bf
--- /dev/null
+++ b/datafusion-examples/examples/builtin_functions/main.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These are miscellaneous function-related examples
+//!
+//! These examples demonstrate miscellaneous function-related features.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example builtin_functions -- [date_time|function_factory|regexp]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `date_time` — examples of date-time related functions and queries
+//! - `function_factory` — register `CREATE FUNCTION` handler to implement SQL macros
+//! - `regexp` — examples of using regular expression functions
+
+mod date_time;
+mod function_factory;
+mod regexp;
+
+use std::str::FromStr;
+
+use datafusion::error::{DataFusionError, Result};
+
+enum ExampleKind {
+    DateTime,
+    FunctionFactory,
+    Regexp,
+}
+
+impl AsRef<str> for ExampleKind {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::DateTime => "date_time",
+            Self::FunctionFactory => "function_factory",
+            Self::Regexp => "regexp",
+        }
+    }
+}
+
+impl FromStr for ExampleKind {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "date_time" => Ok(Self::DateTime),
+            "function_factory" => Ok(Self::FunctionFactory),
+            "regexp" => Ok(Self::Regexp),
+            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
+        }
+    }
+}
+
+impl ExampleKind {
+    const ALL: [Self; 3] = [Self::DateTime, Self::FunctionFactory, Self::Regexp];
+
+    const EXAMPLE_NAME: &str = "builtin_functions";
+
+    fn variants() -> Vec<&'static str> {
+        Self::ALL.iter().map(|x| x.as_ref()).collect()
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::variants().join("|")
+    );
+
+    let arg = std::env::args().nth(1).ok_or_else(|| {
+        eprintln!("{usage}");
+        DataFusionError::Execution("Missing argument".to_string())
+    })?;
+
+    match arg.parse::<ExampleKind>()? {
+        ExampleKind::DateTime => date_time::date_time().await?,
+        ExampleKind::FunctionFactory => function_factory::function_factory().await?,
+        ExampleKind::Regexp => regexp::regexp().await?,
+    }
+
+    Ok(())
+}
diff --git a/datafusion-examples/examples/regexp.rs b/datafusion-examples/examples/builtin_functions/regexp.rs
similarity index 99%
rename from datafusion-examples/examples/regexp.rs
rename to datafusion-examples/examples/builtin_functions/regexp.rs
index 12d115b9b502..13c078693028 100644
--- a/datafusion-examples/examples/regexp.rs
+++ b/datafusion-examples/examples/builtin_functions/regexp.rs
@@ -28,12 +28,11 @@ use datafusion::prelude::*;
 ///
 /// Supported flags can be found at
 /// https://docs.rs/regex/latest/regex/#grouping-and-flags
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn regexp() -> Result<()> {
     let ctx = SessionContext::new();
     ctx.register_csv(
         "examples",
-        "../../datafusion/physical-expr/tests/data/regex.csv",
+        "datafusion/physical-expr/tests/data/regex.csv",
         CsvReadOptions::new(),
     )
     .await?;
diff --git a/datafusion-examples/examples/csv_json_opener.rs b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
similarity index 89%
rename from datafusion-examples/examples/csv_json_opener.rs
rename to datafusion-examples/examples/custom_data_source/csv_json_opener.rs
index 1a2c2cbff418..4205bbcdf86a 100644
--- a/datafusion-examples/examples/csv_json_opener.rs
+++ b/datafusion-examples/examples/custom_data_source/csv_json_opener.rs
@@ -18,6 +18,7 @@
 use std::sync::Arc;
 
 use arrow::datatypes::{DataType, Field, Schema};
+use datafusion::common::config::CsvOptions;
 use datafusion::{
     assert_batches_eq,
     datasource::{
@@ -39,8 +40,7 @@ use object_store::{local::LocalFileSystem, memory::InMemory, ObjectStore};
 /// read data from (CSV/JSON) into Arrow RecordBatches.
 ///
 /// If you want to query data in CSV or JSON files, see the [`dataframe.rs`] and [`sql_query.rs`] examples
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn csv_json_opener() -> Result<()> {
     csv_opener().await?;
     json_opener().await?;
     Ok(())
@@ -55,19 +55,25 @@ async fn csv_opener() -> Result<()> {
 
     let path = std::path::Path::new(&path).canonicalize()?;
 
+    let options = CsvOptions {
+        has_header: Some(true),
+        delimiter: b',',
+        quote: b'"',
+        ..Default::default()
+    };
+
     let scan_config = FileScanConfigBuilder::new(
         ObjectStoreUrl::local_filesystem(),
-        Arc::clone(&schema),
-        Arc::new(CsvSource::default()),
+        Arc::new(CsvSource::new(Arc::clone(&schema)).with_csv_options(options.clone())),
     )
-    .with_projection(Some(vec![12, 0]))
+    .with_projection_indices(Some(vec![12, 0]))
     .with_limit(Some(5))
     .with_file(PartitionedFile::new(path.display().to_string(), 10))
     .build();
 
-    let config = CsvSource::new(true, b',', b'"')
+    let config = CsvSource::new(Arc::clone(&schema))
+        .with_csv_options(options)
         .with_comment(Some(b'#'))
-        .with_schema(schema)
         .with_batch_size(8192)
         .with_projection(&scan_config);
 
@@ -123,10 +129,9 @@ async fn json_opener() -> Result<()> {
 
     let scan_config = FileScanConfigBuilder::new(
         ObjectStoreUrl::local_filesystem(),
-        schema,
-        Arc::new(JsonSource::default()),
+        Arc::new(JsonSource::new(schema)),
     )
-    .with_projection(Some(vec![1, 0]))
+    .with_projection_indices(Some(vec![1, 0]))
     .with_limit(Some(5))
     .with_file(PartitionedFile::new(path.to_string(), 10))
     .build();
diff --git a/datafusion-examples/examples/csv_sql_streaming.rs b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs
similarity index 98%
rename from datafusion-examples/examples/csv_sql_streaming.rs
rename to datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs
index 99264bbcb486..aca63c4f35c2 100644
--- a/datafusion-examples/examples/csv_sql_streaming.rs
+++ b/datafusion-examples/examples/custom_data_source/csv_sql_streaming.rs
@@ -21,8 +21,7 @@ use datafusion::prelude::*;
 
 /// This example demonstrates executing a simple query against an Arrow data source (CSV) and
 /// fetching results with streaming aggregation and streaming window
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn csv_sql_streaming() -> Result<()> {
     // create local execution context
     let ctx = SessionContext::new();
 
diff --git a/datafusion-examples/examples/custom_datasource.rs b/datafusion-examples/examples/custom_data_source/custom_datasource.rs
similarity index 99%
rename from datafusion-examples/examples/custom_datasource.rs
rename to datafusion-examples/examples/custom_data_source/custom_datasource.rs
index bc865fac5a33..2213d50fccda 100644
--- a/datafusion-examples/examples/custom_datasource.rs
+++ b/datafusion-examples/examples/custom_data_source/custom_datasource.rs
@@ -42,8 +42,7 @@ use datafusion::catalog::Session;
 use tokio::time::timeout;
 
 /// This example demonstrates executing a simple query against a custom datasource
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn custom_datasource() -> Result<()> {
     // create our custom datasource and adding some users
     let db = CustomDataSource::default();
     db.populate_users();
diff --git a/datafusion-examples/examples/custom_file_casts.rs b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs
similarity index 99%
rename from datafusion-examples/examples/custom_file_casts.rs
rename to datafusion-examples/examples/custom_data_source/custom_file_casts.rs
index 4d97ecd91dc6..31ec2845c611 100644
--- a/datafusion-examples/examples/custom_file_casts.rs
+++ b/datafusion-examples/examples/custom_data_source/custom_file_casts.rs
@@ -44,9 +44,7 @@ use object_store::{ObjectStore, PutPayload};
 // This example enforces that casts must be strictly widening: if the file type is Int64 and the table type is Int32, it will error
 // before even reading the data.
 // Without this custom cast rule DataFusion would happily do the narrowing cast, potentially erroring only if it found a row with data it could not cast.
-
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn custom_file_casts() -> Result<()> {
     println!("=== Creating example data ===");
 
     // Create a logical / table schema with an Int32 column
diff --git a/datafusion-examples/examples/custom_file_format.rs b/datafusion-examples/examples/custom_data_source/custom_file_format.rs
similarity index 95%
rename from datafusion-examples/examples/custom_file_format.rs
rename to datafusion-examples/examples/custom_data_source/custom_file_format.rs
index 67fe642fd46e..510fa53c593f 100644
--- a/datafusion-examples/examples/custom_file_format.rs
+++ b/datafusion-examples/examples/custom_data_source/custom_file_format.rs
@@ -30,6 +30,7 @@ use datafusion::{
             FileFormat, FileFormatFactory,
         },
         physical_plan::{FileScanConfig, FileSinkConfig, FileSource},
+        table_schema::TableSchema,
         MemTable,
     },
     error::Result,
@@ -47,6 +48,42 @@ use tempfile::tempdir;
 /// TSVFileFormatFactory is responsible for creating instances of TSVFileFormat.
 /// The former, once registered with the SessionState, will then be used
 /// to facilitate SQL operations on TSV files, such as `COPY TO` shown here.
+pub async fn custom_file_format() -> Result<()> {
+    // Create a new context with the default configuration
+    let mut state = SessionStateBuilder::new().with_default_features().build();
+
+    // Register the custom file format
+    let file_format = Arc::new(TSVFileFactory::new());
+    state.register_file_format(file_format, true)?;
+
+    // Create a new context with the custom file format
+    let ctx = SessionContext::new_with_state(state);
+
+    let mem_table = create_mem_table();
+    ctx.register_table("mem_table", mem_table)?;
+
+    let temp_dir = tempdir().unwrap();
+    let table_save_path = temp_dir.path().join("mem_table.tsv");
+
+    let d = ctx
+        .sql(&format!(
+            "COPY mem_table TO '{}' STORED AS TSV;",
+            table_save_path.display(),
+        ))
+        .await?;
+
+    let results = d.collect().await?;
+    println!(
+        "Number of inserted rows: {:?}",
+        (results[0]
+            .column_by_name("count")
+            .unwrap()
+            .as_primitive::<UInt64Type>()
+            .value(0))
+    );
+
+    Ok(())
+}
 
 #[derive(Debug)]
 /// Custom file format that reads and writes TSV files
@@ -128,8 +165,8 @@ impl FileFormat for TSVFileFormat {
             .await
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        self.csv_file_format.file_source()
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        self.csv_file_format.file_source(table_schema)
     }
 }
 
@@ -180,44 +217,6 @@ impl GetExt for TSVFileFactory {
     }
 }
 
-#[tokio::main]
-async fn main() -> Result<()> {
-    // Create a new context with the default configuration
-    let mut state = SessionStateBuilder::new().with_default_features().build();
-
-    // Register the custom file format
-    let file_format = Arc::new(TSVFileFactory::new());
-    state.register_file_format(file_format, true).unwrap();
-
-    // Create a new context with the custom file format
-    let ctx = SessionContext::new_with_state(state);
-
-    let mem_table = create_mem_table();
-    ctx.register_table("mem_table", mem_table).unwrap();
-
-    let temp_dir = tempdir().unwrap();
-    let table_save_path = temp_dir.path().join("mem_table.tsv");
-
-    let d = ctx
-        .sql(&format!(
-            "COPY mem_table TO '{}' STORED AS TSV;",
-            table_save_path.display(),
-        ))
-        .await?;
-
-    let results = d.collect().await?;
-    println!(
-        "Number of inserted rows: {:?}",
-        (results[0]
-            .column_by_name("count")
-            .unwrap()
-            .as_primitive::<UInt64Type>()
-            .value(0))
-    );
-
-    Ok(())
-}
-
 // create a simple mem table
 fn create_mem_table() -> Arc<MemTable> {
     let fields = vec![
diff --git a/datafusion-examples/examples/file_stream_provider.rs b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs
similarity index 91%
rename from datafusion-examples/examples/file_stream_provider.rs
rename to datafusion-examples/examples/custom_data_source/file_stream_provider.rs
index e6c59d57e98d..55d2cc8cc0af 100644
--- a/datafusion-examples/examples/file_stream_provider.rs
+++ b/datafusion-examples/examples/custom_data_source/file_stream_provider.rs
@@ -15,6 +15,29 @@
 // specific language governing permissions and limitations
 // under the License.
 
+/// Demonstrates how to use [`FileStreamProvider`] and [`StreamTable`] to stream data
+/// from a file-like source (FIFO) into DataFusion for continuous querying.
+///
+/// On non-Windows systems, this example creates a named pipe (FIFO) and
+/// writes rows into it asynchronously while DataFusion reads the data
+/// through a `FileStreamProvider`.  
+///
+/// This illustrates how to integrate dynamically updated data sources
+/// with DataFusion without needing to reload the entire dataset each time.
+///
+/// This example does not work on Windows.
+pub async fn file_stream_provider() -> datafusion::error::Result<()> {
+    #[cfg(target_os = "windows")]
+    {
+        println!("file_stream_provider example does not work on windows");
+        Ok(())
+    }
+    #[cfg(not(target_os = "windows"))]
+    {
+        non_windows::main().await
+    }
+}
+
 #[cfg(not(target_os = "windows"))]
 mod non_windows {
     use datafusion::assert_batches_eq;
@@ -186,16 +209,3 @@ mod non_windows {
         Ok(())
     }
 }
-
-#[tokio::main]
-async fn main() -> datafusion::error::Result<()> {
-    #[cfg(target_os = "windows")]
-    {
-        println!("file_stream_provider example does not work on windows");
-        Ok(())
-    }
-    #[cfg(not(target_os = "windows"))]
-    {
-        non_windows::main().await
-    }
-}
diff --git a/datafusion-examples/examples/custom_data_source/main.rs b/datafusion-examples/examples/custom_data_source/main.rs
new file mode 100644
index 000000000000..ce0585f8c3f7
--- /dev/null
+++ b/datafusion-examples/examples/custom_data_source/main.rs
@@ -0,0 +1,126 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # These examples are all related to extending or defining how DataFusion reads data
+//!
+//! These examples demonstrate how DataFusion reads data.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example custom_data_source -- [csv_json_opener|csv_sql_streaming|custom_datasource|custom_file_casts|custom_file_format|file_stream_provider]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `csv_json_opener` — use low level FileOpener APIs to read CSV/JSON into Arrow RecordBatches
+//! - `csv_sql_streaming` — build and run a streaming query plan from a SQL statement against a local CSV file
+//! - `custom_datasource` — run queries against a custom datasource (TableProvider)
+//! - `custom_file_casts` — implement custom casting rules to adapt file schemas
+//! - `custom_file_format` — write data to a custom file format
+//! - `file_stream_provider` — run a query on FileStreamProvider which implements StreamProvider for reading and writing to arbitrary stream sources/sinks
+
+mod csv_json_opener;
+mod csv_sql_streaming;
+mod custom_datasource;
+mod custom_file_casts;
+mod custom_file_format;
+mod file_stream_provider;
+
+use std::str::FromStr;
+
+use datafusion::error::{DataFusionError, Result};
+
+enum ExampleKind {
+    CsvJsonOpener,
+    CsvSqlStreaming,
+    CustomDatasource,
+    CustomFileCasts,
+    CustomFileFormat,
+    FileFtreamProvider,
+}
+
+impl AsRef<str> for ExampleKind {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::CsvJsonOpener => "csv_json_opener",
+            Self::CsvSqlStreaming => "csv_sql_streaming",
+            Self::CustomDatasource => "custom_datasource",
+            Self::CustomFileCasts => "custom_file_casts",
+            Self::CustomFileFormat => "custom_file_format",
+            Self::FileFtreamProvider => "file_stream_provider",
+        }
+    }
+}
+
+impl FromStr for ExampleKind {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "csv_json_opener" => Ok(Self::CsvJsonOpener),
+            "csv_sql_streaming" => Ok(Self::CsvSqlStreaming),
+            "custom_datasource" => Ok(Self::CustomDatasource),
+            "custom_file_casts" => Ok(Self::CustomFileCasts),
+            "custom_file_format" => Ok(Self::CustomFileFormat),
+            "file_stream_provider" => Ok(Self::FileFtreamProvider),
+            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
+        }
+    }
+}
+
+impl ExampleKind {
+    const ALL: [Self; 6] = [
+        Self::CsvJsonOpener,
+        Self::CsvSqlStreaming,
+        Self::CustomDatasource,
+        Self::CustomFileCasts,
+        Self::CustomFileFormat,
+        Self::FileFtreamProvider,
+    ];
+
+    const EXAMPLE_NAME: &str = "custom_data_source";
+
+    fn variants() -> Vec<&'static str> {
+        Self::ALL.iter().map(|x| x.as_ref()).collect()
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::variants().join("|")
+    );
+
+    let arg = std::env::args().nth(1).ok_or_else(|| {
+        eprintln!("{usage}");
+        DataFusionError::Execution("Missing argument".to_string())
+    })?;
+
+    match arg.parse::<ExampleKind>()? {
+        ExampleKind::CsvJsonOpener => csv_json_opener::csv_json_opener().await?,
+        ExampleKind::CsvSqlStreaming => csv_sql_streaming::csv_sql_streaming().await?,
+        ExampleKind::CustomDatasource => custom_datasource::custom_datasource().await?,
+        ExampleKind::CustomFileCasts => custom_file_casts::custom_file_casts().await?,
+        ExampleKind::CustomFileFormat => custom_file_format::custom_file_format().await?,
+        ExampleKind::FileFtreamProvider => {
+            file_stream_provider::file_stream_provider().await?
+        }
+    }
+
+    Ok(())
+}
diff --git a/datafusion-examples/examples/default_column_values.rs b/datafusion-examples/examples/default_column_values.rs
index 43e2d4ca0988..bfc60519f26e 100644
--- a/datafusion-examples/examples/default_column_values.rs
+++ b/datafusion-examples/examples/default_column_values.rs
@@ -235,7 +235,7 @@ impl TableProvider for DefaultValueTableProvider {
             &df_schema,
         )?;
 
-        let parquet_source = ParquetSource::default()
+        let parquet_source = ParquetSource::new(schema.clone())
             .with_predicate(filter)
             .with_pushdown_filters(true);
 
@@ -257,10 +257,9 @@ impl TableProvider for DefaultValueTableProvider {
 
         let file_scan_config = FileScanConfigBuilder::new(
             ObjectStoreUrl::parse("memory://")?,
-            self.schema.clone(),
             Arc::new(parquet_source),
         )
-        .with_projection(projection.cloned())
+        .with_projection_indices(projection.cloned())
         .with_limit(limit)
         .with_file_group(file_group)
         .with_expr_adapter(Some(Arc::new(DefaultValuePhysicalExprAdapterFactory) as _));
diff --git a/datafusion-examples/examples/external_dependency/query-aws-s3.rs b/datafusion-examples/examples/external_dependency/query-aws-s3.rs
index da2d7e4879f9..cd0b4562d5f2 100644
--- a/datafusion-examples/examples/external_dependency/query-aws-s3.rs
+++ b/datafusion-examples/examples/external_dependency/query-aws-s3.rs
@@ -28,7 +28,6 @@ use url::Url;
 ///
 /// - AWS_ACCESS_KEY_ID
 /// - AWS_SECRET_ACCESS_KEY
-///
 #[tokio::main]
 async fn main() -> Result<()> {
     let ctx = SessionContext::new();
diff --git a/datafusion-examples/examples/flight/flight_client.rs b/datafusion-examples/examples/flight/client.rs
similarity index 91%
rename from datafusion-examples/examples/flight/flight_client.rs
rename to datafusion-examples/examples/flight/client.rs
index e3237284b430..031beea47d57 100644
--- a/datafusion-examples/examples/flight/flight_client.rs
+++ b/datafusion-examples/examples/flight/client.rs
@@ -17,6 +17,7 @@
 
 use std::collections::HashMap;
 use std::sync::Arc;
+use tonic::transport::Endpoint;
 
 use datafusion::arrow::datatypes::Schema;
 
@@ -29,12 +30,13 @@ use datafusion::arrow::util::pretty;
 /// This example shows how to wrap DataFusion with `FlightService` to support looking up schema information for
 /// Parquet files and executing SQL queries against them on a remote server.
 /// This example is run along-side the example `flight_server`.
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
+pub async fn client() -> Result<(), Box<dyn std::error::Error>> {
     let testdata = datafusion::test_util::parquet_test_data();
 
     // Create Flight client
-    let mut client = FlightServiceClient::connect("http://localhost:50051").await?;
+    let endpoint = Endpoint::new("http://localhost:50051")?;
+    let channel = endpoint.connect().await?;
+    let mut client = FlightServiceClient::new(channel);
 
     // Call get_schema to get the schema of a Parquet file
     let request = tonic::Request::new(FlightDescriptor {
diff --git a/datafusion-examples/examples/flight/main.rs b/datafusion-examples/examples/flight/main.rs
new file mode 100644
index 000000000000..a83b19bac42e
--- /dev/null
+++ b/datafusion-examples/examples/flight/main.rs
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # Arrow Flight Examples
+//!
+//! These examples demonstrate Arrow Flight usage.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example flight -- [client|server|sql_server]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `client` — run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol
+//! - `server` — run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol
+//! - `sql_server` — run DataFusion as a standalone process and execute SQL queries from JDBC clients
+
+mod client;
+mod server;
+mod sql_server;
+
+use std::str::FromStr;
+
+use datafusion::error::{DataFusionError, Result};
+
+enum ExampleKind {
+    Client,
+    Server,
+    SqlServer,
+}
+
+impl AsRef<str> for ExampleKind {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::Client => "client",
+            Self::Server => "server",
+            Self::SqlServer => "sql_server",
+        }
+    }
+}
+
+impl FromStr for ExampleKind {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "client" => Ok(Self::Client),
+            "server" => Ok(Self::Server),
+            "sql_server" => Ok(Self::SqlServer),
+            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
+        }
+    }
+}
+
+impl ExampleKind {
+    const ALL: [Self; 3] = [Self::Client, Self::Server, Self::SqlServer];
+
+    const EXAMPLE_NAME: &str = "flight";
+
+    fn variants() -> Vec<&'static str> {
+        Self::ALL.iter().map(|x| x.as_ref()).collect()
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::variants().join("|")
+    );
+
+    let arg = std::env::args().nth(1).ok_or_else(|| {
+        eprintln!("{usage}");
+        DataFusionError::Execution("Missing argument".to_string())
+    })?;
+
+    match arg.parse::<ExampleKind>()? {
+        ExampleKind::Client => client::client().await?,
+        ExampleKind::Server => server::server().await?,
+        ExampleKind::SqlServer => sql_server::sql_server().await?,
+    }
+
+    Ok(())
+}
diff --git a/datafusion-examples/examples/flight/flight_server.rs b/datafusion-examples/examples/flight/server.rs
similarity index 95%
rename from datafusion-examples/examples/flight/flight_server.rs
rename to datafusion-examples/examples/flight/server.rs
index 58bfb7a341c1..dc75287cf2e2 100644
--- a/datafusion-examples/examples/flight/flight_server.rs
+++ b/datafusion-examples/examples/flight/server.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
+use arrow::ipc::writer::{CompressionContext, DictionaryTracker, IpcDataGenerator};
 use std::sync::Arc;
 
 use arrow_flight::{PollInfo, SchemaAsIpc};
@@ -106,6 +106,7 @@ impl FlightService for FlightServiceImpl {
 
                 // add an initial FlightData message that sends schema
                 let options = arrow::ipc::writer::IpcWriteOptions::default();
+                let mut compression_context = CompressionContext::default();
                 let schema_flight_data = SchemaAsIpc::new(&schema, &options);
 
                 let mut flights = vec![FlightData::from(schema_flight_data)];
@@ -115,7 +116,7 @@ impl FlightService for FlightServiceImpl {
 
                 for batch in &results {
                     let (flight_dictionaries, flight_batch) = encoder
-                        .encoded_batch(batch, &mut tracker, &options)
+                        .encode(batch, &mut tracker, &options, &mut compression_context)
                         .map_err(|e: ArrowError| Status::internal(e.to_string()))?;
 
                     flights.extend(flight_dictionaries.into_iter().map(Into::into));
@@ -193,8 +194,7 @@ fn to_tonic_err(e: datafusion::error::DataFusionError) -> Status {
 /// This example shows how to wrap DataFusion with `FlightService` to support looking up schema information for
 /// Parquet files and executing SQL queries against them on a remote server.
 /// This example is run along-side the example `flight_client`.
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
+pub async fn server() -> Result<(), Box<dyn std::error::Error>> {
     let addr = "0.0.0.0:50051".parse()?;
     let service = FlightServiceImpl {};
 
diff --git a/datafusion-examples/examples/flight/flight_sql_server.rs b/datafusion-examples/examples/flight/sql_server.rs
similarity index 99%
rename from datafusion-examples/examples/flight/flight_sql_server.rs
rename to datafusion-examples/examples/flight/sql_server.rs
index c35debec7d71..d86860f9d436 100644
--- a/datafusion-examples/examples/flight/flight_sql_server.rs
+++ b/datafusion-examples/examples/flight/sql_server.rs
@@ -68,9 +68,7 @@ macro_rules! status {
 ///
 /// Based heavily on Ballista's implementation: https://github.com/apache/datafusion-ballista/blob/main/ballista/scheduler/src/flight_sql.rs
 /// and the example in arrow-rs: https://github.com/apache/arrow-rs/blob/master/arrow-flight/examples/flight_sql_server.rs
-///
-#[tokio::main]
-async fn main() -> Result<(), Box<dyn std::error::Error>> {
+pub async fn sql_server() -> Result<(), Box<dyn std::error::Error>> {
     env_logger::init();
     let addr = "0.0.0.0:50051".parse()?;
     let service = FlightSqlServiceImpl {
diff --git a/datafusion-examples/examples/json_shredding.rs b/datafusion-examples/examples/json_shredding.rs
index a2e83bc9510a..5ef8b59b6420 100644
--- a/datafusion-examples/examples/json_shredding.rs
+++ b/datafusion-examples/examples/json_shredding.rs
@@ -142,7 +142,7 @@ async fn main() -> Result<()> {
         .await?;
     let plan = format!("{}", arrow::util::pretty::pretty_format_batches(&batches)?);
     println!("{plan}");
-    assert_contains!(&plan, "row_groups_pruned_statistics=1");
+    assert_contains!(&plan, "row_groups_pruned_statistics=2 total → 1 matched");
     assert_contains!(&plan, "pushdown_rows_pruned=1");
 
     Ok(())
diff --git a/datafusion-examples/examples/parquet_embedded_index.rs b/datafusion-examples/examples/parquet_embedded_index.rs
index 3cbe18914775..bc0e5a072caa 100644
--- a/datafusion-examples/examples/parquet_embedded_index.rs
+++ b/datafusion-examples/examples/parquet_embedded_index.rs
@@ -426,8 +426,10 @@ impl TableProvider for DistinctIndexTable {
 
         // Build ParquetSource to actually read the files
         let url = ObjectStoreUrl::parse("file://")?;
-        let source = Arc::new(ParquetSource::default().with_enable_page_index(true));
-        let mut builder = FileScanConfigBuilder::new(url, self.schema.clone(), source);
+        let source = Arc::new(
+            ParquetSource::new(self.schema.clone()).with_enable_page_index(true),
+        );
+        let mut builder = FileScanConfigBuilder::new(url, source);
         for file in files_to_scan {
             let path = self.dir.join(file);
             let len = std::fs::metadata(&path)?.len();
diff --git a/datafusion-examples/examples/parquet_encrypted.rs b/datafusion-examples/examples/parquet_encrypted.rs
index e9e239b7a1c3..690d9f2a5f14 100644
--- a/datafusion-examples/examples/parquet_encrypted.rs
+++ b/datafusion-examples/examples/parquet_encrypted.rs
@@ -16,12 +16,13 @@
 // under the License.
 
 use datafusion::common::DataFusionError;
-use datafusion::config::TableParquetOptions;
+use datafusion::config::{ConfigFileEncryptionProperties, TableParquetOptions};
 use datafusion::dataframe::{DataFrame, DataFrameWriteOptions};
 use datafusion::logical_expr::{col, lit};
 use datafusion::parquet::encryption::decrypt::FileDecryptionProperties;
 use datafusion::parquet::encryption::encrypt::FileEncryptionProperties;
 use datafusion::prelude::{ParquetReadOptions, SessionContext};
+use std::sync::Arc;
 use tempfile::TempDir;
 
 #[tokio::main]
@@ -55,7 +56,7 @@ async fn main() -> datafusion::common::Result<()> {
 
     // Write encrypted parquet
     let mut options = TableParquetOptions::default();
-    options.crypto.file_encryption = Some((&encrypt).into());
+    options.crypto.file_encryption = Some(ConfigFileEncryptionProperties::from(&encrypt));
     parquet_df
         .write_parquet(
             tempfile_str.as_str(),
@@ -100,7 +101,8 @@ async fn query_dataframe(df: &DataFrame) -> Result<(), DataFusionError> {
 // Setup encryption and decryption properties
 fn setup_encryption(
     parquet_df: &DataFrame,
-) -> Result<(FileEncryptionProperties, FileDecryptionProperties), DataFusionError> {
+) -> Result<(Arc<FileEncryptionProperties>, Arc<FileDecryptionProperties>), DataFusionError>
+{
     let schema = parquet_df.schema();
     let footer_key = b"0123456789012345".to_vec(); // 128bit/16
     let column_key = b"1234567890123450".to_vec(); // 128bit/16
diff --git a/datafusion-examples/examples/parquet_encrypted_with_kms.rs b/datafusion-examples/examples/parquet_encrypted_with_kms.rs
index 19b0e8d0b199..45bfd183773a 100644
--- a/datafusion-examples/examples/parquet_encrypted_with_kms.rs
+++ b/datafusion-examples/examples/parquet_encrypted_with_kms.rs
@@ -226,7 +226,7 @@ impl EncryptionFactory for TestEncryptionFactory {
         options: &EncryptionFactoryOptions,
         schema: &SchemaRef,
         _file_path: &Path,
-    ) -> Result<Option<FileEncryptionProperties>> {
+    ) -> Result<Option<Arc<FileEncryptionProperties>>> {
         let config: EncryptionConfig = options.to_extension_options()?;
 
         // Generate a random encryption key for this file.
@@ -268,7 +268,7 @@ impl EncryptionFactory for TestEncryptionFactory {
         &self,
         _options: &EncryptionFactoryOptions,
         _file_path: &Path,
-    ) -> Result<Option<FileDecryptionProperties>> {
+    ) -> Result<Option<Arc<FileDecryptionProperties>>> {
         let decryption_properties =
             FileDecryptionProperties::with_key_retriever(Arc::new(TestKeyRetriever {}))
                 .build()?;
diff --git a/datafusion-examples/examples/parquet_index.rs b/datafusion-examples/examples/parquet_index.rs
index afc3b279f4a9..bc9e2a9226d0 100644
--- a/datafusion-examples/examples/parquet_index.rs
+++ b/datafusion-examples/examples/parquet_index.rs
@@ -99,7 +99,6 @@ use url::Url;
 ///                   Thus some parquet files are      │             │
 ///                   "pruned" and thus are not        └─────────────┘
 ///                   scanned at all                   Parquet Files
-///
 /// ```
 ///
 /// [`ListingTable`]: datafusion::datasource::listing::ListingTable
@@ -243,10 +242,11 @@ impl TableProvider for IndexTableProvider {
         let files = self.index.get_files(predicate.clone())?;
 
         let object_store_url = ObjectStoreUrl::parse("file://")?;
-        let source = Arc::new(ParquetSource::default().with_predicate(predicate));
+        let source =
+            Arc::new(ParquetSource::new(self.schema()).with_predicate(predicate));
         let mut file_scan_config_builder =
-            FileScanConfigBuilder::new(object_store_url, self.schema(), source)
-                .with_projection(projection.cloned())
+            FileScanConfigBuilder::new(object_store_url, source)
+                .with_projection_indices(projection.cloned())
                 .with_limit(limit);
 
         // Transform to the format needed to pass to DataSourceExec
diff --git a/datafusion-examples/examples/sql_query.rs b/datafusion-examples/examples/sql_query.rs
index 0ac203cfb7e7..4da07d33d03d 100644
--- a/datafusion-examples/examples/sql_query.rs
+++ b/datafusion-examples/examples/sql_query.rs
@@ -32,7 +32,6 @@ use std::sync::Arc;
 ///
 /// [`query_memtable`]: a simple query against a [`MemTable`]
 /// [`query_parquet`]: a simple query against a directory with multiple Parquet files
-///
 #[tokio::main]
 async fn main() -> Result<()> {
     query_memtable().await?;
diff --git a/datafusion-examples/examples/thread_pools.rs b/datafusion-examples/examples/thread_pools.rs
index bba56b2932ab..9842cccfbfe8 100644
--- a/datafusion-examples/examples/thread_pools.rs
+++ b/datafusion-examples/examples/thread_pools.rs
@@ -342,7 +342,7 @@ impl CpuRuntime {
     /// message such as:
     ///
     /// ```text
-    ///A Tokio 1.x context was found, but IO is disabled.
+    /// A Tokio 1.x context was found, but IO is disabled.
     /// ```
     pub fn handle(&self) -> &Handle {
         &self.handle
diff --git a/datafusion-examples/examples/advanced_udaf.rs b/datafusion-examples/examples/udf/advanced_udaf.rs
similarity index 98%
rename from datafusion-examples/examples/advanced_udaf.rs
rename to datafusion-examples/examples/udf/advanced_udaf.rs
index 89f0a470e32e..81e227bfacee 100644
--- a/datafusion-examples/examples/advanced_udaf.rs
+++ b/datafusion-examples/examples/udf/advanced_udaf.rs
@@ -469,8 +469,9 @@ fn create_context() -> Result<SessionContext> {
     Ok(ctx)
 }
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// In this example we register `GeoMeanUdaf` and `SimplifiedGeoMeanUdaf`
+/// as user defined aggregate functions and invoke them via the DataFrame API and SQL
+pub async fn advanced_udaf() -> Result<()> {
     let ctx = create_context()?;
 
     let geo_mean_udf = AggregateUDF::from(GeoMeanUdaf::new());
diff --git a/datafusion-examples/examples/advanced_udf.rs b/datafusion-examples/examples/udf/advanced_udf.rs
similarity index 99%
rename from datafusion-examples/examples/advanced_udf.rs
rename to datafusion-examples/examples/udf/advanced_udf.rs
index 56ae599efa11..bb5a68e90cbb 100644
--- a/datafusion-examples/examples/advanced_udf.rs
+++ b/datafusion-examples/examples/udf/advanced_udf.rs
@@ -245,10 +245,35 @@ fn maybe_pow_in_place(base: f64, exp_array: ArrayRef) -> Result<ArrayRef> {
     }
 }
 
+/// create local execution context with an in-memory table:
+///
+/// ```text
+/// +-----+-----+
+/// | a   | b   |
+/// +-----+-----+
+/// | 2.1 | 1.0 |
+/// | 3.1 | 2.0 |
+/// | 4.1 | 3.0 |
+/// | 5.1 | 4.0 |
+/// +-----+-----+
+/// ```
+fn create_context() -> Result<SessionContext> {
+    // define data.
+    let a: ArrayRef = Arc::new(Float32Array::from(vec![2.1, 3.1, 4.1, 5.1]));
+    let b: ArrayRef = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0]));
+    let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)])?;
+
+    // declare a new context. In Spark API, this corresponds to a new SparkSession
+    let ctx = SessionContext::new();
+
+    // declare a table in memory. In Spark API, this corresponds to createDataFrame(...).
+    ctx.register_batch("t", batch)?;
+    Ok(ctx)
+}
+
 /// In this example we register `PowUdf` as a user defined function
 /// and invoke it via the DataFrame API and SQL
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn advanced_udf() -> Result<()> {
     let ctx = create_context()?;
 
     // create the UDF
@@ -295,29 +320,3 @@ async fn main() -> Result<()> {
 
     Ok(())
 }
-
-/// create local execution context with an in-memory table:
-///
-/// ```text
-/// +-----+-----+
-/// | a   | b   |
-/// +-----+-----+
-/// | 2.1 | 1.0 |
-/// | 3.1 | 2.0 |
-/// | 4.1 | 3.0 |
-/// | 5.1 | 4.0 |
-/// +-----+-----+
-/// ```
-fn create_context() -> Result<SessionContext> {
-    // define data.
-    let a: ArrayRef = Arc::new(Float32Array::from(vec![2.1, 3.1, 4.1, 5.1]));
-    let b: ArrayRef = Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0]));
-    let batch = RecordBatch::try_from_iter(vec![("a", a), ("b", b)])?;
-
-    // declare a new context. In Spark API, this corresponds to a new SparkSession
-    let ctx = SessionContext::new();
-
-    // declare a table in memory. In Spark API, this corresponds to createDataFrame(...).
-    ctx.register_batch("t", batch)?;
-    Ok(ctx)
-}
diff --git a/datafusion-examples/examples/advanced_udwf.rs b/datafusion-examples/examples/udf/advanced_udwf.rs
similarity index 98%
rename from datafusion-examples/examples/advanced_udwf.rs
rename to datafusion-examples/examples/udf/advanced_udwf.rs
index ba4c377fd676..86f215e019c7 100644
--- a/datafusion-examples/examples/advanced_udwf.rs
+++ b/datafusion-examples/examples/udf/advanced_udwf.rs
@@ -236,8 +236,9 @@ async fn create_context() -> Result<SessionContext> {
     Ok(ctx)
 }
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// In this example we register `SmoothItUdf` as user defined window function
+/// and invoke it via the DataFrame API and SQL
+pub async fn advanced_udwf() -> Result<()> {
     let ctx = create_context().await?;
     let smooth_it = WindowUDF::from(SmoothItUdf::new());
     ctx.register_udwf(smooth_it.clone());
diff --git a/datafusion-examples/examples/async_udf.rs b/datafusion-examples/examples/udf/async_udf.rs
similarity index 98%
rename from datafusion-examples/examples/async_udf.rs
rename to datafusion-examples/examples/udf/async_udf.rs
index b52ec68ea442..475775a599f6 100644
--- a/datafusion-examples/examples/async_udf.rs
+++ b/datafusion-examples/examples/udf/async_udf.rs
@@ -38,8 +38,9 @@ use datafusion::prelude::{SessionConfig, SessionContext};
 use std::any::Any;
 use std::sync::Arc;
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// In this example we register `AskLLM` as an asynchronous user defined function
+/// and invoke it via the DataFrame API and SQL
+pub async fn async_udf() -> Result<()> {
     // Use a hard coded parallelism level of 4 so the explain plan
     // is consistent across machines.
     let config = SessionConfig::new().with_target_partitions(4);
diff --git a/datafusion-examples/examples/udf/main.rs b/datafusion-examples/examples/udf/main.rs
new file mode 100644
index 000000000000..104d37393780
--- /dev/null
+++ b/datafusion-examples/examples/udf/main.rs
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! # User-Defined Functions Examples
+//!
+//! These examples demonstrate user-defined functions in DataFusion.
+//!
+//! ## Usage
+//! ```bash
+//! cargo run --example udf -- [adv_udaf|adv_udf|adv_udwf|async_udf|udaf|udf|udtf|udwf]
+//! ```
+//!
+//! Each subcommand runs a corresponding example:
+//! - `adv_udaf` — user defined aggregate function example
+//! - `adv_udf` — user defined scalar function example
+//! - `adv_udwf` — user defined window function example
+//! - `async_udf` — asynchronous user defined function example
+//! - `udaf` — simple user defined aggregate function example
+//! - `udf` — simple user defined scalar function example
+//! - `udtf` — simple user defined table function example
+//! - `udwf` — simple user defined window function example
+
+mod advanced_udaf;
+mod advanced_udf;
+mod advanced_udwf;
+mod async_udf;
+mod simple_udaf;
+mod simple_udf;
+mod simple_udtf;
+mod simple_udwf;
+
+use std::str::FromStr;
+
+use datafusion::error::{DataFusionError, Result};
+
+enum ExampleKind {
+    AdvUdaf,
+    AdvUdf,
+    AdvUdwf,
+    AsyncUdf,
+    Udf,
+    Udaf,
+    Udwf,
+    Udtf,
+}
+
+impl AsRef<str> for ExampleKind {
+    fn as_ref(&self) -> &str {
+        match self {
+            Self::AdvUdaf => "adv_udaf",
+            Self::AdvUdf => "adv_udf",
+            Self::AdvUdwf => "adv_udwf",
+            Self::AsyncUdf => "async_udf",
+            Self::Udf => "udf",
+            Self::Udaf => "udaf",
+            Self::Udwf => "udwt",
+            Self::Udtf => "udtf",
+        }
+    }
+}
+
+impl FromStr for ExampleKind {
+    type Err = DataFusionError;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "adv_udaf" => Ok(Self::AdvUdaf),
+            "adv_udf" => Ok(Self::AdvUdf),
+            "adv_udwf" => Ok(Self::AdvUdwf),
+            "async_udf" => Ok(Self::AsyncUdf),
+            "udaf" => Ok(Self::Udaf),
+            "udf" => Ok(Self::Udf),
+            "udtf" => Ok(Self::Udtf),
+            "udwf" => Ok(Self::Udwf),
+            _ => Err(DataFusionError::Execution(format!("Unknown example: {s}"))),
+        }
+    }
+}
+
+impl ExampleKind {
+    const ALL: [Self; 8] = [
+        Self::AdvUdaf,
+        Self::AdvUdf,
+        Self::AdvUdwf,
+        Self::AsyncUdf,
+        Self::Udaf,
+        Self::Udf,
+        Self::Udtf,
+        Self::Udwf,
+    ];
+
+    const EXAMPLE_NAME: &str = "udf";
+
+    fn variants() -> Vec<&'static str> {
+        Self::ALL.iter().map(|x| x.as_ref()).collect()
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let usage = format!(
+        "Usage: cargo run --example {} -- [{}]",
+        ExampleKind::EXAMPLE_NAME,
+        ExampleKind::variants().join("|")
+    );
+
+    let arg = std::env::args().nth(1).ok_or_else(|| {
+        eprintln!("{usage}");
+        DataFusionError::Execution("Missing argument".to_string())
+    })?;
+
+    match arg.parse::<ExampleKind>()? {
+        ExampleKind::AdvUdaf => advanced_udaf::advanced_udaf().await?,
+        ExampleKind::AdvUdf => advanced_udf::advanced_udf().await?,
+        ExampleKind::AdvUdwf => advanced_udwf::advanced_udwf().await?,
+        ExampleKind::AsyncUdf => async_udf::async_udf().await?,
+        ExampleKind::Udaf => simple_udaf::simple_udaf().await?,
+        ExampleKind::Udf => simple_udf::simple_udf().await?,
+        ExampleKind::Udtf => simple_udtf::simple_udtf().await?,
+        ExampleKind::Udwf => simple_udwf::simple_udwf().await?,
+    }
+
+    Ok(())
+}
diff --git a/datafusion-examples/examples/simple_udaf.rs b/datafusion-examples/examples/udf/simple_udaf.rs
similarity index 97%
rename from datafusion-examples/examples/simple_udaf.rs
rename to datafusion-examples/examples/udf/simple_udaf.rs
index 82bde7c034a5..e9f905e72099 100644
--- a/datafusion-examples/examples/simple_udaf.rs
+++ b/datafusion-examples/examples/udf/simple_udaf.rs
@@ -135,8 +135,9 @@ impl Accumulator for GeometricMean {
     }
 }
 
-#[tokio::main]
-async fn main() -> Result<()> {
+/// In this example we register `GeometricMean`
+/// as user defined aggregate function and invoke it via the DataFrame API and SQL
+pub async fn simple_udaf() -> Result<()> {
     let ctx = create_context()?;
 
     // here is where we define the UDAF. We also declare its signature:
diff --git a/datafusion-examples/examples/simple_udf.rs b/datafusion-examples/examples/udf/simple_udf.rs
similarity index 99%
rename from datafusion-examples/examples/simple_udf.rs
rename to datafusion-examples/examples/udf/simple_udf.rs
index 5612e0939f70..7d4f3588e313 100644
--- a/datafusion-examples/examples/simple_udf.rs
+++ b/datafusion-examples/examples/udf/simple_udf.rs
@@ -57,8 +57,7 @@ fn create_context() -> Result<SessionContext> {
 }
 
 /// In this example we will declare a single-type, single return type UDF that exponentiates f64, a^b
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn simple_udf() -> Result<()> {
     let ctx = create_context()?;
 
     // First, declare the actual implementation of the calculation
diff --git a/datafusion-examples/examples/simple_udtf.rs b/datafusion-examples/examples/udf/simple_udtf.rs
similarity index 99%
rename from datafusion-examples/examples/simple_udtf.rs
rename to datafusion-examples/examples/udf/simple_udtf.rs
index b65ffb8d7174..a03b157134ae 100644
--- a/datafusion-examples/examples/simple_udtf.rs
+++ b/datafusion-examples/examples/udf/simple_udtf.rs
@@ -42,8 +42,7 @@ use std::sync::Arc;
 // 3. Register the function using [`SessionContext::register_udtf`]
 
 /// This example demonstrates how to register a TableFunction
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn simple_udtf() -> Result<()> {
     // create local execution context
     let ctx = SessionContext::new();
 
diff --git a/datafusion-examples/examples/simple_udwf.rs b/datafusion-examples/examples/udf/simple_udwf.rs
similarity index 99%
rename from datafusion-examples/examples/simple_udwf.rs
rename to datafusion-examples/examples/udf/simple_udwf.rs
index 1736ff00bd70..2cf1df8d8ed8 100644
--- a/datafusion-examples/examples/simple_udwf.rs
+++ b/datafusion-examples/examples/udf/simple_udwf.rs
@@ -42,8 +42,7 @@ async fn create_context() -> Result<SessionContext> {
 }
 
 /// In this example we will declare a user defined window function that computes a moving average and then run it using SQL
-#[tokio::main]
-async fn main() -> Result<()> {
+pub async fn simple_udwf() -> Result<()> {
     let ctx = create_context().await?;
 
     // here is where we define the UDWF. We also declare its signature:
diff --git a/datafusion/catalog-listing/Cargo.toml b/datafusion/catalog-listing/Cargo.toml
index 4eaeed675a20..be1374b37148 100644
--- a/datafusion/catalog-listing/Cargo.toml
+++ b/datafusion/catalog-listing/Cargo.toml
@@ -46,11 +46,13 @@ futures = { workspace = true }
 itertools = { workspace = true }
 log = { workspace = true }
 object_store = { workspace = true }
-tokio = { workspace = true }
 
 [dev-dependencies]
 datafusion-datasource-parquet = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/catalog-listing/src/config.rs b/datafusion/catalog-listing/src/config.rs
index 90f44de4fdbc..3370d2ea7553 100644
--- a/datafusion/catalog-listing/src/config.rs
+++ b/datafusion/catalog-listing/src/config.rs
@@ -53,7 +53,6 @@ pub enum SchemaSource {
 ///
 /// If not specified, a [`datafusion_datasource::schema_adapter::DefaultSchemaAdapterFactory`]
 /// will be used, which handles basic schema compatibility cases.
-///
 #[derive(Debug, Clone, Default)]
 pub struct ListingTableConfig {
     /// Paths on the `ObjectStore` for creating [`crate::ListingTable`].
@@ -160,8 +159,8 @@ impl ListingTableConfig {
     ///     .with_file_extension(".parquet")
     ///     .with_collect_stat(true);
     ///
-    /// let config = ListingTableConfig::new(table_paths)
-    ///     .with_listing_options(options);  // Configure file format and options
+    /// let config = ListingTableConfig::new(table_paths).with_listing_options(options);
+    /// // Configure file format and options
     /// ```
     pub fn with_listing_options(self, listing_options: ListingOptions) -> Self {
         // Note: This method properly sets options, but be aware that downstream
diff --git a/datafusion/catalog-listing/src/helpers.rs b/datafusion/catalog-listing/src/helpers.rs
index 82cc36867939..089457648d21 100644
--- a/datafusion/catalog-listing/src/helpers.rs
+++ b/datafusion/catalog-listing/src/helpers.rs
@@ -25,12 +25,11 @@ use datafusion_common::internal_err;
 use datafusion_common::{HashMap, Result, ScalarValue};
 use datafusion_datasource::ListingTableUrl;
 use datafusion_datasource::PartitionedFile;
-use datafusion_expr::{BinaryExpr, Operator};
+use datafusion_expr::{lit, utils, BinaryExpr, Operator};
 
 use arrow::{
-    array::{Array, ArrayRef, AsArray, StringBuilder},
-    compute::{and, cast, prep_null_mask_filter},
-    datatypes::{DataType, Field, Fields, Schema},
+    array::AsArray,
+    datatypes::{DataType, Field},
     record_batch::RecordBatch,
 };
 use datafusion_expr::execution_props::ExecutionProps;
@@ -39,7 +38,7 @@ use futures::{stream::BoxStream, StreamExt, TryStreamExt};
 use log::{debug, trace};
 
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion};
-use datafusion_common::{Column, DFSchema, DataFusionError};
+use datafusion_common::{Column, DFSchema};
 use datafusion_expr::{Expr, Volatility};
 use datafusion_physical_expr::create_physical_expr;
 use object_store::path::Path;
@@ -239,105 +238,6 @@ pub async fn list_partitions(
     Ok(out)
 }
 
-async fn prune_partitions(
-    table_path: &ListingTableUrl,
-    partitions: Vec<Partition>,
-    filters: &[Expr],
-    partition_cols: &[(String, DataType)],
-) -> Result<Vec<Partition>> {
-    if filters.is_empty() {
-        // prune partitions which don't contain the partition columns
-        return Ok(partitions
-            .into_iter()
-            .filter(|p| {
-                let cols = partition_cols.iter().map(|x| x.0.as_str());
-                !parse_partitions_for_path(table_path, &p.path, cols)
-                    .unwrap_or_default()
-                    .is_empty()
-            })
-            .collect());
-    }
-
-    let mut builders: Vec<_> = (0..partition_cols.len())
-        .map(|_| StringBuilder::with_capacity(partitions.len(), partitions.len() * 10))
-        .collect();
-
-    for partition in &partitions {
-        let cols = partition_cols.iter().map(|x| x.0.as_str());
-        let parsed = parse_partitions_for_path(table_path, &partition.path, cols)
-            .unwrap_or_default();
-
-        let mut builders = builders.iter_mut();
-        for (p, b) in parsed.iter().zip(&mut builders) {
-            b.append_value(p);
-        }
-        builders.for_each(|b| b.append_null());
-    }
-
-    let arrays = partition_cols
-        .iter()
-        .zip(builders)
-        .map(|((_, d), mut builder)| {
-            let array = builder.finish();
-            cast(&array, d)
-        })
-        .collect::<Result<_, _>>()?;
-
-    let fields: Fields = partition_cols
-        .iter()
-        .map(|(n, d)| Field::new(n, d.clone(), true))
-        .collect();
-    let schema = Arc::new(Schema::new(fields));
-
-    let df_schema = DFSchema::from_unqualified_fields(
-        partition_cols
-            .iter()
-            .map(|(n, d)| Field::new(n, d.clone(), true))
-            .collect(),
-        Default::default(),
-    )?;
-
-    let batch = RecordBatch::try_new(schema, arrays)?;
-
-    // TODO: Plumb this down
-    let props = ExecutionProps::new();
-
-    // Applies `filter` to `batch` returning `None` on error
-    let do_filter = |filter| -> Result<ArrayRef> {
-        let expr = create_physical_expr(filter, &df_schema, &props)?;
-        expr.evaluate(&batch)?.into_array(partitions.len())
-    };
-
-    //.Compute the conjunction of the filters
-    let mask = filters
-        .iter()
-        .map(|f| do_filter(f).map(|a| a.as_boolean().clone()))
-        .reduce(|a, b| Ok(and(&a?, &b?)?));
-
-    let mask = match mask {
-        Some(Ok(mask)) => mask,
-        Some(Err(err)) => return Err(err),
-        None => return Ok(partitions),
-    };
-
-    // Don't retain partitions that evaluated to null
-    let prepared = match mask.null_count() {
-        0 => mask,
-        _ => prep_null_mask_filter(&mask),
-    };
-
-    // Sanity check
-    assert_eq!(prepared.len(), partitions.len());
-
-    let filtered = partitions
-        .into_iter()
-        .zip(prepared.values())
-        .filter_map(|(p, f)| f.then_some(p))
-        .collect();
-
-    Ok(filtered)
-}
-
 #[derive(Debug)]
 enum PartitionValue {
     Single(String),
@@ -412,6 +312,62 @@ pub fn evaluate_partition_prefix<'a>(
     }
 }
 
+fn filter_partitions(
+    pf: PartitionedFile,
+    filters: &[Expr],
+    df_schema: &DFSchema,
+) -> Result<Option<PartitionedFile>> {
+    if pf.partition_values.is_empty() && !filters.is_empty() {
+        return Ok(None);
+    } else if filters.is_empty() {
+        return Ok(Some(pf));
+    }
+
+    let arrays = pf
+        .partition_values
+        .iter()
+        .map(|v| v.to_array())
+        .collect::<Result<_, _>>()?;
+
+    let batch = RecordBatch::try_new(Arc::clone(df_schema.inner()), arrays)?;
+
+    let filter = utils::conjunction(filters.iter().cloned()).unwrap_or_else(|| lit(true));
+    let props = ExecutionProps::new();
+    let expr = create_physical_expr(&filter, df_schema, &props)?;
+
+    // Since we're only operating on a single file, our batch and resulting "array" holds only one
+    // value indicating if the input file matches the provided filters
+    let matches = expr.evaluate(&batch)?.into_array(1)?;
+    if matches.as_boolean().value(0) {
+        return Ok(Some(pf));
+    }
+
+    Ok(None)
+}
+
+fn try_into_partitioned_file(
+    object_meta: ObjectMeta,
+    partition_cols: &[(String, DataType)],
+    table_path: &ListingTableUrl,
+) -> Result<PartitionedFile> {
+    let cols = partition_cols.iter().map(|(name, _)| name.as_str());
+    let parsed = parse_partitions_for_path(table_path, &object_meta.location, cols);
+
+    let partition_values = parsed
+        .into_iter()
+        .flatten()
+        .zip(partition_cols)
+        .map(|(parsed, (_, datatype))| {
+            ScalarValue::try_from_string(parsed.to_string(), datatype)
+        })
+        .collect::<Result<Vec<_>>>()?;
+
+    let mut pf: PartitionedFile = object_meta.into();
+    pf.partition_values = partition_values;
+
+    Ok(pf)
+}
+
 /// Discover the partitions on the given path and prune out files
 /// that belong to irrelevant partitions using `filters` expressions.
 /// `filters` should only contain expressions that can be evaluated
@@ -424,7 +380,11 @@ pub async fn pruned_partition_list<'a>(
     file_extension: &'a str,
     partition_cols: &'a [(String, DataType)],
 ) -> Result<BoxStream<'a, Result<PartitionedFile>>> {
-    // if no partition col => simply list all the files
+    let objects = table_path
+        .list_all_files(ctx, store, file_extension)
+        .await?
+        .try_filter(|object_meta| futures::future::ready(object_meta.size > 0));
+
     if partition_cols.is_empty() {
         if !filters.is_empty() {
             return internal_err!(
@@ -432,72 +392,29 @@ pub async fn pruned_partition_list<'a>(
                 table_path
             );
         }
-        return Ok(Box::pin(
-            table_path
-                .list_all_files(ctx, store, file_extension)
-                .await?
-                .try_filter(|object_meta| futures::future::ready(object_meta.size > 0))
-                .map_ok(|object_meta| object_meta.into()),
-        ));
-    }
-
-    let partition_prefix = evaluate_partition_prefix(partition_cols, filters);
-
-    let partitions =
-        list_partitions(store, table_path, partition_cols.len(), partition_prefix)
-            .await?;
-    debug!("Listed {} partitions", partitions.len());
 
-    let pruned =
-        prune_partitions(table_path, partitions, filters, partition_cols).await?;
-
-    debug!("Pruning yielded {} partitions", pruned.len());
-
-    let stream = futures::stream::iter(pruned)
-        .map(move |partition: Partition| async move {
-            let cols = partition_cols.iter().map(|x| x.0.as_str());
-            let parsed = parse_partitions_for_path(table_path, &partition.path, cols);
+        // if no partition col => simply list all the files
+        Ok(objects.map_ok(|object_meta| object_meta.into()).boxed())
+    } else {
+        let df_schema = DFSchema::from_unqualified_fields(
+            partition_cols
+                .iter()
+                .map(|(n, d)| Field::new(n, d.clone(), true))
+                .collect(),
+            Default::default(),
+        )?;
 
-            let partition_values = parsed
-                .into_iter()
-                .flatten()
-                .zip(partition_cols)
-                .map(|(parsed, (_, datatype))| {
-                    ScalarValue::try_from_string(parsed.to_string(), datatype)
-                })
-                .collect::<Result<Vec<_>>>()?;
-
-            let files = match partition.files {
-                Some(files) => files,
-                None => {
-                    trace!("Recursively listing partition {}", partition.path);
-                    store.list(Some(&partition.path)).try_collect().await?
-                }
-            };
-            let files = files.into_iter().filter(move |o| {
-                let extension_match = o.location.as_ref().ends_with(file_extension);
-                // here need to scan subdirectories(`listing_table_ignore_subdirectory` = false)
-                let glob_match = table_path.contains(&o.location, false);
-                extension_match && glob_match
-            });
-
-            let stream = futures::stream::iter(files.map(move |object_meta| {
-                Ok(PartitionedFile {
-                    object_meta,
-                    partition_values: partition_values.clone(),
-                    range: None,
-                    statistics: None,
-                    extensions: None,
-                    metadata_size_hint: None,
-                })
-            }));
-
-            Ok::<_, DataFusionError>(stream)
-        })
-        .buffer_unordered(CONCURRENCY_LIMIT)
-        .try_flatten()
-        .boxed();
-    Ok(stream)
+        Ok(objects
+            .map_ok(|object_meta| {
+                try_into_partitioned_file(object_meta, partition_cols, table_path)
+            })
+            .try_filter_map(move |pf| {
+                futures::future::ready(
+                    pf.and_then(|pf| filter_partitions(pf, filters, &df_schema)),
+                )
+            })
+            .boxed())
+    }
 }
 
 /// Extract the partition values for the given `file_path` (in the given `table_path`)
@@ -541,22 +458,11 @@ pub fn describe_partition(partition: &Partition) -> (&str, usize, Vec<&str>) {
 
 #[cfg(test)]
 mod tests {
-    use async_trait::async_trait;
-    use datafusion_common::config::TableOptions;
     use datafusion_datasource::file_groups::FileGroup;
-    use datafusion_execution::config::SessionConfig;
-    use datafusion_execution::runtime_env::RuntimeEnv;
-    use futures::FutureExt;
-    use object_store::memory::InMemory;
-    use std::any::Any;
     use std::ops::Not;
 
     use super::*;
-    use datafusion_expr::{
-        case, col, lit, AggregateUDF, Expr, LogicalPlan, ScalarUDF, WindowUDF,
-    };
-    use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
-    use datafusion_physical_plan::ExecutionPlan;
+    use datafusion_expr::{case, col, lit, Expr};
 
     #[test]
     fn test_split_files() {
@@ -599,209 +505,6 @@ mod tests {
         assert_eq!(0, chunks.len());
     }
 
-    #[tokio::test]
-    async fn test_pruned_partition_list_empty() {
-        let (store, state) = make_test_store_and_state(&[
-            ("tablepath/mypartition=val1/notparquetfile", 100),
-            ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
-            ("tablepath/file.parquet", 100),
-            ("tablepath/notapartition/file.parquet", 100),
-            ("tablepath/notmypartition=val1/file.parquet", 100),
-        ]);
-        let filter = Expr::eq(col("mypartition"), lit("val1"));
-        let pruned = pruned_partition_list(
-            state.as_ref(),
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter],
-            ".parquet",
-            &[(String::from("mypartition"), DataType::Utf8)],
-        )
-        .await
-        .expect("partition pruning failed")
-        .collect::<Vec<_>>()
-        .await;
-
-        assert_eq!(pruned.len(), 0);
-    }
-
-    #[tokio::test]
-    async fn test_pruned_partition_list() {
-        let (store, state) = make_test_store_and_state(&[
-            ("tablepath/mypartition=val1/file.parquet", 100),
-            ("tablepath/mypartition=val2/file.parquet", 100),
-            ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
-            ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
-            ("tablepath/notapartition/file.parquet", 100),
-            ("tablepath/notmypartition=val1/file.parquet", 100),
-        ]);
-        let filter = Expr::eq(col("mypartition"), lit("val1"));
-        let pruned = pruned_partition_list(
-            state.as_ref(),
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter],
-            ".parquet",
-            &[(String::from("mypartition"), DataType::Utf8)],
-        )
-        .await
-        .expect("partition pruning failed")
-        .try_collect::<Vec<_>>()
-        .await
-        .unwrap();
-
-        assert_eq!(pruned.len(), 2);
-        let f1 = &pruned[0];
-        assert_eq!(
-            f1.object_meta.location.as_ref(),
-            "tablepath/mypartition=val1/file.parquet"
-        );
-        assert_eq!(&f1.partition_values, &[ScalarValue::from("val1")]);
-        let f2 = &pruned[1];
-        assert_eq!(
-            f2.object_meta.location.as_ref(),
-            "tablepath/mypartition=val1/other=val3/file.parquet"
-        );
-        assert_eq!(f2.partition_values, &[ScalarValue::from("val1"),]);
-    }
-
-    #[tokio::test]
-    async fn test_pruned_partition_list_multi() {
-        let (store, state) = make_test_store_and_state(&[
-            ("tablepath/part1=p1v1/file.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
-            ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100),
-        ]);
-        let filter1 = Expr::eq(col("part1"), lit("p1v2"));
-        let filter2 = Expr::eq(col("part2"), lit("p2v1"));
-        let pruned = pruned_partition_list(
-            state.as_ref(),
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            &[filter1, filter2],
-            ".parquet",
-            &[
-                (String::from("part1"), DataType::Utf8),
-                (String::from("part2"), DataType::Utf8),
-            ],
-        )
-        .await
-        .expect("partition pruning failed")
-        .try_collect::<Vec<_>>()
-        .await
-        .unwrap();
-
-        assert_eq!(pruned.len(), 2);
-        let f1 = &pruned[0];
-        assert_eq!(
-            f1.object_meta.location.as_ref(),
-            "tablepath/part1=p1v2/part2=p2v1/file1.parquet"
-        );
-        assert_eq!(
-            &f1.partition_values,
-            &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1"),]
-        );
-        let f2 = &pruned[1];
-        assert_eq!(
-            f2.object_meta.location.as_ref(),
-            "tablepath/part1=p1v2/part2=p2v1/file2.parquet"
-        );
-        assert_eq!(
-            &f2.partition_values,
-            &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1")]
-        );
-    }
-
-    #[tokio::test]
-    async fn test_list_partition() {
-        let (store, _) = make_test_store_and_state(&[
-            ("tablepath/part1=p1v1/file.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
-            ("tablepath/part1=p1v3/part2=p2v1/file3.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v2/file4.parquet", 100),
-            ("tablepath/part1=p1v2/part2=p2v2/empty.parquet", 0),
-        ]);
-
-        let partitions = list_partitions(
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            0,
-            None,
-        )
-        .await
-        .expect("listing partitions failed");
-
-        assert_eq!(
-            &partitions
-                .iter()
-                .map(describe_partition)
-                .collect::<Vec<_>>(),
-            &vec![
-                ("tablepath", 0, vec![]),
-                ("tablepath/part1=p1v1", 1, vec![]),
-                ("tablepath/part1=p1v2", 1, vec![]),
-                ("tablepath/part1=p1v3", 1, vec![]),
-            ]
-        );
-
-        let partitions = list_partitions(
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            1,
-            None,
-        )
-        .await
-        .expect("listing partitions failed");
-
-        assert_eq!(
-            &partitions
-                .iter()
-                .map(describe_partition)
-                .collect::<Vec<_>>(),
-            &vec![
-                ("tablepath", 0, vec![]),
-                ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
-                ("tablepath/part1=p1v2", 1, vec![]),
-                ("tablepath/part1=p1v2/part2=p2v1", 2, vec![]),
-                ("tablepath/part1=p1v2/part2=p2v2", 2, vec![]),
-                ("tablepath/part1=p1v3", 1, vec![]),
-                ("tablepath/part1=p1v3/part2=p2v1", 2, vec![]),
-            ]
-        );
-
-        let partitions = list_partitions(
-            store.as_ref(),
-            &ListingTableUrl::parse("file:///tablepath/").unwrap(),
-            2,
-            None,
-        )
-        .await
-        .expect("listing partitions failed");
-
-        assert_eq!(
-            &partitions
-                .iter()
-                .map(describe_partition)
-                .collect::<Vec<_>>(),
-            &vec![
-                ("tablepath", 0, vec![]),
-                ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
-                ("tablepath/part1=p1v2", 1, vec![]),
-                ("tablepath/part1=p1v3", 1, vec![]),
-                (
-                    "tablepath/part1=p1v2/part2=p2v1",
-                    2,
-                    vec!["file1.parquet", "file2.parquet"]
-                ),
-                ("tablepath/part1=p1v2/part2=p2v2", 2, vec!["file4.parquet"]),
-                ("tablepath/part1=p1v3/part2=p2v1", 2, vec!["file3.parquet"]),
-            ]
-        );
-    }
-
     #[test]
     fn test_parse_partitions_for_path() {
         assert_eq!(
@@ -1016,86 +719,4 @@ mod tests {
             Some(Path::from("a=1970-01-05")),
         );
     }
-
-    pub fn make_test_store_and_state(
-        files: &[(&str, u64)],
-    ) -> (Arc<InMemory>, Arc<dyn Session>) {
-        let memory = InMemory::new();
-
-        for (name, size) in files {
-            memory
-                .put(&Path::from(*name), vec![0; *size as usize].into())
-                .now_or_never()
-                .unwrap()
-                .unwrap();
-        }
-
-        (Arc::new(memory), Arc::new(MockSession {}))
-    }
-
-    struct MockSession {}
-
-    #[async_trait]
-    impl Session for MockSession {
-        fn session_id(&self) -> &str {
-            unimplemented!()
-        }
-
-        fn config(&self) -> &SessionConfig {
-            unimplemented!()
-        }
-
-        async fn create_physical_plan(
-            &self,
-            _logical_plan: &LogicalPlan,
-        ) -> Result<Arc<dyn ExecutionPlan>> {
-            unimplemented!()
-        }
-
-        fn create_physical_expr(
-            &self,
-            _expr: Expr,
-            _df_schema: &DFSchema,
-        ) -> Result<Arc<dyn PhysicalExpr>> {
-            unimplemented!()
-        }
-
-        fn scalar_functions(&self) -> &std::collections::HashMap<String, Arc<ScalarUDF>> {
-            unimplemented!()
-        }
-
-        fn aggregate_functions(
-            &self,
-        ) -> &std::collections::HashMap<String, Arc<AggregateUDF>> {
-            unimplemented!()
-        }
-
-        fn window_functions(&self) -> &std::collections::HashMap<String, Arc<WindowUDF>> {
-            unimplemented!()
-        }
-
-        fn runtime_env(&self) -> &Arc<RuntimeEnv> {
-            unimplemented!()
-        }
-
-        fn execution_props(&self) -> &ExecutionProps {
-            unimplemented!()
-        }
-
-        fn as_any(&self) -> &dyn Any {
-            unimplemented!()
-        }
-
-        fn table_options(&self) -> &TableOptions {
-            unimplemented!()
-        }
-
-        fn table_options_mut(&mut self) -> &mut TableOptions {
-            unimplemented!()
-        }
-
-        fn task_ctx(&self) -> Arc<datafusion_execution::TaskContext> {
-            unimplemented!()
-        }
-    }
 }
diff --git a/datafusion/catalog-listing/src/mod.rs b/datafusion/catalog-listing/src/mod.rs
index 90d04b46b806..1e06483261d2 100644
--- a/datafusion/catalog-listing/src/mod.rs
+++ b/datafusion/catalog-listing/src/mod.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/datafusion/catalog-listing/src/options.rs b/datafusion/catalog-listing/src/options.rs
index 3cbf3573e951..7da8005f90ec 100644
--- a/datafusion/catalog-listing/src/options.rs
+++ b/datafusion/catalog-listing/src/options.rs
@@ -100,10 +100,8 @@ impl ListingOptions {
     /// # use datafusion_catalog_listing::ListingOptions;
     /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
     ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_extension(".parquet");
+    /// let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension(".parquet");
     ///
     /// assert_eq!(listing_options.file_extension, ".parquet");
     /// ```
@@ -123,10 +121,8 @@ impl ListingOptions {
     /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
     ///
     /// let extension = Some(".parquet");
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_extension_opt(extension);
+    /// let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension_opt(extension);
     ///
     /// assert_eq!(listing_options.file_extension, ".parquet");
     /// ```
@@ -216,10 +212,8 @@ impl ListingOptions {
     /// # use datafusion_catalog_listing::ListingOptions;
     /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
     ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_collect_stat(true);
+    /// let listing_options =
+    ///     ListingOptions::new(Arc::new(ParquetFormat::default())).with_collect_stat(true);
     ///
     /// assert_eq!(listing_options.collect_stat, true);
     /// ```
@@ -235,10 +229,8 @@ impl ListingOptions {
     /// # use datafusion_catalog_listing::ListingOptions;
     /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
     ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_target_partitions(8);
+    /// let listing_options =
+    ///     ListingOptions::new(Arc::new(ParquetFormat::default())).with_target_partitions(8);
     ///
     /// assert_eq!(listing_options.target_partitions, 8);
     /// ```
@@ -255,15 +247,11 @@ impl ListingOptions {
     /// # use datafusion_catalog_listing::ListingOptions;
     /// # use datafusion_datasource_parquet::file_format::ParquetFormat;
     ///
-    ///  // Tell datafusion that the files are sorted by column "a"
-    ///  let file_sort_order = vec![vec![
-    ///    col("a").sort(true, true)
-    ///  ]];
+    /// // Tell datafusion that the files are sorted by column "a"
+    /// let file_sort_order = vec![vec![col("a").sort(true, true)]];
     ///
-    /// let listing_options = ListingOptions::new(Arc::new(
-    ///     ParquetFormat::default()
-    ///   ))
-    ///   .with_file_sort_order(file_sort_order.clone());
+    /// let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_sort_order(file_sort_order.clone());
     ///
     /// assert_eq!(listing_options.file_sort_order, file_sort_order);
     /// ```
diff --git a/datafusion/catalog-listing/src/table.rs b/datafusion/catalog-listing/src/table.rs
index e9ac1bf097a2..33d5c86bf88d 100644
--- a/datafusion/catalog-listing/src/table.rs
+++ b/datafusion/catalog-listing/src/table.rs
@@ -34,7 +34,7 @@ use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory,
 };
 use datafusion_datasource::{
-    compute_all_files_statistics, ListingTableUrl, PartitionedFile,
+    compute_all_files_statistics, ListingTableUrl, PartitionedFile, TableSchema,
 };
 use datafusion_execution::cache::cache_manager::FileStatisticsCache;
 use datafusion_execution::cache::cache_unit::DefaultFileStatisticsCache;
@@ -338,7 +338,16 @@ impl ListingTable {
     fn create_file_source_with_schema_adapter(
         &self,
     ) -> datafusion_common::Result<Arc<dyn FileSource>> {
-        let mut source = self.options.format.file_source();
+        let table_schema = TableSchema::new(
+            Arc::clone(&self.file_schema),
+            self.options
+                .table_partition_cols
+                .iter()
+                .map(|(col, field)| Arc::new(Field::new(col, field.clone(), false)))
+                .collect(),
+        );
+
+        let mut source = self.options.format.file_source(table_schema);
         // Apply schema adapter to source if available
         //
         // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
@@ -418,7 +427,7 @@ impl TableProvider for ListingTable {
             .options
             .table_partition_cols
             .iter()
-            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .map(|col| Ok(Arc::new(self.table_schema.field_with_name(&col.0)?.clone())))
             .collect::<datafusion_common::Result<Vec<_>>>()?;
 
         let table_partition_col_names = table_partition_cols
@@ -491,20 +500,15 @@ impl TableProvider for ListingTable {
             .format
             .create_physical_plan(
                 state,
-                FileScanConfigBuilder::new(
-                    object_store_url,
-                    Arc::clone(&self.file_schema),
-                    file_source,
-                )
-                .with_file_groups(partitioned_file_lists)
-                .with_constraints(self.constraints.clone())
-                .with_statistics(statistics)
-                .with_projection(projection)
-                .with_limit(limit)
-                .with_output_ordering(output_ordering)
-                .with_table_partition_cols(table_partition_cols)
-                .with_expr_adapter(self.expr_adapter_factory.clone())
-                .build(),
+                FileScanConfigBuilder::new(object_store_url, file_source)
+                    .with_file_groups(partitioned_file_lists)
+                    .with_constraints(self.constraints.clone())
+                    .with_statistics(statistics)
+                    .with_projection_indices(projection)
+                    .with_limit(limit)
+                    .with_output_ordering(output_ordering)
+                    .with_expr_adapter(self.expr_adapter_factory.clone())
+                    .build(),
             )
             .await?;
 
diff --git a/datafusion/catalog/Cargo.toml b/datafusion/catalog/Cargo.toml
index a1db45654be0..1009e9aee477 100644
--- a/datafusion/catalog/Cargo.toml
+++ b/datafusion/catalog/Cargo.toml
@@ -49,5 +49,8 @@ object_store = { workspace = true }
 parking_lot = { workspace = true }
 tokio = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
diff --git a/datafusion/common-runtime/Cargo.toml b/datafusion/common-runtime/Cargo.toml
index e53d97b41360..fd9a818bcb1d 100644
--- a/datafusion/common-runtime/Cargo.toml
+++ b/datafusion/common-runtime/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/common-runtime/src/lib.rs b/datafusion/common-runtime/src/lib.rs
index 5d404d99e776..eb32fead7ecf 100644
--- a/datafusion/common-runtime/src/lib.rs
+++ b/datafusion/common-runtime/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml
index f5e51cb236d4..b222ae12b92f 100644
--- a/datafusion/common/Cargo.toml
+++ b/datafusion/common/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -66,12 +69,12 @@ half = { workspace = true }
 hashbrown = { workspace = true }
 hex = { workspace = true, optional = true }
 indexmap = { workspace = true }
-libc = "0.2.176"
+libc = "0.2.177"
 log = { workspace = true }
 object_store = { workspace = true, optional = true }
 parquet = { workspace = true, optional = true, default-features = true }
 paste = "1.0.15"
-pyo3 = { version = "0.25", optional = true }
+pyo3 = { version = "0.26", optional = true }
 recursive = { workspace = true, optional = true }
 sqlparser = { workspace = true, optional = true }
 tokio = { workspace = true }
diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs
index e6eda3c585e8..b95167ca1390 100644
--- a/datafusion/common/src/cast.rs
+++ b/datafusion/common/src/cast.rs
@@ -24,8 +24,8 @@ use crate::{downcast_value, Result};
 use arrow::array::{
     BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
     DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
-    Int16Array, Int8Array, LargeBinaryArray, LargeStringArray, StringViewArray,
-    UInt16Array,
+    Int16Array, Int8Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
+    ListViewArray, StringViewArray, UInt16Array,
 };
 use arrow::{
     array::{
@@ -324,3 +324,13 @@ pub fn as_generic_string_array<T: OffsetSizeTrait>(
 ) -> Result<&GenericStringArray<T>> {
     Ok(downcast_value!(array, GenericStringArray, T))
 }
+
+// Downcast Array to ListViewArray
+pub fn as_list_view_array(array: &dyn Array) -> Result<&ListViewArray> {
+    Ok(downcast_value!(array, ListViewArray))
+}
+
+// Downcast Array to LargeListViewArray
+pub fn as_large_list_view_array(array: &dyn Array) -> Result<&LargeListViewArray> {
+    Ok(downcast_value!(array, LargeListViewArray))
+}
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 271ba6ddcff5..0ed499da0475 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -26,14 +26,15 @@ use crate::format::{ExplainAnalyzeLevel, ExplainFormat};
 use crate::parsers::CompressionTypeVariant;
 use crate::utils::get_available_parallelism;
 use crate::{DataFusionError, Result};
+#[cfg(feature = "parquet_encryption")]
+use hex;
 use std::any::Any;
 use std::collections::{BTreeMap, HashMap};
 use std::error::Error;
 use std::fmt::{self, Display};
 use std::str::FromStr;
-
 #[cfg(feature = "parquet_encryption")]
-use hex;
+use std::sync::Arc;
 
 /// A macro that wraps a configuration struct and automatically derives
 /// [`Default`] and [`ConfigField`] for it, allowing it to be used
@@ -56,7 +57,7 @@ use hex;
 ///        /// Field 3 doc
 ///        field3: Option<usize>, default = None
 ///    }
-///}
+/// }
 /// ```
 ///
 /// Will generate
@@ -466,9 +467,8 @@ config_namespace! {
 
         /// The default time zone
         ///
-        /// Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime
-        /// according to this time zone, and then extract the hour
-        pub time_zone: String, default = "+00:00".into()
+        /// Some functions, e.g. `now` return timestamps in this time zone
+        pub time_zone: Option<String>, default = None
 
         /// Parquet options
         pub parquet: ParquetOptions, default = Default::default()
@@ -517,6 +517,23 @@ config_namespace! {
         /// batches and merged.
         pub sort_in_place_threshold_bytes: usize, default = 1024 * 1024
 
+        /// Maximum size in bytes for individual spill files before rotating to a new file.
+        ///
+        /// When operators spill data to disk (e.g., RepartitionExec), they write
+        /// multiple batches to the same file until this size limit is reached, then rotate
+        /// to a new file. This reduces syscall overhead compared to one-file-per-batch
+        /// while preventing files from growing too large.
+        ///
+        /// A larger value reduces file creation overhead but may hold more disk space.
+        /// A smaller value creates more files but allows finer-grained space reclamation
+        /// as files can be deleted once fully consumed.
+        ///
+        /// Now only `RepartitionExec` supports this spill file rotation feature, other spilling operators
+        /// may create spill files larger than the limit.
+        ///
+        /// Default: 128 MB
+        pub max_spill_file_size_bytes: usize, default = 128 * 1024 * 1024
+
         /// Number of files to read in parallel when inferring schema and statistics
         pub meta_fetch_concurrency: usize, default = 32
 
@@ -620,7 +637,10 @@ config_namespace! {
         /// bytes of the parquet file optimistically. If not specified, two reads are required:
         /// One read to fetch the 8-byte parquet footer and
         /// another to fetch the metadata length encoded in the footer
-        pub metadata_size_hint: Option<usize>, default = None
+        /// Default setting to 512 KiB, which should be sufficient for most parquet files,
+        /// it can reduce one I/O operation per parquet file. If the metadata is larger than
+        /// the hint, two reads will still be performed.
+        pub metadata_size_hint: Option<usize>, default = Some(512 * 1024)
 
         /// (reading) If true, filter expressions are be applied during the parquet decoding operation to
         /// reduce the number of rows decoded. This optimization is sometimes called "late materialization".
@@ -1322,36 +1342,35 @@ impl ConfigOptions {
 /// # Example
 /// ```
 /// use datafusion_common::{
-///     config::ConfigExtension, extensions_options,
-///     config::ConfigOptions,
+///     config::ConfigExtension, config::ConfigOptions, extensions_options,
 /// };
-///  // Define a new configuration struct using the `extensions_options` macro
-///  extensions_options! {
-///     /// My own config options.
-///     pub struct MyConfig {
-///         /// Should "foo" be replaced by "bar"?
-///         pub foo_to_bar: bool, default = true
+/// // Define a new configuration struct using the `extensions_options` macro
+/// extensions_options! {
+///    /// My own config options.
+///    pub struct MyConfig {
+///        /// Should "foo" be replaced by "bar"?
+///        pub foo_to_bar: bool, default = true
 ///
-///         /// How many "baz" should be created?
-///         pub baz_count: usize, default = 1337
-///     }
-///  }
+///        /// How many "baz" should be created?
+///        pub baz_count: usize, default = 1337
+///    }
+/// }
 ///
-///  impl ConfigExtension for MyConfig {
+/// impl ConfigExtension for MyConfig {
 ///     const PREFIX: &'static str = "my_config";
-///  }
+/// }
 ///
-///  // set up config struct and register extension
-///  let mut config = ConfigOptions::default();
-///  config.extensions.insert(MyConfig::default());
+/// // set up config struct and register extension
+/// let mut config = ConfigOptions::default();
+/// config.extensions.insert(MyConfig::default());
 ///
-///  // overwrite config default
-///  config.set("my_config.baz_count", "42").unwrap();
+/// // overwrite config default
+/// config.set("my_config.baz_count", "42").unwrap();
 ///
-///  // check config state
-///  let my_config = config.extensions.get::<MyConfig>().unwrap();
-///  assert!(my_config.foo_to_bar,);
-///  assert_eq!(my_config.baz_count, 42,);
+/// // check config state
+/// let my_config = config.extensions.get::<MyConfig>().unwrap();
+/// assert!(my_config.foo_to_bar,);
+/// assert_eq!(my_config.baz_count, 42,);
 /// ```
 ///
 /// # Note:
@@ -2409,13 +2428,13 @@ impl From<ConfigFileEncryptionProperties> for FileEncryptionProperties {
                 hex::decode(&val.aad_prefix_as_hex).expect("Invalid AAD prefix");
             fep = fep.with_aad_prefix(aad_prefix);
         }
-        fep.build().unwrap()
+        Arc::unwrap_or_clone(fep.build().unwrap())
     }
 }
 
 #[cfg(feature = "parquet_encryption")]
-impl From<&FileEncryptionProperties> for ConfigFileEncryptionProperties {
-    fn from(f: &FileEncryptionProperties) -> Self {
+impl From<&Arc<FileEncryptionProperties>> for ConfigFileEncryptionProperties {
+    fn from(f: &Arc<FileEncryptionProperties>) -> Self {
         let (column_names_vec, column_keys_vec, column_metas_vec) = f.column_keys();
 
         let mut column_encryption_properties: HashMap<
@@ -2557,13 +2576,13 @@ impl From<ConfigFileDecryptionProperties> for FileDecryptionProperties {
             fep = fep.with_aad_prefix(aad_prefix);
         }
 
-        fep.build().unwrap()
+        Arc::unwrap_or_clone(fep.build().unwrap())
     }
 }
 
 #[cfg(feature = "parquet_encryption")]
-impl From<&FileDecryptionProperties> for ConfigFileDecryptionProperties {
-    fn from(f: &FileDecryptionProperties) -> Self {
+impl From<&Arc<FileDecryptionProperties>> for ConfigFileDecryptionProperties {
+    fn from(f: &Arc<FileDecryptionProperties>) -> Self {
         let (column_names_vec, column_keys_vec) = f.column_keys();
         let mut column_decryption_properties: HashMap<
             String,
@@ -2834,6 +2853,7 @@ mod tests {
     };
     use std::any::Any;
     use std::collections::HashMap;
+    use std::sync::Arc;
 
     #[derive(Default, Debug, Clone)]
     pub struct TestExtensionConfig {
@@ -2990,16 +3010,15 @@ mod tests {
             .unwrap();
 
         // Test round-trip
-        let config_encrypt: ConfigFileEncryptionProperties =
-            (&file_encryption_properties).into();
-        let encryption_properties_built: FileEncryptionProperties =
-            config_encrypt.clone().into();
+        let config_encrypt =
+            ConfigFileEncryptionProperties::from(&file_encryption_properties);
+        let encryption_properties_built =
+            Arc::new(FileEncryptionProperties::from(config_encrypt.clone()));
         assert_eq!(file_encryption_properties, encryption_properties_built);
 
-        let config_decrypt: ConfigFileDecryptionProperties =
-            (&decryption_properties).into();
-        let decryption_properties_built: FileDecryptionProperties =
-            config_decrypt.clone().into();
+        let config_decrypt = ConfigFileDecryptionProperties::from(&decryption_properties);
+        let decryption_properties_built =
+            Arc::new(FileDecryptionProperties::from(config_decrypt.clone()));
         assert_eq!(decryption_properties, decryption_properties_built);
 
         ///////////////////////////////////////////////////////////////////////////////////
diff --git a/datafusion/common/src/datatype.rs b/datafusion/common/src/datatype.rs
index 544ec0c2468c..65f639521186 100644
--- a/datafusion/common/src/datatype.rs
+++ b/datafusion/common/src/datatype.rs
@@ -81,7 +81,6 @@ pub trait FieldExt {
     /// assert_eq!(list_field.data_type(), &DataType::List(Arc::new(
     ///     Field::new("item", DataType::Int32, true)
     /// )));
-    ///
     fn into_list(self) -> Self;
 
     /// Return a new Field representing this Field as the item type of a
@@ -107,7 +106,6 @@ pub trait FieldExt {
     ///    Field::new("item", DataType::Int32, true)),
     ///    3
     /// ));
-    ///
     fn into_fixed_size_list(self, list_size: i32) -> Self;
 
     /// Update the field to have the default list field name ("item")
diff --git a/datafusion/common/src/dfschema.rs b/datafusion/common/src/dfschema.rs
index 6866b4011f9e..24d152a7dba8 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -56,12 +56,10 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// an Arrow schema.
 ///
 /// ```rust
-/// use datafusion_common::{DFSchema, Column};
 /// use arrow::datatypes::{DataType, Field, Schema};
+/// use datafusion_common::{Column, DFSchema};
 ///
-/// let arrow_schema = Schema::new(vec![
-///    Field::new("c1", DataType::Int32, false),
-/// ]);
+/// let arrow_schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
 ///
 /// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
 /// let column = Column::from_qualified_name("t1.c1");
@@ -77,12 +75,10 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// Create an unqualified schema using TryFrom:
 ///
 /// ```rust
-/// use datafusion_common::{DFSchema, Column};
 /// use arrow::datatypes::{DataType, Field, Schema};
+/// use datafusion_common::{Column, DFSchema};
 ///
-/// let arrow_schema = Schema::new(vec![
-///    Field::new("c1", DataType::Int32, false),
-/// ]);
+/// let arrow_schema = Schema::new(vec![Field::new("c1", DataType::Int32, false)]);
 ///
 /// let df_schema = DFSchema::try_from(arrow_schema).unwrap();
 /// let column = Column::new_unqualified("c1");
@@ -94,13 +90,15 @@ pub type DFSchemaRef = Arc<DFSchema>;
 /// Use the `Into` trait to convert `DFSchema` into an Arrow schema:
 ///
 /// ```rust
+/// use arrow::datatypes::{Field, Schema};
 /// use datafusion_common::DFSchema;
-/// use arrow::datatypes::{Schema, Field};
 /// use std::collections::HashMap;
 ///
-/// let df_schema = DFSchema::from_unqualified_fields(vec![
-///    Field::new("c1", arrow::datatypes::DataType::Int32, false),
-/// ].into(),HashMap::new()).unwrap();
+/// let df_schema = DFSchema::from_unqualified_fields(
+///     vec![Field::new("c1", arrow::datatypes::DataType::Int32, false)].into(),
+///     HashMap::new(),
+/// )
+/// .unwrap();
 /// let schema: &Schema = df_schema.as_arrow();
 /// assert_eq!(schema.fields().len(), 1);
 /// ```
@@ -884,22 +882,26 @@ impl DFSchema {
     /// # Example
     ///
     /// ```
-    /// use datafusion_common::DFSchema;
     /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_common::DFSchema;
     /// use std::collections::HashMap;
     ///
     /// let schema = DFSchema::from_unqualified_fields(
     ///     vec![
     ///         Field::new("id", DataType::Int32, false),
     ///         Field::new("name", DataType::Utf8, true),
-    ///     ].into(),
-    ///     HashMap::new()
-    /// ).unwrap();
+    ///     ]
+    ///     .into(),
+    ///     HashMap::new(),
+    /// )
+    /// .unwrap();
     ///
-    /// assert_eq!(schema.tree_string().to_string(),
-    /// r#"root
+    /// assert_eq!(
+    ///     schema.tree_string().to_string(),
+    ///     r#"root
     ///  |-- id: int32 (nullable = false)
-    ///  |-- name: utf8 (nullable = true)"#);
+    ///  |-- name: utf8 (nullable = true)"#
+    /// );
     /// ```
     pub fn tree_string(&self) -> impl Display + '_ {
         let mut result = String::from("root\n");
@@ -1417,7 +1419,7 @@ mod tests {
     fn from_qualified_schema_into_arrow_schema() -> Result<()> {
         let schema = DFSchema::try_from_qualified_schema("t1", &test_schema_1())?;
         let arrow_schema = schema.as_arrow();
-        insta::assert_snapshot!(arrow_schema, @r#"Field { name: "c0", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c1", data_type: Boolean, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }"#);
+        insta::assert_snapshot!(arrow_schema.to_string(), @r#"Field { "c0": nullable Boolean }, Field { "c1": nullable Boolean }"#);
         Ok(())
     }
 
diff --git a/datafusion/common/src/diagnostic.rs b/datafusion/common/src/diagnostic.rs
index 0dce8e6a56ec..b25bf1c12e44 100644
--- a/datafusion/common/src/diagnostic.rs
+++ b/datafusion/common/src/diagnostic.rs
@@ -30,8 +30,11 @@ use crate::Span;
 /// ```rust
 /// # use datafusion_common::{Location, Span, Diagnostic};
 /// let span = Some(Span {
-///     start: Location{ line: 2, column: 1 },
-///     end: Location{ line: 4, column: 15 }
+///     start: Location { line: 2, column: 1 },
+///     end: Location {
+///         line: 4,
+///         column: 15,
+///     },
 /// });
 /// let diagnostic = Diagnostic::new_error("Something went wrong", span)
 ///     .with_help("Have you tried turning it on and off again?", None);
diff --git a/datafusion/common/src/encryption.rs b/datafusion/common/src/encryption.rs
index b764ad77cff1..2a8cfdbc8996 100644
--- a/datafusion/common/src/encryption.rs
+++ b/datafusion/common/src/encryption.rs
@@ -24,38 +24,10 @@ pub use parquet::encryption::decrypt::FileDecryptionProperties;
 pub use parquet::encryption::encrypt::FileEncryptionProperties;
 
 #[cfg(not(feature = "parquet_encryption"))]
-#[derive(Default, Debug)]
+#[derive(Default, Clone, Debug)]
 pub struct FileDecryptionProperties;
 #[cfg(not(feature = "parquet_encryption"))]
-#[derive(Default, Debug)]
+#[derive(Default, Clone, Debug)]
 pub struct FileEncryptionProperties;
 
 pub use crate::config::{ConfigFileDecryptionProperties, ConfigFileEncryptionProperties};
-
-#[cfg(feature = "parquet_encryption")]
-pub fn map_encryption_to_config_encryption(
-    encryption: Option<&FileEncryptionProperties>,
-) -> Option<ConfigFileEncryptionProperties> {
-    encryption.map(|fe| fe.into())
-}
-
-#[cfg(not(feature = "parquet_encryption"))]
-pub fn map_encryption_to_config_encryption(
-    _encryption: Option<&FileEncryptionProperties>,
-) -> Option<ConfigFileEncryptionProperties> {
-    None
-}
-
-#[cfg(feature = "parquet_encryption")]
-pub fn map_config_decryption_to_decryption(
-    decryption: &ConfigFileDecryptionProperties,
-) -> FileDecryptionProperties {
-    decryption.clone().into()
-}
-
-#[cfg(not(feature = "parquet_encryption"))]
-pub fn map_config_decryption_to_decryption(
-    _decryption: &ConfigFileDecryptionProperties,
-) -> FileDecryptionProperties {
-    FileDecryptionProperties {}
-}
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 210f0442972d..4fa6d28e7324 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -684,7 +684,10 @@ impl DataFusionError {
 /// let mut builder = DataFusionError::builder();
 /// builder.add_error(DataFusionError::Internal("foo".to_owned()));
 /// // ok_or returns the value if no errors have been added
-/// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
+/// assert_contains!(
+///     builder.error_or(42).unwrap_err().to_string(),
+///     "Internal error: foo"
+/// );
 /// ```
 #[derive(Debug, Default)]
 pub struct DataFusionErrorBuilder(Vec<DataFusionError>);
@@ -702,7 +705,10 @@ impl DataFusionErrorBuilder {
     /// # use datafusion_common::{assert_contains, DataFusionError};
     /// let mut builder = DataFusionError::builder();
     /// builder.add_error(DataFusionError::Internal("foo".to_owned()));
-    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
+    /// assert_contains!(
+    ///     builder.error_or(42).unwrap_err().to_string(),
+    ///     "Internal error: foo"
+    /// );
     /// ```
     pub fn add_error(&mut self, error: DataFusionError) {
         self.0.push(error);
@@ -714,8 +720,11 @@ impl DataFusionErrorBuilder {
     /// ```
     /// # use datafusion_common::{assert_contains, DataFusionError};
     /// let builder = DataFusionError::builder()
-    ///   .with_error(DataFusionError::Internal("foo".to_owned()));
-    /// assert_contains!(builder.error_or(42).unwrap_err().to_string(), "Internal error: foo");
+    ///     .with_error(DataFusionError::Internal("foo".to_owned()));
+    /// assert_contains!(
+    ///     builder.error_or(42).unwrap_err().to_string(),
+    ///     "Internal error: foo"
+    /// );
     /// ```
     pub fn with_error(mut self, error: DataFusionError) -> Self {
         self.0.push(error);
@@ -749,6 +758,116 @@ macro_rules! unwrap_or_internal_err {
     };
 }
 
+/// Assert a condition, returning `DataFusionError::Internal` on failure.
+///
+/// # Examples
+///
+/// ```text
+/// assert_or_internal_err!(predicate);
+/// assert_or_internal_err!(predicate, "human readable message");
+/// assert_or_internal_err!(predicate, format!("details: {}", value));
+/// ```
+#[macro_export]
+macro_rules! assert_or_internal_err {
+    ($cond:expr) => {
+        if !$cond {
+            return Err(DataFusionError::Internal(format!(
+                "Assertion failed: {}",
+                stringify!($cond)
+            )));
+        }
+    };
+    ($cond:expr, $($arg:tt)+) => {
+        if !$cond {
+            return Err(DataFusionError::Internal(format!(
+                "Assertion failed: {}: {}",
+                stringify!($cond),
+                format!($($arg)+)
+            )));
+        }
+    };
+}
+
+/// Assert equality, returning `DataFusionError::Internal` on failure.
+///
+/// # Examples
+///
+/// ```text
+/// assert_eq_or_internal_err!(actual, expected);
+/// assert_eq_or_internal_err!(left_expr, right_expr, "values must match");
+/// assert_eq_or_internal_err!(lhs, rhs, "metadata: {}", extra);
+/// ```
+#[macro_export]
+macro_rules! assert_eq_or_internal_err {
+    ($left:expr, $right:expr $(,)?) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val != right_val {
+            return Err(DataFusionError::Internal(format!(
+                "Assertion failed: {} == {} (left: {:?}, right: {:?})",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val
+            )));
+        }
+    }};
+    ($left:expr, $right:expr, $($arg:tt)+) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val != right_val {
+            return Err(DataFusionError::Internal(format!(
+                "Assertion failed: {} == {} (left: {:?}, right: {:?}): {}",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val,
+                format!($($arg)+)
+            )));
+        }
+    }};
+}
+
+/// Assert inequality, returning `DataFusionError::Internal` on failure.
+///
+/// # Examples
+///
+/// ```text
+/// assert_ne_or_internal_err!(left, right);
+/// assert_ne_or_internal_err!(lhs_expr, rhs_expr, "values must differ");
+/// assert_ne_or_internal_err!(a, b, "context {}", info);
+/// ```
+#[macro_export]
+macro_rules! assert_ne_or_internal_err {
+    ($left:expr, $right:expr $(,)?) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val == right_val {
+            return Err(DataFusionError::Internal(format!(
+                "Assertion failed: {} != {} (left: {:?}, right: {:?})",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val
+            )));
+        }
+    }};
+    ($left:expr, $right:expr, $($arg:tt)+) => {{
+        let left_val = &$left;
+        let right_val = &$right;
+        if left_val == right_val {
+            return Err(DataFusionError::Internal(format!(
+                "Assertion failed: {} != {} (left: {:?}, right: {:?}): {}",
+                stringify!($left),
+                stringify!($right),
+                left_val,
+                right_val,
+                format!($($arg)+)
+            )));
+        }
+    }};
+}
+
 /// Add a macros for concise  DataFusionError::* errors declaration
 /// supports placeholders the same way as `format!`
 /// Examples:
@@ -965,6 +1084,115 @@ mod test {
     use std::sync::Arc;
 
     use arrow::error::ArrowError;
+    use insta::assert_snapshot;
+
+    fn ok_result() -> Result<()> {
+        Ok(())
+    }
+
+    #[test]
+    fn test_assert_eq_or_internal_err_passes() -> Result<()> {
+        assert_eq_or_internal_err!(1, 1);
+        ok_result()
+    }
+
+    #[test]
+    fn test_assert_eq_or_internal_err_fails() {
+        fn check() -> Result<()> {
+            assert_eq_or_internal_err!(1, 2, "expected equality");
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: 1 == 2 (left: 1, right: 2): expected equality.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_ne_or_internal_err_passes() -> Result<()> {
+        assert_ne_or_internal_err!(1, 2);
+        ok_result()
+    }
+
+    #[test]
+    fn test_assert_ne_or_internal_err_fails() {
+        fn check() -> Result<()> {
+            assert_ne_or_internal_err!(3, 3, "values must differ");
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: 3 != 3 (left: 3, right: 3): values must differ.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_passes() -> Result<()> {
+        assert_or_internal_err!(true);
+        assert_or_internal_err!(true, "message");
+        ok_result()
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_fails_default() {
+        fn check() -> Result<()> {
+            assert_or_internal_err!(false);
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: false.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_fails_with_message() {
+        fn check() -> Result<()> {
+            assert_or_internal_err!(false, "custom message");
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: false: custom message.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
+
+    #[test]
+    fn test_assert_or_internal_err_with_format_arguments() {
+        fn check() -> Result<()> {
+            assert_or_internal_err!(false, "custom {}", 42);
+            ok_result()
+        }
+
+        let err = check().unwrap_err();
+        assert_snapshot!(
+            err.to_string(),
+            @r"
+        Internal error: Assertion failed: false: custom 42.
+        This issue was likely caused by a bug in DataFusion's code. Please help us to resolve this by filing a bug report in our issue tracker: https://github.com/apache/datafusion/issues
+        "
+        );
+    }
 
     #[test]
     fn test_error_size() {
diff --git a/datafusion/common/src/file_options/parquet_writer.rs b/datafusion/common/src/file_options/parquet_writer.rs
index 3977f2b489e1..564929c61bab 100644
--- a/datafusion/common/src/file_options/parquet_writer.rs
+++ b/datafusion/common/src/file_options/parquet_writer.rs
@@ -402,15 +402,14 @@ pub(crate) fn parse_statistics_string(str_setting: &str) -> Result<EnabledStatis
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::config::{ParquetColumnOptions, ParquetEncryptionOptions, ParquetOptions};
-    #[cfg(feature = "parquet_encryption")]
-    use crate::encryption::map_encryption_to_config_encryption;
-    use parquet::{
-        basic::Compression,
-        file::properties::{
-            BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
-            DEFAULT_BLOOM_FILTER_NDV,
-        },
+    use crate::config::{
+        ConfigFileEncryptionProperties, ParquetColumnOptions, ParquetEncryptionOptions,
+        ParquetOptions,
+    };
+    use parquet::basic::Compression;
+    use parquet::file::properties::{
+        BloomFilterProperties, EnabledStatistics, DEFAULT_BLOOM_FILTER_FPP,
+        DEFAULT_BLOOM_FILTER_NDV,
     };
     use std::collections::HashMap;
 
@@ -539,7 +538,10 @@ mod tests {
         };
 
         #[cfg(feature = "parquet_encryption")]
-        let fep = map_encryption_to_config_encryption(props.file_encryption_properties());
+        let fep = props
+            .file_encryption_properties()
+            .map(ConfigFileEncryptionProperties::from);
+
         #[cfg(not(feature = "parquet_encryption"))]
         let fep = None;
 
diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs
index 4b18351f708b..d60189fb6fa3 100644
--- a/datafusion/common/src/hash_utils.rs
+++ b/datafusion/common/src/hash_utils.rs
@@ -17,9 +17,6 @@
 
 //! Functionality used both on logical and physical plans
 
-#[cfg(not(feature = "force_hash_collisions"))]
-use std::sync::Arc;
-
 use ahash::RandomState;
 use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano};
 use arrow::array::*;
@@ -141,7 +138,7 @@ fn hash_array_primitive<T>(
 /// with the new hash using `combine_hashes`
 #[cfg(not(feature = "force_hash_collisions"))]
 fn hash_array<T>(
-    array: T,
+    array: &T,
     random_state: &RandomState,
     hashes_buffer: &mut [u64],
     rehash: bool,
@@ -215,12 +212,11 @@ fn hash_dictionary<K: ArrowDictionaryKeyType>(
     // Hash each dictionary value once, and then use that computed
     // hash for each key value to avoid a potentially expensive
     // redundant hashing for large dictionary elements (e.g. strings)
-    let dict_values = Arc::clone(array.values());
+    let dict_values = array.values();
     let mut dict_hashes = vec![0; dict_values.len()];
-    create_hashes(&[dict_values], random_state, &mut dict_hashes)?;
+    create_hashes([dict_values], random_state, &mut dict_hashes)?;
 
     // combine hash for each index in values
-    let dict_values = array.values();
     for (hash, key) in hashes_buffer.iter_mut().zip(array.keys().iter()) {
         if let Some(key) = key {
             let idx = key.as_usize();
@@ -308,11 +304,11 @@ fn hash_list_array<OffsetSize>(
 where
     OffsetSize: OffsetSizeTrait,
 {
-    let values = Arc::clone(array.values());
+    let values = array.values();
     let offsets = array.value_offsets();
     let nulls = array.nulls();
     let mut values_hashes = vec![0u64; values.len()];
-    create_hashes(&[values], random_state, &mut values_hashes)?;
+    create_hashes([values], random_state, &mut values_hashes)?;
     if let Some(nulls) = nulls {
         for (i, (start, stop)) in offsets.iter().zip(offsets.iter().skip(1)).enumerate() {
             if nulls.is_valid(i) {
@@ -339,11 +335,11 @@ fn hash_fixed_list_array(
     random_state: &RandomState,
     hashes_buffer: &mut [u64],
 ) -> Result<()> {
-    let values = Arc::clone(array.values());
+    let values = array.values();
     let value_length = array.value_length() as usize;
     let nulls = array.nulls();
     let mut values_hashes = vec![0u64; values.len()];
-    create_hashes(&[values], random_state, &mut values_hashes)?;
+    create_hashes([values], random_state, &mut values_hashes)?;
     if let Some(nulls) = nulls {
         for i in 0..array.len() {
             if nulls.is_valid(i) {
@@ -366,83 +362,132 @@ fn hash_fixed_list_array(
     Ok(())
 }
 
-/// Test version of `create_hashes` that produces the same value for
-/// all hashes (to test collisions)
-///
-/// See comments on `hashes_buffer` for more details
+/// Internal helper function that hashes a single array and either initializes or combines
+/// the hash values in the buffer.
+#[cfg(not(feature = "force_hash_collisions"))]
+fn hash_single_array(
+    array: &dyn Array,
+    random_state: &RandomState,
+    hashes_buffer: &mut [u64],
+    rehash: bool,
+) -> Result<()> {
+    downcast_primitive_array! {
+        array => hash_array_primitive(array, random_state, hashes_buffer, rehash),
+        DataType::Null => hash_null(random_state, hashes_buffer, rehash),
+        DataType::Boolean => hash_array(&as_boolean_array(array)?, random_state, hashes_buffer, rehash),
+        DataType::Utf8 => hash_array(&as_string_array(array)?, random_state, hashes_buffer, rehash),
+        DataType::Utf8View => hash_array(&as_string_view_array(array)?, random_state, hashes_buffer, rehash),
+        DataType::LargeUtf8 => hash_array(&as_largestring_array(array), random_state, hashes_buffer, rehash),
+        DataType::Binary => hash_array(&as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
+        DataType::BinaryView => hash_array(&as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
+        DataType::LargeBinary => hash_array(&as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
+        DataType::FixedSizeBinary(_) => {
+            let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
+            hash_array(&array, random_state, hashes_buffer, rehash)
+        }
+        DataType::Dictionary(_, _) => downcast_dictionary_array! {
+            array => hash_dictionary(array, random_state, hashes_buffer, rehash)?,
+            _ => unreachable!()
+        }
+        DataType::Struct(_) => {
+            let array = as_struct_array(array)?;
+            hash_struct_array(array, random_state, hashes_buffer)?;
+        }
+        DataType::List(_) => {
+            let array = as_list_array(array)?;
+            hash_list_array(array, random_state, hashes_buffer)?;
+        }
+        DataType::LargeList(_) => {
+            let array = as_large_list_array(array)?;
+            hash_list_array(array, random_state, hashes_buffer)?;
+        }
+        DataType::Map(_, _) => {
+            let array = as_map_array(array)?;
+            hash_map_array(array, random_state, hashes_buffer)?;
+        }
+        DataType::FixedSizeList(_,_) => {
+            let array = as_fixed_size_list_array(array)?;
+            hash_fixed_list_array(array, random_state, hashes_buffer)?;
+        }
+        _ => {
+            // This is internal because we should have caught this before.
+            return _internal_err!(
+                "Unsupported data type in hasher: {}",
+                array.data_type()
+            );
+        }
+    }
+    Ok(())
+}
+
+/// Test version of `hash_single_array` that forces all hashes to collide to zero.
 #[cfg(feature = "force_hash_collisions")]
-pub fn create_hashes<'a>(
-    _arrays: &[ArrayRef],
+fn hash_single_array(
+    _array: &dyn Array,
     _random_state: &RandomState,
-    hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
+    hashes_buffer: &mut [u64],
+    _rehash: bool,
+) -> Result<()> {
     for hash in hashes_buffer.iter_mut() {
         *hash = 0
     }
-    Ok(hashes_buffer)
+    Ok(())
+}
+
+/// Something that can be returned as a `&dyn Array`.
+///
+/// We want `create_hashes` to accept either `&dyn Array` or `ArrayRef`,
+/// and this seems the best way to do so.
+///
+/// We tried having it accept `AsRef<dyn Array>`
+/// but that is not implemented for and cannot be implemented for
+/// `&dyn Array` so callers that have the latter would not be able
+/// to call `create_hashes` directly. This shim trait makes it possible.
+pub trait AsDynArray {
+    fn as_dyn_array(&self) -> &dyn Array;
+}
+
+impl AsDynArray for dyn Array {
+    fn as_dyn_array(&self) -> &dyn Array {
+        self
+    }
+}
+
+impl AsDynArray for &dyn Array {
+    fn as_dyn_array(&self) -> &dyn Array {
+        *self
+    }
+}
+
+impl AsDynArray for ArrayRef {
+    fn as_dyn_array(&self) -> &dyn Array {
+        self.as_ref()
+    }
 }
 
-/// Creates hash values for every row, based on the values in the
-/// columns.
+impl AsDynArray for &ArrayRef {
+    fn as_dyn_array(&self) -> &dyn Array {
+        self.as_ref()
+    }
+}
+
+/// Creates hash values for every row, based on the values in the columns.
 ///
 /// The number of rows to hash is determined by `hashes_buffer.len()`.
-/// `hashes_buffer` should be pre-sized appropriately
-#[cfg(not(feature = "force_hash_collisions"))]
-pub fn create_hashes<'a>(
-    arrays: &[ArrayRef],
+/// `hashes_buffer` should be pre-sized appropriately.
+pub fn create_hashes<'a, I, T>(
+    arrays: I,
     random_state: &RandomState,
     hashes_buffer: &'a mut Vec<u64>,
-) -> Result<&'a mut Vec<u64>> {
-    for (i, col) in arrays.iter().enumerate() {
-        let array = col.as_ref();
+) -> Result<&'a mut Vec<u64>>
+where
+    I: IntoIterator<Item = T>,
+    T: AsDynArray,
+{
+    for (i, array) in arrays.into_iter().enumerate() {
         // combine hashes with `combine_hashes` for all columns besides the first
         let rehash = i >= 1;
-        downcast_primitive_array! {
-            array => hash_array_primitive(array, random_state, hashes_buffer, rehash),
-            DataType::Null => hash_null(random_state, hashes_buffer, rehash),
-            DataType::Boolean => hash_array(as_boolean_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::Utf8 => hash_array(as_string_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::Utf8View => hash_array(as_string_view_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::LargeUtf8 => hash_array(as_largestring_array(array), random_state, hashes_buffer, rehash),
-            DataType::Binary => hash_array(as_generic_binary_array::<i32>(array)?, random_state, hashes_buffer, rehash),
-            DataType::BinaryView => hash_array(as_binary_view_array(array)?, random_state, hashes_buffer, rehash),
-            DataType::LargeBinary => hash_array(as_generic_binary_array::<i64>(array)?, random_state, hashes_buffer, rehash),
-            DataType::FixedSizeBinary(_) => {
-                let array: &FixedSizeBinaryArray = array.as_any().downcast_ref().unwrap();
-                hash_array(array, random_state, hashes_buffer, rehash)
-            }
-            DataType::Dictionary(_, _) => downcast_dictionary_array! {
-                array => hash_dictionary(array, random_state, hashes_buffer, rehash)?,
-                _ => unreachable!()
-            }
-            DataType::Struct(_) => {
-                let array = as_struct_array(array)?;
-                hash_struct_array(array, random_state, hashes_buffer)?;
-            }
-            DataType::List(_) => {
-                let array = as_list_array(array)?;
-                hash_list_array(array, random_state, hashes_buffer)?;
-            }
-            DataType::LargeList(_) => {
-                let array = as_large_list_array(array)?;
-                hash_list_array(array, random_state, hashes_buffer)?;
-            }
-            DataType::Map(_, _) => {
-                let array = as_map_array(array)?;
-                hash_map_array(array, random_state, hashes_buffer)?;
-            }
-            DataType::FixedSizeList(_,_) => {
-                let array = as_fixed_size_list_array(array)?;
-                hash_fixed_list_array(array, random_state, hashes_buffer)?;
-            }
-            _ => {
-                // This is internal because we should have caught this before.
-                return _internal_err!(
-                    "Unsupported data type in hasher: {}",
-                    col.data_type()
-                );
-            }
-        }
+        hash_single_array(array.as_dyn_array(), random_state, hashes_buffer, rehash)?;
     }
     Ok(hashes_buffer)
 }
@@ -465,7 +510,7 @@ mod tests {
             .collect::<Decimal128Array>()
             .with_precision_and_scale(20, 3)
             .unwrap();
-        let array_ref = Arc::new(array);
+        let array_ref: ArrayRef = Arc::new(array);
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
         let hashes_buff = &mut vec![0; array_ref.len()];
         let hashes = create_hashes(&[array_ref], &random_state, hashes_buff)?;
@@ -478,15 +523,21 @@ mod tests {
         let empty_array = FixedSizeListBuilder::new(StringBuilder::new(), 1).finish();
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
         let hashes_buff = &mut vec![0; 0];
-        let hashes = create_hashes(&[Arc::new(empty_array)], &random_state, hashes_buff)?;
+        let hashes = create_hashes(
+            &[Arc::new(empty_array) as ArrayRef],
+            &random_state,
+            hashes_buff,
+        )?;
         assert_eq!(hashes, &Vec::<u64>::new());
         Ok(())
     }
 
     #[test]
     fn create_hashes_for_float_arrays() -> Result<()> {
-        let f32_arr = Arc::new(Float32Array::from(vec![0.12, 0.5, 1f32, 444.7]));
-        let f64_arr = Arc::new(Float64Array::from(vec![0.12, 0.5, 1f64, 444.7]));
+        let f32_arr: ArrayRef =
+            Arc::new(Float32Array::from(vec![0.12, 0.5, 1f32, 444.7]));
+        let f64_arr: ArrayRef =
+            Arc::new(Float64Array::from(vec![0.12, 0.5, 1f64, 444.7]));
 
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
         let hashes_buff = &mut vec![0; f32_arr.len()];
@@ -514,8 +565,10 @@ mod tests {
                     Some(b"Longer than 12 bytes string"),
                 ];
 
-                let binary_array = Arc::new(binary.iter().cloned().collect::<$ARRAY>());
-                let ref_array = Arc::new(binary.iter().cloned().collect::<BinaryArray>());
+                let binary_array: ArrayRef =
+                    Arc::new(binary.iter().cloned().collect::<$ARRAY>());
+                let ref_array: ArrayRef =
+                    Arc::new(binary.iter().cloned().collect::<BinaryArray>());
 
                 let random_state = RandomState::with_seeds(0, 0, 0, 0);
 
@@ -553,7 +606,7 @@ mod tests {
     #[test]
     fn create_hashes_fixed_size_binary() -> Result<()> {
         let input_arg = vec![vec![1, 2], vec![5, 6], vec![5, 6]];
-        let fixed_size_binary_array =
+        let fixed_size_binary_array: ArrayRef =
             Arc::new(FixedSizeBinaryArray::try_from_iter(input_arg.into_iter()).unwrap());
 
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
@@ -580,8 +633,9 @@ mod tests {
                     Some("Longer than 12 bytes string"),
                 ];
 
-                let string_array = Arc::new(strings.iter().cloned().collect::<$ARRAY>());
-                let dict_array = Arc::new(
+                let string_array: ArrayRef =
+                    Arc::new(strings.iter().cloned().collect::<$ARRAY>());
+                let dict_array: ArrayRef = Arc::new(
                     strings
                         .iter()
                         .cloned()
@@ -629,8 +683,9 @@ mod tests {
     fn create_hashes_for_dict_arrays() {
         let strings = [Some("foo"), None, Some("bar"), Some("foo"), None];
 
-        let string_array = Arc::new(strings.iter().cloned().collect::<StringArray>());
-        let dict_array = Arc::new(
+        let string_array: ArrayRef =
+            Arc::new(strings.iter().cloned().collect::<StringArray>());
+        let dict_array: ArrayRef = Arc::new(
             strings
                 .iter()
                 .cloned()
@@ -865,8 +920,9 @@ mod tests {
         let strings1 = [Some("foo"), None, Some("bar")];
         let strings2 = [Some("blarg"), Some("blah"), None];
 
-        let string_array = Arc::new(strings1.iter().cloned().collect::<StringArray>());
-        let dict_array = Arc::new(
+        let string_array: ArrayRef =
+            Arc::new(strings1.iter().cloned().collect::<StringArray>());
+        let dict_array: ArrayRef = Arc::new(
             strings2
                 .iter()
                 .cloned()
@@ -896,4 +952,52 @@ mod tests {
 
         assert_ne!(one_col_hashes, two_col_hashes);
     }
+
+    #[test]
+    fn test_create_hashes_from_arrays() {
+        let int_array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
+        let float_array: ArrayRef =
+            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0]));
+
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+        let hashes_buff = &mut vec![0; int_array.len()];
+        let hashes =
+            create_hashes(&[int_array, float_array], &random_state, hashes_buff).unwrap();
+        assert_eq!(hashes.len(), 4,);
+    }
+
+    #[test]
+    fn test_create_hashes_from_dyn_arrays() {
+        let int_array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
+        let float_array: ArrayRef =
+            Arc::new(Float64Array::from(vec![1.0, 2.0, 3.0, 4.0]));
+
+        // Verify that we can call create_hashes with only &dyn Array
+        fn test(arr1: &dyn Array, arr2: &dyn Array) {
+            let random_state = RandomState::with_seeds(0, 0, 0, 0);
+            let hashes_buff = &mut vec![0; arr1.len()];
+            let hashes = create_hashes([arr1, arr2], &random_state, hashes_buff).unwrap();
+            assert_eq!(hashes.len(), 4,);
+        }
+        test(&*int_array, &*float_array);
+    }
+
+    #[test]
+    fn test_create_hashes_equivalence() {
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
+        let random_state = RandomState::with_seeds(0, 0, 0, 0);
+
+        let mut hashes1 = vec![0; array.len()];
+        create_hashes(
+            &[Arc::clone(&array) as ArrayRef],
+            &random_state,
+            &mut hashes1,
+        )
+        .unwrap();
+
+        let mut hashes2 = vec![0; array.len()];
+        create_hashes([array], &random_state, &mut hashes2).unwrap();
+
+        assert_eq!(hashes1, hashes2);
+    }
 }
diff --git a/datafusion/common/src/lib.rs b/datafusion/common/src/lib.rs
index 76c7b46e3273..549c265024f9 100644
--- a/datafusion/common/src/lib.rs
+++ b/datafusion/common/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 mod column;
 mod dfschema;
diff --git a/datafusion/common/src/metadata.rs b/datafusion/common/src/metadata.rs
index 39065808efb9..3a10cc2b42f9 100644
--- a/datafusion/common/src/metadata.rs
+++ b/datafusion/common/src/metadata.rs
@@ -171,7 +171,6 @@ pub fn format_type_and_metadata(
 /// // Add any metadata from `FieldMetadata` to `Field`
 /// let updated_field = metadata.add_to_field(field);
 /// ```
-///
 #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)]
 pub struct FieldMetadata {
     /// The inner metadata of a literal expression, which is a map of string
diff --git a/datafusion/common/src/nested_struct.rs b/datafusion/common/src/nested_struct.rs
index 38060e370bfa..d43816f75b0e 100644
--- a/datafusion/common/src/nested_struct.rs
+++ b/datafusion/common/src/nested_struct.rs
@@ -110,16 +110,19 @@ fn cast_struct_column(
 /// temporal values are formatted when cast to strings.
 ///
 /// ```
-/// use std::sync::Arc;
-/// use arrow::array::{Int64Array, ArrayRef};
+/// use arrow::array::{ArrayRef, Int64Array};
 /// use arrow::compute::CastOptions;
 /// use arrow::datatypes::{DataType, Field};
 /// use datafusion_common::nested_struct::cast_column;
+/// use std::sync::Arc;
 ///
 /// let source: ArrayRef = Arc::new(Int64Array::from(vec![1, i64::MAX]));
 /// let target = Field::new("ints", DataType::Int32, true);
 /// // Permit lossy conversions by producing NULL on overflow instead of erroring
-/// let options = CastOptions { safe: true, ..Default::default() };
+/// let options = CastOptions {
+///     safe: true,
+///     ..Default::default()
+/// };
 /// let result = cast_column(&source, &target, &options).unwrap();
 /// assert!(result.is_null(1));
 /// ```
diff --git a/datafusion/common/src/pyarrow.rs b/datafusion/common/src/pyarrow.rs
index ff413e08ab07..18c6739735ff 100644
--- a/datafusion/common/src/pyarrow.rs
+++ b/datafusion/common/src/pyarrow.rs
@@ -22,7 +22,7 @@ use arrow::pyarrow::{FromPyArrow, ToPyArrow};
 use pyo3::exceptions::PyException;
 use pyo3::prelude::PyErr;
 use pyo3::types::{PyAnyMethods, PyList};
-use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyObject, PyResult, Python};
+use pyo3::{Bound, FromPyObject, IntoPyObject, PyAny, PyResult, Python};
 
 use crate::{DataFusionError, ScalarValue};
 
@@ -52,11 +52,11 @@ impl FromPyArrow for ScalarValue {
 }
 
 impl ToPyArrow for ScalarValue {
-    fn to_pyarrow(&self, py: Python) -> PyResult<PyObject> {
+    fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let array = self.to_array()?;
         // convert to pyarrow array using C data interface
         let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyscalar = pyarray.call_method1(py, "__getitem__", (0,))?;
+        let pyscalar = pyarray.call_method1("__getitem__", (0,))?;
 
         Ok(pyscalar)
     }
@@ -79,23 +79,22 @@ impl<'source> IntoPyObject<'source> for ScalarValue {
         let array = self.to_array()?;
         // convert to pyarrow array using C data interface
         let pyarray = array.to_data().to_pyarrow(py)?;
-        let pyarray_bound = pyarray.bind(py);
-        pyarray_bound.call_method1("__getitem__", (0,))
+        pyarray.call_method1("__getitem__", (0,))
     }
 }
 
 #[cfg(test)]
 mod tests {
     use pyo3::ffi::c_str;
-    use pyo3::prepare_freethreaded_python;
     use pyo3::py_run;
     use pyo3::types::PyDict;
+    use pyo3::Python;
 
     use super::*;
 
     fn init_python() {
-        prepare_freethreaded_python();
-        Python::with_gil(|py| {
+        Python::initialize();
+        Python::attach(|py| {
             if py.run(c_str!("import pyarrow"), None, None).is_err() {
                 let locals = PyDict::new(py);
                 py.run(
@@ -127,7 +126,7 @@ mod tests {
     fn test_roundtrip() {
         init_python();
 
-        let example_scalars = vec![
+        let example_scalars = [
             ScalarValue::Boolean(Some(true)),
             ScalarValue::Int32(Some(23)),
             ScalarValue::Float64(Some(12.34)),
@@ -135,12 +134,11 @@ mod tests {
             ScalarValue::Date32(Some(1234)),
         ];
 
-        Python::with_gil(|py| {
+        Python::attach(|py| {
             for scalar in example_scalars.iter() {
-                let result = ScalarValue::from_pyarrow_bound(
-                    scalar.to_pyarrow(py).unwrap().bind(py),
-                )
-                .unwrap();
+                let result =
+                    ScalarValue::from_pyarrow_bound(&scalar.to_pyarrow(py).unwrap())
+                        .unwrap();
                 assert_eq!(scalar, &result);
             }
         });
@@ -150,7 +148,7 @@ mod tests {
     fn test_py_scalar() -> PyResult<()> {
         init_python();
 
-        Python::with_gil(|py| -> PyResult<()> {
+        Python::attach(|py| -> PyResult<()> {
             let scalar_float = ScalarValue::Float64(Some(12.34));
             let py_float = scalar_float
                 .into_pyobject(py)?
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
index a70a027a8fac..fadd2e41eaba 100644
--- a/datafusion/common/src/scalar/mod.rs
+++ b/datafusion/common/src/scalar/mod.rs
@@ -171,9 +171,9 @@ pub use struct_builder::ScalarStructBuilder;
 /// let field_b = Field::new("b", DataType::Utf8, false);
 ///
 /// let s1 = ScalarStructBuilder::new()
-///    .with_scalar(field_a, ScalarValue::from(1i32))
-///    .with_scalar(field_b, ScalarValue::from("foo"))
-///    .build();
+///     .with_scalar(field_a, ScalarValue::from(1i32))
+///     .with_scalar(field_b, ScalarValue::from("foo"))
+///     .build();
 /// ```
 ///
 /// ## Example: Creating a null [`ScalarValue::Struct`] using [`ScalarStructBuilder`]
@@ -199,13 +199,13 @@ pub use struct_builder::ScalarStructBuilder;
 /// // Build a struct like: {a: 1, b: "foo"}
 /// // Field description
 /// let fields = Fields::from(vec![
-///   Field::new("a", DataType::Int32, false),
-///   Field::new("b", DataType::Utf8, false),
+///     Field::new("a", DataType::Int32, false),
+///     Field::new("b", DataType::Utf8, false),
 /// ]);
 /// // one row arrays for each field
 /// let arrays: Vec<ArrayRef> = vec![
-///   Arc::new(Int32Array::from(vec![1])),
-///   Arc::new(StringArray::from(vec!["foo"])),
+///     Arc::new(Int32Array::from(vec![1])),
+///     Arc::new(StringArray::from(vec!["foo"])),
 /// ];
 /// // no nulls for this array
 /// let nulls = None;
@@ -878,10 +878,10 @@ impl Hash for ScalarValue {
 
 fn hash_nested_array<H: Hasher>(arr: ArrayRef, state: &mut H) {
     let len = arr.len();
-    let arrays = vec![arr];
     let hashes_buffer = &mut vec![0; len];
     let random_state = ahash::RandomState::with_seeds(0, 0, 0, 0);
-    let hashes = create_hashes(&arrays, &random_state, hashes_buffer).unwrap();
+    let hashes = create_hashes(&[arr], &random_state, hashes_buffer)
+        .expect("hash_nested_array: failed to create row hashes");
     // Hash back to std::hash::Hasher
     hashes.hash(state);
 }
@@ -1068,8 +1068,8 @@ impl ScalarValue {
     ///
     /// Example
     /// ```
-    /// use datafusion_common::ScalarValue;
     /// use arrow::datatypes::DataType;
+    /// use datafusion_common::ScalarValue;
     ///
     /// let scalar = ScalarValue::try_new_null(&DataType::Int32).unwrap();
     /// assert_eq!(scalar.is_null(), true);
@@ -1734,7 +1734,7 @@ impl ScalarValue {
                 ) {
                     return _internal_err!("Invalid precision and scale {err}");
                 }
-                if *scale <= 0 {
+                if *scale < 0 {
                     return _internal_err!("Negative scale is not supported");
                 }
                 match 10_i32.checked_pow((*scale + 1) as u32) {
@@ -1750,7 +1750,7 @@ impl ScalarValue {
                 ) {
                     return _internal_err!("Invalid precision and scale {err}");
                 }
-                if *scale <= 0 {
+                if *scale < 0 {
                     return _internal_err!("Negative scale is not supported");
                 }
                 match i64::from(10).checked_pow((*scale + 1) as u32) {
@@ -2231,23 +2231,16 @@ impl ScalarValue {
     ///
     /// # Example
     /// ```
-    /// use datafusion_common::ScalarValue;
     /// use arrow::array::{BooleanArray, Int32Array};
+    /// use datafusion_common::ScalarValue;
     ///
     /// let arr = Int32Array::from(vec![Some(1), None, Some(10)]);
     /// let five = ScalarValue::Int32(Some(5));
     ///
-    /// let result = arrow::compute::kernels::cmp::lt(
-    ///   &arr,
-    ///   &five.to_scalar().unwrap(),
-    /// ).unwrap();
+    /// let result =
+    ///     arrow::compute::kernels::cmp::lt(&arr, &five.to_scalar().unwrap()).unwrap();
     ///
-    /// let expected = BooleanArray::from(vec![
-    ///     Some(true),
-    ///     None,
-    ///     Some(false)
-    ///   ]
-    /// );
+    /// let expected = BooleanArray::from(vec![Some(true), None, Some(false)]);
     ///
     /// assert_eq!(&result, &expected);
     /// ```
@@ -2265,26 +2258,20 @@ impl ScalarValue {
     ///
     /// # Example
     /// ```
-    /// use datafusion_common::ScalarValue;
     /// use arrow::array::{ArrayRef, BooleanArray};
+    /// use datafusion_common::ScalarValue;
     ///
     /// let scalars = vec![
-    ///   ScalarValue::Boolean(Some(true)),
-    ///   ScalarValue::Boolean(None),
-    ///   ScalarValue::Boolean(Some(false)),
+    ///     ScalarValue::Boolean(Some(true)),
+    ///     ScalarValue::Boolean(None),
+    ///     ScalarValue::Boolean(Some(false)),
     /// ];
     ///
     /// // Build an Array from the list of ScalarValues
-    /// let array = ScalarValue::iter_to_array(scalars.into_iter())
-    ///   .unwrap();
+    /// let array = ScalarValue::iter_to_array(scalars.into_iter()).unwrap();
     ///
-    /// let expected: ArrayRef = std::sync::Arc::new(
-    ///   BooleanArray::from(vec![
-    ///     Some(true),
-    ///     None,
-    ///     Some(false)
-    ///   ]
-    /// ));
+    /// let expected: ArrayRef =
+    ///     std::sync::Arc::new(BooleanArray::from(vec![Some(true), None, Some(false)]));
     ///
     /// assert_eq!(&array, &expected);
     /// ```
@@ -2731,23 +2718,24 @@ impl ScalarValue {
     ///
     /// Example
     /// ```
-    /// use datafusion_common::ScalarValue;
-    /// use arrow::array::{ListArray, Int32Array};
+    /// use arrow::array::{Int32Array, ListArray};
     /// use arrow::datatypes::{DataType, Int32Type};
     /// use datafusion_common::cast::as_list_array;
+    /// use datafusion_common::ScalarValue;
     ///
     /// let scalars = vec![
-    ///    ScalarValue::Int32(Some(1)),
-    ///    ScalarValue::Int32(None),
-    ///    ScalarValue::Int32(Some(2))
+    ///     ScalarValue::Int32(Some(1)),
+    ///     ScalarValue::Int32(None),
+    ///     ScalarValue::Int32(Some(2)),
     /// ];
     ///
     /// let result = ScalarValue::new_list(&scalars, &DataType::Int32, true);
     ///
-    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
-    ///     vec![
-    ///        Some(vec![Some(1), None, Some(2)])
-    ///     ]);
+    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
+    ///     Some(1),
+    ///     None,
+    ///     Some(2),
+    /// ])]);
     ///
     /// assert_eq!(*result, expected);
     /// ```
@@ -2791,23 +2779,25 @@ impl ScalarValue {
     ///
     /// Example
     /// ```
-    /// use datafusion_common::ScalarValue;
-    /// use arrow::array::{ListArray, Int32Array};
+    /// use arrow::array::{Int32Array, ListArray};
     /// use arrow::datatypes::{DataType, Int32Type};
     /// use datafusion_common::cast::as_list_array;
+    /// use datafusion_common::ScalarValue;
     ///
     /// let scalars = vec![
-    ///    ScalarValue::Int32(Some(1)),
-    ///    ScalarValue::Int32(None),
-    ///    ScalarValue::Int32(Some(2))
+    ///     ScalarValue::Int32(Some(1)),
+    ///     ScalarValue::Int32(None),
+    ///     ScalarValue::Int32(Some(2)),
     /// ];
     ///
-    /// let result = ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
+    /// let result =
+    ///     ScalarValue::new_list_from_iter(scalars.into_iter(), &DataType::Int32, true);
     ///
-    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(
-    ///     vec![
-    ///        Some(vec![Some(1), None, Some(2)])
-    ///     ]);
+    /// let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
+    ///     Some(1),
+    ///     None,
+    ///     Some(2),
+    /// ])]);
     ///
     /// assert_eq!(*result, expected);
     /// ```
@@ -2833,23 +2823,25 @@ impl ScalarValue {
     ///
     /// Example
     /// ```
-    /// use datafusion_common::ScalarValue;
-    /// use arrow::array::{LargeListArray, Int32Array};
+    /// use arrow::array::{Int32Array, LargeListArray};
     /// use arrow::datatypes::{DataType, Int32Type};
     /// use datafusion_common::cast::as_large_list_array;
+    /// use datafusion_common::ScalarValue;
     ///
     /// let scalars = vec![
-    ///    ScalarValue::Int32(Some(1)),
-    ///    ScalarValue::Int32(None),
-    ///    ScalarValue::Int32(Some(2))
+    ///     ScalarValue::Int32(Some(1)),
+    ///     ScalarValue::Int32(None),
+    ///     ScalarValue::Int32(Some(2)),
     /// ];
     ///
     /// let result = ScalarValue::new_large_list(&scalars, &DataType::Int32);
     ///
-    /// let expected = LargeListArray::from_iter_primitive::<Int32Type, _, _>(
-    ///     vec![
-    ///        Some(vec![Some(1), None, Some(2)])
-    ///     ]);
+    /// let expected =
+    ///     LargeListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
+    ///         Some(1),
+    ///         None,
+    ///         Some(2),
+    ///     ])]);
     ///
     /// assert_eq!(*result, expected);
     /// ```
@@ -3248,14 +3240,14 @@ impl ScalarValue {
     ///
     /// Example 1: Array (ScalarValue::Int32)
     /// ```
-    /// use datafusion_common::ScalarValue;
     /// use arrow::array::ListArray;
     /// use arrow::datatypes::{DataType, Int32Type};
+    /// use datafusion_common::ScalarValue;
     ///
     /// // Equivalent to [[1,2,3], [4,5]]
     /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-    ///    Some(vec![Some(1), Some(2), Some(3)]),
-    ///    Some(vec![Some(4), Some(5)])
+    ///     Some(vec![Some(1), Some(2), Some(3)]),
+    ///     Some(vec![Some(4), Some(5)]),
     /// ]);
     ///
     /// // Convert the array into Scalar Values for each row
@@ -3278,15 +3270,15 @@ impl ScalarValue {
     ///
     /// Example 2: Nested array (ScalarValue::List)
     /// ```
-    /// use datafusion_common::ScalarValue;
     /// use arrow::array::ListArray;
     /// use arrow::datatypes::{DataType, Int32Type};
     /// use datafusion_common::utils::SingleRowListArrayBuilder;
+    /// use datafusion_common::ScalarValue;
     /// use std::sync::Arc;
     ///
     /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-    ///    Some(vec![Some(1), Some(2), Some(3)]),
-    ///    Some(vec![Some(4), Some(5)])
+    ///     Some(vec![Some(1), Some(2), Some(3)]),
+    ///     Some(vec![Some(4), Some(5)]),
     /// ]);
     ///
     /// // Wrap into another layer of list, we got nested array as [ [[1,2,3], [4,5]] ]
@@ -3295,33 +3287,34 @@ impl ScalarValue {
     /// // Convert the array into Scalar Values for each row, we got 1D arrays in this example
     /// let scalar_vec = ScalarValue::convert_array_to_scalar_vec(&list_arr).unwrap();
     ///
-    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-    ///     Some(vec![Some(1), Some(2), Some(3)]),
-    /// ]);
-    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-    ///     Some(vec![Some(4), Some(5)]),
-    /// ]);
+    /// let l1 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
+    ///     Some(1),
+    ///     Some(2),
+    ///     Some(3),
+    /// ])]);
+    /// let l2 = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![Some(vec![
+    ///     Some(4),
+    ///     Some(5),
+    /// ])]);
     ///
-    /// let expected = vec![
-    ///   Some(vec![
+    /// let expected = vec![Some(vec![
     ///     ScalarValue::List(Arc::new(l1)),
     ///     ScalarValue::List(Arc::new(l2)),
-    ///   ]),
-    /// ];
+    /// ])];
     ///
     /// assert_eq!(scalar_vec, expected);
     /// ```
     ///
     /// Example 3: Nullable array
     /// ```
-    /// use datafusion_common::ScalarValue;
     /// use arrow::array::ListArray;
     /// use arrow::datatypes::{DataType, Int32Type};
+    /// use datafusion_common::ScalarValue;
     ///
     /// let list_arr = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
-    ///    Some(vec![Some(1), Some(2), Some(3)]),
-    ///    None,
-    ///    Some(vec![Some(4), Some(5)])
+    ///     Some(vec![Some(1), Some(2), Some(3)]),
+    ///     None,
+    ///     Some(vec![Some(4), Some(5)]),
     /// ]);
     ///
     /// // Convert the array into Scalar Values for each row
@@ -4414,6 +4407,7 @@ macro_rules! impl_scalar {
 
 impl_scalar!(f64, Float64);
 impl_scalar!(f32, Float32);
+impl_scalar!(f16, Float16);
 impl_scalar!(i8, Int8);
 impl_scalar!(i16, Int16);
 impl_scalar!(i32, Int32);
@@ -4570,6 +4564,7 @@ impl_try_from!(UInt8, u8);
 impl_try_from!(UInt16, u16);
 impl_try_from!(UInt32, u32);
 impl_try_from!(UInt64, u64);
+impl_try_from!(Float16, f16);
 impl_try_from!(Float32, f32);
 impl_try_from!(Float64, f64);
 impl_try_from!(Boolean, bool);
@@ -4655,9 +4650,9 @@ impl fmt::Display for ScalarValue {
                 }
                 None => write!(f, "NULL")?,
             },
-            ScalarValue::List(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
-            ScalarValue::LargeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
-            ScalarValue::FixedSizeList(arr) => fmt_list(arr.to_owned() as ArrayRef, f)?,
+            ScalarValue::List(arr) => fmt_list(arr.as_ref(), f)?,
+            ScalarValue::LargeList(arr) => fmt_list(arr.as_ref(), f)?,
+            ScalarValue::FixedSizeList(arr) => fmt_list(arr.as_ref(), f)?,
             ScalarValue::Date32(e) => format_option!(
                 f,
                 e.map(|v| {
@@ -4779,12 +4774,11 @@ impl fmt::Display for ScalarValue {
     }
 }
 
-fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result {
+fn fmt_list(arr: &dyn Array, f: &mut fmt::Formatter) -> fmt::Result {
     // ScalarValue List, LargeList, FixedSizeList should always have a single element
     assert_eq!(arr.len(), 1);
     let options = FormatOptions::default().with_display_error(true);
-    let formatter =
-        ArrayFormatter::try_new(arr.as_ref() as &dyn Array, &options).unwrap();
+    let formatter = ArrayFormatter::try_new(arr, &options).unwrap();
     let value_formatter = formatter.value(0);
     write!(f, "{value_formatter}")
 }
@@ -8700,7 +8694,7 @@ mod tests {
             ])),
             true,
         ));
-        let scalars = vec![
+        let scalars = [
             ScalarValue::try_new_null(&DataType::List(Arc::clone(&field_ref))).unwrap(),
             ScalarValue::try_new_null(&DataType::LargeList(Arc::clone(&field_ref)))
                 .unwrap(),
diff --git a/datafusion/common/src/scalar/struct_builder.rs b/datafusion/common/src/scalar/struct_builder.rs
index fd19dccf8963..045b5778243d 100644
--- a/datafusion/common/src/scalar/struct_builder.rs
+++ b/datafusion/common/src/scalar/struct_builder.rs
@@ -47,13 +47,11 @@ impl ScalarStructBuilder {
     /// ```rust
     /// # use arrow::datatypes::{DataType, Field};
     /// # use datafusion_common::scalar::ScalarStructBuilder;
-    /// let fields = vec![
-    ///    Field::new("a", DataType::Int32, false),
-    /// ];
+    /// let fields = vec![Field::new("a", DataType::Int32, false)];
     /// let sv = ScalarStructBuilder::new_null(fields);
     /// // Note this is `NULL`, not `{a: NULL}`
     /// assert_eq!(format!("{sv}"), "NULL");
-    ///```
+    /// ```
     ///
     /// To create a struct where the *fields* are null, use `Self::new()` and
     /// pass null values for each field:
@@ -65,9 +63,9 @@ impl ScalarStructBuilder {
     /// let field = Field::new("a", DataType::Int32, true);
     /// // add a null value for the "a" field
     /// let sv = ScalarStructBuilder::new()
-    ///   .with_scalar(field, ScalarValue::Int32(None))
-    ///   .build()
-    ///   .unwrap();
+    ///     .with_scalar(field, ScalarValue::Int32(None))
+    ///     .build()
+    ///     .unwrap();
     /// // value is not null, but field is
     /// assert_eq!(format!("{sv}"), "{a:}");
     /// ```
@@ -85,6 +83,7 @@ impl ScalarStructBuilder {
     }
 
     /// Add the specified field and `ScalarValue` to the struct.
+    #[expect(clippy::needless_pass_by_value)] // Skip for public API's compatibility
     pub fn with_scalar(self, field: impl IntoFieldRef, value: ScalarValue) -> Self {
         // valid scalar value should not fail
         let array = value.to_array().unwrap();
diff --git a/datafusion/common/src/stats.rs b/datafusion/common/src/stats.rs
index 2481a88676ef..da298c20ebcb 100644
--- a/datafusion/common/src/stats.rs
+++ b/datafusion/common/src/stats.rs
@@ -520,33 +520,35 @@ impl Statistics {
     /// # use arrow::datatypes::{Field, Schema, DataType};
     /// # use datafusion_common::stats::Precision;
     /// let stats1 = Statistics::default()
-    ///   .with_num_rows(Precision::Exact(1))
-    ///   .with_total_byte_size(Precision::Exact(2))
-    ///   .add_column_statistics(ColumnStatistics::new_unknown()
-    ///      .with_null_count(Precision::Exact(3))
-    ///      .with_min_value(Precision::Exact(ScalarValue::from(4)))
-    ///      .with_max_value(Precision::Exact(ScalarValue::from(5)))
-    ///   );
+    ///     .with_num_rows(Precision::Exact(1))
+    ///     .with_total_byte_size(Precision::Exact(2))
+    ///     .add_column_statistics(
+    ///         ColumnStatistics::new_unknown()
+    ///             .with_null_count(Precision::Exact(3))
+    ///             .with_min_value(Precision::Exact(ScalarValue::from(4)))
+    ///             .with_max_value(Precision::Exact(ScalarValue::from(5))),
+    ///     );
     ///
     /// let stats2 = Statistics::default()
-    ///   .with_num_rows(Precision::Exact(10))
-    ///   .with_total_byte_size(Precision::Inexact(20))
-    ///   .add_column_statistics(ColumnStatistics::new_unknown()
-    ///       // absent null count
-    ///      .with_min_value(Precision::Exact(ScalarValue::from(40)))
-    ///      .with_max_value(Precision::Exact(ScalarValue::from(50)))
-    ///   );
+    ///     .with_num_rows(Precision::Exact(10))
+    ///     .with_total_byte_size(Precision::Inexact(20))
+    ///     .add_column_statistics(
+    ///         ColumnStatistics::new_unknown()
+    ///             // absent null count
+    ///             .with_min_value(Precision::Exact(ScalarValue::from(40)))
+    ///             .with_max_value(Precision::Exact(ScalarValue::from(50))),
+    ///     );
     ///
     /// let merged_stats = stats1.try_merge(&stats2).unwrap();
     /// let expected_stats = Statistics::default()
-    ///   .with_num_rows(Precision::Exact(11))
-    ///   .with_total_byte_size(Precision::Inexact(22)) // inexact in stats2 --> inexact
-    ///   .add_column_statistics(
-    ///     ColumnStatistics::new_unknown()
-    ///       .with_null_count(Precision::Absent) // missing from stats2 --> absent
-    ///       .with_min_value(Precision::Exact(ScalarValue::from(4)))
-    ///       .with_max_value(Precision::Exact(ScalarValue::from(50)))
-    ///   );
+    ///     .with_num_rows(Precision::Exact(11))
+    ///     .with_total_byte_size(Precision::Inexact(22)) // inexact in stats2 --> inexact
+    ///     .add_column_statistics(
+    ///         ColumnStatistics::new_unknown()
+    ///             .with_null_count(Precision::Absent) // missing from stats2 --> absent
+    ///             .with_min_value(Precision::Exact(ScalarValue::from(4)))
+    ///             .with_max_value(Precision::Exact(ScalarValue::from(50))),
+    ///     );
     ///
     /// assert_eq!(merged_stats, expected_stats)
     /// ```
diff --git a/datafusion/common/src/table_reference.rs b/datafusion/common/src/table_reference.rs
index 574465856760..3163a8b16c8d 100644
--- a/datafusion/common/src/table_reference.rs
+++ b/datafusion/common/src/table_reference.rs
@@ -69,8 +69,11 @@ impl std::fmt::Display for ResolvedTableReference {
 ///
 /// // Get a table reference to 'myschema.mytable' (note the capitalization)
 /// let table_reference = TableReference::from("MySchema.MyTable");
-/// assert_eq!(table_reference, TableReference::partial("myschema", "mytable"));
-///```
+/// assert_eq!(
+///     table_reference,
+///     TableReference::partial("myschema", "mytable")
+/// );
+/// ```
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub enum TableReference {
     /// An unqualified table reference, e.g. "table"
@@ -247,7 +250,10 @@ impl TableReference {
     /// assert_eq!(table_reference.to_quoted_string(), "myschema.mytable");
     ///
     /// let table_reference = TableReference::partial("MySchema", "MyTable");
-    /// assert_eq!(table_reference.to_quoted_string(), r#""MySchema"."MyTable""#);
+    /// assert_eq!(
+    ///     table_reference.to_quoted_string(),
+    ///     r#""MySchema"."MyTable""#
+    /// );
     /// ```
     pub fn to_quoted_string(&self) -> String {
         match self {
diff --git a/datafusion/common/src/test_util.rs b/datafusion/common/src/test_util.rs
index d97d4003e729..c51dea1c4de0 100644
--- a/datafusion/common/src/test_util.rs
+++ b/datafusion/common/src/test_util.rs
@@ -55,7 +55,7 @@ pub fn format_batches(results: &[RecordBatch]) -> Result<impl Display, ArrowErro
 /// # use arrow::array::{ArrayRef, Int32Array};
 /// # use datafusion_common::assert_batches_eq;
 /// let col: ArrayRef = Arc::new(Int32Array::from(vec![1, 2]));
-///  let batch = RecordBatch::try_from_iter([("column", col)]).unwrap();
+/// let batch = RecordBatch::try_from_iter([("column", col)]).unwrap();
 /// // Expected output is a vec of strings
 /// let expected = vec![
 ///     "+--------+",
diff --git a/datafusion/common/src/tree_node.rs b/datafusion/common/src/tree_node.rs
index ea0aa28c938d..9b36266eec2e 100644
--- a/datafusion/common/src/tree_node.rs
+++ b/datafusion/common/src/tree_node.rs
@@ -638,12 +638,13 @@ impl TreeNodeRecursion {
 /// # fn make_new_expr(i: i64) -> i64 { 2 }
 /// let expr = orig_expr();
 /// let ret = Transformed::no(expr.clone())
-///   .transform_data(|expr| {
-///    // closure returns a result and potentially transforms the node
-///    // in this example, it does transform the node
-///    let new_expr = make_new_expr(expr);
-///    Ok(Transformed::yes(new_expr))
-///  }).unwrap();
+///     .transform_data(|expr| {
+///         // closure returns a result and potentially transforms the node
+///         // in this example, it does transform the node
+///         let new_expr = make_new_expr(expr);
+///         Ok(Transformed::yes(new_expr))
+///     })
+///     .unwrap();
 /// // transformed flag is the union of the original ans closure's  transformed flag
 /// assert!(ret.transformed);
 /// ```
diff --git a/datafusion/common/src/types/builtin.rs b/datafusion/common/src/types/builtin.rs
index ec69db790377..314529b99a34 100644
--- a/datafusion/common/src/types/builtin.rs
+++ b/datafusion/common/src/types/builtin.rs
@@ -15,9 +15,17 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use arrow::datatypes::IntervalUnit::*;
+
 use crate::types::{LogicalTypeRef, NativeType};
 use std::sync::{Arc, LazyLock};
 
+/// Create a singleton and accompanying static variable for a [`LogicalTypeRef`]
+/// of a [`NativeType`].
+/// * `name`: name of the static variable, must be unique.
+/// * `getter`: name of the public function that will return the singleton instance
+///   of the static variable.
+/// * `ty`: the [`NativeType`].
 macro_rules! singleton {
     ($name:ident, $getter:ident, $ty:ident) => {
         static $name: LazyLock<LogicalTypeRef> =
@@ -31,6 +39,26 @@ macro_rules! singleton {
     };
 }
 
+/// Similar to [`singleton`], but for native types that have variants, such as
+/// `NativeType::Interval(MonthDayNano)`.
+/// * `name`: name of the static variable, must be unique.
+/// * `getter`: name of the public function that will return the singleton instance
+///   of the static variable.
+/// * `ty`: the [`NativeType`].
+/// * `variant`: specific variant of the `ty`.
+macro_rules! singleton_variant {
+    ($name:ident, $getter:ident, $ty:ident, $variant:ident) => {
+        static $name: LazyLock<LogicalTypeRef> =
+            LazyLock::new(|| Arc::new(NativeType::$ty($variant)));
+
+        #[doc = "Getter for singleton instance of a logical type representing"]
+        #[doc = concat!("[`NativeType::", stringify!($ty), "`] of unit [`", stringify!($variant),"`].`")]
+        pub fn $getter() -> LogicalTypeRef {
+            Arc::clone(&$name)
+        }
+    };
+}
+
 singleton!(LOGICAL_NULL, logical_null, Null);
 singleton!(LOGICAL_BOOLEAN, logical_boolean, Boolean);
 singleton!(LOGICAL_INT8, logical_int8, Int8);
@@ -47,3 +75,10 @@ singleton!(LOGICAL_FLOAT64, logical_float64, Float64);
 singleton!(LOGICAL_DATE, logical_date, Date);
 singleton!(LOGICAL_BINARY, logical_binary, Binary);
 singleton!(LOGICAL_STRING, logical_string, String);
+
+singleton_variant!(
+    LOGICAL_INTERVAL_MDN,
+    logical_interval_mdn,
+    Interval,
+    MonthDayNano
+);
diff --git a/datafusion/common/src/types/logical.rs b/datafusion/common/src/types/logical.rs
index eb7cf88e0075..674b1a41204d 100644
--- a/datafusion/common/src/types/logical.rs
+++ b/datafusion/common/src/types/logical.rs
@@ -67,12 +67,12 @@ pub type LogicalTypeRef = Arc<dyn LogicalType>;
 ///         &NativeType::String
 ///     }
 ///
-///    fn signature(&self) -> TypeSignature<'_> {
-///        TypeSignature::Extension {
-///            name: "JSON",
-///            parameters: &[],
-///        }
-///    }
+///     fn signature(&self) -> TypeSignature<'_> {
+///         TypeSignature::Extension {
+///             name: "JSON",
+///             parameters: &[],
+///         }
+///     }
 /// }
 /// ```
 pub trait LogicalType: Sync + Send {
diff --git a/datafusion/common/src/types/native.rs b/datafusion/common/src/types/native.rs
index 5cef0adfbde8..a1495b779ac9 100644
--- a/datafusion/common/src/types/native.rs
+++ b/datafusion/common/src/types/native.rs
@@ -430,22 +430,7 @@ impl From<DataType> for NativeType {
 impl NativeType {
     #[inline]
     pub fn is_numeric(&self) -> bool {
-        use NativeType::*;
-        matches!(
-            self,
-            UInt8
-                | UInt16
-                | UInt32
-                | UInt64
-                | Int8
-                | Int16
-                | Int32
-                | Int64
-                | Float16
-                | Float32
-                | Float64
-                | Decimal(_, _)
-        )
+        self.is_integer() || self.is_float() || self.is_decimal()
     }
 
     #[inline]
@@ -486,4 +471,19 @@ impl NativeType {
     pub fn is_binary(&self) -> bool {
         matches!(self, NativeType::Binary | NativeType::FixedSizeBinary(_))
     }
+
+    #[inline]
+    pub fn is_null(&self) -> bool {
+        matches!(self, NativeType::Null)
+    }
+
+    #[inline]
+    pub fn is_decimal(&self) -> bool {
+        matches!(self, Self::Decimal(_, _))
+    }
+
+    #[inline]
+    pub fn is_float(&self) -> bool {
+        matches!(self, Self::Float16 | Self::Float32 | Self::Float64)
+    }
 }
diff --git a/datafusion/common/src/utils/memory.rs b/datafusion/common/src/utils/memory.rs
index 29e523996cf4..a56b940fab66 100644
--- a/datafusion/common/src/utils/memory.rs
+++ b/datafusion/common/src/utils/memory.rs
@@ -56,8 +56,8 @@ use std::mem::size_of;
 /// impl<T> MyStruct<T> {
 ///     fn size(&self) -> Result<usize> {
 ///         let num_elements = self.values.len();
-///         let fixed_size = std::mem::size_of_val(self) +
-///           std::mem::size_of_val(&self.values);
+///         let fixed_size =
+///             std::mem::size_of_val(self) + std::mem::size_of_val(&self.values);
 ///
 ///         estimate_memory_size::<T>(num_elements, fixed_size)
 ///     }
@@ -73,8 +73,8 @@ use std::mem::size_of;
 /// let num_rows = 100;
 /// let fixed_size = std::mem::size_of::<HashMap<u64, u64>>();
 /// let estimated_hashtable_size =
-///   estimate_memory_size::<(u64, u64)>(num_rows,fixed_size)
-///     .expect("Size estimation failed");
+///     estimate_memory_size::<(u64, u64)>(num_rows, fixed_size)
+///         .expect("Size estimation failed");
 /// ```
 pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result<usize> {
     // For the majority of cases hashbrown overestimates the bucket quantity
diff --git a/datafusion/common/src/utils/mod.rs b/datafusion/common/src/utils/mod.rs
index 045c02a5a2aa..7b145ac3ae21 100644
--- a/datafusion/common/src/utils/mod.rs
+++ b/datafusion/common/src/utils/mod.rs
@@ -46,26 +46,23 @@ use std::thread::available_parallelism;
 ///
 /// Example:
 /// ```
-/// use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+/// use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 /// use datafusion_common::project_schema;
 ///
 /// // Schema with columns 'a', 'b', and 'c'
 /// let schema = SchemaRef::new(Schema::new(vec![
-///   Field::new("a", DataType::Int32, true),
-///   Field::new("b", DataType::Int64, true),
-///   Field::new("c", DataType::Utf8, true),
+///     Field::new("a", DataType::Int32, true),
+///     Field::new("b", DataType::Int64, true),
+///     Field::new("c", DataType::Utf8, true),
 /// ]));
 ///
 /// // Pick columns 'c' and 'b'
-/// let projection = Some(vec![2,1]);
-/// let projected_schema = project_schema(
-///    &schema,
-///    projection.as_ref()
-///  ).unwrap();
+/// let projection = Some(vec![2, 1]);
+/// let projected_schema = project_schema(&schema, projection.as_ref()).unwrap();
 ///
 /// let expected_schema = SchemaRef::new(Schema::new(vec![
-///   Field::new("c", DataType::Utf8, true),
-///   Field::new("b", DataType::Int64, true),
+///     Field::new("c", DataType::Utf8, true),
+///     Field::new("b", DataType::Int64, true),
 /// ]));
 ///
 /// assert_eq!(projected_schema, expected_schema);
@@ -398,9 +395,11 @@ pub fn longest_consecutive_prefix<T: Borrow<usize>>(
 /// # use arrow::array::types::Int64Type;
 /// # use datafusion_common::utils::SingleRowListArrayBuilder;
 /// // Array is [1, 2, 3]
-/// let arr = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![
-///       Some(vec![Some(1), Some(2), Some(3)]),
-/// ]);
+/// let arr = ListArray::from_iter_primitive::<Int64Type, _, _>(vec![Some(vec![
+///     Some(1),
+///     Some(2),
+///     Some(3),
+/// ])]);
 /// // Wrap as a list array: [[1, 2, 3]]
 /// let list_arr = SingleRowListArrayBuilder::new(Arc::new(arr)).build_list_array();
 /// assert_eq!(list_arr.len(), 1);
@@ -554,7 +553,8 @@ pub fn fixed_size_list_to_arrays(a: &ArrayRef) -> Vec<ArrayRef> {
 /// use datafusion_common::utils::base_type;
 /// use std::sync::Arc;
 ///
-/// let data_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
+/// let data_type =
+///     DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true)));
 /// assert_eq!(base_type(&data_type), DataType::Int32);
 ///
 /// let data_type = DataType::Int32;
@@ -906,16 +906,19 @@ pub fn get_available_parallelism() -> usize {
 /// # use datafusion_common::utils::take_function_args;
 /// # use datafusion_common::ScalarValue;
 /// fn my_function(args: &[ScalarValue]) -> Result<()> {
-///   // function expects 2 args, so create a 2-element array
-///   let [arg1, arg2] = take_function_args("my_function", args)?;
-///   // ... do stuff..
-///   Ok(())
+///     // function expects 2 args, so create a 2-element array
+///     let [arg1, arg2] = take_function_args("my_function", args)?;
+///     // ... do stuff..
+///     Ok(())
 /// }
 ///
 /// // Calling the function with 1 argument produces an error:
 /// let args = vec![ScalarValue::Int32(Some(10))];
 /// let err = my_function(&args).unwrap_err();
-/// assert_eq!(err.to_string(), "Execution error: my_function function requires 2 arguments, got 1");
+/// assert_eq!(
+///     err.to_string(),
+///     "Execution error: my_function function requires 2 arguments, got 1"
+/// );
 /// // Calling the function with 2 arguments works great
 /// let args = vec![ScalarValue::Int32(Some(10)), ScalarValue::Int32(Some(20))];
 /// my_function(&args).unwrap();
diff --git a/datafusion/common/src/utils/proxy.rs b/datafusion/common/src/utils/proxy.rs
index d940677a5fb3..fb951aa3b028 100644
--- a/datafusion/common/src/utils/proxy.rs
+++ b/datafusion/common/src/utils/proxy.rs
@@ -47,7 +47,9 @@ pub trait VecAllocExt {
     /// assert_eq!(allocated, 16); // no new allocation needed
     ///
     /// // push more data into the vec
-    /// for _ in 0..10 { vec.push_accounted(1, &mut allocated); }
+    /// for _ in 0..10 {
+    ///     vec.push_accounted(1, &mut allocated);
+    /// }
     /// assert_eq!(allocated, 64); // underlying vec has space for 10 u32s
     /// assert_eq!(vec.allocated_size(), 64);
     /// ```
@@ -82,7 +84,9 @@ pub trait VecAllocExt {
     /// assert_eq!(vec.allocated_size(), 16); // no new allocation needed
     ///
     /// // push more data into the vec
-    /// for _ in 0..10 { vec.push(1); }
+    /// for _ in 0..10 {
+    ///     vec.push(1);
+    /// }
     /// assert_eq!(vec.allocated_size(), 64); // space for 64 now
     /// ```
     fn allocated_size(&self) -> usize;
@@ -133,7 +137,9 @@ pub trait RawTableAllocExt {
     /// assert_eq!(allocated, 64);
     ///
     /// // insert more values
-    /// for i in 0..100 { table.insert_accounted(i, hash_fn, &mut allocated); }
+    /// for i in 0..100 {
+    ///     table.insert_accounted(i, hash_fn, &mut allocated);
+    /// }
     /// assert_eq!(allocated, 400);
     /// ```
     fn insert_accounted(
@@ -200,7 +206,9 @@ pub trait HashTableAllocExt {
     /// assert_eq!(allocated, 64);
     ///
     /// // insert more values
-    /// for i in 0..100 { table.insert_accounted(i, hash_fn, &mut allocated); }
+    /// for i in 0..100 {
+    ///     table.insert_accounted(i, hash_fn, &mut allocated);
+    /// }
     /// assert_eq!(allocated, 400);
     /// ```
     fn insert_accounted(
diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml
index 22c9f43a902e..67a73ac6f669 100644
--- a/datafusion/core/Cargo.toml
+++ b/datafusion/core/Cargo.toml
@@ -32,6 +32,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -241,6 +244,10 @@ required-features = ["parquet"]
 harness = false
 name = "sql_planner"
 
+[[bench]]
+harness = false
+name = "sql_planner_extended"
+
 [[bench]]
 harness = false
 name = "sql_query_with_io"
diff --git a/datafusion/core/benches/aggregate_query_sql.rs b/datafusion/core/benches/aggregate_query_sql.rs
index 9da341ce2e92..87aeed49337e 100644
--- a/datafusion/core/benches/aggregate_query_sql.rs
+++ b/datafusion/core/benches/aggregate_query_sql.rs
@@ -21,17 +21,19 @@ extern crate arrow;
 extern crate datafusion;
 
 mod data_utils;
+
 use crate::criterion::Criterion;
 use data_utils::create_table_provider;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionContext;
 use parking_lot::Mutex;
+use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
-    criterion::black_box(rt.block_on(df.collect()).unwrap());
+    black_box(rt.block_on(df.collect()).unwrap());
 }
 
 fn create_context(
diff --git a/datafusion/core/benches/csv_load.rs b/datafusion/core/benches/csv_load.rs
index 3f984757466d..de0f0d825057 100644
--- a/datafusion/core/benches/csv_load.rs
+++ b/datafusion/core/benches/csv_load.rs
@@ -21,12 +21,14 @@ extern crate arrow;
 extern crate datafusion;
 
 mod data_utils;
+
 use crate::criterion::Criterion;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionContext;
 use datafusion::prelude::CsvReadOptions;
 use datafusion::test_util::csv::TestCsvFile;
 use parking_lot::Mutex;
+use std::hint::black_box;
 use std::sync::Arc;
 use std::time::Duration;
 use test_utils::AccessLogGenerator;
@@ -39,7 +41,7 @@ fn load_csv(
     options: CsvReadOptions,
 ) {
     let df = rt.block_on(ctx.lock().read_csv(path, options)).unwrap();
-    criterion::black_box(rt.block_on(df.collect()).unwrap());
+    black_box(rt.block_on(df.collect()).unwrap());
 }
 
 fn create_context() -> Result<Arc<Mutex<SessionContext>>> {
diff --git a/datafusion/core/benches/dataframe.rs b/datafusion/core/benches/dataframe.rs
index 12eb34719e4b..00fa85918347 100644
--- a/datafusion/core/benches/dataframe.rs
+++ b/datafusion/core/benches/dataframe.rs
@@ -26,6 +26,7 @@ use datafusion::datasource::MemTable;
 use datafusion::prelude::SessionContext;
 use datafusion_expr::col;
 use datafusion_functions::expr_fn::btrim;
+use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
@@ -45,7 +46,7 @@ fn create_context(field_count: u32) -> datafusion_common::Result<Arc<SessionCont
 }
 
 fn run(column_count: u32, ctx: Arc<SessionContext>, rt: &Runtime) {
-    criterion::black_box(rt.block_on(async {
+    black_box(rt.block_on(async {
         let mut data_frame = ctx.table("t").await.unwrap();
 
         for i in 0..column_count {
diff --git a/datafusion/core/benches/distinct_query_sql.rs b/datafusion/core/benches/distinct_query_sql.rs
index c1ef55992689..d05e8b13b2af 100644
--- a/datafusion/core/benches/distinct_query_sql.rs
+++ b/datafusion/core/benches/distinct_query_sql.rs
@@ -30,12 +30,13 @@ use datafusion_execution::config::SessionConfig;
 use datafusion_execution::TaskContext;
 
 use parking_lot::Mutex;
+use std::hint::black_box;
 use std::{sync::Arc, time::Duration};
 use tokio::runtime::Runtime;
 
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
-    criterion::black_box(rt.block_on(df.collect()).unwrap());
+    black_box(rt.block_on(df.collect()).unwrap());
 }
 
 fn create_context(
@@ -124,8 +125,7 @@ async fn distinct_with_limit(
 }
 
 fn run(rt: &Runtime, plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>) {
-    criterion::black_box(rt.block_on(distinct_with_limit(plan.clone(), ctx.clone())))
-        .unwrap();
+    black_box(rt.block_on(distinct_with_limit(plan.clone(), ctx.clone()))).unwrap();
 }
 
 pub async fn create_context_sampled_data(
diff --git a/datafusion/core/benches/filter_query_sql.rs b/datafusion/core/benches/filter_query_sql.rs
index c82a1607184d..16905e0f9660 100644
--- a/datafusion/core/benches/filter_query_sql.rs
+++ b/datafusion/core/benches/filter_query_sql.rs
@@ -24,13 +24,14 @@ use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion::prelude::SessionContext;
 use datafusion::{datasource::MemTable, error::Result};
 use futures::executor::block_on;
+use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
 async fn query(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.sql(sql)).unwrap();
-    criterion::black_box(rt.block_on(df.collect()).unwrap());
+    black_box(rt.block_on(df.collect()).unwrap());
 }
 
 fn create_context(array_len: usize, batch_size: usize) -> Result<SessionContext> {
diff --git a/datafusion/core/benches/map_query_sql.rs b/datafusion/core/benches/map_query_sql.rs
index 063b8e6c86bb..09234546b2df 100644
--- a/datafusion/core/benches/map_query_sql.rs
+++ b/datafusion/core/benches/map_query_sql.rs
@@ -15,10 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int32Array, RecordBatch};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use parking_lot::Mutex;
 use rand::prelude::ThreadRng;
 use rand::Rng;
diff --git a/datafusion/core/benches/parquet_query_sql.rs b/datafusion/core/benches/parquet_query_sql.rs
index 14dcdf15f173..e2b381048013 100644
--- a/datafusion/core/benches/parquet_query_sql.rs
+++ b/datafusion/core/benches/parquet_query_sql.rs
@@ -166,11 +166,12 @@ fn generate_file() -> NamedTempFile {
     }
 
     let metadata = writer.close().unwrap();
+    let file_metadata = metadata.file_metadata();
     assert_eq!(
-        metadata.num_rows as usize,
+        file_metadata.num_rows() as usize,
         WRITE_RECORD_BATCH_SIZE * NUM_BATCHES
     );
-    assert_eq!(metadata.row_groups.len(), EXPECTED_ROW_GROUPS);
+    assert_eq!(metadata.row_groups().len(), EXPECTED_ROW_GROUPS);
 
     println!(
         "Generated parquet file in {} seconds",
diff --git a/datafusion/core/benches/spm.rs b/datafusion/core/benches/spm.rs
index 5c244832300e..ecc3f908d4b1 100644
--- a/datafusion/core/benches/spm.rs
+++ b/datafusion/core/benches/spm.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int32Array, Int64Array, RecordBatch, StringArray};
@@ -25,7 +26,7 @@ use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeE
 use datafusion_physical_plan::{collect, ExecutionPlan};
 
 use criterion::async_executor::FuturesExecutor;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_datasource::memory::MemorySourceConfig;
 
 fn generate_spm_for_round_robin_tie_breaker(
diff --git a/datafusion/core/benches/sql_planner.rs b/datafusion/core/benches/sql_planner.rs
index 83563099cad6..6266a7184cf5 100644
--- a/datafusion/core/benches/sql_planner.rs
+++ b/datafusion/core/benches/sql_planner.rs
@@ -25,18 +25,12 @@ mod data_utils;
 use crate::criterion::Criterion;
 use arrow::array::{ArrayRef, RecordBatch};
 use arrow::datatypes::{DataType, Field, Fields, Schema};
-use arrow_schema::TimeUnit::Nanosecond;
 use criterion::Bencher;
 use datafusion::datasource::MemTable;
 use datafusion::execution::context::SessionContext;
-use datafusion::prelude::DataFrame;
 use datafusion_common::{config::Dialect, ScalarValue};
-use datafusion_expr::Expr::Literal;
-use datafusion_expr::{cast, col, lit, not, try_cast, when};
-use datafusion_functions::expr_fn::{
-    btrim, length, regexp_like, regexp_replace, to_timestamp, upper,
-};
-use std::ops::Rem;
+use datafusion_expr::col;
+use std::hint::black_box;
 use std::path::PathBuf;
 use std::sync::Arc;
 use test_utils::tpcds::tpcds_schemas;
@@ -50,12 +44,12 @@ const CLICKBENCH_DATA_PATH: &str = "data/hits_partitioned/";
 
 /// Create a logical plan from the specified sql
 fn logical_plan(ctx: &SessionContext, rt: &Runtime, sql: &str) {
-    criterion::black_box(rt.block_on(ctx.sql(sql)).unwrap());
+    black_box(rt.block_on(ctx.sql(sql)).unwrap());
 }
 
 /// Create a physical ExecutionPlan (by way of logical plan)
 fn physical_plan(ctx: &SessionContext, rt: &Runtime, sql: &str) {
-    criterion::black_box(rt.block_on(async {
+    black_box(rt.block_on(async {
         ctx.sql(sql)
             .await
             .unwrap()
@@ -65,150 +59,6 @@ fn physical_plan(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     }));
 }
 
-/// Build a dataframe for testing logical plan optimization
-fn build_test_data_frame(ctx: &SessionContext, rt: &Runtime) -> DataFrame {
-    register_string_table(ctx, 100, 1000);
-
-    rt.block_on(async {
-        let mut df = ctx.table("t").await.unwrap();
-        // add some columns in
-        for i in 100..150 {
-            df = df
-                .with_column(&format!("c{i}"), Literal(ScalarValue::Utf8(None), None))
-                .unwrap();
-        }
-        // add in some columns with string encoded timestamps
-        for i in 150..175 {
-            df = df
-                .with_column(
-                    &format!("c{i}"),
-                    Literal(ScalarValue::Utf8(Some("2025-08-21 09:43:17".into())), None),
-                )
-                .unwrap();
-        }
-        // do a bunch of ops on the columns
-        for i in 0..175 {
-            // trim the columns
-            df = df
-                .with_column(&format!("c{i}"), btrim(vec![col(format!("c{i}"))]))
-                .unwrap();
-        }
-
-        for i in 0..175 {
-            let c_name = format!("c{i}");
-            let c = col(&c_name);
-
-            // random ops
-            if i % 5 == 0 && i < 150 {
-                // the actual ops here are largely unimportant as they are just a sample
-                // of ops that could occur on a dataframe
-                df = df
-                    .with_column(&c_name, cast(c.clone(), DataType::Utf8))
-                    .unwrap()
-                    .with_column(
-                        &c_name,
-                        when(
-                            cast(c.clone(), DataType::Int32).gt(lit(135)),
-                            cast(
-                                cast(c.clone(), DataType::Int32) - lit(i + 3),
-                                DataType::Utf8,
-                            ),
-                        )
-                        .otherwise(c.clone())
-                        .unwrap(),
-                    )
-                    .unwrap()
-                    .with_column(
-                        &c_name,
-                        when(
-                            c.clone().is_not_null().and(
-                                cast(c.clone(), DataType::Int32)
-                                    .between(lit(120), lit(130)),
-                            ),
-                            Literal(ScalarValue::Utf8(None), None),
-                        )
-                        .otherwise(
-                            when(
-                                c.clone().is_not_null().and(regexp_like(
-                                    cast(c.clone(), DataType::Utf8View),
-                                    lit("[0-9]*"),
-                                    None,
-                                )),
-                                upper(c.clone()),
-                            )
-                            .otherwise(c.clone())
-                            .unwrap(),
-                        )
-                        .unwrap(),
-                    )
-                    .unwrap()
-                    .with_column(
-                        &c_name,
-                        when(
-                            c.clone().is_not_null().and(
-                                cast(c.clone(), DataType::Int32)
-                                    .between(lit(90), lit(100)),
-                            ),
-                            cast(c.clone(), DataType::Utf8View),
-                        )
-                        .otherwise(Literal(ScalarValue::Date32(None), None))
-                        .unwrap(),
-                    )
-                    .unwrap()
-                    .with_column(
-                        &c_name,
-                        when(
-                            c.clone().is_not_null().and(
-                                cast(c.clone(), DataType::Int32).rem(lit(10)).gt(lit(7)),
-                            ),
-                            regexp_replace(
-                                cast(c.clone(), DataType::Utf8View),
-                                lit("1"),
-                                lit("a"),
-                                None,
-                            ),
-                        )
-                        .otherwise(Literal(ScalarValue::Date32(None), None))
-                        .unwrap(),
-                    )
-                    .unwrap()
-            }
-            if i >= 150 {
-                df = df
-                    .with_column(
-                        &c_name,
-                        try_cast(
-                            to_timestamp(vec![c.clone(), lit("%Y-%m-%d %H:%M:%S")]),
-                            DataType::Timestamp(Nanosecond, Some("UTC".into())),
-                        ),
-                    )
-                    .unwrap()
-                    .with_column(&c_name, try_cast(c.clone(), DataType::Date32))
-                    .unwrap()
-            }
-
-            // add in a few unions
-            if i % 30 == 0 {
-                let df1 = df
-                    .clone()
-                    .filter(length(c.clone()).gt(lit(2)))
-                    .unwrap()
-                    .with_column(&format!("c{i}_filtered"), lit(true))
-                    .unwrap();
-                let df2 = df
-                    .filter(not(length(c.clone()).gt(lit(2))))
-                    .unwrap()
-                    .with_column(&format!("c{i}_filtered"), lit(false))
-                    .unwrap();
-
-                df = df1.union_by_name(df2).unwrap()
-            }
-        }
-
-        df
-    })
-}
-
 /// Create schema with the specified number of columns
 fn create_schema(column_prefix: &str, num_columns: usize) -> Schema {
     let fields: Fields = (0..num_columns)
@@ -296,7 +146,7 @@ fn benchmark_with_param_values_many_columns(
         rt.block_on(async { ctx.state().statement_to_plan(statement).await.unwrap() });
     b.iter(|| {
         let plan = plan.clone();
-        criterion::black_box(plan.with_param_values(vec![ScalarValue::from(1)]).unwrap());
+        black_box(plan.with_param_values(vec![ScalarValue::from(1)]).unwrap());
     });
 }
 
@@ -334,33 +184,6 @@ fn register_union_order_table(ctx: &SessionContext, num_columns: usize, num_rows
     ctx.register_table("t", Arc::new(table)).unwrap();
 }
 
-/// Registers a table like this:
-/// c0,c1,c2...,c99
-/// "0","100"..."9900"
-/// "0","200"..."19800"
-/// "0","300"..."29700"
-fn register_string_table(ctx: &SessionContext, num_columns: usize, num_rows: usize) {
-    // ("c0", ["0", "0", ...])
-    // ("c1": ["100", "200", ...])
-    // etc
-    let iter = (0..num_columns).map(|i| i as u64).map(|i| {
-        let array: ArrayRef = Arc::new(arrow::array::StringViewArray::from_iter_values(
-            (0..num_rows)
-                .map(|j| format!("c{}", j as u64 * 100 + i))
-                .collect::<Vec<_>>(),
-        ));
-        (format!("c{i}"), array)
-    });
-    let batch = RecordBatch::try_from_iter(iter).unwrap();
-    let schema = batch.schema();
-    let partitions = vec![vec![batch]];
-
-    // create the table
-    let table = MemTable::try_new(schema, partitions).unwrap();
-
-    ctx.register_table("t", Arc::new(table)).unwrap();
-}
-
 /// return a query like
 /// ```sql
 /// select c1, 2 as c2, ... n as cn from t ORDER BY c1
@@ -579,7 +402,8 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 
     // -- Sorted Queries --
-    for column_count in [10, 50, 100, 200, 300] {
+    // 100, 200 && 300 is taking too long - https://github.com/apache/datafusion/issues/18366
+    for column_count in [10, 50 /* 100, 200, 300 */] {
         register_union_order_table(&ctx, column_count, 1000);
 
         // this query has many expressions in its sort order so stresses
@@ -596,20 +420,6 @@ fn criterion_benchmark(c: &mut Criterion) {
         let _ = ctx.deregister_table("t");
     }
 
-    // -- validate logical plan optimize performance
-    let df = build_test_data_frame(&ctx, &rt);
-
-    c.bench_function("logical_plan_optimize", |b| {
-        b.iter(|| {
-            let df_clone = df.clone();
-            criterion::black_box(
-                rt.block_on(async { df_clone.into_optimized_plan().unwrap() }),
-            );
-        })
-    });
-
-    let _ = ctx.deregister_table("t");
-
     // --- TPC-H ---
 
     let tpch_ctx = register_defs(SessionContext::new(), tpch_schemas());
diff --git a/datafusion/core/benches/sql_planner_extended.rs b/datafusion/core/benches/sql_planner_extended.rs
new file mode 100644
index 000000000000..aff7cb4d101d
--- /dev/null
+++ b/datafusion/core/benches/sql_planner_extended.rs
@@ -0,0 +1,233 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{ArrayRef, RecordBatch};
+use arrow_schema::DataType;
+use arrow_schema::TimeUnit::Nanosecond;
+use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion::prelude::{DataFrame, SessionContext};
+use datafusion_catalog::MemTable;
+use datafusion_common::ScalarValue;
+use datafusion_expr::Expr::Literal;
+use datafusion_expr::{cast, col, lit, not, try_cast, when};
+use datafusion_functions::expr_fn::{
+    btrim, length, regexp_like, regexp_replace, to_timestamp, upper,
+};
+use std::hint::black_box;
+use std::ops::Rem;
+use std::sync::Arc;
+use tokio::runtime::Runtime;
+
+// This benchmark suite is designed to test the performance of
+// logical planning with a large plan containing unions, many columns
+// with a variety of operations in it.
+//
+// Since it is (currently) very slow to execute it has been separated
+// out from the sql_planner benchmark suite to this file.
+//
+// See https://github.com/apache/datafusion/issues/17261 for details.
+
+/// Registers a table like this:
+/// c0,c1,c2...,c99
+/// "0","100"..."9900"
+/// "0","200"..."19800"
+/// "0","300"..."29700"
+fn register_string_table(ctx: &SessionContext, num_columns: usize, num_rows: usize) {
+    // ("c0", ["0", "0", ...])
+    // ("c1": ["100", "200", ...])
+    // etc
+    let iter = (0..num_columns).map(|i| i as u64).map(|i| {
+        let array: ArrayRef = Arc::new(arrow::array::StringViewArray::from_iter_values(
+            (0..num_rows)
+                .map(|j| format!("c{}", j as u64 * 100 + i))
+                .collect::<Vec<_>>(),
+        ));
+        (format!("c{i}"), array)
+    });
+    let batch = RecordBatch::try_from_iter(iter).unwrap();
+    let schema = batch.schema();
+    let partitions = vec![vec![batch]];
+
+    // create the table
+    let table = MemTable::try_new(schema, partitions).unwrap();
+
+    ctx.register_table("t", Arc::new(table)).unwrap();
+}
+
+/// Build a dataframe for testing logical plan optimization
+fn build_test_data_frame(ctx: &SessionContext, rt: &Runtime) -> DataFrame {
+    register_string_table(ctx, 100, 1000);
+
+    rt.block_on(async {
+        let mut df = ctx.table("t").await.unwrap();
+        // add some columns in
+        for i in 100..150 {
+            df = df
+                .with_column(&format!("c{i}"), Literal(ScalarValue::Utf8(None), None))
+                .unwrap();
+        }
+        // add in some columns with string encoded timestamps
+        for i in 150..175 {
+            df = df
+                .with_column(
+                    &format!("c{i}"),
+                    Literal(ScalarValue::Utf8(Some("2025-08-21 09:43:17".into())), None),
+                )
+                .unwrap();
+        }
+        // do a bunch of ops on the columns
+        for i in 0..175 {
+            // trim the columns
+            df = df
+                .with_column(&format!("c{i}"), btrim(vec![col(format!("c{i}"))]))
+                .unwrap();
+        }
+
+        for i in 0..175 {
+            let c_name = format!("c{i}");
+            let c = col(&c_name);
+
+            // random ops
+            if i % 5 == 0 && i < 150 {
+                // the actual ops here are largely unimportant as they are just a sample
+                // of ops that could occur on a dataframe
+                df = df
+                    .with_column(&c_name, cast(c.clone(), DataType::Utf8))
+                    .unwrap()
+                    .with_column(
+                        &c_name,
+                        when(
+                            cast(c.clone(), DataType::Int32).gt(lit(135)),
+                            cast(
+                                cast(c.clone(), DataType::Int32) - lit(i + 3),
+                                DataType::Utf8,
+                            ),
+                        )
+                        .otherwise(c.clone())
+                        .unwrap(),
+                    )
+                    .unwrap()
+                    .with_column(
+                        &c_name,
+                        when(
+                            c.clone().is_not_null().and(
+                                cast(c.clone(), DataType::Int32)
+                                    .between(lit(120), lit(130)),
+                            ),
+                            Literal(ScalarValue::Utf8(None), None),
+                        )
+                        .otherwise(
+                            when(
+                                c.clone().is_not_null().and(regexp_like(
+                                    cast(c.clone(), DataType::Utf8View),
+                                    lit("[0-9]*"),
+                                    None,
+                                )),
+                                upper(c.clone()),
+                            )
+                            .otherwise(c.clone())
+                            .unwrap(),
+                        )
+                        .unwrap(),
+                    )
+                    .unwrap()
+                    .with_column(
+                        &c_name,
+                        when(
+                            c.clone().is_not_null().and(
+                                cast(c.clone(), DataType::Int32)
+                                    .between(lit(90), lit(100)),
+                            ),
+                            cast(c.clone(), DataType::Utf8View),
+                        )
+                        .otherwise(Literal(ScalarValue::Date32(None), None))
+                        .unwrap(),
+                    )
+                    .unwrap()
+                    .with_column(
+                        &c_name,
+                        when(
+                            c.clone().is_not_null().and(
+                                cast(c.clone(), DataType::Int32).rem(lit(10)).gt(lit(7)),
+                            ),
+                            regexp_replace(
+                                cast(c.clone(), DataType::Utf8View),
+                                lit("1"),
+                                lit("a"),
+                                None,
+                            ),
+                        )
+                        .otherwise(Literal(ScalarValue::Date32(None), None))
+                        .unwrap(),
+                    )
+                    .unwrap()
+            }
+            if i >= 150 {
+                df = df
+                    .with_column(
+                        &c_name,
+                        try_cast(
+                            to_timestamp(vec![c.clone(), lit("%Y-%m-%d %H:%M:%S")]),
+                            DataType::Timestamp(Nanosecond, Some("UTC".into())),
+                        ),
+                    )
+                    .unwrap()
+                    .with_column(&c_name, try_cast(c.clone(), DataType::Date32))
+                    .unwrap()
+            }
+
+            // add in a few unions
+            if i % 30 == 0 {
+                let df1 = df
+                    .clone()
+                    .filter(length(c.clone()).gt(lit(2)))
+                    .unwrap()
+                    .with_column(&format!("c{i}_filtered"), lit(true))
+                    .unwrap();
+                let df2 = df
+                    .filter(not(length(c.clone()).gt(lit(2))))
+                    .unwrap()
+                    .with_column(&format!("c{i}_filtered"), lit(false))
+                    .unwrap();
+
+                df = df1.union_by_name(df2).unwrap()
+            }
+        }
+
+        df
+    })
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let ctx = SessionContext::new();
+    let rt = Runtime::new().unwrap();
+
+    // validate logical plan optimize performance
+    // https://github.com/apache/datafusion/issues/17261
+
+    let df = build_test_data_frame(&ctx, &rt);
+
+    c.bench_function("logical_plan_optimize", |b| {
+        b.iter(|| {
+            let df_clone = df.clone();
+            black_box(rt.block_on(async { df_clone.into_optimized_plan().unwrap() }));
+        })
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/core/benches/struct_query_sql.rs b/datafusion/core/benches/struct_query_sql.rs
index f9cc43d1ea2c..5c7b42731082 100644
--- a/datafusion/core/benches/struct_query_sql.rs
+++ b/datafusion/core/benches/struct_query_sql.rs
@@ -24,13 +24,14 @@ use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion::prelude::SessionContext;
 use datafusion::{datasource::MemTable, error::Result};
 use futures::executor::block_on;
+use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
 async fn query(ctx: &SessionContext, rt: &Runtime, sql: &str) {
     // execute the query
     let df = rt.block_on(ctx.sql(sql)).unwrap();
-    criterion::black_box(rt.block_on(df.collect()).unwrap());
+    black_box(rt.block_on(df.collect()).unwrap());
 }
 
 fn create_context(array_len: usize, batch_size: usize) -> Result<SessionContext> {
diff --git a/datafusion/core/benches/topk_aggregate.rs b/datafusion/core/benches/topk_aggregate.rs
index cf3c7fa2e26f..7971293c9ce2 100644
--- a/datafusion/core/benches/topk_aggregate.rs
+++ b/datafusion/core/benches/topk_aggregate.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 mod data_utils;
+
 use arrow::util::pretty::pretty_format_batches;
 use criterion::{criterion_group, criterion_main, Criterion};
 use data_utils::make_data;
@@ -24,6 +25,7 @@ use datafusion::prelude::SessionContext;
 use datafusion::{datasource::MemTable, error::Result};
 use datafusion_execution::config::SessionConfig;
 use datafusion_execution::TaskContext;
+use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
@@ -44,7 +46,7 @@ async fn create_context(
     opts.optimizer.enable_topk_aggregation = use_topk;
     let ctx = SessionContext::new_with_config(cfg);
     let _ = ctx.register_table("traces", mem_table)?;
-    let sql = format!("select trace_id, max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};");
+    let sql = format!("select max(timestamp_ms) from traces group by trace_id order by max(timestamp_ms) desc limit {limit};");
     let df = ctx.sql(sql.as_str()).await?;
     let physical_plan = df.create_physical_plan().await?;
     let actual_phys_plan = displayable(physical_plan.as_ref()).indent(true).to_string();
@@ -57,10 +59,8 @@ async fn create_context(
 }
 
 fn run(rt: &Runtime, plan: Arc<dyn ExecutionPlan>, ctx: Arc<TaskContext>, asc: bool) {
-    criterion::black_box(
-        rt.block_on(async { aggregate(plan.clone(), ctx.clone(), asc).await }),
-    )
-    .unwrap();
+    black_box(rt.block_on(async { aggregate(plan.clone(), ctx.clone(), asc).await }))
+        .unwrap();
 }
 
 async fn aggregate(
@@ -75,20 +75,20 @@ async fn aggregate(
 
     let actual = format!("{}", pretty_format_batches(&batches)?).to_lowercase();
     let expected_asc = r#"
-+----------------------------------+--------------------------+
-| trace_id                         | max(traces.timestamp_ms) |
-+----------------------------------+--------------------------+
-| 5868861a23ed31355efc5200eb80fe74 | 16909009999999           |
-| 4040e64656804c3d77320d7a0e7eb1f0 | 16909009999998           |
-| 02801bbe533190a9f8713d75222f445d | 16909009999997           |
-| 9e31b3b5a620de32b68fefa5aeea57f1 | 16909009999996           |
-| 2d88a860e9bd1cfaa632d8e7caeaa934 | 16909009999995           |
-| a47edcef8364ab6f191dd9103e51c171 | 16909009999994           |
-| 36a3fa2ccfbf8e00337f0b1254384db6 | 16909009999993           |
-| 0756be84f57369012e10de18b57d8a2f | 16909009999992           |
-| d4d6bf9845fa5897710e3a8db81d5907 | 16909009999991           |
-| 3c2cc1abe728a66b61e14880b53482a0 | 16909009999990           |
-+----------------------------------+--------------------------+
++--------------------------+
+| max(traces.timestamp_ms) |
++--------------------------+
+| 16909009999999           |
+| 16909009999998           |
+| 16909009999997           |
+| 16909009999996           |
+| 16909009999995           |
+| 16909009999994           |
+| 16909009999993           |
+| 16909009999992           |
+| 16909009999991           |
+| 16909009999990           |
++--------------------------+
         "#
     .trim();
     if asc {
diff --git a/datafusion/core/benches/window_query_sql.rs b/datafusion/core/benches/window_query_sql.rs
index a55d17a7c5dc..6d83959f7eb3 100644
--- a/datafusion/core/benches/window_query_sql.rs
+++ b/datafusion/core/benches/window_query_sql.rs
@@ -21,17 +21,19 @@ extern crate arrow;
 extern crate datafusion;
 
 mod data_utils;
+
 use crate::criterion::Criterion;
 use data_utils::create_table_provider;
 use datafusion::error::Result;
 use datafusion::execution::context::SessionContext;
 use parking_lot::Mutex;
+use std::hint::black_box;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
 fn query(ctx: Arc<Mutex<SessionContext>>, rt: &Runtime, sql: &str) {
     let df = rt.block_on(ctx.lock().sql(sql)).unwrap();
-    criterion::black_box(rt.block_on(df.collect()).unwrap());
+    black_box(rt.block_on(df.collect()).unwrap());
 }
 
 fn create_context(
diff --git a/datafusion/core/src/dataframe/mod.rs b/datafusion/core/src/dataframe/mod.rs
index 287a133273d8..98804e424b40 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -258,10 +258,13 @@ impl DataFrame {
     /// # async fn main() -> Result<()> {
     /// // datafusion will parse number as i64 first.
     /// let sql = "a > 1 and b in (1, 10)";
-    /// let expected = col("a").gt(lit(1 as i64))
-    ///   .and(col("b").in_list(vec![lit(1 as i64), lit(10 as i64)], false));
+    /// let expected = col("a")
+    ///     .gt(lit(1 as i64))
+    ///     .and(col("b").in_list(vec![lit(1 as i64), lit(10 as i64)], false));
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let expr = df.parse_sql_expr(sql)?;
     /// assert_eq!(expected, expr);
     /// # Ok(())
@@ -289,14 +292,16 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.select_columns(&["a", "b"])?;
     /// let expected = vec![
     ///     "+---+---+",
     ///     "| a | b |",
     ///     "+---+---+",
     ///     "| 1 | 2 |",
-    ///     "+---+---+"
+    ///     "+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -305,12 +310,12 @@ impl DataFrame {
     pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
         let fields = columns
             .iter()
-            .map(|name| {
+            .flat_map(|name| {
                 self.plan
                     .schema()
-                    .qualified_field_with_unqualified_name(name)
+                    .qualified_fields_with_unqualified_name(name)
             })
-            .collect::<Result<Vec<_>>>()?;
+            .collect::<Vec<_>>();
         let expr: Vec<Expr> = fields
             .into_iter()
             .map(|(qualifier, field)| Expr::Column(Column::from((qualifier, field))))
@@ -329,8 +334,10 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let df : DataFrame = df.select_exprs(&["a * b", "c"])?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let df: DataFrame = df.select_exprs(&["a * b", "c"])?;
     /// # Ok(())
     /// # }
     /// ```
@@ -357,14 +364,16 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.select(vec![col("a"), col("b") * col("c")])?;
     /// let expected = vec![
     ///     "+---+-----------------------+",
     ///     "| a | ?table?.b * ?table?.c |",
     ///     "+---+-----------------------+",
     ///     "| 1 | 6                     |",
-    ///     "+---+-----------------------+"
+    ///     "+---+-----------------------+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -407,7 +416,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// // +----+----+----+
     /// // | a  | b  | c  |
     /// // +----+----+----+
@@ -419,7 +430,7 @@ impl DataFrame {
     ///     "| b | c |",
     ///     "+---+---+",
     ///     "| 2 | 3 |",
-    ///     "+---+---+"
+    ///     "+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -428,13 +439,12 @@ impl DataFrame {
     pub fn drop_columns(self, columns: &[&str]) -> Result<DataFrame> {
         let fields_to_drop = columns
             .iter()
-            .map(|name| {
+            .flat_map(|name| {
                 self.plan
                     .schema()
-                    .qualified_field_with_unqualified_name(name)
+                    .qualified_fields_with_unqualified_name(name)
             })
-            .filter(|r| r.is_ok())
-            .collect::<Result<Vec<_>>>()?;
+            .collect::<Vec<_>>();
         let expr: Vec<Expr> = self
             .plan
             .schema()
@@ -518,7 +528,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.filter(col("a").lt_eq(col("b")))?;
     /// // all rows where a <= b are returned
     /// let expected = vec![
@@ -528,7 +540,7 @@ impl DataFrame {
     ///     "| 1 | 2 | 3 |",
     ///     "| 4 | 5 | 6 |",
     ///     "| 7 | 8 | 9 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -557,7 +569,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     ///
     /// // The following use is the equivalent of "SELECT MIN(b) GROUP BY a"
     /// let df1 = df.clone().aggregate(vec![col("a")], vec![min(col("b"))])?;
@@ -568,7 +582,7 @@ impl DataFrame {
     ///     "| 1 | 2              |",
     ///     "| 4 | 5              |",
     ///     "| 7 | 8              |",
-    ///     "+---+----------------+"
+    ///     "+---+----------------+",
     /// ];
     /// assert_batches_sorted_eq!(expected1, &df1.collect().await?);
     /// // The following use is the equivalent of "SELECT MIN(b)"
@@ -578,7 +592,7 @@ impl DataFrame {
     ///     "| min(?table?.b) |",
     ///     "+----------------+",
     ///     "| 2              |",
-    ///     "+----------------+"
+    ///     "+----------------+",
     /// ];
     /// # assert_batches_sorted_eq!(expected2, &df2.collect().await?);
     /// # Ok(())
@@ -646,7 +660,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.limit(1, Some(2))?;
     /// let expected = vec![
     ///     "+---+---+---+",
@@ -654,7 +670,7 @@ impl DataFrame {
     ///     "+---+---+---+",
     ///     "| 4 | 5 | 6 |",
     ///     "| 7 | 8 | 9 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -683,7 +699,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?   ;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let d2 = df.clone();
     /// let df = df.union(d2)?;
     /// let expected = vec![
@@ -692,7 +710,7 @@ impl DataFrame {
     ///     "+---+---+---+",
     ///     "| 1 | 2 | 3 |",
     ///     "| 1 | 2 | 3 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -723,8 +741,13 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let d2 = df.clone().select_columns(&["b", "c", "a"])?.with_column("d", lit("77"))?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let d2 = df
+    ///     .clone()
+    ///     .select_columns(&["b", "c", "a"])?
+    ///     .with_column("d", lit("77"))?;
     /// let df = df.union_by_name(d2)?;
     /// let expected = vec![
     ///     "+---+---+---+----+",
@@ -732,7 +755,7 @@ impl DataFrame {
     ///     "+---+---+---+----+",
     ///     "| 1 | 2 | 3 |    |",
     ///     "| 1 | 2 | 3 | 77 |",
-    ///     "+---+---+---+----+"
+    ///     "+---+---+---+----+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -762,7 +785,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let d2 = df.clone();
     /// let df = df.union_distinct(d2)?;
     /// // df2 are duplicate of df
@@ -771,7 +796,7 @@ impl DataFrame {
     ///     "| a | b | c |",
     ///     "+---+---+---+",
     ///     "| 1 | 2 | 3 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -802,7 +827,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let d2 = df.clone().select_columns(&["b", "c", "a"])?;
     /// let df = df.union_by_name_distinct(d2)?;
     /// let expected = vec![
@@ -810,7 +837,7 @@ impl DataFrame {
     ///     "| a | b | c |",
     ///     "+---+---+---+",
     ///     "| 1 | 2 | 3 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -837,14 +864,16 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.distinct()?;
     /// let expected = vec![
     ///     "+---+---+---+",
     ///     "| a | b | c |",
     ///     "+---+---+---+",
     ///     "| 1 | 2 | 3 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -871,15 +900,17 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
-    ///   // Return a single row (a, b) for each distinct value of a
-    ///   .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?
+    ///     // Return a single row (a, b) for each distinct value of a
+    ///     .distinct_on(vec![col("a")], vec![col("a"), col("b")], None)?;
     /// let expected = vec![
     ///     "+---+---+",
     ///     "| a | b |",
     ///     "+---+---+",
     ///     "| 1 | 2 |",
-    ///     "+---+---+"
+    ///     "+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -952,7 +983,7 @@ impl DataFrame {
         }));
 
         //collect recordBatch
-        let describe_record_batch = vec![
+        let describe_record_batch = [
             // count aggregation
             self.clone().aggregate(
                 vec![],
@@ -1125,11 +1156,13 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.sort(vec![
-    ///   col("a").sort(false, true),   // a DESC, nulls first
-    ///   col("b").sort(true, false), // b ASC, nulls last
-    ///  ])?;
+    ///     col("a").sort(false, true), // a DESC, nulls first
+    ///     col("b").sort(true, false), // b ASC, nulls last
+    /// ])?;
     /// let expected = vec![
     ///     "+---+---+---+",
     ///     "| a | b | c |",
@@ -1176,12 +1209,17 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let left = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let right = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
-    ///   .select(vec![
-    ///     col("a").alias("a2"),
-    ///     col("b").alias("b2"),
-    ///     col("c").alias("c2")])?;
+    /// let left = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let right = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?
+    ///     .select(vec![
+    ///         col("a").alias("a2"),
+    ///         col("b").alias("b2"),
+    ///         col("c").alias("c2"),
+    ///     ])?;
     /// // Perform the equivalent of `left INNER JOIN right ON (a = a2 AND b = b2)`
     /// // finding all pairs of rows from `left` and `right` where `a = a2` and `b = b2`.
     /// let join = left.join(right, JoinType::Inner, &["a", "b"], &["a2", "b2"], None)?;
@@ -1190,13 +1228,12 @@ impl DataFrame {
     ///     "| a | b | c | a2 | b2 | c2 |",
     ///     "+---+---+---+----+----+----+",
     ///     "| 1 | 2 | 3 | 1  | 2  | 3  |",
-    ///     "+---+---+---+----+----+----+"
+    ///     "+---+---+---+----+----+----+",
     /// ];
     /// assert_batches_sorted_eq!(expected, &join.collect().await?);
     /// # Ok(())
     /// # }
     /// ```
-    ///
     pub fn join(
         self,
         right: DataFrame,
@@ -1258,7 +1295,7 @@ impl DataFrame {
     ///     "+---+---+---+----+----+----+",
     ///     "| a | b | c | a2 | b2 | c2 |",
     ///     "+---+---+---+----+----+----+",
-    ///     "+---+---+---+----+----+----+"
+    ///     "+---+---+---+----+----+----+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &join_on.collect().await?);
     /// # Ok(())
@@ -1290,7 +1327,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df1 = df.repartition(Partitioning::RoundRobinBatch(4))?;
     /// let expected = vec![
     ///     "+---+---+---+",
@@ -1299,7 +1338,7 @@ impl DataFrame {
     ///     "| 1 | 2 | 3 |",
     ///     "| 4 | 5 | 6 |",
     ///     "| 7 | 8 | 9 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df1.collect().await?);
     /// # Ok(())
@@ -1328,7 +1367,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let count = df.count().await?; // 1
     /// # assert_eq!(count, 1);
     /// # Ok(())
@@ -1367,7 +1408,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let batches = df.collect().await?;
     /// # Ok(())
     /// # }
@@ -1387,7 +1430,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// df.show().await?;
     /// # Ok(())
     /// # }
@@ -1446,7 +1491,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// df.show_limit(10).await?;
     /// # Ok(())
     /// # }
@@ -1472,7 +1519,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let stream = df.execute_stream().await?;
     /// # Ok(())
     /// # }
@@ -1498,7 +1547,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let batches = df.collect_partitioned().await?;
     /// # Ok(())
     /// # }
@@ -1518,7 +1569,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let batches = df.execute_stream_partitioned().await?;
     /// # Ok(())
     /// # }
@@ -1547,7 +1600,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let schema = df.schema();
     /// # Ok(())
     /// # }
@@ -1598,7 +1653,19 @@ impl DataFrame {
     /// Note: This discards the [`SessionState`] associated with this
     /// [`DataFrame`] in favour of the one passed to [`TableProvider::scan`]
     pub fn into_view(self) -> Arc<dyn TableProvider> {
-        Arc::new(DataFrameTableProvider { plan: self.plan })
+        Arc::new(DataFrameTableProvider {
+            plan: self.plan,
+            table_type: TableType::Temporary,
+        })
+    }
+
+    /// See [`Self::into_view`]. The returned [`TableProvider`] will
+    /// create a transient table.
+    pub fn into_temporary_view(self) -> Arc<dyn TableProvider> {
+        Arc::new(DataFrameTableProvider {
+            plan: self.plan,
+            table_type: TableType::Temporary,
+        })
     }
 
     /// Return a DataFrame with the explanation of its plan so far.
@@ -1613,8 +1680,14 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let batches = df.limit(0, Some(100))?.explain(false, false)?.collect().await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let batches = df
+    ///     .limit(0, Some(100))?
+    ///     .explain(false, false)?
+    ///     .collect()
+    ///     .await?;
     /// # Ok(())
     /// # }
     /// ```
@@ -1637,8 +1710,18 @@ impl DataFrame {
     /// # async fn main() -> Result<()> {
     /// use datafusion_expr::{Explain, ExplainOption};
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let batches = df.limit(0, Some(100))?.explain_with_options(ExplainOption::default().with_verbose(false).with_analyze(false))?.collect().await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let batches = df
+    ///     .limit(0, Some(100))?
+    ///     .explain_with_options(
+    ///         ExplainOption::default()
+    ///             .with_verbose(false)
+    ///             .with_analyze(false),
+    ///     )?
+    ///     .collect()
+    ///     .await?;
     /// # Ok(())
     /// # }
     /// ```
@@ -1668,7 +1751,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let f = df.registry();
     /// // use f.udf("name", vec![...]) to use the udf
     /// # Ok(())
@@ -1687,15 +1772,19 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let d2 = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let d2 = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.intersect(d2)?;
     /// let expected = vec![
     ///     "+---+---+---+",
     ///     "| a | b | c |",
     ///     "+---+---+---+",
     ///     "| 1 | 2 | 3 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -1721,15 +1810,19 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-    /// let d2 = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let d2 = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.intersect_distinct(d2)?;
     /// let expected = vec![
     ///     "+---+---+---+",
     ///     "| a | b | c |",
     ///     "+---+---+---+",
     ///     "| 1 | 2 | 3 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &df.collect().await?);
     /// # Ok(())
@@ -1755,8 +1848,12 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
-    /// let d2 = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let d2 = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let result = df.except(d2)?;
     /// // those columns are not in example.csv, but in example_long.csv
     /// let expected = vec![
@@ -1765,7 +1862,7 @@ impl DataFrame {
     ///     "+---+---+---+",
     ///     "| 4 | 5 | 6 |",
     ///     "| 7 | 8 | 9 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &result.collect().await?);
     /// # Ok(())
@@ -1791,8 +1888,12 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example_long.csv", CsvReadOptions::new()).await?;
-    /// let d2 = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example_long.csv", CsvReadOptions::new())
+    ///     .await?;
+    /// let d2 = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let result = df.except_distinct(d2)?;
     /// // those columns are not in example.csv, but in example_long.csv
     /// let expected = vec![
@@ -1801,7 +1902,7 @@ impl DataFrame {
     ///     "+---+---+---+",
     ///     "| 4 | 5 | 6 |",
     ///     "| 7 | 8 | 9 |",
-    ///     "+---+---+---+"
+    ///     "+---+---+---+",
     /// ];
     /// # assert_batches_sorted_eq!(expected, &result.collect().await?);
     /// # Ok(())
@@ -1878,13 +1979,15 @@ impl DataFrame {
     /// use datafusion::dataframe::DataFrameWriteOptions;
     /// let ctx = SessionContext::new();
     /// // Sort the data by column "b" and write it to a new location
-    /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
-    ///   .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first
-    ///   .write_csv(
-    ///     "output.csv",
-    ///     DataFrameWriteOptions::new(),
-    ///     None, // can also specify CSV writing options here
-    /// ).await?;
+    /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?
+    ///     .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first
+    ///     .write_csv(
+    ///         "output.csv",
+    ///         DataFrameWriteOptions::new(),
+    ///         None, // can also specify CSV writing options here
+    ///     )
+    ///     .await?;
     /// # fs::remove_file("output.csv")?;
     /// # Ok(())
     /// # }
@@ -1948,13 +2051,11 @@ impl DataFrame {
     /// use datafusion::dataframe::DataFrameWriteOptions;
     /// let ctx = SessionContext::new();
     /// // Sort the data by column "b" and write it to a new location
-    /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
-    ///   .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first
-    ///   .write_json(
-    ///     "output.json",
-    ///     DataFrameWriteOptions::new(),
-    ///     None
-    /// ).await?;
+    /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?
+    ///     .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first
+    ///     .write_json("output.json", DataFrameWriteOptions::new(), None)
+    ///     .await?;
     /// # fs::remove_file("output.json")?;
     /// # Ok(())
     /// # }
@@ -2015,7 +2116,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.with_column("ab_sum", col("a") + col("b"))?;
     /// # Ok(())
     /// # }
@@ -2089,7 +2192,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.with_column_renamed("ab_sum", "total")?;
     ///
     /// # Ok(())
@@ -2222,7 +2327,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// let df = df.cache().await?;
     /// # Ok(())
     /// # }
@@ -2266,7 +2373,9 @@ impl DataFrame {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// // Fill nulls in only columns "a" and "c":
     /// let df = df.fill_null(ScalarValue::from(0), vec!["a".to_owned(), "c".to_owned()])?;
     /// // Fill nulls across all columns:
@@ -2337,9 +2446,9 @@ impl DataFrame {
     /// Helper for creating DataFrame.
     /// # Example
     /// ```
-    /// use std::sync::Arc;
     /// use arrow::array::{ArrayRef, Int32Array, StringArray};
     /// use datafusion::prelude::DataFrame;
+    /// use std::sync::Arc;
     /// let id: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
     /// let name: ArrayRef = Arc::new(StringArray::from(vec!["foo", "bar", "baz"]));
     /// let df = DataFrame::from_columns(vec![("id", id), ("name", name)]).unwrap();
@@ -2426,6 +2535,7 @@ macro_rules! dataframe {
 #[derive(Debug)]
 struct DataFrameTableProvider {
     plan: LogicalPlan,
+    table_type: TableType,
 }
 
 #[async_trait]
@@ -2451,7 +2561,7 @@ impl TableProvider for DataFrameTableProvider {
     }
 
     fn table_type(&self) -> TableType {
-        TableType::View
+        self.table_type
     }
 
     async fn scan(
diff --git a/datafusion/core/src/dataframe/parquet.rs b/datafusion/core/src/dataframe/parquet.rs
index d46a902ca513..cb8a6cf29541 100644
--- a/datafusion/core/src/dataframe/parquet.rs
+++ b/datafusion/core/src/dataframe/parquet.rs
@@ -42,13 +42,15 @@ impl DataFrame {
     /// use datafusion::dataframe::DataFrameWriteOptions;
     /// let ctx = SessionContext::new();
     /// // Sort the data by column "b" and write it to a new location
-    /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?
-    ///   .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first
-    ///   .write_parquet(
-    ///     "output.parquet",
-    ///     DataFrameWriteOptions::new(),
-    ///     None, // can also specify parquet writing options here
-    /// ).await?;
+    /// ctx.read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?
+    ///     .sort(vec![col("b").sort(true, true)])? // sort by b asc, nulls first
+    ///     .write_parquet(
+    ///         "output.parquet",
+    ///         DataFrameWriteOptions::new(),
+    ///         None, // can also specify parquet writing options here
+    ///     )
+    ///     .await?;
     /// # fs::remove_file("output.parquet")?;
     /// # Ok(())
     /// # }
@@ -116,6 +118,8 @@ mod tests {
     use datafusion_execution::config::SessionConfig;
     use datafusion_expr::{col, lit};
 
+    #[cfg(feature = "parquet_encryption")]
+    use datafusion_common::config::ConfigFileEncryptionProperties;
     use object_store::local::LocalFileSystem;
     use parquet::file::reader::FileReader;
     use tempfile::TempDir;
@@ -280,7 +284,8 @@ mod tests {
 
         // Write encrypted parquet using write_parquet
         let mut options = TableParquetOptions::default();
-        options.crypto.file_encryption = Some((&encrypt).into());
+        options.crypto.file_encryption =
+            Some(ConfigFileEncryptionProperties::from(&encrypt));
         options.global.allow_single_file_parallelism = allow_single_file_parallelism;
 
         df.write_parquet(
diff --git a/datafusion/core/src/datasource/file_format/mod.rs b/datafusion/core/src/datasource/file_format/mod.rs
index e165707c2eb0..7c55d452c4e1 100644
--- a/datafusion/core/src/datasource/file_format/mod.rs
+++ b/datafusion/core/src/datasource/file_format/mod.rs
@@ -40,6 +40,7 @@ pub(crate) mod test_util {
     use datafusion_catalog::Session;
     use datafusion_common::Result;
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+    use datafusion_datasource::TableSchema;
     use datafusion_datasource::{file_format::FileFormat, PartitionedFile};
     use datafusion_execution::object_store::ObjectStoreUrl;
     use std::sync::Arc;
@@ -66,6 +67,8 @@ pub(crate) mod test_util {
                 .await?
         };
 
+        let table_schema = TableSchema::new(file_schema.clone(), vec![]);
+
         let statistics = format
             .infer_stats(state, &store, file_schema.clone(), &meta)
             .await?;
@@ -85,12 +88,11 @@ pub(crate) mod test_util {
                 state,
                 FileScanConfigBuilder::new(
                     ObjectStoreUrl::local_filesystem(),
-                    file_schema,
-                    format.file_source(),
+                    format.file_source(table_schema),
                 )
                 .with_file_groups(file_groups)
                 .with_statistics(statistics)
-                .with_projection(projection)
+                .with_projection_indices(projection)
                 .with_limit(limit)
                 .build(),
             )
diff --git a/datafusion/core/src/datasource/file_format/options.rs b/datafusion/core/src/datasource/file_format/options.rs
index 8c1bb02ef073..e78c5f09553c 100644
--- a/datafusion/core/src/datasource/file_format/options.rs
+++ b/datafusion/core/src/datasource/file_format/options.rs
@@ -269,6 +269,8 @@ pub struct ParquetReadOptions<'a> {
     pub file_sort_order: Vec<Vec<SortExpr>>,
     /// Properties for decryption of Parquet files that use modular encryption
     pub file_decryption_properties: Option<ConfigFileDecryptionProperties>,
+    /// Metadata size hint for Parquet files reading (in bytes)
+    pub metadata_size_hint: Option<usize>,
 }
 
 impl Default for ParquetReadOptions<'_> {
@@ -281,6 +283,7 @@ impl Default for ParquetReadOptions<'_> {
             schema: None,
             file_sort_order: vec![],
             file_decryption_properties: None,
+            metadata_size_hint: None,
         }
     }
 }
@@ -340,6 +343,12 @@ impl<'a> ParquetReadOptions<'a> {
         self.file_decryption_properties = Some(file_decryption_properties);
         self
     }
+
+    /// Configure metadata size hint for Parquet files reading (in bytes)
+    pub fn metadata_size_hint(mut self, size_hint: Option<usize>) -> Self {
+        self.metadata_size_hint = size_hint;
+        self
+    }
 }
 
 /// Options that control the reading of ARROW files.
@@ -606,6 +615,11 @@ impl ReadOptions<'_> for ParquetReadOptions<'_> {
         if let Some(file_decryption_properties) = &self.file_decryption_properties {
             options.crypto.file_decryption = Some(file_decryption_properties.clone());
         }
+        // This can be overridden per-read in ParquetReadOptions, if setting.
+        if let Some(metadata_size_hint) = self.metadata_size_hint {
+            options.global.metadata_size_hint = Some(metadata_size_hint);
+        }
+
         let mut file_format = ParquetFormat::new().with_options(options);
 
         if let Some(parquet_pruning) = self.parquet_pruning {
diff --git a/datafusion/core/src/datasource/file_format/parquet.rs b/datafusion/core/src/datasource/file_format/parquet.rs
index 088c4408fff5..52c5393e1031 100644
--- a/datafusion/core/src/datasource/file_format/parquet.rs
+++ b/datafusion/core/src/datasource/file_format/parquet.rs
@@ -154,7 +154,6 @@ mod tests {
     use futures::stream::BoxStream;
     use futures::StreamExt;
     use insta::assert_snapshot;
-    use log::error;
     use object_store::local::LocalFileSystem;
     use object_store::ObjectMeta;
     use object_store::{
@@ -163,9 +162,10 @@ mod tests {
     };
     use parquet::arrow::arrow_reader::ArrowReaderOptions;
     use parquet::arrow::ParquetRecordBatchStreamBuilder;
-    use parquet::file::metadata::{KeyValue, ParquetColumnIndex, ParquetOffsetIndex};
-    use parquet::file::page_index::index::Index;
-    use parquet::format::FileMetaData;
+    use parquet::file::metadata::{
+        KeyValue, ParquetColumnIndex, ParquetMetaData, ParquetOffsetIndex,
+    };
+    use parquet::file::page_index::column_index::ColumnIndexMetaData;
     use tokio::fs::File;
 
     enum ForceViews {
@@ -546,7 +546,8 @@ mod tests {
         let (files, _file_names) = store_parquet(vec![batch1], false).await?;
 
         let state = SessionContext::new().state();
-        let format = ParquetFormat::default();
+        // Make metadata size hint None to keep original behavior
+        let format = ParquetFormat::default().with_metadata_size_hint(None);
         let _schema = format.infer_schema(&state, &store.upcast(), &files).await?;
         assert_eq!(store.request_count(), 3);
         // No increase, cache being used.
@@ -620,7 +621,9 @@ mod tests {
 
         let mut state = SessionContext::new().state();
         state = set_view_state(state, force_views);
-        let format = ParquetFormat::default().with_force_view_types(force_views);
+        let format = ParquetFormat::default()
+            .with_force_view_types(force_views)
+            .with_metadata_size_hint(None);
         let schema = format.infer_schema(&state, &store.upcast(), &files).await?;
         assert_eq!(store.request_count(), 6);
 
@@ -1144,18 +1147,14 @@ mod tests {
 
         // 325 pages in int_col
         assert_eq!(int_col_offset.len(), 325);
-        match int_col_index {
-            Index::INT32(index) => {
-                assert_eq!(index.indexes.len(), 325);
-                for min_max in index.clone().indexes {
-                    assert!(min_max.min.is_some());
-                    assert!(min_max.max.is_some());
-                    assert!(min_max.null_count.is_some());
-                }
-            }
-            _ => {
-                error!("fail to read page index.")
-            }
+        let ColumnIndexMetaData::INT32(index) = int_col_index else {
+            panic!("fail to read page index.")
+        };
+        assert_eq!(index.min_values().len(), 325);
+        assert_eq!(index.max_values().len(), 325);
+        // all values are non null
+        for idx in 0..325 {
+            assert_eq!(index.null_count(idx), Some(0));
         }
     }
 
@@ -1556,7 +1555,7 @@ mod tests {
         Ok(parquet_sink)
     }
 
-    fn get_written(parquet_sink: Arc<ParquetSink>) -> Result<(Path, FileMetaData)> {
+    fn get_written(parquet_sink: Arc<ParquetSink>) -> Result<(Path, ParquetMetaData)> {
         let mut written = parquet_sink.written();
         let written = written.drain();
         assert_eq!(
@@ -1566,28 +1565,33 @@ mod tests {
             written.len()
         );
 
-        let (path, file_metadata) = written.take(1).next().unwrap();
-        Ok((path, file_metadata))
+        let (path, parquet_meta_data) = written.take(1).next().unwrap();
+        Ok((path, parquet_meta_data))
     }
 
-    fn assert_file_metadata(file_metadata: FileMetaData, expected_kv: &Vec<KeyValue>) {
-        let FileMetaData {
-            num_rows,
-            schema,
-            key_value_metadata,
-            ..
-        } = file_metadata;
-        assert_eq!(num_rows, 2, "file metadata to have 2 rows");
+    fn assert_file_metadata(
+        parquet_meta_data: ParquetMetaData,
+        expected_kv: &Vec<KeyValue>,
+    ) {
+        let file_metadata = parquet_meta_data.file_metadata();
+        let schema_descr = file_metadata.schema_descr();
+        assert_eq!(file_metadata.num_rows(), 2, "file metadata to have 2 rows");
         assert!(
-            schema.iter().any(|col_schema| col_schema.name == "a"),
+            schema_descr
+                .columns()
+                .iter()
+                .any(|col_schema| col_schema.name() == "a"),
             "output file metadata should contain col a"
         );
         assert!(
-            schema.iter().any(|col_schema| col_schema.name == "b"),
+            schema_descr
+                .columns()
+                .iter()
+                .any(|col_schema| col_schema.name() == "b"),
             "output file metadata should contain col b"
         );
 
-        let mut key_value_metadata = key_value_metadata.unwrap();
+        let mut key_value_metadata = file_metadata.key_value_metadata().unwrap().clone();
         key_value_metadata.sort_by(|a, b| a.key.cmp(&b.key));
         assert_eq!(&key_value_metadata, expected_kv);
     }
@@ -1644,13 +1648,11 @@ mod tests {
 
         // check the file metadata includes partitions
         let mut expected_partitions = std::collections::HashSet::from(["a=foo", "a=bar"]);
-        for (
-            path,
-            FileMetaData {
-                num_rows, schema, ..
-            },
-        ) in written.take(2)
-        {
+        for (path, parquet_metadata) in written.take(2) {
+            let file_metadata = parquet_metadata.file_metadata();
+            let schema = file_metadata.schema_descr();
+            let num_rows = file_metadata.num_rows();
+
             let path_parts = path.parts().collect::<Vec<_>>();
             assert_eq!(path_parts.len(), 2, "should have path prefix");
 
@@ -1663,11 +1665,17 @@ mod tests {
 
             assert_eq!(num_rows, 1, "file metadata to have 1 row");
             assert!(
-                !schema.iter().any(|col_schema| col_schema.name == "a"),
+                !schema
+                    .columns()
+                    .iter()
+                    .any(|col_schema| col_schema.name() == "a"),
                 "output file metadata will not contain partitioned col a"
             );
             assert!(
-                schema.iter().any(|col_schema| col_schema.name == "b"),
+                schema
+                    .columns()
+                    .iter()
+                    .any(|col_schema| col_schema.name() == "b"),
                 "output file metadata should contain col b"
             );
         }
diff --git a/datafusion/core/src/datasource/mod.rs b/datafusion/core/src/datasource/mod.rs
index 94d651ddadd5..620e389a0fb8 100644
--- a/datafusion/core/src/datasource/mod.rs
+++ b/datafusion/core/src/datasource/mod.rs
@@ -45,6 +45,7 @@ pub use datafusion_catalog::view;
 pub use datafusion_datasource::schema_adapter;
 pub use datafusion_datasource::sink;
 pub use datafusion_datasource::source;
+pub use datafusion_datasource::table_schema;
 pub use datafusion_execution::object_store;
 pub use datafusion_physical_expr::create_ordering;
 
@@ -123,16 +124,13 @@ mod tests {
         let f2 = Field::new("extra_column", DataType::Utf8, true);
 
         let schema = Arc::new(Schema::new(vec![f1.clone(), f2.clone()]));
-        let source = ParquetSource::default()
+        let source = ParquetSource::new(Arc::clone(&schema))
             .with_schema_adapter_factory(Arc::new(TestSchemaAdapterFactory {}))
             .unwrap();
-        let base_conf = FileScanConfigBuilder::new(
-            ObjectStoreUrl::local_filesystem(),
-            schema,
-            source,
-        )
-        .with_file(partitioned_file)
-        .build();
+        let base_conf =
+            FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+                .with_file(partitioned_file)
+                .build();
 
         let parquet_exec = DataSourceExec::from_data_source(base_conf);
 
diff --git a/datafusion/core/src/datasource/physical_plan/avro.rs b/datafusion/core/src/datasource/physical_plan/avro.rs
index 8a00af959ccc..1cf8c573acd9 100644
--- a/datafusion/core/src/datasource/physical_plan/avro.rs
+++ b/datafusion/core/src/datasource/physical_plan/avro.rs
@@ -34,7 +34,7 @@ mod tests {
     use datafusion_common::{test_util, Result, ScalarValue};
     use datafusion_datasource::file_format::FileFormat;
     use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
-    use datafusion_datasource::PartitionedFile;
+    use datafusion_datasource::{PartitionedFile, TableSchema};
     use datafusion_datasource_avro::source::AvroSource;
     use datafusion_datasource_avro::AvroFormat;
     use datafusion_execution::object_store::ObjectStoreUrl;
@@ -81,15 +81,11 @@ mod tests {
             .infer_schema(&state, &store, std::slice::from_ref(&meta))
             .await?;
 
-        let source = Arc::new(AvroSource::new());
-        let conf = FileScanConfigBuilder::new(
-            ObjectStoreUrl::local_filesystem(),
-            file_schema,
-            source,
-        )
-        .with_file(meta.into())
-        .with_projection(Some(vec![0, 1, 2]))
-        .build();
+        let source = Arc::new(AvroSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+            .with_file(meta.into())
+            .with_projection_indices(Some(vec![0, 1, 2]))
+            .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
         assert_eq!(
@@ -157,10 +153,10 @@ mod tests {
         // Include the missing column in the projection
         let projection = Some(vec![0, 1, 2, actual_schema.fields().len()]);
 
-        let source = Arc::new(AvroSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(AvroSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file(meta.into())
-            .with_projection(projection)
+            .with_projection_indices(projection)
             .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
@@ -227,13 +223,16 @@ mod tests {
         partitioned_file.partition_values = vec![ScalarValue::from("2021-10-26")];
 
         let projection = Some(vec![0, 1, file_schema.fields().len(), 2]);
-        let source = Arc::new(AvroSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let table_schema = TableSchema::new(
+            file_schema.clone(),
+            vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+        );
+        let source = Arc::new(AvroSource::new(table_schema.clone()));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             // select specific columns of the files as well as the partitioning
             // column which is supposed to be the last column in the table schema.
-            .with_projection(projection)
+            .with_projection_indices(projection)
             .with_file(partitioned_file)
-            .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
             .build();
 
         let source_exec = DataSourceExec::from_data_source(conf);
diff --git a/datafusion/core/src/datasource/physical_plan/csv.rs b/datafusion/core/src/datasource/physical_plan/csv.rs
index b2ef51a76f89..ac5df24d4999 100644
--- a/datafusion/core/src/datasource/physical_plan/csv.rs
+++ b/datafusion/core/src/datasource/physical_plan/csv.rs
@@ -29,12 +29,14 @@ mod tests {
     use std::io::Write;
     use std::sync::Arc;
 
+    use datafusion_datasource::TableSchema;
     use datafusion_datasource_csv::CsvFormat;
     use object_store::ObjectStore;
 
     use crate::prelude::CsvReadOptions;
     use crate::prelude::SessionContext;
     use crate::test::partitioned_file_groups;
+    use datafusion_common::config::CsvOptions;
     use datafusion_common::test_util::arrow_test_data;
     use datafusion_common::test_util::batches_to_string;
     use datafusion_common::{assert_batches_eq, Result};
@@ -94,6 +96,8 @@ mod tests {
     async fn csv_exec_with_projection(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
+
         let session_ctx = SessionContext::new();
         let task_ctx = session_ctx.task_ctx();
         let file_schema = aggr_test_schema();
@@ -110,16 +114,21 @@ mod tests {
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_file_compression_type(file_compression_type)
-        .with_newlines_in_values(false)
-        .with_projection(Some(vec![0, 2, 4]))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+                .with_file_compression_type(file_compression_type)
+                .with_newlines_in_values(false)
+                .with_projection_indices(Some(vec![0, 2, 4]))
+                .build();
 
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
@@ -158,6 +167,8 @@ mod tests {
     async fn csv_exec_with_mixed_order_projection(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
+
         let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
         let session_ctx = SessionContext::new_with_config(cfg);
         let task_ctx = session_ctx.task_ctx();
@@ -175,16 +186,21 @@ mod tests {
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_projection(Some(vec![4, 0, 2]))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+                .with_newlines_in_values(false)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .with_projection_indices(Some(vec![4, 0, 2]))
+                .build();
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(3, csv.schema().fields().len());
@@ -221,6 +237,7 @@ mod tests {
     async fn csv_exec_with_limit(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
         use futures::StreamExt;
 
         let cfg = SessionConfig::new().set_str("datafusion.catalog.has_header", "true");
@@ -240,16 +257,21 @@ mod tests {
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_limit(Some(5))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+                .with_newlines_in_values(false)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .with_limit(Some(5))
+                .build();
         assert_eq!(13, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(13, csv.schema().fields().len());
@@ -287,6 +309,8 @@ mod tests {
     async fn csv_exec_with_missing_column(
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
+        use datafusion_datasource::TableSchema;
+
         let session_ctx = SessionContext::new();
         let task_ctx = session_ctx.task_ctx();
         let file_schema = aggr_test_schema_with_missing_col();
@@ -303,16 +327,21 @@ mod tests {
             tmp_dir.path(),
         )?;
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_limit(Some(5))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+                .with_newlines_in_values(false)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .with_limit(Some(5))
+                .build();
         assert_eq!(14, config.file_schema().fields().len());
         let csv = DataSourceExec::from_data_source(config);
         assert_eq!(14, csv.schema().fields().len());
@@ -341,6 +370,7 @@ mod tests {
         file_compression_type: FileCompressionType,
     ) -> Result<()> {
         use datafusion_common::ScalarValue;
+        use datafusion_datasource::TableSchema;
 
         let session_ctx = SessionContext::new();
         let task_ctx = session_ctx.task_ctx();
@@ -362,19 +392,26 @@ mod tests {
 
         let num_file_schema_fields = file_schema.fields().len();
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
-        // We should be able to project on the partition column
-        // Which is supposed to be after the file fields
-        .with_projection(Some(vec![0, num_file_schema_fields]))
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::new(
+            Arc::clone(&file_schema),
+            vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+        );
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+                .with_newlines_in_values(false)
+                .with_file_compression_type(file_compression_type.to_owned())
+                // We should be able to project on the partition column
+                // Which is supposed to be after the file fields
+                .with_projection_indices(Some(vec![0, num_file_schema_fields]))
+                .build();
 
         // we don't have `/date=xx/` in the path but that is ok because
         // partitions are resolved during scan anyway
@@ -463,15 +500,20 @@ mod tests {
         )
         .unwrap();
 
-        let source = Arc::new(CsvSource::new(true, b',', b'"'));
-        let config = FileScanConfigBuilder::from(partitioned_csv_config(
-            file_schema,
-            file_groups,
-            source,
-        ))
-        .with_newlines_in_values(false)
-        .with_file_compression_type(file_compression_type.to_owned())
-        .build();
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let table_schema = TableSchema::from_file_schema(Arc::clone(&file_schema));
+        let source =
+            Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+        let config =
+            FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+                .with_newlines_in_values(false)
+                .with_file_compression_type(file_compression_type.to_owned())
+                .build();
         let csv = DataSourceExec::from_data_source(config);
 
         let it = csv.execute(0, task_ctx).unwrap();
diff --git a/datafusion/core/src/datasource/physical_plan/json.rs b/datafusion/core/src/datasource/physical_plan/json.rs
index 0d45711c76fb..de7e87d25c84 100644
--- a/datafusion/core/src/datasource/physical_plan/json.rs
+++ b/datafusion/core/src/datasource/physical_plan/json.rs
@@ -176,8 +176,8 @@ mod tests {
         let (object_store_url, file_groups, file_schema) =
             prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
             .with_limit(Some(3))
             .with_file_compression_type(file_compression_type.to_owned())
@@ -251,8 +251,8 @@ mod tests {
         let file_schema = Arc::new(builder.finish());
         let missing_field_idx = file_schema.fields.len() - 1;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
             .with_limit(Some(3))
             .with_file_compression_type(file_compression_type.to_owned())
@@ -294,10 +294,10 @@ mod tests {
         let (object_store_url, file_groups, file_schema) =
             prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
-            .with_projection(Some(vec![0, 2]))
+            .with_projection_indices(Some(vec![0, 2]))
             .with_file_compression_type(file_compression_type.to_owned())
             .build();
         let exec = DataSourceExec::from_data_source(conf);
@@ -342,10 +342,10 @@ mod tests {
         let (object_store_url, file_groups, file_schema) =
             prepare_store(&state, file_compression_type.to_owned(), tmp_dir.path()).await;
 
-        let source = Arc::new(JsonSource::new());
-        let conf = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+        let source = Arc::new(JsonSource::new(Arc::clone(&file_schema)));
+        let conf = FileScanConfigBuilder::new(object_store_url, source)
             .with_file_groups(file_groups)
-            .with_projection(Some(vec![3, 0, 2]))
+            .with_projection_indices(Some(vec![3, 0, 2]))
             .with_file_compression_type(file_compression_type.to_owned())
             .build();
         let exec = DataSourceExec::from_data_source(conf);
diff --git a/datafusion/core/src/datasource/physical_plan/parquet.rs b/datafusion/core/src/datasource/physical_plan/parquet.rs
index 10a475c1cc9a..b27dcf56e33c 100644
--- a/datafusion/core/src/datasource/physical_plan/parquet.rs
+++ b/datafusion/core/src/datasource/physical_plan/parquet.rs
@@ -54,7 +54,7 @@ mod tests {
     use datafusion_datasource::source::DataSourceExec;
 
     use datafusion_datasource::file::FileSource;
-    use datafusion_datasource::{FileRange, PartitionedFile};
+    use datafusion_datasource::{FileRange, PartitionedFile, TableSchema};
     use datafusion_datasource_parquet::source::ParquetSource;
     use datafusion_datasource_parquet::{
         DefaultParquetFileReaderFactory, ParquetFileReaderFactory, ParquetFormat,
@@ -65,7 +65,7 @@ mod tests {
     use datafusion_physical_plan::analyze::AnalyzeExec;
     use datafusion_physical_plan::collect;
     use datafusion_physical_plan::metrics::{
-        ExecutionPlanMetricsSet, MetricType, MetricsSet,
+        ExecutionPlanMetricsSet, MetricType, MetricValue, MetricsSet,
     };
     use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 
@@ -161,7 +161,7 @@ mod tests {
                 .as_ref()
                 .map(|p| logical2physical(p, &table_schema));
 
-            let mut source = ParquetSource::default();
+            let mut source = ParquetSource::new(table_schema);
             if let Some(predicate) = predicate {
                 source = source.with_predicate(predicate);
             }
@@ -186,23 +186,19 @@ mod tests {
                 source = source.with_bloom_filter_on_read(false);
             }
 
-            source.with_schema(Arc::clone(&table_schema))
+            Arc::new(source)
         }
 
         fn build_parquet_exec(
             &self,
-            file_schema: SchemaRef,
             file_group: FileGroup,
             source: Arc<dyn FileSource>,
         ) -> Arc<DataSourceExec> {
-            let base_config = FileScanConfigBuilder::new(
-                ObjectStoreUrl::local_filesystem(),
-                file_schema,
-                source,
-            )
-            .with_file_group(file_group)
-            .with_projection(self.projection.clone())
-            .build();
+            let base_config =
+                FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+                    .with_file_group(file_group)
+                    .with_projection_indices(self.projection.clone())
+                    .build();
             DataSourceExec::from_data_source(base_config)
         }
 
@@ -231,11 +227,8 @@ mod tests {
 
             // build a ParquetExec to return the results
             let parquet_source = self.build_file_source(Arc::clone(table_schema));
-            let parquet_exec = self.build_parquet_exec(
-                Arc::clone(table_schema),
-                file_group.clone(),
-                Arc::clone(&parquet_source),
-            );
+            let parquet_exec =
+                self.build_parquet_exec(file_group.clone(), Arc::clone(&parquet_source));
 
             let analyze_exec = Arc::new(AnalyzeExec::new(
                 false,
@@ -243,7 +236,6 @@ mod tests {
                 vec![MetricType::SUMMARY, MetricType::DEV],
                 // use a new ParquetSource to avoid sharing execution metrics
                 self.build_parquet_exec(
-                    Arc::clone(table_schema),
                     file_group.clone(),
                     self.build_file_source(Arc::clone(table_schema)),
                 ),
@@ -1175,8 +1167,10 @@ mod tests {
         // There are 4 rows pruned in each of batch2, batch3, and
         // batch4 for a total of 12. batch1 had no pruning as c2 was
         // filled in as null
-        assert_eq!(get_value(&metrics, "page_index_rows_pruned"), 12);
-        assert_eq!(get_value(&metrics, "page_index_rows_matched"), 6);
+        let (page_index_pruned, page_index_matched) =
+            get_pruning_metric(&metrics, "page_index_rows_pruned");
+        assert_eq!(page_index_pruned, 12);
+        assert_eq!(page_index_matched, 6);
     }
 
     #[tokio::test]
@@ -1548,8 +1542,7 @@ mod tests {
         ) -> Result<()> {
             let config = FileScanConfigBuilder::new(
                 ObjectStoreUrl::local_filesystem(),
-                file_schema,
-                Arc::new(ParquetSource::default()),
+                Arc::new(ParquetSource::new(file_schema)),
             )
             .with_file_groups(file_groups)
             .build();
@@ -1651,23 +1644,26 @@ mod tests {
             ),
         ]);
 
-        let source = Arc::new(ParquetSource::default());
-        let config = FileScanConfigBuilder::new(object_store_url, schema.clone(), source)
-            .with_file(partitioned_file)
-            // file has 10 cols so index 12 should be month and 13 should be day
-            .with_projection(Some(vec![0, 1, 2, 12, 13]))
-            .with_table_partition_cols(vec![
-                Field::new("year", DataType::Utf8, false),
-                Field::new("month", DataType::UInt8, false),
-                Field::new(
+        let table_schema = TableSchema::new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(Field::new("year", DataType::Utf8, false)),
+                Arc::new(Field::new("month", DataType::UInt8, false)),
+                Arc::new(Field::new(
                     "day",
                     DataType::Dictionary(
                         Box::new(DataType::UInt16),
                         Box::new(DataType::Utf8),
                     ),
                     false,
-                ),
-            ])
+                )),
+            ],
+        );
+        let source = Arc::new(ParquetSource::new(table_schema.clone()));
+        let config = FileScanConfigBuilder::new(object_store_url, source)
+            .with_file(partitioned_file)
+            // file has 10 cols so index 12 should be month and 13 should be day
+            .with_projection_indices(Some(vec![0, 1, 2, 12, 13]))
             .build();
 
         let parquet_exec = DataSourceExec::from_data_source(config);
@@ -1729,8 +1725,7 @@ mod tests {
         let file_schema = Arc::new(Schema::empty());
         let config = FileScanConfigBuilder::new(
             ObjectStoreUrl::local_filesystem(),
-            file_schema,
-            Arc::new(ParquetSource::default()),
+            Arc::new(ParquetSource::new(file_schema)),
         )
         .with_file(partitioned_file)
         .build();
@@ -1776,8 +1771,10 @@ mod tests {
             | 5   |
             +-----+
         "###);
-        assert_eq!(get_value(&metrics, "page_index_rows_pruned"), 4);
-        assert_eq!(get_value(&metrics, "page_index_rows_matched"), 2);
+        let (page_index_pruned, page_index_matched) =
+            get_pruning_metric(&metrics, "page_index_rows_pruned");
+        assert_eq!(page_index_pruned, 4);
+        assert_eq!(page_index_matched, 2);
         assert!(
             get_value(&metrics, "page_index_eval_time") > 0,
             "no eval time in metrics: {metrics:#?}"
@@ -1866,8 +1863,10 @@ mod tests {
         assert_contains!(&explain, "predicate=c1@0 != bar");
 
         // there's a single row group, but we can check that it matched
-        // if no pruning was done this would be 0 instead of 1
-        assert_contains!(&explain, "row_groups_matched_statistics=1");
+        assert_contains!(
+            &explain,
+            "row_groups_pruned_statistics=1 total \u{2192} 1 matched"
+        );
 
         // check the projection
         assert_contains!(&explain, "projection=[c1]");
@@ -1898,8 +1897,10 @@ mod tests {
 
         // When both matched and pruned are 0, it means that the pruning predicate
         // was not used at all.
-        assert_contains!(&explain, "row_groups_matched_statistics=0");
-        assert_contains!(&explain, "row_groups_pruned_statistics=0");
+        assert_contains!(
+            &explain,
+            "row_groups_pruned_statistics=1 total \u{2192} 1 matched"
+        );
 
         // But pushdown predicate should be present
         assert_contains!(
@@ -1952,7 +1953,12 @@ mod tests {
     /// Panics if no such metric.
     fn get_value(metrics: &MetricsSet, metric_name: &str) -> usize {
         match metrics.sum_by_name(metric_name) {
-            Some(v) => v.as_usize(),
+            Some(v) => match v {
+                MetricValue::PruningMetrics {
+                    pruning_metrics, ..
+                } => pruning_metrics.pruned(),
+                _ => v.as_usize(),
+            },
             _ => {
                 panic!(
                     "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}"
@@ -1961,6 +1967,20 @@ mod tests {
         }
     }
 
+    fn get_pruning_metric(metrics: &MetricsSet, metric_name: &str) -> (usize, usize) {
+        match metrics.sum_by_name(metric_name) {
+            Some(MetricValue::PruningMetrics {
+                pruning_metrics, ..
+            }) => (pruning_metrics.pruned(), pruning_metrics.matched()),
+            Some(_) => panic!(
+                "Metric '{metric_name}' is not a pruning metric in\n\n{metrics:#?}"
+            ),
+            None => panic!(
+                "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}"
+            ),
+        }
+    }
+
     fn populate_csv_partitions(
         tmp_dir: &TempDir,
         partition_count: usize,
@@ -2252,11 +2272,11 @@ mod tests {
         let size_hint_calls = reader_factory.metadata_size_hint_calls.clone();
 
         let source = Arc::new(
-            ParquetSource::default()
+            ParquetSource::new(Arc::clone(&schema))
                 .with_parquet_file_reader_factory(reader_factory)
                 .with_metadata_size_hint(456),
         );
-        let config = FileScanConfigBuilder::new(store_url, schema, source)
+        let config = FileScanConfigBuilder::new(store_url, source)
             .with_file(
                 PartitionedFile {
                     object_meta: ObjectMeta {
diff --git a/datafusion/core/src/execution/context/csv.rs b/datafusion/core/src/execution/context/csv.rs
index 15d6d21f038a..e6f95886e91d 100644
--- a/datafusion/core/src/execution/context/csv.rs
+++ b/datafusion/core/src/execution/context/csv.rs
@@ -37,9 +37,16 @@ impl SessionContext {
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
     /// // You can read a single file using `read_csv`
-    /// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+    ///     .await?;
     /// // you can also read multiple files:
-    /// let df = ctx.read_csv(vec!["tests/data/example.csv", "tests/data/example.csv"], CsvReadOptions::new()).await?;
+    /// let df = ctx
+    ///     .read_csv(
+    ///         vec!["tests/data/example.csv", "tests/data/example.csv"],
+    ///         CsvReadOptions::new(),
+    ///     )
+    ///     .await?;
     /// # Ok(())
     /// # }
     /// ```
diff --git a/datafusion/core/src/execution/context/mod.rs b/datafusion/core/src/execution/context/mod.rs
index 448ee5264afd..c732c2c92f64 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -76,6 +76,7 @@ pub use datafusion_execution::config::SessionConfig;
 use datafusion_execution::registry::SerializerRegistry;
 pub use datafusion_execution::TaskContext;
 pub use datafusion_expr::execution_props::ExecutionProps;
+use datafusion_expr::simplify::SimplifyContext;
 use datafusion_expr::{
     expr_rewriter::FunctionRewrite,
     logical_plan::{DdlStatement, Statement},
@@ -83,6 +84,7 @@ use datafusion_expr::{
     Expr, UserDefinedLogicalNode, WindowUDF,
 };
 use datafusion_optimizer::analyzer::type_coercion::TypeCoercion;
+use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 use datafusion_optimizer::Analyzer;
 use datafusion_optimizer::{AnalyzerRule, OptimizerRule};
 use datafusion_session::SessionStore;
@@ -166,22 +168,23 @@ where
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 /// let ctx = SessionContext::new();
-/// let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
-/// let df = df.filter(col("a").lt_eq(col("b")))?
-///            .aggregate(vec![col("a")], vec![min(col("b"))])?
-///            .limit(0, Some(100))?;
-/// let results = df
-///   .collect()
-///   .await?;
+/// let df = ctx
+///     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+///     .await?;
+/// let df = df
+///     .filter(col("a").lt_eq(col("b")))?
+///     .aggregate(vec![col("a")], vec![min(col("b"))])?
+///     .limit(0, Some(100))?;
+/// let results = df.collect().await?;
 /// assert_batches_eq!(
-///  &[
-///    "+---+----------------+",
-///    "| a | min(?table?.b) |",
-///    "+---+----------------+",
-///    "| 1 | 2              |",
-///    "+---+----------------+",
-///  ],
-///  &results
+///     &[
+///         "+---+----------------+",
+///         "| a | min(?table?.b) |",
+///         "+---+----------------+",
+///         "| 1 | 2              |",
+///         "+---+----------------+",
+///     ],
+///     &results
 /// );
 /// # Ok(())
 /// # }
@@ -197,21 +200,22 @@ where
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
 /// let ctx = SessionContext::new();
-/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
+/// ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new())
+///     .await?;
 /// let results = ctx
-///   .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100")
-///   .await?
-///   .collect()
-///   .await?;
+///     .sql("SELECT a, min(b) FROM example GROUP BY a LIMIT 100")
+///     .await?
+///     .collect()
+///     .await?;
 /// assert_batches_eq!(
-///  &[
-///    "+---+----------------+",
-///    "| a | min(example.b) |",
-///    "+---+----------------+",
-///    "| 1 | 2              |",
-///    "+---+----------------+",
-///  ],
-///  &results
+///     &[
+///         "+---+----------------+",
+///         "| a | min(example.b) |",
+///         "+---+----------------+",
+///         "| 1 | 2              |",
+///         "+---+----------------+",
+///     ],
+///     &results
 /// );
 /// # Ok(())
 /// # }
@@ -231,18 +235,18 @@ where
 /// let config = SessionConfig::new().with_batch_size(4 * 1024);
 ///
 /// // configure a memory limit of 1GB with 20%  slop
-///  let runtime_env = RuntimeEnvBuilder::new()
+/// let runtime_env = RuntimeEnvBuilder::new()
 ///     .with_memory_limit(1024 * 1024 * 1024, 0.80)
 ///     .build_arc()
 ///     .unwrap();
 ///
 /// // Create a SessionState using the config and runtime_env
 /// let state = SessionStateBuilder::new()
-///   .with_config(config)
-///   .with_runtime_env(runtime_env)
-///   // include support for built in functions and configurations
-///   .with_default_features()
-///   .build();
+///     .with_config(config)
+///     .with_runtime_env(runtime_env)
+///     // include support for built in functions and configurations
+///     .with_default_features()
+///     .build();
 ///
 /// // Create a SessionContext
 /// let ctx = SessionContext::from(state);
@@ -428,16 +432,14 @@ impl SessionContext {
     /// # use datafusion::prelude::*;
     /// # use datafusion::execution::SessionStateBuilder;
     /// # use datafusion_optimizer::push_down_filter::PushDownFilter;
-    /// let my_rule = PushDownFilter{}; // pretend it is a new rule
-    /// // Create a new builder with a custom optimizer rule
+    /// let my_rule = PushDownFilter {}; // pretend it is a new rule
+    ///                                  // Create a new builder with a custom optimizer rule
     /// let context: SessionContext = SessionStateBuilder::new()
-    ///   .with_optimizer_rule(Arc::new(my_rule))
-    ///   .build()
-    ///   .into();
+    ///     .with_optimizer_rule(Arc::new(my_rule))
+    ///     .build()
+    ///     .into();
     /// // Enable local file access and convert context back to a builder
-    /// let builder = context
-    ///   .enable_url_table()
-    ///   .into_state_builder();
+    /// let builder = context.enable_url_table().into_state_builder();
     /// ```
     pub fn into_state_builder(self) -> SessionStateBuilder {
         let SessionContext {
@@ -585,11 +587,10 @@ impl SessionContext {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// ctx
-    ///   .sql("CREATE TABLE foo (x INTEGER)")
-    ///   .await?
-    ///   .collect()
-    ///   .await?;
+    /// ctx.sql("CREATE TABLE foo (x INTEGER)")
+    ///     .await?
+    ///     .collect()
+    ///     .await?;
     /// assert!(ctx.table_exist("foo").unwrap());
     /// # Ok(())
     /// # }
@@ -614,14 +615,14 @@ impl SessionContext {
     /// # #[tokio::main]
     /// # async fn main() -> Result<()> {
     /// let ctx = SessionContext::new();
-    /// let options = SQLOptions::new()
-    ///   .with_allow_ddl(false);
-    /// let err = ctx.sql_with_options("CREATE TABLE foo (x INTEGER)", options)
-    ///   .await
-    ///   .unwrap_err();
-    /// assert!(
-    ///   err.to_string().starts_with("Error during planning: DDL not supported: CreateMemoryTable")
-    /// );
+    /// let options = SQLOptions::new().with_allow_ddl(false);
+    /// let err = ctx
+    ///     .sql_with_options("CREATE TABLE foo (x INTEGER)", options)
+    ///     .await
+    ///     .unwrap_err();
+    /// assert!(err
+    ///     .to_string()
+    ///     .starts_with("Error during planning: DDL not supported: CreateMemoryTable"));
     /// # Ok(())
     /// # }
     /// ```
@@ -653,8 +654,7 @@ impl SessionContext {
     /// // provide type information that `a` is an Int32
     /// let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
     /// let df_schema = DFSchema::try_from(schema).unwrap();
-    /// let expr = SessionContext::new()
-    ///  .parse_sql_expr(sql, &df_schema)?;
+    /// let expr = SessionContext::new().parse_sql_expr(sql, &df_schema)?;
     /// assert_eq!(expected, expr);
     /// # Ok(())
     /// # }
@@ -1143,8 +1143,14 @@ impl SessionContext {
     /// ```
     /// use datafusion::execution::context::SessionContext;
     ///
-    /// assert_eq!(SessionContext::parse_memory_limit("1M").unwrap(), 1024 * 1024);
-    /// assert_eq!(SessionContext::parse_memory_limit("1.5G").unwrap(), (1.5 * 1024.0 * 1024.0 * 1024.0) as usize);
+    /// assert_eq!(
+    ///     SessionContext::parse_memory_limit("1M").unwrap(),
+    ///     1024 * 1024
+    /// );
+    /// assert_eq!(
+    ///     SessionContext::parse_memory_limit("1.5G").unwrap(),
+    ///     (1.5 * 1024.0 * 1024.0 * 1024.0) as usize
+    /// );
     /// ```
     pub fn parse_memory_limit(limit: &str) -> Result<usize> {
         let (number, unit) = limit.split_at(limit.len() - 1);
@@ -1265,14 +1271,18 @@ impl SessionContext {
             exec_datafusion_err!("Prepared statement '{}' does not exist", name)
         })?;
 
+        let state = self.state.read();
+        let context = SimplifyContext::new(state.execution_props());
+        let simplifier = ExprSimplifier::new(context);
+
         // Only allow literals as parameters for now.
         let mut params: Vec<ScalarAndMetadata> = parameters
             .into_iter()
-            .map(|e| match e {
+            .map(|e| match simplifier.simplify(e)? {
                 Expr::Literal(scalar, metadata) => {
                     Ok(ScalarAndMetadata::new(scalar, metadata))
                 }
-                _ => not_impl_err!("Unsupported parameter type: {}", e),
+                e => not_impl_err!("Unsupported parameter type: {e}"),
             })
             .collect::<Result<_>>()?;
 
diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs
index 561e0c363a37..d7a66db28ac4 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -37,7 +37,9 @@ use datafusion_catalog::information_schema::{
 use datafusion_catalog::MemoryCatalogProviderList;
 use datafusion_catalog::{TableFunction, TableFunctionImpl};
 use datafusion_common::alias::AliasGenerator;
-use datafusion_common::config::{ConfigExtension, ConfigOptions, Dialect, TableOptions};
+#[cfg(feature = "sql")]
+use datafusion_common::config::Dialect;
+use datafusion_common::config::{ConfigExtension, ConfigOptions, TableOptions};
 use datafusion_common::display::{PlanType, StringifiedPlan, ToStringifiedPlan};
 use datafusion_common::tree_node::TreeNode;
 use datafusion_common::{
@@ -114,12 +116,12 @@ use uuid::Uuid;
 /// # use std::sync::Arc;
 /// # #[tokio::main]
 /// # async fn main() -> Result<()> {
-///     let state = SessionStateBuilder::new()
-///         .with_config(SessionConfig::new())
-///         .with_runtime_env(Arc::new(RuntimeEnv::default()))
-///         .with_default_features()
-///         .build();
-///     Ok(())
+/// let state = SessionStateBuilder::new()
+///     .with_config(SessionConfig::new())
+///     .with_runtime_env(Arc::new(RuntimeEnv::default()))
+///     .with_default_features()
+///     .build();
+/// Ok(())
 /// # }
 /// ```
 ///
@@ -545,6 +547,16 @@ impl SessionState {
 
         let sql_expr = self.sql_to_expr_with_alias(sql, &dialect)?;
 
+        self.create_logical_expr_from_sql_expr(sql_expr, df_schema)
+    }
+
+    /// Creates a datafusion style AST [`Expr`] from a SQL expression.
+    #[cfg(feature = "sql")]
+    pub fn create_logical_expr_from_sql_expr(
+        &self,
+        sql_expr: SQLExprWithAlias,
+        df_schema: &DFSchema,
+    ) -> datafusion_common::Result<Expr> {
         let provider = SessionContextProvider {
             state: self,
             tables: HashMap::new(),
@@ -2095,6 +2107,36 @@ mod tests {
         assert!(sql_to_expr(&state).is_err())
     }
 
+    #[test]
+    #[cfg(feature = "sql")]
+    fn test_create_logical_expr_from_sql_expr() {
+        let state = SessionStateBuilder::new().with_default_features().build();
+
+        let provider = SessionContextProvider {
+            state: &state,
+            tables: HashMap::new(),
+        };
+
+        let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
+        let df_schema = DFSchema::try_from(schema).unwrap();
+        let dialect = state.config.options().sql_parser.dialect;
+        let query = SqlToRel::new_with_options(&provider, state.get_parser_options());
+
+        for sql in ["[1,2,3]", "a > 10", "SUM(a)"] {
+            let sql_expr = state.sql_to_expr(sql, &dialect).unwrap();
+            let from_str = query
+                .sql_to_expr(sql_expr, &df_schema, &mut PlannerContext::new())
+                .unwrap();
+
+            let sql_expr_with_alias =
+                state.sql_to_expr_with_alias(sql, &dialect).unwrap();
+            let from_expr = state
+                .create_logical_expr_from_sql_expr(sql_expr_with_alias, &df_schema)
+                .unwrap();
+            assert_eq!(from_str, from_expr);
+        }
+    }
+
     #[test]
     fn test_from_existing() -> Result<()> {
         fn employee_batch() -> RecordBatch {
diff --git a/datafusion/core/src/execution/session_state_defaults.rs b/datafusion/core/src/execution/session_state_defaults.rs
index baf396f3f1c5..62a575541a5d 100644
--- a/datafusion/core/src/execution/session_state_defaults.rs
+++ b/datafusion/core/src/execution/session_state_defaults.rs
@@ -101,7 +101,7 @@ impl SessionStateDefaults {
         expr_planners
     }
 
-    /// returns the list of default [`ScalarUDF']'s
+    /// returns the list of default [`ScalarUDF`]s
     pub fn default_scalar_functions() -> Vec<Arc<ScalarUDF>> {
         #[cfg_attr(not(feature = "nested_expressions"), allow(unused_mut))]
         let mut functions: Vec<Arc<ScalarUDF>> = functions::all_default_functions();
@@ -112,12 +112,12 @@ impl SessionStateDefaults {
         functions
     }
 
-    /// returns the list of default [`AggregateUDF']'s
+    /// returns the list of default [`AggregateUDF`]s
     pub fn default_aggregate_functions() -> Vec<Arc<AggregateUDF>> {
         functions_aggregate::all_default_aggregate_functions()
     }
 
-    /// returns the list of default [`WindowUDF']'s
+    /// returns the list of default [`WindowUDF`]s
     pub fn default_window_functions() -> Vec<Arc<WindowUDF>> {
         functions_window::all_default_window_functions()
     }
@@ -127,7 +127,7 @@ impl SessionStateDefaults {
         functions_table::all_default_table_functions()
     }
 
-    /// returns the list of default [`FileFormatFactory']'s
+    /// returns the list of default [`FileFormatFactory`]s
     pub fn default_file_formats() -> Vec<Arc<dyn FileFormatFactory>> {
         let file_formats: Vec<Arc<dyn FileFormatFactory>> = vec![
             #[cfg(feature = "parquet")]
diff --git a/datafusion/core/src/lib.rs b/datafusion/core/src/lib.rs
index e7ace544a11c..381dd5e9e848 100644
--- a/datafusion/core/src/lib.rs
+++ b/datafusion/core/src/lib.rs
@@ -86,26 +86,29 @@
 //! let ctx = SessionContext::new();
 //!
 //! // create the dataframe
-//! let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?;
+//! let df = ctx
+//!     .read_csv("tests/data/example.csv", CsvReadOptions::new())
+//!     .await?;
 //!
 //! // create a plan
-//! let df = df.filter(col("a").lt_eq(col("b")))?
-//!            .aggregate(vec![col("a")], vec![min(col("b"))])?
-//!            .limit(0, Some(100))?;
+//! let df = df
+//!     .filter(col("a").lt_eq(col("b")))?
+//!     .aggregate(vec![col("a")], vec![min(col("b"))])?
+//!     .limit(0, Some(100))?;
 //!
 //! // execute the plan
 //! let results: Vec<RecordBatch> = df.collect().await?;
 //!
 //! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?
-//!    .to_string();
+//! let pretty_results =
+//!     arrow::util::pretty::pretty_format_batches(&results)?.to_string();
 //!
 //! let expected = vec![
 //!     "+---+----------------+",
 //!     "| a | min(?table?.b) |",
 //!     "+---+----------------+",
 //!     "| 1 | 2              |",
-//!     "+---+----------------+"
+//!     "+---+----------------+",
 //! ];
 //!
 //! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
@@ -126,24 +129,27 @@
 //! # async fn main() -> Result<()> {
 //! let ctx = SessionContext::new();
 //!
-//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?;
+//! ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new())
+//!     .await?;
 //!
 //! // create a plan
-//! let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?;
+//! let df = ctx
+//!     .sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100")
+//!     .await?;
 //!
 //! // execute the plan
 //! let results: Vec<RecordBatch> = df.collect().await?;
 //!
 //! // format the results
-//! let pretty_results = arrow::util::pretty::pretty_format_batches(&results)?
-//!   .to_string();
+//! let pretty_results =
+//!     arrow::util::pretty::pretty_format_batches(&results)?.to_string();
 //!
 //! let expected = vec![
 //!     "+---+----------------+",
 //!     "| a | min(example.b) |",
 //!     "+---+----------------+",
 //!     "| 1 | 2              |",
-//!     "+---+----------------+"
+//!     "+---+----------------+",
 //! ];
 //!
 //! assert_eq!(pretty_results.trim().lines().collect::<Vec<_>>(), expected);
@@ -443,7 +449,30 @@
 //! other operators read a single [`RecordBatch`] from their input to produce a
 //! single [`RecordBatch`] as output.
 //!
-//! For example, given this SQL query:
+//! For example, given this SQL:
+//!
+//! ```sql
+//! SELECT name FROM 'data.parquet' WHERE id > 10
+//! ```
+//!
+//! An simplified DataFusion execution plan is shown below. It first reads
+//! data from the Parquet file, then applies the filter, then the projection,
+//! and finally produces output. Each step processes one [`RecordBatch`] at a
+//! time. Multiple batches are processed concurrently on different CPU cores
+//! for plans with multiple partitions.
+//!
+//! ```text
+//! ┌─────────────┐    ┌──────────────┐    ┌────────────────┐    ┌──────────────────┐    ┌──────────┐
+//! │ Parquet     │───▶│ DataSource   │───▶│ FilterExec     │───▶│ ProjectionExec   │───▶│ Results  │
+//! │ File        │    │              │    │                │    │                  │    │          │
+//! └─────────────┘    └──────────────┘    └────────────────┘    └──────────────────┘    └──────────┘
+//!                    (reads data)        (id > 10)             (keeps "name" col)
+//!                    RecordBatch ───▶    RecordBatch ────▶     RecordBatch ────▶        RecordBatch
+//! ```
+//!
+//! DataFusion uses the classic "pull" based control flow (explained more in the
+//! next section) to implement streaming execution. As an example,
+//! consider the following SQL query:
 //!
 //! ```sql
 //! SELECT date_trunc('month', time) FROM data WHERE id IN (10,20,30);
@@ -607,7 +636,7 @@
 //! └─────────────┘           ┗━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━━━━━━┻━━━━━━━━━━━━━━┛
 //!                          ─────────────────────────────────────────────────────────────▶
 //!                                                                                           time
-//!```
+//! ```
 //!
 //! Note that DataFusion does not use [`tokio::task::spawn_blocking`] for
 //! CPU-bounded work, because `spawn_blocking` is designed for blocking **IO**,
@@ -897,6 +926,12 @@ doc_comment::doctest!("../../../README.md", readme_example_test);
 // For example, if `user_guide_expressions(line 123)` fails,
 // go to `docs/source/user-guide/expressions.md` to find the relevant problem.
 //
+#[cfg(doctest)]
+doc_comment::doctest!(
+    "../../../docs/source/user-guide/arrow-introduction.md",
+    user_guide_arrow_introduction
+);
+
 #[cfg(doctest)]
 doc_comment::doctest!(
     "../../../docs/source/user-guide/concepts-readings-events.md",
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
index 708c52001ee8..6a75485c6284 100644
--- a/datafusion/core/src/physical_planner.rs
+++ b/datafusion/core/src/physical_planner.rs
@@ -64,7 +64,9 @@ use datafusion_catalog::ScanArgs;
 use datafusion_common::display::ToStringifiedPlan;
 use datafusion_common::format::ExplainAnalyzeLevel;
 use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion, TreeNodeVisitor};
-use datafusion_common::TableReference;
+use datafusion_common::{
+    assert_eq_or_internal_err, assert_or_internal_err, TableReference,
+};
 use datafusion_common::{
     exec_err, internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema,
     ScalarValue,
@@ -347,11 +349,11 @@ impl DefaultPhysicalPlanner {
             .flatten()
             .collect::<Vec<_>>();
         // Ideally this never happens if we have a valid LogicalPlan tree
-        if outputs.len() != 1 {
-            return internal_err!(
-                "Failed to convert LogicalPlan to ExecutionPlan: More than one root detected"
-            );
-        }
+        assert_eq_or_internal_err!(
+            outputs.len(),
+            1,
+            "Failed to convert LogicalPlan to ExecutionPlan: More than one root detected"
+        );
         let plan = outputs.pop().unwrap();
         Ok(plan)
     }
@@ -588,9 +590,10 @@ impl DefaultPhysicalPlanner {
                 }
             }
             LogicalPlan::Window(Window { window_expr, .. }) => {
-                if window_expr.is_empty() {
-                    return internal_err!("Impossibly got empty window expression");
-                }
+                assert_or_internal_err!(
+                    !window_expr.is_empty(),
+                    "Impossibly got empty window expression"
+                );
 
                 let input_exec = children.one()?;
 
@@ -1764,14 +1767,12 @@ fn qualify_join_schema_sides(
         .zip(left_fields.iter().chain(right_fields.iter()))
         .enumerate()
     {
-        if field.name() != expected.name() {
-            return internal_err!(
-                "Field name mismatch at index {}: expected '{}', found '{}'",
-                i,
-                expected.name(),
-                field.name()
-            );
-        }
+        assert_eq_or_internal_err!(
+            field.name(),
+            expected.name(),
+            "Field name mismatch at index {}",
+            i
+        );
     }
 
     // qualify sides
@@ -2644,7 +2645,7 @@ mod tests {
         // verify that the plan correctly casts u8 to i64
         // the cast from u8 to i64 for literal will be simplified, and get lit(int64(5))
         // the cast here is implicit so has CastOptions with safe=true
-        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#;
+        let expected = r#"BinaryExpr { left: Column { name: "c7", index: 2 }, op: Lt, right: Literal { value: Int64(5), field: Field { name: "lit", data_type: Int64 } }, fail_on_overflow: false"#;
 
         assert_contains!(format!("{exec_plan:?}"), expected);
         Ok(())
@@ -2704,9 +2705,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Utf8,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c1",
@@ -2718,9 +2716,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c2",
@@ -2732,9 +2727,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c3",
@@ -2843,9 +2835,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Utf8,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c1",
@@ -2857,9 +2846,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c2",
@@ -2871,9 +2857,6 @@ mod tests {
                                 name: "lit",
                                 data_type: Int64,
                                 nullable: true,
-                                dict_id: 0,
-                                dict_is_ordered: false,
-                                metadata: {},
                             },
                         },
                         "c3",
@@ -3047,7 +3030,7 @@ mod tests {
             .expect_err("planning error")
             .strip_backtrace();
 
-        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }], metadata: {} }"#);
+        insta::assert_snapshot!(e, @r#"Error during planning: Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: DFSchema { inner: Schema { fields: [Field { name: "a", data_type: Int32 }], metadata: {} }, field_qualifiers: [None], functional_dependencies: FunctionalDependencies { deps: [] } }, ExecutionPlan schema: Schema { fields: [Field { name: "b", data_type: Int32 }], metadata: {} }"#);
     }
 
     #[tokio::test]
@@ -3063,7 +3046,7 @@ mod tests {
         let execution_plan = plan(&logical_plan).await?;
         // verify that the plan correctly adds cast from Int64(1) to Utf8, and the const will be evaluated.
 
-        let expected = "exprs: [ProjectionExpr { expr: BinaryExpr { left: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"a\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, op: Or, right: BinaryExpr { left: Column { name: \"c1\", index: 0 }, op: Eq, right: Literal { value: Utf8(\"1\"), field: Field { name: \"lit\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }, fail_on_overflow: false }";
+        let expected = r#"expr: BinaryExpr { left: BinaryExpr { left: Column { name: "c1", index: 0 }, op: Eq, right: Literal { value: Utf8("a"), field: Field { name: "lit", data_type: Utf8 } }, fail_on_overflow: false }"#;
 
         assert_contains!(format!("{execution_plan:?}"), expected);
 
@@ -3085,7 +3068,7 @@ mod tests {
 
         assert_contains!(
             &e,
-            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct(foo Boolean), Utf8]"#
+            r#"Error during planning: Can not find compatible types to compare Boolean with [Struct("foo": Boolean), Utf8]"#
         );
 
         Ok(())
diff --git a/datafusion/core/src/test/mod.rs b/datafusion/core/src/test/mod.rs
index 68f83e7f1f11..bbc85af7d874 100644
--- a/datafusion/core/src/test/mod.rs
+++ b/datafusion/core/src/test/mod.rs
@@ -35,12 +35,15 @@ use crate::error::Result;
 use crate::logical_expr::LogicalPlan;
 use crate::test_util::{aggr_test_schema, arrow_test_data};
 
+use datafusion_common::config::CsvOptions;
+
 use arrow::array::{self, Array, ArrayRef, Decimal128Builder, Int32Array};
 use arrow::datatypes::{DataType, Field, Schema};
 use arrow::record_batch::RecordBatch;
 #[cfg(feature = "compression")]
 use datafusion_common::DataFusionError;
 use datafusion_datasource::source::DataSourceExec;
+use datafusion_datasource::TableSchema;
 
 #[cfg(feature = "compression")]
 use bzip2::write::BzEncoder;
@@ -92,11 +95,17 @@ pub fn scan_partitioned_csv(
         FileCompressionType::UNCOMPRESSED,
         work_dir,
     )?;
-    let source = Arc::new(CsvSource::new(true, b'"', b'"'));
-    let config =
-        FileScanConfigBuilder::from(partitioned_csv_config(schema, file_groups, source))
-            .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
-            .build();
+    let options = CsvOptions {
+        has_header: Some(true),
+        delimiter: b',',
+        quote: b'"',
+        ..Default::default()
+    };
+    let table_schema = TableSchema::from_file_schema(schema);
+    let source = Arc::new(CsvSource::new(table_schema.clone()).with_csv_options(options));
+    let config = FileScanConfigBuilder::from(partitioned_csv_config(file_groups, source))
+        .with_file_compression_type(FileCompressionType::UNCOMPRESSED)
+        .build();
     Ok(DataSourceExec::from_data_source(config))
 }
 
diff --git a/datafusion/core/src/test_util/parquet.rs b/datafusion/core/src/test_util/parquet.rs
index eb4c61c02524..b5213cee3f2d 100644
--- a/datafusion/core/src/test_util/parquet.rs
+++ b/datafusion/core/src/test_util/parquet.rs
@@ -37,7 +37,6 @@ use crate::physical_plan::metrics::MetricsSet;
 use crate::physical_plan::ExecutionPlan;
 use crate::prelude::{Expr, SessionConfig, SessionContext};
 
-use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::source::DataSourceExec;
 use object_store::path::Path;
@@ -156,20 +155,21 @@ impl TestParquetFile {
         maybe_filter: Option<Expr>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let parquet_options = ctx.copied_table_options().parquet;
-        let source = Arc::new(ParquetSource::new(parquet_options.clone()));
-        let scan_config_builder = FileScanConfigBuilder::new(
-            self.object_store_url.clone(),
-            Arc::clone(&self.schema),
-            source,
-        )
-        .with_file(PartitionedFile {
-            object_meta: self.object_meta.clone(),
-            partition_values: vec![],
-            range: None,
-            statistics: None,
-            extensions: None,
-            metadata_size_hint: None,
-        });
+        let source = Arc::new(
+            ParquetSource::new(Arc::clone(&self.schema))
+                .with_table_parquet_options(parquet_options.clone()),
+        );
+        let scan_config_builder =
+            FileScanConfigBuilder::new(self.object_store_url.clone(), source).with_file(
+                PartitionedFile {
+                    object_meta: self.object_meta.clone(),
+                    partition_values: vec![],
+                    range: None,
+                    statistics: None,
+                    extensions: None,
+                    metadata_size_hint: None,
+                },
+            );
 
         let df_schema = Arc::clone(&self.schema).to_dfschema_ref()?;
 
@@ -183,10 +183,10 @@ impl TestParquetFile {
                 create_physical_expr(&filter, &df_schema, &ExecutionProps::default())?;
 
             let source = Arc::new(
-                ParquetSource::new(parquet_options)
+                ParquetSource::new(Arc::clone(&self.schema))
+                    .with_table_parquet_options(parquet_options)
                     .with_predicate(Arc::clone(&physical_filter_expr)),
-            )
-            .with_schema(Arc::clone(&self.schema));
+            );
             let config = scan_config_builder.with_source(source).build();
             let parquet_exec = DataSourceExec::from_data_source(config);
 
diff --git a/datafusion/core/tests/catalog_listing/mod.rs b/datafusion/core/tests/catalog_listing/mod.rs
new file mode 100644
index 000000000000..cb6cac4fb067
--- /dev/null
+++ b/datafusion/core/tests/catalog_listing/mod.rs
@@ -0,0 +1,18 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+mod pruned_partition_list;
diff --git a/datafusion/core/tests/catalog_listing/pruned_partition_list.rs b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs
new file mode 100644
index 000000000000..3cdaa3bb9b34
--- /dev/null
+++ b/datafusion/core/tests/catalog_listing/pruned_partition_list.rs
@@ -0,0 +1,251 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::sync::Arc;
+
+use arrow_schema::DataType;
+use futures::{FutureExt, StreamExt as _, TryStreamExt as _};
+use object_store::{memory::InMemory, path::Path, ObjectStore as _};
+
+use datafusion::execution::SessionStateBuilder;
+use datafusion_catalog_listing::helpers::{
+    describe_partition, list_partitions, pruned_partition_list,
+};
+use datafusion_common::ScalarValue;
+use datafusion_datasource::ListingTableUrl;
+use datafusion_expr::{col, lit, Expr};
+use datafusion_session::Session;
+
+#[tokio::test]
+async fn test_pruned_partition_list_empty() {
+    let (store, state) = make_test_store_and_state(&[
+        ("tablepath/mypartition=val1/notparquetfile", 100),
+        ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
+        ("tablepath/file.parquet", 100),
+        ("tablepath/notapartition/file.parquet", 100),
+        ("tablepath/notmypartition=val1/file.parquet", 100),
+    ]);
+    let filter = Expr::eq(col("mypartition"), lit("val1"));
+    let pruned = pruned_partition_list(
+        state.as_ref(),
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        &[filter],
+        ".parquet",
+        &[(String::from("mypartition"), DataType::Utf8)],
+    )
+    .await
+    .expect("partition pruning failed")
+    .collect::<Vec<_>>()
+    .await;
+
+    assert_eq!(pruned.len(), 0);
+}
+
+#[tokio::test]
+async fn test_pruned_partition_list() {
+    let (store, state) = make_test_store_and_state(&[
+        ("tablepath/mypartition=val1/file.parquet", 100),
+        ("tablepath/mypartition=val2/file.parquet", 100),
+        ("tablepath/mypartition=val1/ignoresemptyfile.parquet", 0),
+        ("tablepath/mypartition=val1/other=val3/file.parquet", 100),
+        ("tablepath/notapartition/file.parquet", 100),
+        ("tablepath/notmypartition=val1/file.parquet", 100),
+    ]);
+    let filter = Expr::eq(col("mypartition"), lit("val1"));
+    let pruned = pruned_partition_list(
+        state.as_ref(),
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        &[filter],
+        ".parquet",
+        &[(String::from("mypartition"), DataType::Utf8)],
+    )
+    .await
+    .expect("partition pruning failed")
+    .try_collect::<Vec<_>>()
+    .await
+    .unwrap();
+
+    assert_eq!(pruned.len(), 2);
+    let f1 = &pruned[0];
+    assert_eq!(
+        f1.object_meta.location.as_ref(),
+        "tablepath/mypartition=val1/file.parquet"
+    );
+    assert_eq!(&f1.partition_values, &[ScalarValue::from("val1")]);
+    let f2 = &pruned[1];
+    assert_eq!(
+        f2.object_meta.location.as_ref(),
+        "tablepath/mypartition=val1/other=val3/file.parquet"
+    );
+    assert_eq!(f2.partition_values, &[ScalarValue::from("val1"),]);
+}
+
+#[tokio::test]
+async fn test_pruned_partition_list_multi() {
+    let (store, state) = make_test_store_and_state(&[
+        ("tablepath/part1=p1v1/file.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
+        ("tablepath/part1=p1v3/part2=p2v1/file2.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v2/file2.parquet", 100),
+    ]);
+    let filter1 = Expr::eq(col("part1"), lit("p1v2"));
+    let filter2 = Expr::eq(col("part2"), lit("p2v1"));
+    let pruned = pruned_partition_list(
+        state.as_ref(),
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        &[filter1, filter2],
+        ".parquet",
+        &[
+            (String::from("part1"), DataType::Utf8),
+            (String::from("part2"), DataType::Utf8),
+        ],
+    )
+    .await
+    .expect("partition pruning failed")
+    .try_collect::<Vec<_>>()
+    .await
+    .unwrap();
+
+    assert_eq!(pruned.len(), 2);
+    let f1 = &pruned[0];
+    assert_eq!(
+        f1.object_meta.location.as_ref(),
+        "tablepath/part1=p1v2/part2=p2v1/file1.parquet"
+    );
+    assert_eq!(
+        &f1.partition_values,
+        &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1"),]
+    );
+    let f2 = &pruned[1];
+    assert_eq!(
+        f2.object_meta.location.as_ref(),
+        "tablepath/part1=p1v2/part2=p2v1/file2.parquet"
+    );
+    assert_eq!(
+        &f2.partition_values,
+        &[ScalarValue::from("p1v2"), ScalarValue::from("p2v1")]
+    );
+}
+
+#[tokio::test]
+async fn test_list_partition() {
+    let (store, _) = make_test_store_and_state(&[
+        ("tablepath/part1=p1v1/file.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file1.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v1/file2.parquet", 100),
+        ("tablepath/part1=p1v3/part2=p2v1/file3.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v2/file4.parquet", 100),
+        ("tablepath/part1=p1v2/part2=p2v2/empty.parquet", 0),
+    ]);
+
+    let partitions = list_partitions(
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        0,
+        None,
+    )
+    .await
+    .expect("listing partitions failed");
+
+    assert_eq!(
+        &partitions
+            .iter()
+            .map(describe_partition)
+            .collect::<Vec<_>>(),
+        &vec![
+            ("tablepath", 0, vec![]),
+            ("tablepath/part1=p1v1", 1, vec![]),
+            ("tablepath/part1=p1v2", 1, vec![]),
+            ("tablepath/part1=p1v3", 1, vec![]),
+        ]
+    );
+
+    let partitions = list_partitions(
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        1,
+        None,
+    )
+    .await
+    .expect("listing partitions failed");
+
+    assert_eq!(
+        &partitions
+            .iter()
+            .map(describe_partition)
+            .collect::<Vec<_>>(),
+        &vec![
+            ("tablepath", 0, vec![]),
+            ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
+            ("tablepath/part1=p1v2", 1, vec![]),
+            ("tablepath/part1=p1v2/part2=p2v1", 2, vec![]),
+            ("tablepath/part1=p1v2/part2=p2v2", 2, vec![]),
+            ("tablepath/part1=p1v3", 1, vec![]),
+            ("tablepath/part1=p1v3/part2=p2v1", 2, vec![]),
+        ]
+    );
+
+    let partitions = list_partitions(
+        store.as_ref(),
+        &ListingTableUrl::parse("file:///tablepath/").unwrap(),
+        2,
+        None,
+    )
+    .await
+    .expect("listing partitions failed");
+
+    assert_eq!(
+        &partitions
+            .iter()
+            .map(describe_partition)
+            .collect::<Vec<_>>(),
+        &vec![
+            ("tablepath", 0, vec![]),
+            ("tablepath/part1=p1v1", 1, vec!["file.parquet"]),
+            ("tablepath/part1=p1v2", 1, vec![]),
+            ("tablepath/part1=p1v3", 1, vec![]),
+            (
+                "tablepath/part1=p1v2/part2=p2v1",
+                2,
+                vec!["file1.parquet", "file2.parquet"]
+            ),
+            ("tablepath/part1=p1v2/part2=p2v2", 2, vec!["file4.parquet"]),
+            ("tablepath/part1=p1v3/part2=p2v1", 2, vec!["file3.parquet"]),
+        ]
+    );
+}
+
+pub fn make_test_store_and_state(
+    files: &[(&str, u64)],
+) -> (Arc<InMemory>, Arc<dyn Session>) {
+    let memory = InMemory::new();
+
+    for (name, size) in files {
+        memory
+            .put(&Path::from(*name), vec![0; *size as usize].into())
+            .now_or_never()
+            .unwrap()
+            .unwrap();
+    }
+
+    let state = SessionStateBuilder::new().build();
+    (Arc::new(memory), Arc::new(state))
+}
diff --git a/datafusion/core/tests/core_integration.rs b/datafusion/core/tests/core_integration.rs
index edcf039e4e70..cc4dfcf72059 100644
--- a/datafusion/core/tests/core_integration.rs
+++ b/datafusion/core/tests/core_integration.rs
@@ -57,6 +57,9 @@ mod serde;
 /// Run all tests that are found in the `catalog` directory
 mod catalog;
 
+/// Run all tests that are found in the `catalog_listing` directory
+mod catalog_listing;
+
 /// Run all tests that are found in the `tracing` directory
 mod tracing;
 
diff --git a/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow
new file mode 100644
index 000000000000..bad9e3de4a57
Binary files /dev/null and b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=123/data.arrow differ
diff --git a/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow
new file mode 100644
index 000000000000..4a07fbfa47f3
Binary files /dev/null and b/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow differ
diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs
index d95eb38c19e1..265862ff9af8 100644
--- a/datafusion/core/tests/dataframe/dataframe_functions.rs
+++ b/datafusion/core/tests/dataframe/dataframe_functions.rs
@@ -309,16 +309,16 @@ async fn test_fn_arrow_typeof() -> Result<()> {
 
     assert_snapshot!(
         batches_to_string(&batches),
-        @r#"
-    +------------------------------------------------------------------------------------------------------------------+
-    | arrow_typeof(test.l)                                                                                             |
-    +------------------------------------------------------------------------------------------------------------------+
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    | List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) |
-    +------------------------------------------------------------------------------------------------------------------+
-    "#);
+        @r"
+    +----------------------+
+    | arrow_typeof(test.l) |
+    +----------------------+
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    | List(nullable Int32) |
+    +----------------------+
+    ");
 
     Ok(())
 }
diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs
index 979ada2bc6bb..aab51efa1e34 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -45,6 +45,7 @@ use insta::assert_snapshot;
 use object_store::local::LocalFileSystem;
 use std::collections::HashMap;
 use std::fs;
+use std::path::Path;
 use std::sync::Arc;
 use tempfile::TempDir;
 use url::Url;
@@ -77,7 +78,7 @@ use datafusion_expr::var_provider::{VarProvider, VarType};
 use datafusion_expr::{
     cast, col, create_udf, exists, in_subquery, lit, out_ref_col, placeholder,
     scalar_subquery, when, wildcard, Expr, ExprFunctionExt, ExprSchemable, LogicalPlan,
-    LogicalPlanBuilder, ScalarFunctionImplementation, SortExpr, WindowFrame,
+    LogicalPlanBuilder, ScalarFunctionImplementation, SortExpr, TableType, WindowFrame,
     WindowFrameBound, WindowFrameUnits, WindowFunctionDefinition,
 };
 use datafusion_physical_expr::aggregate::AggregateExprBuilder;
@@ -404,6 +405,55 @@ async fn select_with_periods() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn select_columns_duplicated_names_from_different_qualifiers() -> Result<()> {
+    let t1 = test_table_with_name("t1")
+        .await?
+        .select_columns(&["c1"])?
+        .limit(0, Some(3))?;
+    let t2 = test_table_with_name("t2")
+        .await?
+        .select_columns(&["c1"])?
+        .limit(3, Some(3))?;
+    let t3 = test_table_with_name("t3")
+        .await?
+        .select_columns(&["c1"])?
+        .limit(6, Some(3))?;
+
+    let join_res = t1
+        .join(t2, JoinType::Left, &["t1.c1"], &["t2.c1"], None)?
+        .join(t3, JoinType::Left, &["t1.c1"], &["t3.c1"], None)?;
+    assert_snapshot!(
+        batches_to_sort_string(&join_res.clone().collect().await.unwrap()),
+        @r"
+    +----+----+----+
+    | c1 | c1 | c1 |
+    +----+----+----+
+    | b  | b  |    |
+    | b  | b  |    |
+    | c  |    |    |
+    | d  |    | d  |
+    +----+----+----+
+    "
+    );
+
+    let select_res = join_res.select_columns(&["c1"])?;
+    assert_snapshot!(
+        batches_to_sort_string(&select_res.clone().collect().await.unwrap()),
+        @r"
+    +----+----+----+
+    | c1 | c1 | c1 |
+    +----+----+----+
+    | b  | b  |    |
+    | b  | b  |    |
+    | c  |    |    |
+    | d  |    | d  |
+    +----+----+----+
+    "
+    );
+    Ok(())
+}
+
 #[tokio::test]
 async fn drop_columns() -> Result<()> {
     // build plan using Table API
@@ -542,6 +592,54 @@ async fn drop_with_periods() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn drop_columns_duplicated_names_from_different_qualifiers() -> Result<()> {
+    let t1 = test_table_with_name("t1")
+        .await?
+        .select_columns(&["c1"])?
+        .limit(0, Some(3))?;
+    let t2 = test_table_with_name("t2")
+        .await?
+        .select_columns(&["c1"])?
+        .limit(3, Some(3))?;
+    let t3 = test_table_with_name("t3")
+        .await?
+        .select_columns(&["c1"])?
+        .limit(6, Some(3))?;
+
+    let join_res = t1
+        .join(t2, JoinType::LeftMark, &["c1"], &["c1"], None)?
+        .join(t3, JoinType::LeftMark, &["c1"], &["c1"], None)?;
+    assert_snapshot!(
+        batches_to_sort_string(&join_res.clone().collect().await.unwrap()),
+        @r"
+    +----+-------+-------+
+    | c1 | mark  | mark  |
+    +----+-------+-------+
+    | b  | true  | false |
+    | c  | false | false |
+    | d  | false | true  |
+    +----+-------+-------+
+    "
+    );
+
+    let drop_res = join_res.drop_columns(&["mark"])?;
+    assert_snapshot!(
+        batches_to_sort_string(&drop_res.clone().collect().await.unwrap()),
+        @r"
+    +----+
+    | c1 |
+    +----+
+    | b  |
+    | c  |
+    | d  |
+    +----+
+    "
+    );
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn aggregate() -> Result<()> {
     // build plan using DataFrame API
@@ -1577,6 +1675,23 @@ async fn register_table() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn register_temporary_table() -> Result<()> {
+    let df = test_table().await?.select_columns(&["c1", "c12"])?;
+    let ctx = SessionContext::new();
+    let df_impl = DataFrame::new(ctx.state(), df.logical_plan().clone());
+
+    let df_table_provider = df_impl.clone().into_temporary_view();
+
+    // check that we set the correct table_type
+    assert_eq!(df_table_provider.table_type(), TableType::Temporary);
+
+    // check that we can register a dataframe as a temporary table
+    ctx.register_table("test_table", df_table_provider)?;
+
+    Ok(())
+}
+
 /// Compare the formatted string representation of two plans for equality
 fn assert_same_plan(plan1: &LogicalPlan, plan2: &LogicalPlan) {
     assert_eq!(format!("{plan1:?}"), format!("{plan2:?}"));
@@ -2749,10 +2864,9 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
     |               |       ProjectionExec: expr=[b@0 as b, count(Int64(1))@1 as count(*), count(Int64(1))@1 as count(Int64(1))] |
     |               |         AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(Int64(1))]                       |
     |               |           CoalesceBatchesExec: target_batch_size=8192                                                      |
-    |               |             RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4                               |
-    |               |               RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                         |
-    |               |                 AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))]                        |
-    |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                        |
+    |               |             RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=1                               |
+    |               |               AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(Int64(1))]                          |
+    |               |                 DataSourceExec: partitions=1, partition_sizes=[1]                                          |
     |               |                                                                                                            |
     +---------------+------------------------------------------------------------------------------------------------------------+
     "###
@@ -2761,22 +2875,21 @@ async fn test_count_wildcard_on_sort() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r###"
-    +---------------+--------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                           |
-    +---------------+--------------------------------------------------------------------------------+
-    | logical_plan  | Sort: count(*) ASC NULLS LAST                                                  |
-    |               |   Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]]            |
-    |               |     TableScan: t1 projection=[b]                                               |
-    | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST]                           |
-    |               |   SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true]     |
-    |               |     AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)]      |
-    |               |       CoalesceBatchesExec: target_batch_size=8192                              |
-    |               |         RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=4       |
-    |               |           RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 |
-    |               |             AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)]       |
-    |               |               DataSourceExec: partitions=1, partition_sizes=[1]                |
-    |               |                                                                                |
-    +---------------+--------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------+
+    | plan_type     | plan                                                                       |
+    +---------------+----------------------------------------------------------------------------+
+    | logical_plan  | Sort: count(*) ASC NULLS LAST                                              |
+    |               |   Aggregate: groupBy=[[t1.b]], aggr=[[count(Int64(1)) AS count(*)]]        |
+    |               |     TableScan: t1 projection=[b]                                           |
+    | physical_plan | SortPreservingMergeExec: [count(*)@1 ASC NULLS LAST]                       |
+    |               |   SortExec: expr=[count(*)@1 ASC NULLS LAST], preserve_partitioning=[true] |
+    |               |     AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[count(*)]  |
+    |               |       CoalesceBatchesExec: target_batch_size=8192                          |
+    |               |         RepartitionExec: partitioning=Hash([b@0], 4), input_partitions=1   |
+    |               |           AggregateExec: mode=Partial, gby=[b@0 as b], aggr=[count(*)]     |
+    |               |             DataSourceExec: partitions=1, partition_sizes=[1]              |
+    |               |                                                                            |
+    +---------------+----------------------------------------------------------------------------+
     "###
     );
     Ok(())
@@ -2944,18 +3057,18 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&sql_results).unwrap(),
         @r#"
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                         |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                |
-    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                                                                                                      |
-    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                                                                                                             |
-    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                                                                                                                   |
-    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
-    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                                                                                                                 |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                                                                                                      |
-    |               |                                                                                                                                                                                                                                                                                                                                                                                              |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                                                                                                                                                                                                     |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING AS count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                            |
+    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                  |
+    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                         |
+    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(*) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                               |
+    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
+    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                             |
+    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                  |
+    |               |                                                                                                                                                                                                                                                                                                          |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     "#
     );
 
@@ -2978,24 +3091,174 @@ async fn test_count_wildcard_on_window() -> Result<()> {
     assert_snapshot!(
         pretty_format_batches(&df_results).unwrap(),
         @r#"
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | plan_type     | plan                                                                                                                                                                                                                                                                                                                                                                                         |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                                                                                                       |
-    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                                                                                                      |
-    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                                                                                                             |
-    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                                                                                                            |
-    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
-    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                                                                                                                 |
-    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                                                                                                      |
-    |               |                                                                                                                                                                                                                                                                                                                                                                                              |
-    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | plan_type     | plan                                                                                                                                                                                                                                                                                                     |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    | logical_plan  | Projection: count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING                                                                                                                                                                                                   |
+    |               |   WindowAggr: windowExpr=[[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]]                                                                                                                                                                                  |
+    |               |     TableScan: t1 projection=[a]                                                                                                                                                                                                                                                                         |
+    | physical_plan | ProjectionExec: expr=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@1 as count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING]                                                                                        |
+    |               |   BoundedWindowAggExec: wdw=[count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [t1.a DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING], mode=[Sorted] |
+    |               |     SortExec: expr=[a@0 DESC], preserve_partitioning=[false]                                                                                                                                                                                                                                             |
+    |               |       DataSourceExec: partitions=1, partition_sizes=[1]                                                                                                                                                                                                                                                  |
+    |               |                                                                                                                                                                                                                                                                                                          |
+    +---------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
     "#
     );
 
     Ok(())
 }
 
+#[tokio::test]
+// Test with `repartition_sorts` disabled, causing a full resort of the data
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false(
+) -> Result<()> {
+    assert_snapshot!(
+        union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?,
+        @r#"
+    AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
+      SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+        CoalescePartitionsExec
+          AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+            UnionExec
+              DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+              DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+    "#);
+    Ok(())
+}
+
+#[ignore] // See https://github.com/apache/datafusion/issues/18380
+#[tokio::test]
+// Test with `repartition_sorts` enabled to preserve pre-sorted partitions and avoid resorting
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true(
+) -> Result<()> {
+    assert_snapshot!(
+        union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?,
+        @r#"
+    AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
+      SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+        AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
+          UnionExec
+            DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+            SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
+              DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+    "#);
+
+    // 💥 Doesn't pass, and generates this plan:
+    //
+    // AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted
+    //   SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+    //     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+    //       AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]
+    //         UnionExec
+    //           DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet
+    //           DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet
+    //
+    //
+    // === Excerpt from the verbose explain ===
+    //
+    // +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    // | plan_type                                                  | plan                                                                                                                                                                                                                                                                                                                                        |
+    // +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+    // | initial_physical_plan                                      | AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                                  |
+    // |                                                            |   AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                              |
+    // |                                                            |     UnionExec                                                                                                                                                                                                                                                                                                                               |
+    // |                                                            |       DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet                                                                                                                               |
+    // |                                                            |       SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]                                                                                                                                                                                                                                                                   |
+    // |                                                            |         DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet                                                                                                                                                                    |
+    // ...
+    // | physical_plan after EnforceDistribution                    | OutputRequirementExec: order_by=[], dist_by=Unspecified                                                                                                                                                                                                                                                                                     |
+    // |                                                            |   AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                                |
+    // |                                                            |     SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]                                                                                                                                                                                                                                                                     |
+    // |                                                            |       CoalescePartitionsExec                                                                                                                                                                                                                                                                                                                |
+    // |                                                            |         AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                        |
+    // |                                                            |           UnionExec                                                                                                                                                                                                                                                                                                                         |
+    // |                                                            |             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet                                                                                                                         |
+    // |                                                            |             SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]                                                                                                                                                                                                                                                             |
+    // |                                                            |               DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet                                                                                                                                                              |
+    // |                                                            |                                                                                                                                                                                                                                                                                                                                             |
+    // | physical_plan after CombinePartialFinalAggregate           | SAME TEXT AS ABOVE
+    // |                                                            |                                                                                                                                                                                                                                                                                                                                             |
+    // | physical_plan after EnforceSorting                         | OutputRequirementExec: order_by=[], dist_by=Unspecified                                                                                                                                                                                                                                                                                     |
+    // |                                                            |   AggregateExec: mode=Final, gby=[id@0 as id], aggr=[], ordering_mode=Sorted                                                                                                                                                                                                                                                                |
+    // |                                                            |     SortPreservingMergeExec: [id@0 ASC NULLS LAST]                                                                                                                                                                                                                                                                                          |
+    // |                                                            |       SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]                                                                                                                                                                                                                                                                    |
+    // |                                                            |         AggregateExec: mode=Partial, gby=[id@0 as id], aggr=[]                                                                                                                                                                                                                                                                              |
+    // |                                                            |           UnionExec                                                                                                                                                                                                                                                                                                                         |
+    // |                                                            |             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], output_ordering=[id@0 ASC NULLS LAST], file_type=parquet                                                                                                                         |
+    // |                                                            |             DataSourceExec: file_groups={1 group: [[{testdata}/alltypes_tiny_pages.parquet]]}, projection=[id], file_type=parquet                                                                                                                                                                |
+    // ...
+    // +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+
+    Ok(())
+}
+
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(
+    repartition_sorts: bool,
+) -> Result<String> {
+    let config = SessionConfig::default()
+        .with_target_partitions(1)
+        .with_repartition_sorts(repartition_sorts);
+    let ctx = SessionContext::new_with_config(config);
+
+    let testdata = parquet_test_data();
+
+    // Register "sorted" table, that is sorted
+    ctx.register_parquet(
+        "sorted",
+        &format!("{testdata}/alltypes_tiny_pages.parquet"),
+        ParquetReadOptions::default()
+            .file_sort_order(vec![vec![col("id").sort(true, false)]]),
+    )
+    .await?;
+
+    // Register "unsorted" table
+    ctx.register_parquet(
+        "unsorted",
+        &format!("{testdata}/alltypes_tiny_pages.parquet"),
+        ParquetReadOptions::default(),
+    )
+    .await?;
+
+    let source_sorted = ctx
+        .table("sorted")
+        .await
+        .unwrap()
+        .select(vec![col("id")])
+        .unwrap();
+
+    let source_unsorted = ctx
+        .table("unsorted")
+        .await
+        .unwrap()
+        .select(vec![col("id")])
+        .unwrap();
+
+    let source_unsorted_resorted =
+        source_unsorted.sort(vec![col("id").sort(true, false)])?;
+
+    let union = source_sorted.union(source_unsorted_resorted)?;
+
+    let agg = union.aggregate(vec![col("id")], vec![])?;
+
+    let df = agg;
+
+    // To be able to remove user specific paths from the plan, for stable assertions
+    let testdata_clean = Path::new(&testdata).canonicalize()?.display().to_string();
+    let testdata_clean = testdata_clean.strip_prefix("/").unwrap_or(&testdata_clean);
+
+    // Use displayable() rather than explain().collect() to avoid table formatting issues. We need
+    // to replace machine-specific paths with variable lengths, which breaks table alignment and
+    // causes snapshot mismatches.
+    let physical_plan = df.create_physical_plan().await?;
+    let displayable_plan = displayable(physical_plan.as_ref())
+        .indent(true)
+        .to_string()
+        .replace(testdata_clean, "{testdata}");
+
+    Ok(displayable_plan)
+}
+
 #[tokio::test]
 async fn test_count_wildcard_on_aggregate() -> Result<()> {
     let ctx = create_join_context()?;
@@ -3086,10 +3349,9 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     |               |         ProjectionExec: expr=[count(Int64(1))@1 as count(*), a@0 as a, true as __always_true]                             |
     |               |           AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]                                    |
     |               |             CoalesceBatchesExec: target_batch_size=8192                                                                   |
-    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4                                            |
-    |               |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                      |
-    |               |                   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]                                     |
-    |               |                     DataSourceExec: partitions=1, partition_sizes=[1]                                                     |
+    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                            |
+    |               |                 AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]                                       |
+    |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                                       |
     |               |                                                                                                                           |
     +---------------+---------------------------------------------------------------------------------------------------------------------------+
     "
@@ -3143,10 +3405,9 @@ async fn test_count_wildcard_on_where_scalar_subquery() -> Result<()> {
     |               |         ProjectionExec: expr=[count(*)@1 as count(*), a@0 as a, true as __always_true]                                    |
     |               |           AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(*)]                                           |
     |               |             CoalesceBatchesExec: target_batch_size=8192                                                                   |
-    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4                                            |
-    |               |                 RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1                                      |
-    |               |                   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)]                                            |
-    |               |                     DataSourceExec: partitions=1, partition_sizes=[1]                                                     |
+    |               |               RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1                                            |
+    |               |                 AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(*)]                                              |
+    |               |                   DataSourceExec: partitions=1, partition_sizes=[1]                                                       |
     |               |                                                                                                                           |
     +---------------+---------------------------------------------------------------------------------------------------------------------------+
     "
@@ -4435,12 +4696,12 @@ async fn unnest_with_redundant_columns() -> Result<()> {
     let actual = formatted.trim();
     assert_snapshot!(
         actual,
-        @r###"
+        @r"
     Projection: shapes.shape_id [shape_id:UInt32]
       Unnest: lists[shape_id2|depth=1] structs[] [shape_id:UInt32, shape_id2:UInt32;N]
-        Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { name: "item", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
+        Aggregate: groupBy=[[shapes.shape_id]], aggr=[[array_agg(shapes.shape_id) AS shape_id2]] [shape_id:UInt32, shape_id2:List(Field { data_type: UInt32, nullable: true });N]
           TableScan: shapes projection=[shape_id] [shape_id:UInt32]
-    "###
+    "
     );
 
     let results = df.collect().await?;
@@ -6311,7 +6572,7 @@ async fn test_copy_schema() -> Result<()> {
     let target_path = tmp_dir.path().join("target.csv");
 
     let query = format!(
-        "COPY source_table TO '{:?}' STORED AS csv",
+        "COPY source_table TO '{}' STORED AS csv",
         target_path.to_str().unwrap()
     );
 
@@ -6459,10 +6720,10 @@ async fn test_duplicate_state_fields_for_dfschema_construct() -> Result<()> {
         "ticker",
         "first_value(value)[first_value]",
         "timestamp@0",
-        "is_set",
+        "first_value(value)[first_value_is_set]",
         "last_value(value)[last_value]",
         "timestamp@0",
-        "is_set",
+        "last_value(value)[last_value_is_set]",
     ];
 
     let binding = partial_agg.schema();
diff --git a/datafusion/core/tests/datasource/object_store_access.rs b/datafusion/core/tests/datasource/object_store_access.rs
index 6b9585f408a1..33129150db58 100644
--- a/datafusion/core/tests/datasource/object_store_access.rs
+++ b/datafusion/core/tests/datasource/object_store_access.rs
@@ -27,7 +27,10 @@
 use arrow::array::{ArrayRef, Int32Array, RecordBatch};
 use async_trait::async_trait;
 use bytes::Bytes;
-use datafusion::prelude::{CsvReadOptions, SessionContext};
+use datafusion::prelude::{CsvReadOptions, ParquetReadOptions, SessionContext};
+use datafusion_catalog_listing::{ListingOptions, ListingTable, ListingTableConfig};
+use datafusion_datasource::ListingTableUrl;
+use datafusion_datasource_csv::CsvFormat;
 use futures::stream::BoxStream;
 use insta::assert_snapshot;
 use object_store::memory::InMemory;
@@ -45,8 +48,9 @@ use url::Url;
 
 #[tokio::test]
 async fn create_single_csv_file() {
+    let test = Test::new().with_single_file_csv().await;
     assert_snapshot!(
-        single_file_csv_test().await.requests(),
+        test.requests(),
         @r"
     RequestCountingObjectStore()
     Total Requests: 2
@@ -58,8 +62,9 @@ async fn create_single_csv_file() {
 
 #[tokio::test]
 async fn query_single_csv_file() {
+    let test = Test::new().with_single_file_csv().await;
     assert_snapshot!(
-        single_file_csv_test().await.query("select * from csv_table").await,
+        test.query("select * from csv_table").await,
         @r"
     ------- Query Output (2 rows) -------
     +---------+-------+-------+
@@ -79,8 +84,9 @@ async fn query_single_csv_file() {
 
 #[tokio::test]
 async fn create_multi_file_csv_file() {
+    let test = Test::new().with_multi_file_csv().await;
     assert_snapshot!(
-        multi_file_csv_test().await.requests(),
+        test.requests(),
         @r"
     RequestCountingObjectStore()
     Total Requests: 4
@@ -94,8 +100,9 @@ async fn create_multi_file_csv_file() {
 
 #[tokio::test]
 async fn query_multi_csv_file() {
+    let test = Test::new().with_multi_file_csv().await;
     assert_snapshot!(
-        multi_file_csv_test().await.query("select * from csv_table").await,
+        test.query("select * from csv_table").await,
         @r"
     ------- Query Output (6 rows) -------
     +---------+-------+-------+
@@ -120,24 +127,250 @@ async fn query_multi_csv_file() {
 }
 
 #[tokio::test]
-async fn create_single_parquet_file() {
+async fn query_partitioned_csv_file() {
+    let test = Test::new().with_partitioned_csv().await;
     assert_snapshot!(
-        single_file_parquet_test().await.requests(),
+        test.query("select * from csv_table_partitioned").await,
+        @r"
+    ------- Query Output (6 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    | 0.00003 | 3e-12 | true  | 3 | 30 | 300 |
+    | 0.00003 | 5e-12 | false | 3 | 30 | 300 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 4
+    - LIST prefix=data
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.csv
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
+    - GET  (opts) path=data/a=3/b=30/c=300/file_3.csv
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from csv_table_partitioned WHERE a=2").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - LIST prefix=data
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from csv_table_partitioned WHERE b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - LIST prefix=data
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from csv_table_partitioned WHERE c=200").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - LIST prefix=data
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from csv_table_partitioned WHERE a=2 AND b=20").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00002 | 2e-12 | true  | 2 | 20 | 200 |
+    | 0.00003 | 5e-12 | false | 2 | 20 | 200 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - LIST prefix=data
+    - GET  (opts) path=data/a=2/b=20/c=200/file_2.csv
+    "
+    );
+
+    assert_snapshot!(
+        test.query("select * from csv_table_partitioned WHERE a<2 AND b=10 AND c=100").await,
+        @r"
+    ------- Query Output (2 rows) -------
+    +---------+-------+-------+---+----+-----+
+    | d1      | d2    | d3    | a | b  | c   |
+    +---------+-------+-------+---+----+-----+
+    | 0.00001 | 1e-12 | true  | 1 | 10 | 100 |
+    | 0.00003 | 5e-12 | false | 1 | 10 | 100 |
+    +---------+-------+-------+---+----+-----+
+    ------- Object Store Request Summary -------
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - LIST prefix=data
+    - GET  (opts) path=data/a=1/b=10/c=100/file_1.csv
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file_default() {
+    // The default metadata size hint is 512KB
+    // which is enough to fetch the entire footer metadata and PageIndex
+    // in a single GET request.
+    let test = Test::new().with_single_file_parquet().await;
+    // expect 1 get request which reads the footer metadata and page index
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=parquet_table.parquet
+    - GET  (range) range=0-2994 path=parquet_table.parquet
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file_prefetch() {
+    // Explicitly specify a prefetch hint that is adequate for the footer and page index
+    let test = Test::new()
+        .with_parquet_metadata_size_hint(Some(1000))
+        .with_single_file_parquet()
+        .await;
+    // expect 1 1000 byte request which reads the footer metadata and page index
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=parquet_table.parquet
+    - GET  (range) range=1994-2994 path=parquet_table.parquet
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file_too_small_prefetch() {
+    // configure a prefetch size that is too small to fetch the footer
+    // metadata
+    //
+    // Using the ranges from  the test below (with no_prefetch),
+    // pick a number less than 730:
+    // --------
+    // 2286-2294: (8 bytes) footer + length
+    // 2264-2986: (722 bytes) footer metadata
+    let test = Test::new()
+        .with_parquet_metadata_size_hint(Some(500))
+        .with_single_file_parquet()
+        .await;
+    // expect three get requests:
+    // 1. read the footer (500 bytes per hint, not enough for the footer metadata)
+    // 2. Read the footer metadata
+    // 3. reads the PageIndex
+    assert_snapshot!(
+        test.requests(),
         @r"
     RequestCountingObjectStore()
     Total Requests: 4
     - HEAD path=parquet_table.parquet
-    - GET  (range) range=2986-2994 path=parquet_table.parquet
+    - GET  (range) range=2494-2994 path=parquet_table.parquet
     - GET  (range) range=2264-2986 path=parquet_table.parquet
     - GET  (range) range=2124-2264 path=parquet_table.parquet
     "
     );
 }
 
+#[tokio::test]
+async fn create_single_parquet_file_small_prefetch() {
+    // configure a prefetch size that is large enough for the footer
+    // metadata but **not** the PageIndex
+    //
+    // Using the ranges from the test below (with no_prefetch),
+    // the 730 is determined as follows;
+    // --------
+    // 2286-2294: (8 bytes) footer + length
+    // 2264-2986: (722 bytes) footer metadata
+    let test = Test::new()
+        // 740 is enough to get both the footer + length (8 bytes)
+        // but not the entire PageIndex
+        .with_parquet_metadata_size_hint(Some(740))
+        .with_single_file_parquet()
+        .await;
+    // expect two get requests:
+    // 1. read the footer metadata
+    // 2. reads the PageIndex
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 3
+    - HEAD path=parquet_table.parquet
+    - GET  (range) range=2254-2994 path=parquet_table.parquet
+    - GET  (range) range=2124-2264 path=parquet_table.parquet
+    "
+    );
+}
+
+#[tokio::test]
+async fn create_single_parquet_file_no_prefetch() {
+    let test = Test::new()
+        // force no prefetch by setting size hint to None
+        .with_parquet_metadata_size_hint(None)
+        .with_single_file_parquet()
+        .await;
+    // Without a metadata size hint, the parquet reader
+    // does *three* range requests to read the footer metadata:
+    // 1. The footer length (last 8 bytes)
+    // 2. The footer metadata
+    // 3. The PageIndex metadata
+    assert_snapshot!(
+        test.requests(),
+        @r"
+    RequestCountingObjectStore()
+    Total Requests: 2
+    - HEAD path=parquet_table.parquet
+    - GET  (range) range=0-2994 path=parquet_table.parquet
+    "
+    );
+}
+
 #[tokio::test]
 async fn query_single_parquet_file() {
+    let test = Test::new().with_single_file_parquet().await;
     assert_snapshot!(
-        single_file_parquet_test().await.query("select count(distinct a), count(b) from parquet_table").await,
+        test.query("select count(distinct a), count(b) from parquet_table").await,
         @r"
     ------- Query Output (1 rows) -------
     +---------------------------------+------------------------+
@@ -157,10 +390,11 @@ async fn query_single_parquet_file() {
 
 #[tokio::test]
 async fn query_single_parquet_file_with_single_predicate() {
+    let test = Test::new().with_single_file_parquet().await;
     // Note that evaluating predicates requires additional object store requests
     // (to evaluate predicates)
     assert_snapshot!(
-        single_file_parquet_test().await.query("select min(a), max(b) from parquet_table WHERE a > 150").await,
+        test.query("select min(a), max(b) from parquet_table WHERE a > 150").await,
         @r"
     ------- Query Output (1 rows) -------
     +----------------------+----------------------+
@@ -179,10 +413,12 @@ async fn query_single_parquet_file_with_single_predicate() {
 
 #[tokio::test]
 async fn query_single_parquet_file_multi_row_groups_multiple_predicates() {
+    let test = Test::new().with_single_file_parquet().await;
+
     // Note that evaluating predicates requires additional object store requests
     // (to evaluate predicates)
     assert_snapshot!(
-        single_file_parquet_test().await.query("select min(a), max(b) from parquet_table WHERE a > 50 AND b < 1150").await,
+        test.query("select min(a), max(b) from parquet_table WHERE a > 50 AND b < 1150").await,
         @r"
     ------- Query Output (1 rows) -------
     +----------------------+----------------------+
@@ -200,75 +436,16 @@ async fn query_single_parquet_file_multi_row_groups_multiple_predicates() {
     );
 }
 
-/// Create a test with a single CSV file with three columns and two rows
-async fn single_file_csv_test() -> Test {
-    // upload CSV data to object store
-    let csv_data = r#"c1,c2,c3
-0.00001,5e-12,true
-0.00002,4e-12,false
-"#;
-
-    Test::new()
-        .with_bytes("/csv_table.csv", csv_data)
-        .await
-        .register_csv("csv_table", "/csv_table.csv")
-        .await
-}
-
-/// Create a test with three CSV files in a directory
-async fn multi_file_csv_test() -> Test {
-    let mut test = Test::new();
-    // upload CSV data to object store
-    for i in 0..3 {
-        let csv_data1 = format!(
-            r#"c1,c2,c3
-0.0000{i},{i}e-12,true
-0.00003,5e-12,false
-"#
-        );
-        test = test
-            .with_bytes(&format!("/data/file_{i}.csv"), csv_data1)
-            .await;
-    }
-    // register table
-    test.register_csv("csv_table", "/data/").await
-}
-
-/// Create a test with a single parquet file that has two
-/// columns and two row groups
-///
-/// Column "a": Int32 with values 0-100] in row group 1
-/// and [101-200] in row group 2
-///
-/// Column "b": Int32 with values 1000-1100] in row group 1
-/// and [1101-1200] in row group 2
-async fn single_file_parquet_test() -> Test {
-    // Create parquet bytes
-    let a: ArrayRef = Arc::new(Int32Array::from_iter_values(0..200));
-    let b: ArrayRef = Arc::new(Int32Array::from_iter_values(1000..1200));
-    let batch = RecordBatch::try_from_iter([("a", a), ("b", b)]).unwrap();
-
-    let mut buffer = vec![];
-    let props = parquet::file::properties::WriterProperties::builder()
-        .set_max_row_group_size(100)
-        .build();
-    let mut writer =
-        parquet::arrow::ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props))
-            .unwrap();
-    writer.write(&batch).unwrap();
-    writer.close().unwrap();
-
-    Test::new()
-        .with_bytes("/parquet_table.parquet", buffer)
-        .await
-        .register_parquet("parquet_table", "/parquet_table.parquet")
-        .await
-}
-
 /// Runs tests with a request counting object store
 struct Test {
     object_store: Arc<RequestCountingObjectStore>,
     session_context: SessionContext,
+    /// metadata size hint to use when registering parquet files
+    ///
+    /// * `None`: uses the default (does not set a size_hint)
+    /// * `Some(None)`L: set prefetch hint to None (prefetching)
+    /// * `Some(Some(size))`: set prefetch hint to size
+    parquet_metadata_size_hint: Option<Option<usize>>,
 }
 
 impl Test {
@@ -281,9 +458,16 @@ impl Test {
         Self {
             object_store,
             session_context,
+            parquet_metadata_size_hint: None,
         }
     }
 
+    /// Specify the metadata size hint to use when registering parquet files
+    fn with_parquet_metadata_size_hint(mut self, size_hint: Option<usize>) -> Self {
+        self.parquet_metadata_size_hint = Some(size_hint);
+        self
+    }
+
     /// Returns a string representation of all recorded requests thus far
     fn requests(&self) -> String {
         format!("{}", self.object_store)
@@ -300,7 +484,7 @@ impl Test {
         self
     }
 
-    /// Register a CSV file at the given path relative to the [`datafusion_test_data`] directory
+    /// Register a CSV file at the given path
     async fn register_csv(self, table_name: &str, path: &str) -> Self {
         let mut options = CsvReadOptions::new();
         options.has_header = true;
@@ -312,16 +496,133 @@ impl Test {
         self
     }
 
-    /// Register a CSV file at the given path relative to the [`datafusion_test_data`] directory
+    /// Register a partitioned CSV table at the given path
+    async fn register_partitioned_csv(self, table_name: &str, path: &str) -> Self {
+        let file_format = Arc::new(CsvFormat::default().with_has_header(true));
+        let options = ListingOptions::new(file_format);
+
+        let url = format!("mem://{path}").parse().unwrap();
+        let table_url = ListingTableUrl::try_new(url, None).unwrap();
+
+        let session_state = self.session_context.state();
+        let mut config = ListingTableConfig::new(table_url).with_listing_options(options);
+        config = config
+            .infer_partitions_from_path(&session_state)
+            .await
+            .unwrap();
+        config = config.infer_schema(&session_state).await.unwrap();
+
+        let table = Arc::new(ListingTable::try_new(config).unwrap());
+        self.session_context
+            .register_table(table_name, table)
+            .unwrap();
+        self
+    }
+
+    /// Register a Parquet file at the given path
     async fn register_parquet(self, table_name: &str, path: &str) -> Self {
         let path = format!("mem://{path}");
+        let mut options: ParquetReadOptions<'_> = ParquetReadOptions::new();
+
+        // If a metadata size hint was specified, apply it
+        if let Some(parquet_metadata_size_hint) = self.parquet_metadata_size_hint {
+            options = options.metadata_size_hint(parquet_metadata_size_hint);
+        }
+
         self.session_context
-            .register_parquet(table_name, path, Default::default())
+            .register_parquet(table_name, path, options)
             .await
             .unwrap();
         self
     }
 
+    /// Register a single CSV file with three columns and two row named
+    /// `csv_table`
+    async fn with_single_file_csv(self) -> Test {
+        // upload CSV data to object store
+        let csv_data = r#"c1,c2,c3
+0.00001,5e-12,true
+0.00002,4e-12,false
+"#;
+        self.with_bytes("/csv_table.csv", csv_data)
+            .await
+            .register_csv("csv_table", "/csv_table.csv")
+            .await
+    }
+
+    /// Register three CSV files in a directory, called `csv_table`
+    async fn with_multi_file_csv(mut self) -> Test {
+        // upload CSV data to object store
+        for i in 0..3 {
+            let csv_data1 = format!(
+                r#"c1,c2,c3
+0.0000{i},{i}e-12,true
+0.00003,5e-12,false
+"#
+            );
+            self = self
+                .with_bytes(&format!("/data/file_{i}.csv"), csv_data1)
+                .await;
+        }
+        // register table
+        self.register_csv("csv_table", "/data/").await
+    }
+
+    /// Register three CSV files in a partitioned directory structure, called
+    /// `csv_table_partitioned`
+    async fn with_partitioned_csv(mut self) -> Test {
+        for i in 1..4 {
+            // upload CSV data to object store
+            let csv_data1 = format!(
+                r#"d1,d2,d3
+0.0000{i},{i}e-12,true
+0.00003,5e-12,false
+"#
+            );
+            self = self
+                .with_bytes(
+                    &format!("/data/a={i}/b={}/c={}/file_{i}.csv", i * 10, i * 100,),
+                    csv_data1,
+                )
+                .await;
+        }
+        // register table
+        self.register_partitioned_csv("csv_table_partitioned", "/data/")
+            .await
+    }
+
+    /// Add a single parquet file that has two columns and two row groups named `parquet_table`
+    ///
+    /// Column "a": Int32 with values 0-100] in row group 1
+    /// and [101-200] in row group 2
+    ///
+    /// Column "b": Int32 with values 1000-1100] in row group 1
+    /// and [1101-1200] in row group 2
+    async fn with_single_file_parquet(self) -> Test {
+        // Create parquet bytes
+        let a: ArrayRef = Arc::new(Int32Array::from_iter_values(0..200));
+        let b: ArrayRef = Arc::new(Int32Array::from_iter_values(1000..1200));
+        let batch = RecordBatch::try_from_iter([("a", a), ("b", b)]).unwrap();
+
+        let mut buffer = vec![];
+        let props = parquet::file::properties::WriterProperties::builder()
+            .set_max_row_group_size(100)
+            .build();
+        let mut writer = parquet::arrow::ArrowWriter::try_new(
+            &mut buffer,
+            batch.schema(),
+            Some(props),
+        )
+        .unwrap();
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        self.with_bytes("/parquet_table.parquet", buffer)
+            .await
+            .register_parquet("parquet_table", "/parquet_table.parquet")
+            .await
+    }
+
     /// Runs the specified query and returns a string representation of the results
     /// suitable for comparison with insta snapshots
     ///
diff --git a/datafusion/core/tests/execution/mod.rs b/datafusion/core/tests/execution/mod.rs
index 8770b2a20105..f33ef87aa302 100644
--- a/datafusion/core/tests/execution/mod.rs
+++ b/datafusion/core/tests/execution/mod.rs
@@ -18,3 +18,4 @@
 mod coop;
 mod datasource_split;
 mod logical_plan;
+mod register_arrow;
diff --git a/datafusion/core/tests/execution/register_arrow.rs b/datafusion/core/tests/execution/register_arrow.rs
new file mode 100644
index 000000000000..4ce16dc0906c
--- /dev/null
+++ b/datafusion/core/tests/execution/register_arrow.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Integration tests for register_arrow API
+
+use datafusion::{execution::options::ArrowReadOptions, prelude::*};
+use datafusion_common::Result;
+
+#[tokio::test]
+async fn test_register_arrow_auto_detects_format() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    ctx.register_arrow(
+        "file_format",
+        "../../datafusion/datasource-arrow/tests/data/example.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    ctx.register_arrow(
+        "stream_format",
+        "../../datafusion/datasource-arrow/tests/data/example_stream.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    let file_result = ctx.sql("SELECT * FROM file_format ORDER BY f0").await?;
+    let stream_result = ctx.sql("SELECT * FROM stream_format ORDER BY f0").await?;
+
+    let file_batches = file_result.collect().await?;
+    let stream_batches = stream_result.collect().await?;
+
+    assert_eq!(file_batches.len(), stream_batches.len());
+    assert_eq!(file_batches[0].schema(), stream_batches[0].schema());
+
+    let file_rows: usize = file_batches.iter().map(|b| b.num_rows()).sum();
+    let stream_rows: usize = stream_batches.iter().map(|b| b.num_rows()).sum();
+    assert_eq!(file_rows, stream_rows);
+
+    Ok(())
+}
+
+#[tokio::test]
+async fn test_register_arrow_join_file_and_stream() -> Result<()> {
+    let ctx = SessionContext::new();
+
+    ctx.register_arrow(
+        "file_table",
+        "../../datafusion/datasource-arrow/tests/data/example.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    ctx.register_arrow(
+        "stream_table",
+        "../../datafusion/datasource-arrow/tests/data/example_stream.arrow",
+        ArrowReadOptions::default(),
+    )
+    .await?;
+
+    let result = ctx
+        .sql(
+            "SELECT a.f0, a.f1, b.f0, b.f1
+             FROM file_table a
+             JOIN stream_table b ON a.f0 = b.f0
+             WHERE a.f0 <= 2
+             ORDER BY a.f0",
+        )
+        .await?;
+    let batches = result.collect().await?;
+
+    let total_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
+    assert_eq!(total_rows, 2);
+
+    Ok(())
+}
diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs
index 572a7e2b335c..46c36c6abdac 100644
--- a/datafusion/core/tests/expr_api/simplification.rs
+++ b/datafusion/core/tests/expr_api/simplification.rs
@@ -514,7 +514,7 @@ fn multiple_now() -> Result<()> {
     // expect the same timestamp appears in both exprs
     let actual = get_optimized_plan_formatted(plan, &time);
     let expected = format!(
-        "Projection: TimestampNanosecond({}, Some(\"+00:00\")) AS now(), TimestampNanosecond({}, Some(\"+00:00\")) AS t2\n  TableScan: test",
+        "Projection: TimestampNanosecond({}, None) AS now(), TimestampNanosecond({}, None) AS t2\n  TableScan: test",
         time.timestamp_nanos_opt().unwrap(),
         time.timestamp_nanos_opt().unwrap()
     );
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
index 2abfcd8417cb..fa8ea0b31c02 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/context_generator.rs
@@ -44,7 +44,6 @@ use crate::fuzz_cases::aggregation_fuzzer::data_generator::Dataset;
 ///   - hint `sorted` or not
 ///   - `spilling` or not (TODO, I think a special `MemoryPool` may be needed
 ///     to support this)
-///
 pub struct SessionContextGenerator {
     /// Current testing dataset
     dataset: Arc<Dataset>,
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
index 753a74995d8f..aaf2d1b9bad4 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs
@@ -39,7 +39,6 @@ use crate::fuzz_cases::record_batch_generator::{ColumnDescr, RecordBatchGenerato
 ///     will generate one `base dataset` firstly. Then the `base dataset` will be sorted
 ///     based on each `sort_key` respectively. And finally `len(sort_keys) + 1` datasets
 ///     will be returned
-///
 #[derive(Debug, Clone)]
 pub struct DatasetGeneratorConfig {
     /// Descriptions of columns in datasets, it's `required`
@@ -115,7 +114,6 @@ impl DatasetGeneratorConfig {
 ///   
 ///   - Split each batch to multiple batches which each sub-batch in has the randomly `rows num`,
 ///     and this multiple batches will be used to create the `Dataset`.
-///
 pub struct DatasetGenerator {
     batch_generator: RecordBatchGenerator,
     sort_keys_set: Vec<Vec<String>>,
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
index b90b3e5e32df..1a8ef278cc29 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/fuzzer.rs
@@ -253,7 +253,6 @@ impl AggregationFuzzer {
 ///
 ///   - `dataset_ref`, the input dataset, store it for error reported when found
 ///     the inconsistency between the one for `ctx` and `expected results`.
-///
 struct AggregationFuzzTestTask {
     /// Generated session context in current test case
     ctx_with_params: SessionContextWithParams,
diff --git a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs
index 209278385b7b..766e2bedd74c 100644
--- a/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs
+++ b/datafusion/core/tests/fuzz_cases/aggregation_fuzzer/query_builder.rs
@@ -24,7 +24,7 @@ use rand::{rng, seq::SliceRandom, Rng};
 /// Creates queries like
 /// ```sql
 /// SELECT AGG(..) FROM table_name GROUP BY <group_by_columns>
-///```
+/// ```
 #[derive(Debug, Default, Clone)]
 pub struct QueryBuilder {
     // ===================================
@@ -95,7 +95,6 @@ pub struct QueryBuilder {
     /// More details can see [`GroupOrdering`].
     ///
     /// [`GroupOrdering`]:  datafusion_physical_plan::aggregates::order::GroupOrdering
-    ///
     dataset_sort_keys: Vec<Vec<String>>,
 
     /// If we will also test the no grouping case like:
@@ -103,7 +102,6 @@ pub struct QueryBuilder {
     /// ```text
     ///   SELECT aggr FROM t;
     /// ```
-    ///
     no_grouping: bool,
 
     // ====================================
diff --git a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
index 69639b3e09fd..a72a1558b2e4 100644
--- a/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
+++ b/datafusion/core/tests/fuzz_cases/equivalence/projection.rs
@@ -58,7 +58,7 @@ fn project_orderings_random() -> Result<()> {
             Operator::Plus,
             col("b", &test_schema)?,
         )) as Arc<dyn PhysicalExpr>;
-        let proj_exprs = vec![
+        let proj_exprs = [
             (col("a", &test_schema)?, "a_new"),
             (col("b", &test_schema)?, "b_new"),
             (col("c", &test_schema)?, "c_new"),
@@ -132,7 +132,7 @@ fn ordering_satisfy_after_projection_random() -> Result<()> {
             Operator::Plus,
             col("b", &test_schema)?,
         )) as Arc<dyn PhysicalExpr>;
-        let proj_exprs = vec![
+        let proj_exprs = [
             (col("a", &test_schema)?, "a_new"),
             (col("b", &test_schema)?, "b_new"),
             (col("c", &test_schema)?, "c_new"),
diff --git a/datafusion/core/tests/fuzz_cases/pruning.rs b/datafusion/core/tests/fuzz_cases/pruning.rs
index f8bd4dbc1a76..51ec8f03e5d2 100644
--- a/datafusion/core/tests/fuzz_cases/pruning.rs
+++ b/datafusion/core/tests/fuzz_cases/pruning.rs
@@ -276,13 +276,12 @@ async fn execute_with_predicate(
     ctx: &SessionContext,
 ) -> Vec<String> {
     let parquet_source = if prune_stats {
-        ParquetSource::default().with_predicate(predicate.clone())
+        ParquetSource::new(schema.clone()).with_predicate(predicate.clone())
     } else {
-        ParquetSource::default()
+        ParquetSource::new(schema.clone())
     };
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("memory://").unwrap(),
-        schema.clone(),
         Arc::new(parquet_source),
     )
     .with_file_group(
diff --git a/datafusion/core/tests/optimizer/mod.rs b/datafusion/core/tests/optimizer/mod.rs
index aec32d05624c..9b2a5596827d 100644
--- a/datafusion/core/tests/optimizer/mod.rs
+++ b/datafusion/core/tests/optimizer/mod.rs
@@ -287,7 +287,7 @@ fn test_nested_schema_nullability() {
 
 #[test]
 fn test_inequalities_non_null_bounded() {
-    let guarantees = vec![
+    let guarantees = [
         // x ∈ [1, 3] (not null)
         (
             col("x"),
diff --git a/datafusion/core/tests/parquet/custom_reader.rs b/datafusion/core/tests/parquet/custom_reader.rs
index 3a1f06656236..0a147d15a6fd 100644
--- a/datafusion/core/tests/parquet/custom_reader.rs
+++ b/datafusion/core/tests/parquet/custom_reader.rs
@@ -80,7 +80,7 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() {
         .collect();
 
     let source = Arc::new(
-        ParquetSource::default()
+        ParquetSource::new(file_schema.clone())
             // prepare the scan
             .with_parquet_file_reader_factory(Arc::new(
                 InMemoryParquetFileReaderFactory(Arc::clone(&in_memory_object_store)),
@@ -89,7 +89,6 @@ async fn route_data_access_ops_to_parquet_file_reader_factory() {
     let base_config = FileScanConfigBuilder::new(
         // just any url that doesn't point to in memory object store
         ObjectStoreUrl::local_filesystem(),
-        file_schema,
         source,
     )
     .with_file_group(file_group)
diff --git a/datafusion/core/tests/parquet/encryption.rs b/datafusion/core/tests/parquet/encryption.rs
index 819d8bf3a283..09b93f06ce85 100644
--- a/datafusion/core/tests/parquet/encryption.rs
+++ b/datafusion/core/tests/parquet/encryption.rs
@@ -314,7 +314,7 @@ async fn verify_file_encrypted(
         for col in row_group.columns() {
             assert!(matches!(
                 col.crypto_metadata(),
-                Some(ColumnCryptoMetaData::EncryptionWithFooterKey)
+                Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY)
             ));
         }
     }
@@ -336,7 +336,7 @@ impl EncryptionFactory for MockEncryptionFactory {
         config: &EncryptionFactoryOptions,
         _schema: &SchemaRef,
         file_path: &object_store::path::Path,
-    ) -> datafusion_common::Result<Option<FileEncryptionProperties>> {
+    ) -> datafusion_common::Result<Option<Arc<FileEncryptionProperties>>> {
         assert_eq!(
             config.options.get("test_key"),
             Some(&"test value".to_string())
@@ -353,7 +353,7 @@ impl EncryptionFactory for MockEncryptionFactory {
         &self,
         config: &EncryptionFactoryOptions,
         file_path: &object_store::path::Path,
-    ) -> datafusion_common::Result<Option<FileDecryptionProperties>> {
+    ) -> datafusion_common::Result<Option<Arc<FileDecryptionProperties>>> {
         assert_eq!(
             config.options.get("test_key"),
             Some(&"test value".to_string())
diff --git a/datafusion/core/tests/parquet/external_access_plan.rs b/datafusion/core/tests/parquet/external_access_plan.rs
index a5397c5a397c..b35cb6e09cfb 100644
--- a/datafusion/core/tests/parquet/external_access_plan.rs
+++ b/datafusion/core/tests/parquet/external_access_plan.rs
@@ -33,7 +33,7 @@ use datafusion_common::{assert_contains, DFSchema};
 use datafusion_datasource_parquet::{ParquetAccessPlan, RowGroupAccess};
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_expr::{col, lit, Expr};
-use datafusion_physical_plan::metrics::MetricsSet;
+use datafusion_physical_plan::metrics::{MetricValue, MetricsSet};
 use datafusion_physical_plan::ExecutionPlan;
 
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
@@ -178,12 +178,21 @@ async fn plan_and_filter() {
     .unwrap();
 
     // Verify that row group pruning still happens for just that group
-    let row_groups_pruned_statistics =
-        metric_value(&parquet_metrics, "row_groups_pruned_statistics").unwrap();
-    assert_eq!(
-        row_groups_pruned_statistics, 1,
-        "metrics : {parquet_metrics:#?}",
-    );
+    let row_groups_pruned_statistics = parquet_metrics
+        .sum_by_name("row_groups_pruned_statistics")
+        .unwrap();
+    if let MetricValue::PruningMetrics {
+        pruning_metrics, ..
+    } = row_groups_pruned_statistics
+    {
+        assert_eq!(
+            pruning_metrics.pruned(),
+            1,
+            "metrics : {parquet_metrics:#?}",
+        );
+    } else {
+        unreachable!("metrics `row_groups_pruned_statistics` should exist")
+    }
 }
 
 #[tokio::test]
@@ -346,11 +355,11 @@ impl TestFull {
         let source = if let Some(predicate) = predicate {
             let df_schema = DFSchema::try_from(schema.clone())?;
             let predicate = ctx.create_physical_expr(predicate, &df_schema)?;
-            Arc::new(ParquetSource::default().with_predicate(predicate))
+            Arc::new(ParquetSource::new(schema.clone()).with_predicate(predicate))
         } else {
-            Arc::new(ParquetSource::default())
+            Arc::new(ParquetSource::new(schema.clone()))
         };
-        let config = FileScanConfigBuilder::new(object_store_url, schema.clone(), source)
+        let config = FileScanConfigBuilder::new(object_store_url, source)
             .with_file(partitioned_file)
             .build();
 
diff --git a/datafusion/core/tests/parquet/filter_pushdown.rs b/datafusion/core/tests/parquet/filter_pushdown.rs
index b769fec7d372..966f25161397 100644
--- a/datafusion/core/tests/parquet/filter_pushdown.rs
+++ b/datafusion/core/tests/parquet/filter_pushdown.rs
@@ -29,7 +29,7 @@
 use arrow::compute::concat_batches;
 use arrow::record_batch::RecordBatch;
 use datafusion::physical_plan::collect;
-use datafusion::physical_plan::metrics::MetricsSet;
+use datafusion::physical_plan::metrics::{MetricValue, MetricsSet};
 use datafusion::prelude::{
     col, lit, lit_timestamp_nano, Expr, ParquetReadOptions, SessionContext,
 };
@@ -563,9 +563,9 @@ impl<'a> TestCase<'a> {
             }
         };
 
-        let page_index_rows_pruned = get_value(&metrics, "page_index_rows_pruned");
+        let (page_index_rows_pruned, page_index_rows_matched) =
+            get_pruning_metrics(&metrics, "page_index_rows_pruned");
         println!(" page_index_rows_pruned: {page_index_rows_pruned}");
-        let page_index_rows_matched = get_value(&metrics, "page_index_rows_matched");
         println!(" page_index_rows_matched: {page_index_rows_matched}");
 
         let page_index_filtering_expected = if scan_options.enable_page_index {
@@ -592,14 +592,29 @@ impl<'a> TestCase<'a> {
     }
 }
 
+fn get_pruning_metrics(metrics: &MetricsSet, metric_name: &str) -> (usize, usize) {
+    match metrics.sum_by_name(metric_name) {
+        Some(MetricValue::PruningMetrics {
+            pruning_metrics, ..
+        }) => (pruning_metrics.pruned(), pruning_metrics.matched()),
+        Some(_) => {
+            panic!("Metric '{metric_name}' is not a pruning metric in\n\n{metrics:#?}")
+        }
+        None => panic!(
+            "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}"
+        ),
+    }
+}
+
 fn get_value(metrics: &MetricsSet, metric_name: &str) -> usize {
     match metrics.sum_by_name(metric_name) {
+        Some(MetricValue::PruningMetrics {
+            pruning_metrics, ..
+        }) => pruning_metrics.pruned(),
         Some(v) => v.as_usize(),
-        _ => {
-            panic!(
-                "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}"
-            );
-        }
+        None => panic!(
+            "Expected metric not found. Looking for '{metric_name}' in\n\n{metrics:#?}"
+        ),
     }
 }
 
@@ -631,8 +646,8 @@ async fn predicate_cache_pushdown_default() -> datafusion_common::Result<()> {
 
 #[tokio::test]
 async fn predicate_cache_pushdown_disable() -> datafusion_common::Result<()> {
-    // Can disable the cache even with filter pushdown by setting the size to 0. In this case we
-    // expect the inner records are reported but no records are read from the cache
+    // Can disable the cache even with filter pushdown by setting the size to 0.
+    // This results in no records read from the cache and no metrics reported
     let mut config = SessionConfig::new();
     config.options_mut().execution.parquet.pushdown_filters = true;
     config
@@ -641,13 +656,10 @@ async fn predicate_cache_pushdown_disable() -> datafusion_common::Result<()> {
         .parquet
         .max_predicate_cache_size = Some(0);
     let ctx = SessionContext::new_with_config(config);
+    // Since the cache is disabled, there is no reporting or use of the cache
     PredicateCacheTest {
-        // file has 8 rows, which need to be read twice, one for filter, one for
-        // final output
-        expected_inner_records: 16,
-        // Expect this to 0 records read as the cache is disabled. However, it is
-        // non zero due to https://github.com/apache/arrow-rs/issues/8307
-        expected_records: 3,
+        expected_inner_records: 0,
+        expected_records: 0,
     }
     .run(&ctx)
     .await
diff --git a/datafusion/core/tests/parquet/mod.rs b/datafusion/core/tests/parquet/mod.rs
index c44d14abd381..097600e45ead 100644
--- a/datafusion/core/tests/parquet/mod.rs
+++ b/datafusion/core/tests/parquet/mod.rs
@@ -37,6 +37,7 @@ use datafusion::{
     prelude::{ParquetReadOptions, SessionConfig, SessionContext},
 };
 use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
+use datafusion_physical_plan::metrics::MetricValue;
 use parquet::arrow::ArrowWriter;
 use parquet::file::properties::{EnabledStatistics, WriterProperties};
 use std::sync::Arc;
@@ -125,9 +126,44 @@ struct TestOutput {
 impl TestOutput {
     /// retrieve the value of the named metric, if any
     fn metric_value(&self, metric_name: &str) -> Option<usize> {
+        if let Some((pruned, _matched)) = self.pruning_metric(metric_name) {
+            return Some(pruned);
+        }
+
         self.parquet_metrics
             .sum(|metric| metric.value().name() == metric_name)
-            .map(|v| v.as_usize())
+            .map(|v| match v {
+                MetricValue::PruningMetrics {
+                    pruning_metrics, ..
+                } => pruning_metrics.pruned(),
+                _ => v.as_usize(),
+            })
+    }
+
+    fn pruning_metric(&self, metric_name: &str) -> Option<(usize, usize)> {
+        let mut total_pruned = 0;
+        let mut total_matched = 0;
+        let mut found = false;
+
+        for metric in self.parquet_metrics.iter() {
+            let metric = metric.as_ref();
+            if metric.value().name() == metric_name {
+                if let MetricValue::PruningMetrics {
+                    pruning_metrics, ..
+                } = metric.value()
+                {
+                    total_pruned += pruning_metrics.pruned();
+                    total_matched += pruning_metrics.matched();
+                    found = true;
+                }
+            }
+        }
+
+        if found {
+            Some((total_pruned, total_matched))
+        } else {
+            None
+        }
     }
 
     /// The number of times the pruning predicate evaluation errors
@@ -135,47 +171,52 @@ impl TestOutput {
         self.metric_value("predicate_evaluation_errors")
     }
 
-    /// The number of row_groups matched by bloom filter
-    fn row_groups_matched_bloom_filter(&self) -> Option<usize> {
-        self.metric_value("row_groups_matched_bloom_filter")
-    }
-
-    /// The number of row_groups pruned by bloom filter
-    fn row_groups_pruned_bloom_filter(&self) -> Option<usize> {
-        self.metric_value("row_groups_pruned_bloom_filter")
+    /// The number of row_groups pruned / matched by bloom filter
+    fn row_groups_bloom_filter(&self) -> Option<(usize, usize)> {
+        self.pruning_metric("row_groups_pruned_bloom_filter")
     }
 
     /// The number of row_groups matched by statistics
     fn row_groups_matched_statistics(&self) -> Option<usize> {
-        self.metric_value("row_groups_matched_statistics")
+        self.pruning_metric("row_groups_pruned_statistics")
+            .map(|(_pruned, matched)| matched)
     }
 
     /// The number of row_groups pruned by statistics
     fn row_groups_pruned_statistics(&self) -> Option<usize> {
-        self.metric_value("row_groups_pruned_statistics")
+        self.pruning_metric("row_groups_pruned_statistics")
+            .map(|(pruned, _matched)| pruned)
     }
 
+    /// Metric `files_ranges_pruned_statistics` tracks both pruned and matched count,
+    /// for testing purpose, here it only aggregate the `pruned` count.
     fn files_ranges_pruned_statistics(&self) -> Option<usize> {
-        self.metric_value("files_ranges_pruned_statistics")
+        self.pruning_metric("files_ranges_pruned_statistics")
+            .map(|(pruned, _matched)| pruned)
     }
 
     /// The number of row_groups matched by bloom filter or statistics
+    ///
+    /// E.g. starting with 10 row groups, statistics: 10 total -> 7 matched, bloom
+    /// filter: 7 total -> 3 matched, this function returns 3 for the final matched
+    /// count.
     fn row_groups_matched(&self) -> Option<usize> {
-        self.row_groups_matched_bloom_filter()
-            .zip(self.row_groups_matched_statistics())
-            .map(|(a, b)| a + b)
+        self.row_groups_bloom_filter()
+            .map(|(_pruned, matched)| matched)
     }
 
     /// The number of row_groups pruned
     fn row_groups_pruned(&self) -> Option<usize> {
-        self.row_groups_pruned_bloom_filter()
+        self.row_groups_bloom_filter()
+            .map(|(pruned, _matched)| pruned)
             .zip(self.row_groups_pruned_statistics())
             .map(|(a, b)| a + b)
     }
 
     /// The number of row pages pruned
     fn row_pages_pruned(&self) -> Option<usize> {
-        self.metric_value("page_index_rows_pruned")
+        self.pruning_metric("page_index_rows_pruned")
+            .map(|(pruned, _matched)| pruned)
     }
 
     fn description(&self) -> String {
diff --git a/datafusion/core/tests/parquet/page_pruning.rs b/datafusion/core/tests/parquet/page_pruning.rs
index 27bee10234b5..fb2a196b0aa6 100644
--- a/datafusion/core/tests/parquet/page_pruning.rs
+++ b/datafusion/core/tests/parquet/page_pruning.rs
@@ -81,12 +81,12 @@ async fn get_parquet_exec(
     let predicate = create_physical_expr(&filter, &df_schema, &execution_props).unwrap();
 
     let source = Arc::new(
-        ParquetSource::default()
+        ParquetSource::new(schema.clone())
             .with_predicate(predicate)
             .with_enable_page_index(true)
             .with_pushdown_filters(pushdown_filters),
     );
-    let base_config = FileScanConfigBuilder::new(object_store_url, schema, source)
+    let base_config = FileScanConfigBuilder::new(object_store_url, source)
         .with_file(partitioned_file)
         .build();
 
diff --git a/datafusion/core/tests/parquet/row_group_pruning.rs b/datafusion/core/tests/parquet/row_group_pruning.rs
index 44409166d3ce..0411298055f2 100644
--- a/datafusion/core/tests/parquet/row_group_pruning.rs
+++ b/datafusion/core/tests/parquet/row_group_pruning.rs
@@ -133,13 +133,14 @@ impl RowGroupPruningTest {
             self.expected_files_pruned_by_statistics,
             "mismatched files_ranges_pruned_statistics",
         );
+        let bloom_filter_metrics = output.row_groups_bloom_filter();
         assert_eq!(
-            output.row_groups_matched_bloom_filter(),
+            bloom_filter_metrics.map(|(_pruned, matched)| matched),
             self.expected_row_group_matched_by_bloom_filter,
             "mismatched row_groups_matched_bloom_filter",
         );
         assert_eq!(
-            output.row_groups_pruned_bloom_filter(),
+            bloom_filter_metrics.map(|(pruned, _matched)| pruned),
             self.expected_row_group_pruned_by_bloom_filter,
             "mismatched row_groups_pruned_bloom_filter",
         );
@@ -163,7 +164,7 @@ async fn prune_timestamps_nanos() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(10)
         .test_row_group_prune()
@@ -181,7 +182,7 @@ async fn prune_timestamps_micros() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(10)
         .test_row_group_prune()
@@ -199,7 +200,7 @@ async fn prune_timestamps_millis() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(10)
         .test_row_group_prune()
@@ -217,7 +218,7 @@ async fn prune_timestamps_seconds() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(10)
         .test_row_group_prune()
@@ -233,7 +234,7 @@ async fn prune_date32() {
         .with_matched_by_stats(Some(1))
         .with_pruned_by_stats(Some(3))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(1))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(1)
         .test_row_group_prune()
@@ -262,8 +263,9 @@ async fn prune_date64() {
     println!("{}", output.description());
     // This should prune out groups  without error
     assert_eq!(output.predicate_evaluation_errors(), Some(0));
-    assert_eq!(output.row_groups_matched(), Some(1));
-    assert_eq!(output.row_groups_pruned(), Some(3));
+    // 'dates' table has 4 row groups, and only the first one is matched by the predicate
+    assert_eq!(output.row_groups_matched_statistics(), Some(1));
+    assert_eq!(output.row_groups_pruned_statistics(), Some(3));
     assert_eq!(output.result_rows, 1, "{}", output.description());
 }
 
@@ -276,7 +278,7 @@ async fn prune_disabled() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(10)
         .test_row_group_prune()
@@ -296,7 +298,7 @@ async fn prune_disabled() {
 
     // This should not prune any
     assert_eq!(output.predicate_evaluation_errors(), Some(0));
-    assert_eq!(output.row_groups_matched(), Some(0));
+    assert_eq!(output.row_groups_matched(), Some(4));
     assert_eq!(output.row_groups_pruned(), Some(0));
     assert_eq!(
         output.result_rows,
@@ -322,7 +324,7 @@ macro_rules! int_tests {
                     .with_matched_by_stats(Some(3))
                     .with_pruned_by_stats(Some(1))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(3))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(11)
                     .test_row_group_prune()
@@ -337,7 +339,7 @@ macro_rules! int_tests {
                     .with_matched_by_stats(Some(3))
                     .with_pruned_by_stats(Some(1))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(3))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(11)
                     .test_row_group_prune()
@@ -381,10 +383,10 @@ macro_rules! int_tests {
                     .with_scenario(Scenario::Int)
                     .with_query(&format!("SELECT * FROM t where abs(i{}) = 1", $bits))
                     .with_expected_errors(Some(0))
-                    .with_matched_by_stats(Some(0))
+                    .with_matched_by_stats(Some(4))
                     .with_pruned_by_stats(Some(0))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(4))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(3)
                     .test_row_group_prune()
@@ -397,10 +399,10 @@ macro_rules! int_tests {
                     .with_scenario(Scenario::Int)
                     .with_query(&format!("SELECT * FROM t where i{}+1 = 1", $bits))
                     .with_expected_errors(Some(0))
-                    .with_matched_by_stats(Some(0))
+                    .with_matched_by_stats(Some(4))
                     .with_pruned_by_stats(Some(0))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(4))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(2)
                     .test_row_group_prune()
@@ -413,10 +415,10 @@ macro_rules! int_tests {
                     .with_scenario(Scenario::Int)
                     .with_query(&format!("SELECT * FROM t where 1-i{} > 1", $bits))
                     .with_expected_errors(Some(0))
-                    .with_matched_by_stats(Some(0))
+                    .with_matched_by_stats(Some(4))
                     .with_pruned_by_stats(Some(0))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(4))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(9)
                     .test_row_group_prune()
@@ -498,7 +500,7 @@ macro_rules! uint_tests {
                     .with_matched_by_stats(Some(3))
                     .with_pruned_by_stats(Some(1))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(3))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(11)
                     .test_row_group_prune()
@@ -542,10 +544,10 @@ macro_rules! uint_tests {
                     .with_scenario(Scenario::UInt)
                     .with_query(&format!("SELECT * FROM t where power(u{}, 2) = 25", $bits))
                     .with_expected_errors(Some(0))
-                    .with_matched_by_stats(Some(0))
+                    .with_matched_by_stats(Some(4))
                     .with_pruned_by_stats(Some(0))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(4))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(2)
                     .test_row_group_prune()
@@ -558,10 +560,10 @@ macro_rules! uint_tests {
                     .with_scenario(Scenario::UInt)
                     .with_query(&format!("SELECT * FROM t where u{}+1 = 6", $bits))
                     .with_expected_errors(Some(0))
-                    .with_matched_by_stats(Some(0))
+                    .with_matched_by_stats(Some(4))
                     .with_pruned_by_stats(Some(0))
                     .with_pruned_files(Some(0))
-                    .with_matched_by_bloom_filter(Some(0))
+                    .with_matched_by_bloom_filter(Some(4))
                     .with_pruned_by_bloom_filter(Some(0))
                     .with_expected_rows(2)
                     .test_row_group_prune()
@@ -682,7 +684,7 @@ async fn prune_f64_lt() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(11)
         .test_row_group_prune()
@@ -694,7 +696,7 @@ async fn prune_f64_lt() {
         .with_matched_by_stats(Some(3))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(3))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(11)
         .test_row_group_prune()
@@ -712,7 +714,7 @@ async fn prune_f64_scalar_fun_and_gt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(2))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(1)
         .test_row_group_prune()
@@ -726,10 +728,10 @@ async fn prune_f64_scalar_fun() {
         .with_scenario(Scenario::Float64)
         .with_query("SELECT * FROM t where abs(f-1) <= 0.000001")
         .with_expected_errors(Some(0))
-        .with_matched_by_stats(Some(0))
+        .with_matched_by_stats(Some(4))
         .with_pruned_by_stats(Some(0))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(4))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(1)
         .test_row_group_prune()
@@ -743,10 +745,10 @@ async fn prune_f64_complex_expr() {
         .with_scenario(Scenario::Float64)
         .with_query("SELECT * FROM t where f+1 > 1.1")
         .with_expected_errors(Some(0))
-        .with_matched_by_stats(Some(0))
+        .with_matched_by_stats(Some(4))
         .with_pruned_by_stats(Some(0))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(4))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(9)
         .test_row_group_prune()
@@ -760,10 +762,10 @@ async fn prune_f64_complex_expr_subtract() {
         .with_scenario(Scenario::Float64)
         .with_query("SELECT * FROM t where 1-f > 1")
         .with_expected_errors(Some(0))
-        .with_matched_by_stats(Some(0))
+        .with_matched_by_stats(Some(4))
         .with_pruned_by_stats(Some(0))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(4))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(9)
         .test_row_group_prune()
@@ -782,7 +784,7 @@ async fn prune_decimal_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(6)
         .test_row_group_prune()
@@ -794,7 +796,7 @@ async fn prune_decimal_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(8)
         .test_row_group_prune()
@@ -806,7 +808,7 @@ async fn prune_decimal_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(6)
         .test_row_group_prune()
@@ -818,7 +820,7 @@ async fn prune_decimal_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(8)
         .test_row_group_prune()
@@ -894,7 +896,7 @@ async fn prune_decimal_in_list() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(5)
         .test_row_group_prune()
@@ -906,7 +908,7 @@ async fn prune_decimal_in_list() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(6)
         .test_row_group_prune()
@@ -918,7 +920,7 @@ async fn prune_decimal_in_list() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(5)
         .test_row_group_prune()
@@ -930,7 +932,7 @@ async fn prune_decimal_in_list() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(6)
         .test_row_group_prune()
@@ -1064,7 +1066,7 @@ async fn prune_string_lt() {
         .with_matched_by_stats(Some(1))
         .with_pruned_by_stats(Some(2))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(1))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(3)
         .test_row_group_prune()
@@ -1079,7 +1081,7 @@ async fn prune_string_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         // all backends from 'mixed' and 'all backends'
         .with_expected_rows(8)
@@ -1172,7 +1174,7 @@ async fn prune_binary_lt() {
         .with_matched_by_stats(Some(1))
         .with_pruned_by_stats(Some(2))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(1))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(3)
         .test_row_group_prune()
@@ -1187,7 +1189,7 @@ async fn prune_binary_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         // all backends from 'mixed' and 'all backends'
         .with_expected_rows(8)
@@ -1279,7 +1281,7 @@ async fn prune_fixedsizebinary_lt() {
         .with_matched_by_stats(Some(1))
         .with_pruned_by_stats(Some(2))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(1))
         .with_pruned_by_bloom_filter(Some(0))
         .with_expected_rows(2)
         .test_row_group_prune()
@@ -1294,7 +1296,7 @@ async fn prune_fixedsizebinary_lt() {
         .with_matched_by_stats(Some(2))
         .with_pruned_by_stats(Some(1))
         .with_pruned_files(Some(0))
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         // all backends from 'mixed' and 'all backends'
         .with_expected_rows(8)
@@ -1362,7 +1364,7 @@ async fn test_row_group_with_null_values() {
         .with_pruned_files(Some(0))
         .with_pruned_by_stats(Some(2))
         .with_expected_rows(5)
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(1))
         .with_pruned_by_bloom_filter(Some(0))
         .test_row_group_prune()
         .await;
@@ -1376,7 +1378,7 @@ async fn test_row_group_with_null_values() {
         .with_pruned_files(Some(0))
         .with_pruned_by_stats(Some(1))
         .with_expected_rows(10)
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(2))
         .with_pruned_by_bloom_filter(Some(0))
         .test_row_group_prune()
         .await;
@@ -1390,7 +1392,7 @@ async fn test_row_group_with_null_values() {
         .with_pruned_files(Some(0))
         .with_pruned_by_stats(Some(2))
         .with_expected_rows(5)
-        .with_matched_by_bloom_filter(Some(0))
+        .with_matched_by_bloom_filter(Some(1))
         .with_pruned_by_bloom_filter(Some(0))
         .test_row_group_prune()
         .await;
diff --git a/datafusion/core/tests/parquet/schema_adapter.rs b/datafusion/core/tests/parquet/schema_adapter.rs
index 40fc6176e212..0e76d626aac5 100644
--- a/datafusion/core/tests/parquet/schema_adapter.rs
+++ b/datafusion/core/tests/parquet/schema_adapter.rs
@@ -482,7 +482,7 @@ fn test_apply_schema_adapter_with_factory() {
     ]));
 
     // Create a parquet source
-    let source = ParquetSource::default();
+    let source = ParquetSource::new(schema.clone());
 
     // Create a file scan config with source that has a schema adapter factory
     let factory = Arc::new(PrefixAdapterFactory {
@@ -491,12 +491,9 @@ fn test_apply_schema_adapter_with_factory() {
 
     let file_source = source.clone().with_schema_adapter_factory(factory).unwrap();
 
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source,
-    )
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .build();
 
     // Apply schema adapter to a new source
     let result_source = source.apply_schema_adapter(&config).unwrap();
@@ -532,18 +529,15 @@ fn test_apply_schema_adapter_without_factory() {
     ]));
 
     // Create a parquet source
-    let source = ParquetSource::default();
+    let source = ParquetSource::new(schema.clone());
 
     // Convert to Arc<dyn FileSource>
     let file_source: Arc<dyn FileSource> = Arc::new(source.clone());
 
     // Create a file scan config without a schema adapter factory
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source,
-    )
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .build();
 
     // Apply schema adapter function - should pass through the source unchanged
     let result_source = source.apply_schema_adapter(&config).unwrap();
diff --git a/datafusion/core/tests/parquet/schema_coercion.rs b/datafusion/core/tests/parquet/schema_coercion.rs
index 59cbf4b0872e..51e5242cbafd 100644
--- a/datafusion/core/tests/parquet/schema_coercion.rs
+++ b/datafusion/core/tests/parquet/schema_coercion.rs
@@ -62,14 +62,10 @@ async fn multi_parquet_coercion() {
         Field::new("c2", DataType::Int32, true),
         Field::new("c3", DataType::Float64, true),
     ]));
-    let source = Arc::new(ParquetSource::default());
-    let conf = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        source,
-    )
-    .with_file_group(file_group)
-    .build();
+    let source = Arc::new(ParquetSource::new(file_schema.clone()));
+    let conf = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+        .with_file_group(file_group)
+        .build();
 
     let parquet_exec = DataSourceExec::from_data_source(conf);
 
@@ -122,11 +118,10 @@ async fn multi_parquet_coercion_projection() {
     ]));
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(file_schema)),
     )
     .with_file_group(file_group)
-    .with_projection(Some(vec![1, 0, 2]))
+    .with_projection_indices(Some(vec![1, 0, 2]))
     .build();
 
     let parquet_exec = DataSourceExec::from_data_source(config);
diff --git a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
index 63111f43806b..aa5a2d053926 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_distribution.rs
@@ -37,6 +37,7 @@ use datafusion::datasource::physical_plan::{CsvSource, ParquetSource};
 use datafusion::datasource::source::DataSourceExec;
 use datafusion::datasource::MemTable;
 use datafusion::prelude::{SessionConfig, SessionContext};
+use datafusion_common::config::CsvOptions;
 use datafusion_common::error::Result;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::ScalarValue;
@@ -66,9 +67,52 @@ use datafusion_physical_plan::projection::{ProjectionExec, ProjectionExpr};
 use datafusion_physical_plan::sorts::sort_preserving_merge::SortPreservingMergeExec;
 use datafusion_physical_plan::union::UnionExec;
 use datafusion_physical_plan::{
-    get_plan_string, DisplayAs, DisplayFormatType, ExecutionPlanProperties,
-    PlanProperties, Statistics,
+    displayable, DisplayAs, DisplayFormatType, ExecutionPlanProperties, PlanProperties,
+    Statistics,
 };
+use insta::Settings;
+
+/// Helper function to replace only the first occurrence of a regex pattern in a plan
+/// Returns (captured_group_1, modified_string)
+fn hide_first(
+    plan: &dyn ExecutionPlan,
+    regex: &str,
+    replacement: &str,
+) -> (String, String) {
+    let plan_str = displayable(plan).indent(true).to_string();
+    let pattern = regex::Regex::new(regex).unwrap();
+
+    if let Some(captures) = pattern.captures(&plan_str) {
+        let full_match = captures.get(0).unwrap();
+        let captured_value = captures
+            .get(1)
+            .map(|m| m.as_str().to_string())
+            .unwrap_or_default();
+        let pos = full_match.start();
+        let end_pos = full_match.end();
+        let mut result = String::with_capacity(plan_str.len());
+        result.push_str(&plan_str[..pos]);
+        result.push_str(replacement);
+        result.push_str(&plan_str[end_pos..]);
+        (captured_value, result)
+    } else {
+        (String::new(), plan_str)
+    }
+}
+
+macro_rules! assert_plan {
+    ($plan: expr, @ $expected:literal) => {
+        insta::assert_snapshot!(
+            displayable($plan.as_ref()).indent(true).to_string(),
+            @ $expected
+        )
+    };
+    ($plan: expr, $another_plan: expr) => {
+        let plan1 = displayable($plan.as_ref()).indent(true).to_string();
+        let plan2 = displayable($another_plan.as_ref()).indent(true).to_string();
+        assert_eq!(plan1, plan2);
+    }
+}
 
 /// Models operators like BoundedWindowExec that require an input
 /// ordering but is easy to construct
@@ -186,8 +230,7 @@ fn parquet_exec_multiple_sorted(
 ) -> Arc<DataSourceExec> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(schema())),
     )
     .with_file_groups(vec![
         FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]),
@@ -204,14 +247,19 @@ fn csv_exec() -> Arc<DataSourceExec> {
 }
 
 fn csv_exec_with_sort(output_ordering: Vec<LexOrdering>) -> Arc<DataSourceExec> {
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(CsvSource::new(false, b',', b'"')),
-    )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_output_ordering(output_ordering)
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: b',',
+                quote: b'"',
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema()).with_csv_options(options))
+        })
+        .with_file(PartitionedFile::new("x".to_string(), 100))
+        .with_output_ordering(output_ordering)
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -222,17 +270,22 @@ fn csv_exec_multiple() -> Arc<DataSourceExec> {
 
 // Created a sorted parquet exec with multiple files
 fn csv_exec_multiple_sorted(output_ordering: Vec<LexOrdering>) -> Arc<DataSourceExec> {
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(CsvSource::new(false, b',', b'"')),
-    )
-    .with_file_groups(vec![
-        FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]),
-        FileGroup::new(vec![PartitionedFile::new("y".to_string(), 100)]),
-    ])
-    .with_output_ordering(output_ordering)
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: b',',
+                quote: b'"',
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema()).with_csv_options(options))
+        })
+        .with_file_groups(vec![
+            FileGroup::new(vec![PartitionedFile::new("x".to_string(), 100)]),
+            FileGroup::new(vec![PartitionedFile::new("y".to_string(), 100)]),
+        ])
+        .with_output_ordering(output_ordering)
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -352,22 +405,6 @@ fn ensure_distribution_helper(
     ensure_distribution(distribution_context, &config).map(|item| item.data.plan)
 }
 
-/// Test whether plan matches with expected plan
-macro_rules! plans_matches_expected {
-    ($EXPECTED_LINES: expr, $PLAN: expr) => {
-        let physical_plan = $PLAN;
-        let actual = get_plan_string(&physical_plan);
-
-        let expected_plan_lines: Vec<&str> = $EXPECTED_LINES
-            .iter().map(|s| *s).collect();
-
-        assert_eq!(
-            expected_plan_lines, actual,
-            "\n**Original Plan Mismatch\n\nexpected:\n\n{expected_plan_lines:#?}\nactual:\n\n{actual:#?}\n\n"
-        );
-    }
-}
-
 fn test_suite_default_config_options() -> ConfigOptions {
     let mut config = ConfigOptions::new();
 
@@ -445,14 +482,11 @@ impl TestConfig {
     /// Perform a series of runs using the current [`TestConfig`],
     /// assert the expected plan result,
     /// and return the result plan (for potential subsequent runs).
-    fn run(
+    fn try_to_plan(
         &self,
-        expected_lines: &[&str],
         plan: Arc<dyn ExecutionPlan>,
         optimizers_to_run: &[Run],
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let expected_lines: Vec<&str> = expected_lines.to_vec();
-
         // Add the ancillary output requirements operator at the start:
         let optimizer = OutputRequirements::new_add_mode();
         let mut optimized = optimizer.optimize(plan.clone(), &self.config)?;
@@ -507,30 +541,16 @@ impl TestConfig {
         let optimizer = OutputRequirements::new_remove_mode();
         let optimized = optimizer.optimize(optimized, &self.config)?;
 
-        // Now format correctly
-        let actual_lines = get_plan_string(&optimized);
-
-        assert_eq!(
-            &expected_lines, &actual_lines,
-            "\n\nexpected:\n\n{expected_lines:#?}\nactual:\n\n{actual_lines:#?}\n\n"
-        );
-
         Ok(optimized)
     }
-}
 
-macro_rules! assert_plan_txt {
-    ($EXPECTED_LINES: expr, $PLAN: expr) => {
-        let expected_lines: Vec<&str> = $EXPECTED_LINES.iter().map(|s| *s).collect();
-        // Now format correctly
-        let actual_lines = get_plan_string(&$PLAN);
-
-        assert_eq!(
-            &expected_lines, &actual_lines,
-            "\n\nexpected:\n\n{:#?}\nactual:\n\n{:#?}\n\n",
-            expected_lines, actual_lines
-        );
-    };
+    fn to_plan(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        optimizers_to_run: &[Run],
+    ) -> Arc<dyn ExecutionPlan> {
+        self.try_to_plan(plan, optimizers_to_run).unwrap()
+    }
 }
 
 #[test]
@@ -556,6 +576,8 @@ fn multi_hash_joins() -> Result<()> {
         JoinType::RightAnti,
     ];
 
+    let settings = Settings::clone_current();
+
     // Join on (a == b1)
     let join_on = vec![(
         Arc::new(Column::new_with_schema("a", &schema()).unwrap()) as _,
@@ -564,11 +586,17 @@ fn multi_hash_joins() -> Result<()> {
 
     for join_type in join_types {
         let join = hash_join_exec(left.clone(), right.clone(), &join_on, &join_type);
-        let join_plan = |shift| -> String {
-            format!("{}HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(a@0, b1@1)]", " ".repeat(shift))
-        };
-        let join_plan_indent2 = join_plan(2);
-        let join_plan_indent4 = join_plan(4);
+
+        let mut settings = settings.clone();
+        settings.add_filter(
+            // join_type={} replace with join_type=... to avoid snapshot name issue
+            format!("join_type={join_type}").as_str(),
+            "join_type=...",
+        );
+
+        insta::allow_duplicates! {
+            settings.bind( || {
+
 
         match join_type {
             JoinType::Inner
@@ -589,50 +617,52 @@ fn multi_hash_joins() -> Result<()> {
                     &top_join_on,
                     &join_type,
                 );
-                let top_join_plan =
-                    format!("HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(a@0, c@2)]");
 
-                let expected = match join_type {
+                let test_config = TestConfig::default();
+                let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+
+                match join_type {
                     // Should include 3 RepartitionExecs
-                    JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => vec![
-                        top_join_plan.as_str(),
-                        &join_plan_indent2,
-                        "    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    ],
+                    JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {
+
+                                assert_plan!(plan_distrib, @r"
+                                HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, c@2)]
+                                  HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
+                                    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                      ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                ");
+                            },
                     // Should include 4 RepartitionExecs
-                    _ => vec![
-                        top_join_plan.as_str(),
-                        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                        &join_plan_indent4,
-                        "      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    ],
+                    _ => {
+                                assert_plan!(plan_distrib, @r"
+                                HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, c@2)]
+                                  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+                                    HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
+                                      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                ");
+                            },
                 };
 
-                let test_config = TestConfig::default();
-                test_config.run(&expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-                test_config.run(&expected, top_join, &SORT_DISTRIB_DISTRIB)?;
+
+                let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
+                assert_plan!(plan_distrib, plan_sort);
             }
             JoinType::RightSemi | JoinType::RightAnti | JoinType::RightMark => {}
         }
 
+
+
         match join_type {
             JoinType::Inner
             | JoinType::Left
@@ -650,55 +680,58 @@ fn multi_hash_joins() -> Result<()> {
 
                 let top_join =
                     hash_join_exec(join, parquet_exec(), &top_join_on, &join_type);
-                let top_join_plan = match join_type {
-                    JoinType::RightSemi | JoinType::RightAnti =>
-                        format!("HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(b1@1, c@2)]"),
-                    _ =>
-                        format!("HashJoinExec: mode=Partitioned, join_type={join_type}, on=[(b1@6, c@2)]"),
-                };
 
-                let expected = match join_type {
+                let test_config = TestConfig::default();
+                let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+
+                match join_type {
                     // Should include 3 RepartitionExecs
-                    JoinType::Inner | JoinType::Right | JoinType::RightSemi | JoinType::RightAnti =>
-                        vec![
-                            top_join_plan.as_str(),
-                            &join_plan_indent2,
-                            "    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                            "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                            "    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                            "        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                            "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                            "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                            "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        ],
+                    JoinType::Inner | JoinType::Right => {
+                            assert_plan!(parquet_exec(), @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet");
+                            },
+                    // Should include 3 RepartitionExecs but have a different "on"
+                            JoinType::RightSemi | JoinType::RightAnti => {
+                            assert_plan!(plan_distrib, @r"
+                            HashJoinExec: mode=Partitioned, join_type=..., on=[(b1@1, c@2)]
+                              HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
+                                RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                  ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                              RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                            ");
+
+                            }
+
                     // Should include 4 RepartitionExecs
-                    _ =>
-                        vec![
-                            top_join_plan.as_str(),
-                            "  RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10",
-                            &join_plan_indent4,
-                            "      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                            "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                            "      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                            "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                            "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                            "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                            "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                            "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        ],
+                    _ => {
+                            assert_plan!(plan_distrib, @r"
+                            HashJoinExec: mode=Partitioned, join_type=..., on=[(b1@6, c@2)]
+                              RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10
+                                HashJoinExec: mode=Partitioned, join_type=..., on=[(a@0, b1@1)]
+                                  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                                  RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+                                    ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                              RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+                                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+                            ");
+
+                            },
                 };
 
-                let test_config = TestConfig::default();
-                test_config.run(&expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-                test_config.run(&expected, top_join, &SORT_DISTRIB_DISTRIB)?;
+
+                let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
+                        assert_plan!(plan_distrib, plan_sort);
             }
             JoinType::LeftSemi | JoinType::LeftAnti | JoinType::LeftMark => {}
         }
+
+                });
+            }
     }
 
     Ok(())
@@ -737,23 +770,24 @@ fn multi_joins_after_alias() -> Result<()> {
     );
 
     // Output partition need to respect the Alias and should not introduce additional RepartitionExec
-    let expected = &[
-        "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, c@2)]",
-        "  ProjectionExec: expr=[a@0 as a1, a@0 as a2]",
-        "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]",
-        "      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "      RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, top_join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(
+        plan_distrib,
+        @r"
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, c@2)]
+      ProjectionExec: expr=[a@0 as a1, a@0 as a2]
+        HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+    let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     // Join on (a2 == c)
     let top_join_on = vec![(
@@ -764,23 +798,24 @@ fn multi_joins_after_alias() -> Result<()> {
     let top_join = hash_join_exec(projection, right, &top_join_on, &JoinType::Inner);
 
     // Output partition need to respect the Alias and should not introduce additional RepartitionExec
-    let expected = &[
-        "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a2@1, c@2)]",
-        "  ProjectionExec: expr=[a@0 as a1, a@0 as a2]",
-        "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]",
-        "      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "      RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, top_join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(
+        plan_distrib,
+        @r"
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a2@1, c@2)]
+      ProjectionExec: expr=[a@0 as a1, a@0 as a2]
+        HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+    let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -816,26 +851,26 @@ fn multi_joins_after_multi_alias() -> Result<()> {
 
     // The Column 'a' has different meaning now after the two Projections
     // The original Output partition can not satisfy the Join requirements and need to add an additional RepartitionExec
-    let expected = &[
-        "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, c@2)]",
-        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "    ProjectionExec: expr=[c1@0 as a]",
-        "      ProjectionExec: expr=[c@2 as c1]",
-        "        HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]",
-        "          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "          RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=10",
-        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, top_join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(
+        plan_distrib,
+        @r"
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, c@2)]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+        ProjectionExec: expr=[c1@0 as a]
+          ProjectionExec: expr=[c@2 as c1]
+            HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, b@1)]
+              RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+              RepartitionExec: partitioning=Hash([b@1], 10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+    let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -861,22 +896,26 @@ fn join_after_agg_alias() -> Result<()> {
     let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
 
     // Only two RepartitionExecs added
-    let expected = &[
-        "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, a2@0)]",
-        "  AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
-        "    RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
-        "      AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
-        "    RepartitionExec: partitioning=Hash([a2@0], 10), input_partitions=10",
-        "      AggregateExec: mode=Partial, gby=[a@0 as a2], aggr=[]",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, join.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(
+        plan_distrib,
+        @r"
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a1@0, a2@0)]
+      AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]
+        RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10
+          AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]
+        RepartitionExec: partitioning=Hash([a2@0], 10), input_partitions=10
+          AggregateExec: mode=Partial, gby=[a@0 as a2], aggr=[]
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+    let plan_sort = test_config.to_plan(join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -914,23 +953,27 @@ fn hash_join_key_ordering() -> Result<()> {
     let join = hash_join_exec(left, right.clone(), &join_on, &JoinType::Inner);
 
     // Only two RepartitionExecs added
-    let expected = &[
-        "HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b1@1, b@0), (a1@0, a@1)]",
-        "  ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
-        "    AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
-        "      RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10",
-        "        AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
-        "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
-        "    RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
-        "      AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, join.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(
+        plan_distrib,
+        @r"
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b1@1, b@0), (a1@0, a@1)]
+      ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
+        AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
+          RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
+            AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
+              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
+        RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
+          AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+    let plan_sort = test_config.to_plan(join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1034,30 +1077,31 @@ fn multi_hash_join_key_ordering() -> Result<()> {
         Arc::new(FilterExec::try_new(predicate, top_join)?);
 
     // The bottom joins' join key ordering is adjusted based on the top join. And the top join should not introduce additional RepartitionExec
-    let expected = &[
-        "FilterExec: c@6 > 1",
-        "  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(B@2, b1@6), (C@3, c@2), (AA@1, a1@5)]",
-        "    ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
-        "      HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]",
-        "        RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10",
-        "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "        RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10",
-        "          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "            ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
-        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]",
-        "      RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=10",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "      RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=10",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
-        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, filter_top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, filter_top_join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib =
+        test_config.to_plan(filter_top_join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(
+        plan_distrib,
+        @r"
+    FilterExec: c@6 > 1
+      HashJoinExec: mode=Partitioned, join_type=Inner, on=[(B@2, b1@6), (C@3, c@2), (AA@1, a1@5)]
+        ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
+          HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]
+            RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+            RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=1
+              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+        HashJoinExec: mode=Partitioned, join_type=Inner, on=[(b@1, b1@1), (c@2, c1@2), (a@0, a1@0)]
+          RepartitionExec: partitioning=Hash([b@1, c@2, a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b1@1, c1@2, a1@0], 10), input_partitions=1
+            ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    "
+    );
+    let plan_sort = test_config.to_plan(filter_top_join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1168,34 +1212,30 @@ fn reorder_join_keys_to_left_input() -> Result<()> {
             &top_join_on,
             &join_type,
         );
-        let top_join_plan =
-            format!("HashJoinExec: mode=Partitioned, join_type={:?}, on=[(AA@1, a1@5), (B@2, b1@6), (C@3, c@2)]", &join_type);
 
-        let reordered = reorder_join_keys_to_inputs(top_join)?;
+        let reordered = reorder_join_keys_to_inputs(top_join).unwrap();
 
         // The top joins' join key ordering is adjusted based on the children inputs.
-        let expected = &[
-            top_join_plan.as_str(),
-            "  ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
-            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1), (c@2, c1@2)]",
-            "      RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=10",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            "      RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=10",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
-            "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            "  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]",
-            "    RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            "    RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
-            "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        ];
-
-        assert_plan_txt!(expected, reordered);
+        let (captured_join_type, modified_plan) =
+            hide_first(reordered.as_ref(), r"join_type=(\w+)", "join_type=...");
+        assert_eq!(captured_join_type, join_type.to_string());
+
+        insta::allow_duplicates! {insta::assert_snapshot!(modified_plan, @r"
+HashJoinExec: mode=Partitioned, join_type=..., on=[(AA@1, a1@5), (B@2, b1@6), (C@3, c@2)]
+  ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1), (c@2, c1@2)]
+      RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([a1@0, b1@1, c1@2], 10), input_partitions=1
+        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]
+    RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=1
+      ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");}
     }
 
     Ok(())
@@ -1302,34 +1342,28 @@ fn reorder_join_keys_to_right_input() -> Result<()> {
             &top_join_on,
             &join_type,
         );
-        let top_join_plan =
-            format!("HashJoinExec: mode=Partitioned, join_type={:?}, on=[(C@3, c@2), (B@2, b1@6), (AA@1, a1@5)]", &join_type);
 
-        let reordered = reorder_join_keys_to_inputs(top_join)?;
+        let reordered = reorder_join_keys_to_inputs(top_join).unwrap();
 
         // The top joins' join key ordering is adjusted based on the children inputs.
-        let expected = &[
-            top_join_plan.as_str(),
-            "  ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]",
-            "    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1)]",
-            "      RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=10",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            "      RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=10",
-            "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
-            "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            "  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]",
-            "    RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            "    RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=10",
-            "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-            "        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]",
-            "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        ];
-
-        assert_plan_txt!(expected, reordered);
+        let (_, plan_str) =
+            hide_first(reordered.as_ref(), r"join_type=(\w+)", "join_type=...");
+        insta::allow_duplicates! {insta::assert_snapshot!(plan_str, @r"
+HashJoinExec: mode=Partitioned, join_type=..., on=[(C@3, c@2), (B@2, b1@6), (AA@1, a1@5)]
+  ProjectionExec: expr=[a@0 as A, a@0 as AA, b@1 as B, c@2 as C]
+    HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a1@0), (b@1, b1@1)]
+      RepartitionExec: partitioning=Hash([a@0, b@1], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+      RepartitionExec: partitioning=Hash([a1@0, b1@1], 10), input_partitions=1
+        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c@2, c1@2), (b@1, b1@1), (a@0, a1@0)]
+    RepartitionExec: partitioning=Hash([c@2, b@1, a@0], 10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    RepartitionExec: partitioning=Hash([c1@2, b1@1, a1@0], 10), input_partitions=1
+      ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");}
     }
 
     Ok(())
@@ -1369,15 +1403,6 @@ fn multi_smj_joins() -> Result<()> {
     for join_type in join_types {
         let join =
             sort_merge_join_exec(left.clone(), right.clone(), &join_on, &join_type);
-        let join_plan = |shift| -> String {
-            format!(
-                "{}SortMergeJoin: join_type={join_type}, on=[(a@0, b1@1)]",
-                " ".repeat(shift)
-            )
-        };
-        let join_plan_indent2 = join_plan(2);
-        let join_plan_indent6 = join_plan(6);
-        let join_plan_indent10 = join_plan(10);
 
         // Top join on (a == c)
         let top_join_on = vec![(
@@ -1386,235 +1411,220 @@ fn multi_smj_joins() -> Result<()> {
         )];
         let top_join =
             sort_merge_join_exec(join.clone(), parquet_exec(), &top_join_on, &join_type);
-        let top_join_plan =
-            format!("SortMergeJoin: join_type={join_type}, on=[(a@0, c@2)]");
-
-        let expected = match join_type {
-            // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
-            JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti =>
-                vec![
-                    top_join_plan.as_str(),
-                    &join_plan_indent2,
-                    "    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-                    "      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                    "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    "    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
-                    "      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                    "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                    "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    "  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
-                    "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                    "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                ],
-            // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
-            // Since ordering of the left child is not preserved after SortMergeJoin
-            // when mode is Right, RightSemi, RightAnti, Full
-            // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
-            //   when mode is Inner, Left, LeftSemi, LeftAnti
-            // Similarly, since partitioning of the left side is not preserved
-            // when mode is Right, RightSemi, RightAnti, Full
-            // - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
-            //   cases when mode is Inner, Left, LeftSemi, LeftAnti
-            _ => vec![
-                    top_join_plan.as_str(),
-                    // Below 2 operators are differences introduced, when join mode is changed
-                    "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-                    "    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                    &join_plan_indent6,
-                    "        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-                    "          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                    "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    "        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
-                    "          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                    "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                    "                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    "  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
-                    "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                    "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            ],
-        };
-        // TODO(wiedld): show different test result if enforce sorting first.
-        test_config.run(&expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-
-        let expected_first_sort_enforcement = match join_type {
-            // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
-            JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti =>
-                vec![
-                    top_join_plan.as_str(),
-                    &join_plan_indent2,
-                    "    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
-                    "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "        SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-                    "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    "    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
-                    "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "        SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
-                    "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                    "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
-                    "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                    "      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-                    "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                ],
-            // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
-            // Since ordering of the left child is not preserved after SortMergeJoin
-            // when mode is Right, RightSemi, RightAnti, Full
-            // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
-            //   when mode is Inner, Left, LeftSemi, LeftAnti
-            // Similarly, since partitioning of the left side is not preserved
-            // when mode is Right, RightSemi, RightAnti, Full
-            // - We need to add one additional Hash Repartition and Roundrobin repartition after
-            //   SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
-            _ => vec![
-                top_join_plan.as_str(),
-                // Below 4 operators are differences introduced, when join mode is changed
-                "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
-                "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                "      SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-                "        CoalescePartitionsExec",
-                &join_plan_indent10,
-                "            RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
-                "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                "                SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-                "                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                "            RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
-                "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                "                SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
-                "                  ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                "                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
-                "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                "      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-                "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-            ],
-        };
-        // TODO(wiedld): show different test result if enforce distribution first.
-        test_config.run(
-            &expected_first_sort_enforcement,
-            top_join,
-            &SORT_DISTRIB_DISTRIB,
-        )?;
 
-        match join_type {
-            JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
-                // This time we use (b1 == c) for top join
-                // Join on (b1 == c)
-                let top_join_on = vec![(
-                    Arc::new(Column::new_with_schema("b1", &join.schema()).unwrap()) as _,
-                    Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
-                )];
-                let top_join =
-                    sort_merge_join_exec(join, parquet_exec(), &top_join_on, &join_type);
-                let top_join_plan =
-                    format!("SortMergeJoin: join_type={join_type}, on=[(b1@6, c@2)]");
-
-                let expected = match join_type {
-                    // Should include 6 RepartitionExecs(3 hash, 3 round-robin) and 3 SortExecs
-                    JoinType::Inner | JoinType::Right => vec![
-                        top_join_plan.as_str(),
-                        &join_plan_indent2,
-                        "    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-                        "      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
-                        "      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
-                        "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    ],
-                    // Should include 7 RepartitionExecs (4 hash, 3 round-robin) and 4 SortExecs
-                    JoinType::Left | JoinType::Full => vec![
-                        top_join_plan.as_str(),
-                        "  SortExec: expr=[b1@6 ASC], preserve_partitioning=[true]",
-                        "    RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10",
-                        &join_plan_indent6,
-                        "        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-                        "          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-                        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]",
-                        "          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10",
-                        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                        "                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
-                        "    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10",
-                        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    ],
-                    // this match arm cannot be reached
-                    _ => unreachable!()
-                };
-                // TODO(wiedld): show different test result if enforce sorting first.
-                test_config.run(&expected, top_join.clone(), &DISTRIB_DISTRIB_SORT)?;
-
-                let expected_first_sort_enforcement = match join_type {
-                    // Should include 6 RepartitionExecs (3 of them preserves order) and 3 SortExecs
-                    JoinType::Inner | JoinType::Right => vec![
-                        top_join_plan.as_str(),
-                        &join_plan_indent2,
-                        "    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
-                        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "        SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-                        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
-                        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "        SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
-                        "          ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
-                        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-                        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    ],
-                    // Should include 8 RepartitionExecs (4 of them preserves order) and 4 SortExecs
-                    JoinType::Left | JoinType::Full => vec![
-                        top_join_plan.as_str(),
-                        "  RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@6 ASC",
-                        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "      SortExec: expr=[b1@6 ASC], preserve_partitioning=[false]",
-                        "        CoalescePartitionsExec",
-                        &join_plan_indent10,
-                        "            RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC",
-                        "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "                SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-                        "                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "            RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=10, preserve_order=true, sort_exprs=b1@1 ASC",
-                        "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "                SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]",
-                        "                  ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]",
-                        "                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                        "  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=c@2 ASC",
-                        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-                        "      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-                        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-                    ],
-                    // this match arm cannot be reached
-                    _ => unreachable!()
-                };
+        let mut settings = Settings::clone_current();
+        settings.add_filter(&format!("join_type={join_type}"), "join_type=...");
+
+        #[rustfmt::skip]
+        insta::allow_duplicates! {
+            settings.bind(|| {
+                let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+
+                match join_type {
+                    // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
+                    JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
+                        assert_plan!(plan_distrib, @r"
+SortMergeJoin: join_type=..., on=[(a@0, c@2)]
+  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                    }
+                    // Should include 7 RepartitionExecs (4 hash, 3 round-robin), 4 SortExecs
+                    // Since ordering of the left child is not preserved after SortMergeJoin
+                    // when mode is Right, RightSemi, RightAnti, Full
+                    // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
+                    //   when mode is Inner, Left, LeftSemi, LeftAnti
+                    // Similarly, since partitioning of the left side is not preserved
+                    // when mode is Right, RightSemi, RightAnti, Full
+                    // - We need to add one additional Hash Repartition after SortMergeJoin in contrast the test
+                    //   cases when mode is Inner, Left, LeftSemi, LeftAnti
+                    _ => {
+                        assert_plan!(plan_distrib, @r"
+SortMergeJoin: join_type=..., on=[(a@0, c@2)]
+  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+      SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+            ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                    }
+                }
 
-                // TODO(wiedld): show different test result if enforce distribution first.
-                test_config.run(
-                    &expected_first_sort_enforcement,
-                    top_join,
-                    &SORT_DISTRIB_DISTRIB,
-                )?;
-            }
-            _ => {}
+                let plan_sort = test_config.to_plan(top_join.clone(), &SORT_DISTRIB_DISTRIB);
+
+                match join_type {
+                    // Should include 6 RepartitionExecs (3 hash, 3 round-robin), 3 SortExecs
+                    JoinType::Inner | JoinType::Left | JoinType::LeftSemi | JoinType::LeftAnti => {
+                        // TODO(wiedld): show different test result if enforce distribution first.
+                        assert_plan!(plan_sort, @r"
+SortMergeJoin: join_type=..., on=[(a@0, c@2)]
+  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+      SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+      SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                    }
+                    // Should include 8 RepartitionExecs (4 hash, 8 round-robin), 4 SortExecs
+                    // Since ordering of the left child is not preserved after SortMergeJoin
+                    // when mode is Right, RightSemi, RightAnti, Full
+                    // - We need to add one additional SortExec after SortMergeJoin in contrast the test cases
+                    //   when mode is Inner, Left, LeftSemi, LeftAnti
+                    // Similarly, since partitioning of the left side is not preserved
+                    // when mode is Right, RightSemi, RightAnti, Full
+                    // - We need to add one additional Hash Repartition and Roundrobin repartition after
+                    //   SortMergeJoin in contrast the test cases when mode is Inner, Left, LeftSemi, LeftAnti
+                    _ => {
+                        // TODO(wiedld): show different test result if enforce distribution first.
+                        assert_plan!(plan_sort, @r"
+SortMergeJoin: join_type=..., on=[(a@0, c@2)]
+  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+            SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                    }
+                }
+
+                match join_type {
+                    JoinType::Inner | JoinType::Left | JoinType::Right | JoinType::Full => {
+                        // This time we use (b1 == c) for top join
+                        // Join on (b1 == c)
+                        let top_join_on = vec![(
+                            Arc::new(Column::new_with_schema("b1", &join.schema()).unwrap()) as _,
+                            Arc::new(Column::new_with_schema("c", &schema()).unwrap()) as _,
+                        )];
+                        let top_join = sort_merge_join_exec(join, parquet_exec(), &top_join_on, &join_type);
+
+                        let plan_distrib = test_config.to_plan(top_join.clone(), &DISTRIB_DISTRIB_SORT);
+
+                        match join_type {
+                            // Should include 6 RepartitionExecs(3 hash, 3 round-robin) and 3 SortExecs
+                            JoinType::Inner | JoinType::Right => {
+                                // TODO(wiedld): show different test result if enforce sorting first.
+                                assert_plan!(plan_distrib, @r"
+SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
+  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+      RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                            }
+                            // Should include 7 RepartitionExecs (4 hash, 3 round-robin) and 4 SortExecs
+                            JoinType::Left | JoinType::Full => {
+                                // TODO(wiedld): show different test result if enforce sorting first.
+                                assert_plan!(plan_distrib, @r"
+SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
+  SortExec: expr=[b1@6 ASC], preserve_partitioning=[true]
+    RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=10
+      SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+        SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+        SortExec: expr=[b1@1 ASC], preserve_partitioning=[true]
+          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+            ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+    RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                            }
+                            // this match arm cannot be reached
+                            _ => unreachable!()
+                        }
+
+                        let plan_sort = test_config.to_plan(top_join, &SORT_DISTRIB_DISTRIB);
+
+                        match join_type {
+                            // Should include 6 RepartitionExecs (3 of them preserves order) and 3 SortExecs
+                            JoinType::Inner | JoinType::Right => {
+                                // TODO(wiedld): show different test result if enforce distribution first.
+                                assert_plan!(plan_sort, @r"
+SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
+  SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+    RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+      SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+      SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+        ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                            }
+                            // Should include 8 RepartitionExecs (4 of them preserves order) and 4 SortExecs
+                            JoinType::Left | JoinType::Full => {
+                                // TODO(wiedld): show different test result if enforce distribution first.
+                                assert_plan!(plan_sort, @r"
+SortMergeJoin: join_type=..., on=[(b1@6, c@2)]
+  RepartitionExec: partitioning=Hash([b1@6], 10), input_partitions=1
+    SortExec: expr=[b1@6 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        SortMergeJoin: join_type=..., on=[(a@0, b1@1)]
+          RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=1
+            SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          RepartitionExec: partitioning=Hash([b1@1], 10), input_partitions=1
+            SortExec: expr=[b1@1 ASC], preserve_partitioning=[false]
+              ProjectionExec: expr=[a@0 as a1, b@1 as b1, c@2 as c1, d@3 as d1, e@4 as e1]
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=1
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+                            }
+                            // this match arm cannot be reached
+                            _ => unreachable!()
+                        }
+                    }
+                    _ => {}
+                }
+            });
         }
     }
-
     Ok(())
 }
 
@@ -1670,52 +1680,50 @@ fn smj_join_key_ordering() -> Result<()> {
 
     // Test: run EnforceDistribution, then EnforceSort.
     // Only two RepartitionExecs added
-    let expected = &[
-        "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]",
-        "  SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]",
-        "    ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
-        "      ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
-        "        AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
-        "          RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10",
-        "            AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
-        "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[true]",
-        "    ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
-        "      AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
-        "        RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
-        "          AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
-        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected, join.clone(), &DISTRIB_DISTRIB_SORT)?;
+    let plan_distrib = test_config.to_plan(join.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib, @r"
+SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
+  SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[true]
+    ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
+      ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
+        AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
+          RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
+            AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
+              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[true]
+    ProjectionExec: expr=[a@1 as a2, b@0 as b2]
+      AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
+        RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
+          AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]",
-        "  RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b3@1 ASC, a3@0 ASC",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]",
-        "        CoalescePartitionsExec",
-        "          ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]",
-        "            ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]",
-        "              AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]",
-        "                RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10",
-        "                  AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]",
-        "                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=10, preserve_order=true, sort_exprs=b2@1 ASC, a2@0 ASC",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false]",
-        "        CoalescePartitionsExec",
-        "          ProjectionExec: expr=[a@1 as a2, b@0 as b2]",
-        "            AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]",
-        "              RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10",
-        "                AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]",
-        "                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, join, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(join, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort, @r"
+SortMergeJoin: join_type=Inner, on=[(b3@1, b2@1), (a3@0, a2@0)]
+  RepartitionExec: partitioning=Hash([b3@1, a3@0], 10), input_partitions=1
+    SortExec: expr=[b3@1 ASC, a3@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        ProjectionExec: expr=[a1@0 as a3, b1@1 as b3]
+          ProjectionExec: expr=[a1@1 as a1, b1@0 as b1]
+            AggregateExec: mode=FinalPartitioned, gby=[b1@0 as b1, a1@1 as a1], aggr=[]
+              RepartitionExec: partitioning=Hash([b1@0, a1@1], 10), input_partitions=10
+                AggregateExec: mode=Partial, gby=[b@1 as b1, a@0 as a1], aggr=[]
+                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  RepartitionExec: partitioning=Hash([b2@1, a2@0], 10), input_partitions=1
+    SortExec: expr=[b2@1 ASC, a2@0 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        ProjectionExec: expr=[a@1 as a2, b@0 as b2]
+          AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], aggr=[]
+            RepartitionExec: partitioning=Hash([b@0, a@1], 10), input_partitions=10
+              AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], aggr=[]
+                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     Ok(())
 }
@@ -1744,13 +1752,14 @@ fn merge_does_not_need_sort() -> Result<()> {
     //
     // The optimizer should not add an additional SortExec as the
     // data is already sorted
-    let expected = &[
-        "SortPreservingMergeExec: [a@0 ASC]",
-        "  CoalesceBatchesExec: target_batch_size=4096",
-        "    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, exec.clone(), &DISTRIB_DISTRIB_SORT)?;
+    let plan_distrib = test_config.to_plan(exec.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                    @r"
+SortPreservingMergeExec: [a@0 ASC]
+  CoalesceBatchesExec: target_batch_size=4096
+    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
     //
@@ -1758,13 +1767,14 @@ fn merge_does_not_need_sort() -> Result<()> {
     // (according to flag: PREFER_EXISTING_SORT)
     // hence in this case ordering lost during CoalescePartitionsExec and re-introduced with
     // SortExec at the top.
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    CoalesceBatchesExec: target_batch_size=4096",
-        "      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, exec, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(exec, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                    @r"
+SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    CoalesceBatchesExec: target_batch_size=4096
+      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
 
     Ok(())
 }
@@ -1790,25 +1800,26 @@ fn union_to_interleave() -> Result<()> {
         aggregate_exec_with_alias(plan, vec![("a1".to_string(), "a2".to_string())]);
 
     // Only two RepartitionExecs added, no final RepartitionExec required
-    let expected = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
-        "  AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]",
-        "    InterleaveExec",
-        "      AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
-        "        RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
-        "          AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
-        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "      AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
-        "        RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
-        "          AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
-        "            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+        @r"
+    AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]
+      AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]
+        InterleaveExec
+          AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]
+            RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10
+              AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]
+                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+          AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]
+            RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10
+              AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]
+                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1834,28 +1845,29 @@ fn union_not_to_interleave() -> Result<()> {
         aggregate_exec_with_alias(plan, vec![("a1".to_string(), "a2".to_string())]);
 
     // Only two RepartitionExecs added, no final RepartitionExec required
-    let expected = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a2@0], 10), input_partitions=20",
-        "    AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]",
-        "      UnionExec",
-        "        AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
-        "          RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
-        "            AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
-        "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "        AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]",
-        "          RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10",
-        "            AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]",
-        "              RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     // TestConfig: Prefer existing union.
     let test_config = TestConfig::default().with_prefer_existing_union();
 
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+        @r"
+    AggregateExec: mode=FinalPartitioned, gby=[a2@0 as a2], aggr=[]
+      RepartitionExec: partitioning=Hash([a2@0], 10), input_partitions=20
+        AggregateExec: mode=Partial, gby=[a1@0 as a2], aggr=[]
+          UnionExec
+            AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]
+              RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10
+                AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]
+                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+            AggregateExec: mode=FinalPartitioned, gby=[a1@0 as a1], aggr=[]
+              RepartitionExec: partitioning=Hash([a1@0], 10), input_partitions=10
+                AggregateExec: mode=Partial, gby=[a@0 as a1], aggr=[]
+                  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1865,17 +1877,18 @@ fn added_repartition_to_single_partition() -> Result<()> {
     let alias = vec![("a".to_string(), "a".to_string())];
     let plan = aggregate_exec_with_alias(parquet_exec(), alias);
 
-    let expected = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(&expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(&expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+        @r"
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1885,18 +1898,19 @@ fn repartition_deepest_node() -> Result<()> {
     let alias = vec![("a".to_string(), "a".to_string())];
     let plan = aggregate_exec_with_alias(filter_exec(parquet_exec()), alias);
 
-    let expected = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      FilterExec: c@2 = 0",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+        @r"
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1905,19 +1919,20 @@ fn repartition_deepest_node() -> Result<()> {
 fn repartition_unsorted_limit() -> Result<()> {
     let plan = limit_exec(filter_exec(parquet_exec()));
 
-    let expected = &[
-        "GlobalLimitExec: skip=0, fetch=100",
-        "  CoalescePartitionsExec",
-        "    LocalLimitExec: fetch=100",
-        "      FilterExec: c@2 = 0",
-        // nothing sorts the data, so the local limit doesn't require sorted data either
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+        @r"
+    GlobalLimitExec: skip=0, fetch=100
+      CoalescePartitionsExec
+        LocalLimitExec: fetch=100
+          FilterExec: c@2 = 0
+            RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+              DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
+    // nothing sorts the data, so the local limit doesn't require sorted data either
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1932,17 +1947,18 @@ fn repartition_sorted_limit() -> Result<()> {
     .into();
     let plan = limit_exec(sort_exec(sort_key, parquet_exec()));
 
-    let expected = &[
-        "GlobalLimitExec: skip=0, fetch=100",
-        "  LocalLimitExec: fetch=100",
-        // data is sorted so can't repartition here
-        "    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+GlobalLimitExec: skip=0, fetch=100
+  LocalLimitExec: fetch=100
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // data is sorted so can't repartition here
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1960,19 +1976,20 @@ fn repartition_sorted_limit_with_filter() -> Result<()> {
         sort_key,
     );
 
-    let expected = &[
-        "SortRequiredExec: [c@2 ASC]",
-        "  FilterExec: c@2 = 0",
-        // We can use repartition here, ordering requirement by SortRequiredExec
-        // is still satisfied.
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortRequiredExec: [c@2 ASC]
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // We can use repartition here, ordering requirement by SortRequiredExec
+    // is still satisfied.
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -1985,26 +2002,28 @@ fn repartition_ignores_limit() -> Result<()> {
         alias,
     );
 
-    let expected = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        GlobalLimitExec: skip=0, fetch=100",
-        "          CoalescePartitionsExec",
-        "            LocalLimitExec: fetch=100",
-        "              FilterExec: c@2 = 0",
-        // repartition should happen prior to the filter to maximize parallelism
-        "                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                  GlobalLimitExec: skip=0, fetch=100",
-        "                    LocalLimitExec: fetch=100",
-        // Expect no repartition to happen for local limit
-        "                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        GlobalLimitExec: skip=0, fetch=100
+          CoalescePartitionsExec
+            LocalLimitExec: fetch=100
+              FilterExec: c@2 = 0
+                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                  GlobalLimitExec: skip=0, fetch=100
+                    LocalLimitExec: fetch=100
+                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // repartition should happen prior to the filter to maximize parallelism
+    // Expect no repartition to happen for local limit (DataSourceExec)
+
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2013,19 +2032,20 @@ fn repartition_ignores_limit() -> Result<()> {
 fn repartition_ignores_union() -> Result<()> {
     let plan = union_exec(vec![parquet_exec(); 5]);
 
-    let expected = &[
-        "UnionExec",
-        // Expect no repartition of DataSourceExec
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+UnionExec
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Expect no repartition of DataSourceExec
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2041,15 +2061,15 @@ fn repartition_through_sort_preserving_merge() -> Result<()> {
     .into();
     let plan = sort_preserving_merge_exec(sort_key, parquet_exec());
 
-    // need resort as the data was not sorted correctly
-    let expected = &[
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2068,24 +2088,25 @@ fn repartition_ignores_sort_preserving_merge() -> Result<()> {
         parquet_exec_multiple_sorted(vec![sort_key]),
     );
 
+    let test_config = TestConfig::default();
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     // Test: run EnforceDistribution, then EnforceSort
-    //
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [c@2 ASC]
+  DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
     // should not sort (as the data was already sorted)
     // should not repartition, since increased parallelism is not beneficial for SortPReservingMerge
-    let expected = &[
-        "SortPreservingMergeExec: [c@2 ASC]",
-        "  DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
 
     Ok(())
 }
@@ -2105,27 +2126,29 @@ fn repartition_ignores_sort_preserving_merge_with_union() -> Result<()> {
     ]);
     let plan = sort_preserving_merge_exec(sort_key, input);
 
+    let test_config = TestConfig::default();
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
     // Test: run EnforceDistribution, then EnforceSort.
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [c@2 ASC]
+  UnionExec
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
     //
     // should not repartition / sort (as the data was already sorted)
-    let expected = &[
-        "SortPreservingMergeExec: [c@2 ASC]",
-        "  UnionExec",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
 
     // test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    UnionExec",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    UnionExec
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
 
     Ok(())
 }
@@ -2149,16 +2172,17 @@ fn repartition_does_not_destroy_sort() -> Result<()> {
     // TestConfig: Prefer existing sort.
     let test_config = TestConfig::default().with_prefer_existing_sort();
 
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortRequiredExec: [d@3 ASC]
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet
+");
     // during repartitioning ordering is preserved
-    let expected = &[
-        "SortRequiredExec: [d@3 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet",
-    ];
-
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2187,22 +2211,25 @@ fn repartition_does_not_destroy_sort_more_complex() -> Result<()> {
     let input2 = filter_exec(parquet_exec());
     let plan = union_exec(vec![input1, input2]);
 
+    let test_config = TestConfig::default();
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+UnionExec
+  SortRequiredExec: [c@2 ASC]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // union input 1: no repartitioning
+    // union input 2: should repartition
+    //
     // should not repartition below the SortRequired as that
     // branch doesn't benefit from increased parallelism
-    let expected = &[
-        "UnionExec",
-        // union input 1: no repartitioning
-        "  SortRequiredExec: [c@2 ASC]",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-        // union input 2: should repartition
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
 
-    let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2227,28 +2254,28 @@ fn repartition_transitively_with_projection() -> Result<()> {
     .into();
     let plan = sort_preserving_merge_exec(sort_key, proj);
 
-    // Test: run EnforceDistribution, then EnforceSort.
-    let expected = &[
-        "SortPreservingMergeExec: [sum@0 ASC]",
-        "  SortExec: expr=[sum@0 ASC], preserve_partitioning=[true]",
-        // Since this projection is not trivial, increasing parallelism is beneficial
-        "    ProjectionExec: expr=[a@0 + b@1 as sum]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [sum@0 ASC]
+  SortExec: expr=[sum@0 ASC], preserve_partitioning=[true]
+    ProjectionExec: expr=[a@0 + b@1 as sum]
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[sum@0 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        // Since this projection is not trivial, increasing parallelism is beneficial
-        "    ProjectionExec: expr=[a@0 + b@1 as sum]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[sum@0 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    ProjectionExec: expr=[a@0 + b@1 as sum]
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Since this projection is not trivial, increasing parallelism is beneficial
 
     Ok(())
 }
@@ -2275,16 +2302,18 @@ fn repartition_ignores_transitively_with_projection() -> Result<()> {
         sort_key,
     );
 
-    let expected = &[
-        "SortRequiredExec: [c@2 ASC]",
-        // Since this projection is trivial, increasing parallelism is not beneficial
-        "  ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
-        "    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortRequiredExec: [c@2 ASC]
+  ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    // Since this projection is trivial, increasing parallelism is not beneficial
+
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2310,16 +2339,17 @@ fn repartition_transitively_past_sort_with_projection() -> Result<()> {
         ),
     );
 
-    let expected = &[
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // Since this projection is trivial, increasing parallelism is not beneficial
-        "  ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+  ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Since this projection is trivial, increasing parallelism is not beneficial
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -2334,28 +2364,30 @@ fn repartition_transitively_past_sort_with_filter() -> Result<()> {
     .into();
     let plan = sort_exec(sort_key, filter_exec(parquet_exec()));
 
-    // Test: run EnforceDistribution, then EnforceSort.
-    let expected = &[
-        "SortPreservingMergeExec: [a@0 ASC]",
-        "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-        // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
-        "    FilterExec: c@2 = 0",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [a@0 ASC]
+  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+
+    // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    FilterExec: c@2 = 0",
-        // Expect repartition on the input of the filter (as it can benefit from additional parallelism)
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Expect repartition on the input of the filter (as it can benefit from additional parallelism)
 
     Ok(())
 }
@@ -2381,30 +2413,32 @@ fn repartition_transitively_past_sort_with_projection_and_filter() -> Result<()>
         ),
     );
 
-    // Test: run EnforceDistribution, then EnforceSort.
-    let expected = &[
-        "SortPreservingMergeExec: [a@0 ASC]",
-        // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
-        "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-        "    ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
-        "      FilterExec: c@2 = 0",
-        // repartition is lowest down
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
+    let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [a@0 ASC]
+  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+    ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+
+    // Expect repartition on the input to the sort (as it can benefit from additional parallelism)
+    // repartition is lowest down
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]",
-        "      FilterExec: c@2 = 0",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected_first_sort_enforcement, plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     Ok(())
 }
@@ -2420,28 +2454,29 @@ fn parallelization_single_partition() -> Result<()> {
         .with_query_execution_partitions(2);
 
     // Test: with parquet
-    let expected_parquet = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        &expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(&expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+                                                                                        @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(&expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(&expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+                                                                                        @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2465,40 +2500,31 @@ fn parallelization_multiple_files() -> Result<()> {
     // The groups must have only contiguous ranges of rows from the same file
     // if any group has rows from multiple files, the data is no longer sorted destroyed
     // https://github.com/apache/datafusion/issues/8451
-    let expected_with_3_target_partitions = [
-        "SortRequiredExec: [a@0 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    DataSourceExec: file_groups={3 groups: [[x:0..50], [y:0..100], [x:50..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
     let test_config_concurrency_3 =
         test_config.clone().with_query_execution_partitions(3);
-    test_config_concurrency_3.run(
-        &expected_with_3_target_partitions,
-        plan.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config_concurrency_3.run(
-        &expected_with_3_target_partitions,
-        plan.clone(),
-        &SORT_DISTRIB_DISTRIB,
-    )?;
+    let plan_3_distrib =
+        test_config_concurrency_3.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_3_distrib,
+                                                                                        @r"
+SortRequiredExec: [a@0 ASC]
+  FilterExec: c@2 = 0
+    DataSourceExec: file_groups={3 groups: [[x:0..50], [y:0..100], [x:50..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
+    let plan_3_sort =
+        test_config_concurrency_3.to_plan(plan.clone(), &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_3_distrib, plan_3_sort);
 
-    let expected_with_8_target_partitions = [
-        "SortRequiredExec: [a@0 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    DataSourceExec: file_groups={8 groups: [[x:0..25], [y:0..25], [x:25..50], [y:25..50], [x:50..75], [y:50..75], [x:75..100], [y:75..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
     let test_config_concurrency_8 = test_config.with_query_execution_partitions(8);
-    test_config_concurrency_8.run(
-        &expected_with_8_target_partitions,
-        plan.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config_concurrency_8.run(
-        &expected_with_8_target_partitions,
-        plan,
-        &SORT_DISTRIB_DISTRIB,
-    )?;
+    let plan_8_distrib =
+        test_config_concurrency_8.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_8_distrib,
+                                                                                        @r"
+SortRequiredExec: [a@0 ASC]
+  FilterExec: c@2 = 0
+    DataSourceExec: file_groups={8 groups: [[x:0..25], [y:0..25], [x:25..50], [y:25..50], [x:50..75], [y:50..75], [x:75..100], [y:75..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
+    let plan_8_sort = test_config_concurrency_8.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_8_distrib, plan_8_sort);
 
     Ok(())
 }
@@ -2515,46 +2541,55 @@ fn parallelization_compressed_csv() -> Result<()> {
         FileCompressionType::UNCOMPRESSED,
     ];
 
-    let expected_not_partitioned = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-
-    let expected_partitioned = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
+    #[rustfmt::skip]
+    insta::allow_duplicates! {
+        for compression_type in compression_types {
+            let plan = aggregate_exec_with_alias(
+                DataSourceExec::from_data_source(
+                    FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+                        let options = CsvOptions {
+                            has_header: Some(false),
+                            delimiter: b',',
+                            quote: b'"',
+                            ..Default::default()
+                        };
+                        Arc::new(CsvSource::new(schema()).with_csv_options(options))
+                    })
+                    .with_file(PartitionedFile::new("x".to_string(), 100))
+                    .with_file_compression_type(compression_type)
+                    .build(),
+                ),
+                vec![("a".to_string(), "a".to_string())],
+            );
+            let test_config = TestConfig::default()
+                .with_query_execution_partitions(2)
+                .with_prefer_repartition_file_scans(10);
+
+            let plan_distrib = test_config.to_plan(plan.clone(), &DISTRIB_DISTRIB_SORT);
+            if compression_type.is_compressed() {
+                // Compressed files cannot be partitioned
+                assert_plan!(plan_distrib,
+                    @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+            } else {
+                // Uncompressed files can be partitioned
+                assert_plan!(plan_distrib,
+                    @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={2 groups: [[x:0..50], [x:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+            }
 
-    for compression_type in compression_types {
-        let expected = if compression_type.is_compressed() {
-            &expected_not_partitioned[..]
-        } else {
-            &expected_partitioned[..]
-        };
-
-        let plan = aggregate_exec_with_alias(
-            DataSourceExec::from_data_source(
-                FileScanConfigBuilder::new(
-                    ObjectStoreUrl::parse("test:///").unwrap(),
-                    schema(),
-                    Arc::new(CsvSource::new(false, b',', b'"')),
-                )
-                .with_file(PartitionedFile::new("x".to_string(), 100))
-                .with_file_compression_type(compression_type)
-                .build(),
-            ),
-            vec![("a".to_string(), "a".to_string())],
-        );
-        let test_config = TestConfig::default()
-            .with_query_execution_partitions(2)
-            .with_prefer_repartition_file_scans(10);
-        test_config.run(expected, plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-        test_config.run(expected, plan, &SORT_DISTRIB_DISTRIB)?;
+            let plan_sort = test_config.to_plan(plan, &SORT_DISTRIB_DISTRIB);
+            assert_plan!(plan_distrib, plan_sort);
+        }
     }
     Ok(())
 }
@@ -2570,30 +2605,30 @@ fn parallelization_two_partitions() -> Result<()> {
         .with_prefer_repartition_file_scans(10);
 
     // Test: with parquet
-    let expected_parquet = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        // Plan already has two partitions
-        "      DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        &expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(&expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+                                                                                    @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Plan already has two partitions
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        // Plan already has two partitions
-        "      DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(&expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(&expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib, @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={2 groups: [[x:0..100], [y:0..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+    // Plan already has two partitions
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2609,30 +2644,32 @@ fn parallelization_two_partitions_into_four() -> Result<()> {
         .with_prefer_repartition_file_scans(10);
 
     // Test: with parquet
-    let expected_parquet = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        // Multiple source files split across partitions
-        "      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        &expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(&expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    // Multiple source files split across partitions
+    assert_plan!(plan_parquet_distrib,
+                                                                                    @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Multiple source files split across partitions
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = [
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        // Multiple source files split across partitions
-        "      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(&expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(&expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    // Multiple source files split across partitions
+    assert_plan!(plan_csv_distrib, @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+      DataSourceExec: file_groups={4 groups: [[x:0..50], [x:50..100], [y:0..50], [y:50..100]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+    // Multiple source files split across partitions
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2651,32 +2688,32 @@ fn parallelization_sorted_limit() -> Result<()> {
     let test_config = TestConfig::default();
 
     // Test: with parquet
-    let expected_parquet = &[
-        "GlobalLimitExec: skip=0, fetch=100",
-        "  LocalLimitExec: fetch=100",
-        // data is sorted so can't repartition here
-        "    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // Doesn't parallelize for SortExec without preserve_partitioning
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib, @r"
+GlobalLimitExec: skip=0, fetch=100
+  LocalLimitExec: fetch=100
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // data is sorted so can't repartition here
+    // Doesn't parallelize for SortExec without preserve_partitioning
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = &[
-        "GlobalLimitExec: skip=0, fetch=100",
-        "  LocalLimitExec: fetch=100",
-        // data is sorted so can't repartition here
-        "    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // Doesn't parallelize for SortExec without preserve_partitioning
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+        @r"
+GlobalLimitExec: skip=0, fetch=100
+  LocalLimitExec: fetch=100
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+    // data is sorted so can't repartition here
+    // Doesn't parallelize for SortExec without preserve_partitioning
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2696,40 +2733,41 @@ fn parallelization_limit_with_filter() -> Result<()> {
     let test_config = TestConfig::default();
 
     // Test: with parquet
-    let expected_parquet = &[
-        "GlobalLimitExec: skip=0, fetch=100",
-        "  CoalescePartitionsExec",
-        "    LocalLimitExec: fetch=100",
-        "      FilterExec: c@2 = 0",
-        // even though data is sorted, we can use repartition here. Since
-        // ordering is not used in subsequent stages anyway.
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // SortExec doesn't benefit from input partitioning
-        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    // even though data is sorted, we can use repartition here. Since
+    // ordering is not used in subsequent stages anyway.
+    // SortExec doesn't benefit from input partitioning
+    assert_plan!(plan_parquet_distrib,
+        @r"
+GlobalLimitExec: skip=0, fetch=100
+  CoalescePartitionsExec
+    LocalLimitExec: fetch=100
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = &[
-        "GlobalLimitExec: skip=0, fetch=100",
-        "  CoalescePartitionsExec",
-        "    LocalLimitExec: fetch=100",
-        "      FilterExec: c@2 = 0",
-        // even though data is sorted, we can use repartition here. Since
-        // ordering is not used in subsequent stages anyway.
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // SortExec doesn't benefit from input partitioning
-        "            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    // even though data is sorted, we can use repartition here. Since
+    // ordering is not used in subsequent stages anyway.
+    // SortExec doesn't benefit from input partitioning
+    assert_plan!(plan_csv_distrib,
+                                                                                    @r"
+GlobalLimitExec: skip=0, fetch=100
+  CoalescePartitionsExec
+    LocalLimitExec: fetch=100
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2747,48 +2785,49 @@ fn parallelization_ignores_limit() -> Result<()> {
     let test_config = TestConfig::default();
 
     // Test: with parquet
-    let expected_parquet = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        GlobalLimitExec: skip=0, fetch=100",
-        "          CoalescePartitionsExec",
-        "            LocalLimitExec: fetch=100",
-        "              FilterExec: c@2 = 0",
-        // repartition should happen prior to the filter to maximize parallelism
-        "                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                  GlobalLimitExec: skip=0, fetch=100",
-        // Limit doesn't benefit from input partitioning - no parallelism
-        "                    LocalLimitExec: fetch=100",
-        "                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+        @r"
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            GlobalLimitExec: skip=0, fetch=100
+              CoalescePartitionsExec
+                LocalLimitExec: fetch=100
+                  FilterExec: c@2 = 0
+                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                      GlobalLimitExec: skip=0, fetch=100
+                        LocalLimitExec: fetch=100
+                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+    ");
+    // repartition should happen prior to the filter to maximize parallelism
+    // Limit doesn't benefit from input partitioning - no parallelism
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10",
-        "    AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        GlobalLimitExec: skip=0, fetch=100",
-        "          CoalescePartitionsExec",
-        "            LocalLimitExec: fetch=100",
-        "              FilterExec: c@2 = 0",
-        // repartition should happen prior to the filter to maximize parallelism
-        "                RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "                  GlobalLimitExec: skip=0, fetch=100",
-        // Limit doesn't benefit from input partitioning - no parallelism
-        "                    LocalLimitExec: fetch=100",
-        "                      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+        @r"
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 10), input_partitions=10
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+            GlobalLimitExec: skip=0, fetch=100
+              CoalescePartitionsExec
+                LocalLimitExec: fetch=100
+                  FilterExec: c@2 = 0
+                    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+                      GlobalLimitExec: skip=0, fetch=100
+                        LocalLimitExec: fetch=100
+                          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+    ");
+    // repartition should happen prior to the filter to maximize parallelism
+    // Limit doesn't benefit from input partitioning - no parallelism
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2801,34 +2840,35 @@ fn parallelization_union_inputs() -> Result<()> {
     let test_config = TestConfig::default();
 
     // Test: with parquet
-    let expected_parquet = &[
-        "UnionExec",
-        // Union doesn't benefit from input partitioning - no parallelism
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+                                                                                    @r"
+UnionExec
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    // Union doesn't benefit from input partitioning - no parallelism
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = &[
-        "UnionExec",
-        // Union doesn't benefit from input partitioning - no parallelism
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+                                                                                    @r"
+UnionExec
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
+");
+    // Union doesn't benefit from input partitioning - no parallelism
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2855,22 +2895,21 @@ fn parallelization_prior_to_sort_preserving_merge() -> Result<()> {
     // parallelization is not beneficial for SortPreservingMerge
 
     // Test: with parquet
-    let expected_parquet = &[
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet"
+    );
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = &[
-        "DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+        @"DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false"
+    );
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -2900,54 +2939,47 @@ fn parallelization_sort_preserving_merge_with_union() -> Result<()> {
     // should not sort (as the data was already sorted)
 
     // Test: with parquet
-    let expected_parquet = &[
-        "SortPreservingMergeExec: [c@2 ASC]",
-        "  UnionExec",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    let expected_parquet_first_sort_enforcement = &[
-        // no SPM
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // has coalesce
-        "  CoalescePartitionsExec",
-        "    UnionExec",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet_first_sort_enforcement,
-        plan_parquet,
-        &SORT_DISTRIB_DISTRIB,
-    )?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+        @r"
+    SortPreservingMergeExec: [c@2 ASC]
+      UnionExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_sort,
+        @r"
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        UnionExec
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
+    // no SPM
+    // has coalesce
 
     // Test: with csv
-    let expected_csv = &[
-        "SortPreservingMergeExec: [c@2 ASC]",
-        "  UnionExec",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    let expected_csv_first_sort_enforcement = &[
-        // no SPM
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        // has coalesce
-        "  CoalescePartitionsExec",
-        "    UnionExec",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-    ];
-    test_config.run(
-        expected_csv_first_sort_enforcement,
-        plan_csv.clone(),
-        &SORT_DISTRIB_DISTRIB,
-    )?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+        @r"
+    SortPreservingMergeExec: [c@2 ASC]
+      UnionExec
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+    ");
+    let plan_csv_sort = test_config.to_plan(plan_csv.clone(), &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_sort,
+        @r"
+    SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+      CoalescePartitionsExec
+        UnionExec
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+    ");
+    // no SPM
+    // has coalesce
 
     Ok(())
 }
@@ -2975,24 +3007,25 @@ fn parallelization_does_not_benefit() -> Result<()> {
     // no parallelization, because SortRequiredExec doesn't benefit from increased parallelism
 
     // Test: with parquet
-    let expected_parquet = &[
-        "SortRequiredExec: [c@2 ASC]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_parquet_distrib,
+        @r"
+    SortRequiredExec: [c@2 ASC]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     // Test: with csv
-    let expected_csv = &[
-        "SortRequiredExec: [c@2 ASC]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-    ];
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_csv_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_csv_distrib,
+        @r"
+    SortRequiredExec: [c@2 ASC]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+    ");
+    let plan_csv_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_csv_distrib, plan_csv_sort);
 
     Ok(())
 }
@@ -3023,26 +3056,26 @@ fn parallelization_ignores_transitively_with_projection_parquet() -> Result<()>
     .into();
     let plan_parquet =
         sort_preserving_merge_exec(sort_key_after_projection, proj_parquet);
-    let expected = &[
-        "SortPreservingMergeExec: [c2@1 ASC]",
-        "  ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    plans_matches_expected!(expected, &plan_parquet);
 
+    assert_plan!(plan_parquet,
+        @r"
+    SortPreservingMergeExec: [c2@1 ASC]
+      ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+    ");
+
+    let test_config = TestConfig::default();
+    let plan_parquet_distrib =
+        test_config.to_plan(plan_parquet.clone(), &DISTRIB_DISTRIB_SORT);
     // Expected Outcome:
     // data should not be repartitioned / resorted
-    let expected_parquet = &[
-        "ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    let test_config = TestConfig::default();
-    test_config.run(
-        expected_parquet,
-        plan_parquet.clone(),
-        &DISTRIB_DISTRIB_SORT,
-    )?;
-    test_config.run(expected_parquet, plan_parquet, &SORT_DISTRIB_DISTRIB)?;
+    assert_plan!(plan_parquet_distrib,
+                                                                                    @r"
+ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    let plan_parquet_sort = test_config.to_plan(plan_parquet, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_parquet_distrib, plan_parquet_sort);
 
     Ok(())
 }
@@ -3071,22 +3104,24 @@ fn parallelization_ignores_transitively_with_projection_csv() -> Result<()> {
     }]
     .into();
     let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv);
-    let expected = &[
-        "SortPreservingMergeExec: [c2@1 ASC]",
-        "  ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-    ];
-    plans_matches_expected!(expected, &plan_csv);
+    assert_plan!(plan_csv,
+                                                                                        @r"
+SortPreservingMergeExec: [c2@1 ASC]
+  ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+");
 
+    let test_config = TestConfig::default();
+    let plan_distrib = test_config.to_plan(plan_csv.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+ProjectionExec: expr=[a@0 as a2, c@2 as c2]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false
+");
     // Expected Outcome:
     // data should not be repartitioned / resorted
-    let expected_csv = &[
-        "ProjectionExec: expr=[a@0 as a2, c@2 as c2]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=csv, has_header=false",
-    ];
-    let test_config = TestConfig::default();
-    test_config.run(expected_csv, plan_csv.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected_csv, plan_csv, &SORT_DISTRIB_DISTRIB)?;
+    let plan_sort = test_config.to_plan(plan_csv, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3096,24 +3131,25 @@ fn remove_redundant_roundrobins() -> Result<()> {
     let input = parquet_exec();
     let repartition = repartition_exec(repartition_exec(input));
     let physical_plan = repartition_exec(filter_exec(repartition));
-    let expected = &[
-        "RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    plans_matches_expected!(expected, &physical_plan);
-
-    let expected = &[
-        "FilterExec: c@2 = 0",
-        "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
+    assert_plan!(physical_plan,
+                                                                                        @r"
+RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     let test_config = TestConfig::default();
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+FilterExec: c@2 = 0
+  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3133,18 +3169,19 @@ fn remove_unnecessary_spm_after_filter() -> Result<()> {
     // TestConfig: Prefer existing sort.
     let test_config = TestConfig::default().with_prefer_existing_sort();
 
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     // Expected Outcome:
     // Original plan expects its output to be ordered by c@2 ASC.
     // This is still satisfied since, after filter that column is constant.
-    let expected = &[
-        "CoalescePartitionsExec",
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c@2 ASC",
-        "      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+CoalescePartitionsExec
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=c@2 ASC
+      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3164,14 +3201,16 @@ fn preserve_ordering_through_repartition() -> Result<()> {
     // TestConfig: Prefer existing sort.
     let test_config = TestConfig::default().with_prefer_existing_sort();
 
-    let expected = &[
-        "SortPreservingMergeExec: [d@3 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=d@3 ASC",
-        "      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [d@3 ASC]
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2, preserve_order=true, sort_exprs=d@3 ASC
+      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[d@3 ASC], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3189,29 +3228,27 @@ fn do_not_preserve_ordering_through_repartition() -> Result<()> {
 
     let test_config = TestConfig::default();
 
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     // Test: run EnforceDistribution, then EnforceSort.
-    let expected = &[
-        "SortPreservingMergeExec: [a@0 ASC]",
-        "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-        "    FilterExec: c@2 = 0",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-        "        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [a@0 ASC]
+  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    FilterExec: c@2 = 0",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-        "        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
-    test_config.run(
-        expected_first_sort_enforcement,
-        physical_plan,
-        &SORT_DISTRIB_DISTRIB,
-    )?;
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
 
     Ok(())
 }
@@ -3227,17 +3264,18 @@ fn no_need_for_sort_after_filter() -> Result<()> {
     let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
     let physical_plan = sort_preserving_merge_exec(sort_key, filter_exec(input));
 
-    let expected = &[
-        // After CoalescePartitionsExec c is still constant. Hence c@2 ASC ordering is already satisfied.
-        "CoalescePartitionsExec",
-        // Since after this stage c is constant. c@2 ASC ordering is already satisfied.
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-        "      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib, @r"
+CoalescePartitionsExec
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+      DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
+    // After CoalescePartitionsExec c is still constant. Hence c@2 ASC ordering is already satisfied.
+    // Since after this stage c is constant. c@2 ASC ordering is already satisfied.
 
     Ok(())
 }
@@ -3261,30 +3299,28 @@ fn do_not_preserve_ordering_through_repartition2() -> Result<()> {
 
     let test_config = TestConfig::default();
 
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
     // Test: run EnforceDistribution, then EnforceSort.
-    let expected = &[
-        "SortPreservingMergeExec: [a@0 ASC]",
-        "  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-        "    FilterExec: c@2 = 0",
-        "      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-        "        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+SortPreservingMergeExec: [a@0 ASC]
+  SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+    FilterExec: c@2 = 0
+      RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+        DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
 
     // Test: result IS DIFFERENT, if EnforceSorting is run first:
-    let expected_first_sort_enforcement = &[
-        "SortExec: expr=[a@0 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]",
-        "      FilterExec: c@2 = 0",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-        "          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(
-        expected_first_sort_enforcement,
-        physical_plan,
-        &SORT_DISTRIB_DISTRIB,
-    )?;
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_sort,
+                                                                                        @r"
+SortExec: expr=[a@0 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+          DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
 
     Ok(())
 }
@@ -3300,14 +3336,16 @@ fn do_not_preserve_ordering_through_repartition3() -> Result<()> {
     let input = parquet_exec_multiple_sorted(vec![sort_key]);
     let physical_plan = filter_exec(input);
 
-    let expected = &[
-        "FilterExec: c@2 = 0",
-        "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2",
-        "    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+FilterExec: c@2 = 0
+  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2
+    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3322,30 +3360,27 @@ fn do_not_put_sort_when_input_is_invalid() -> Result<()> {
     .into();
     let input = parquet_exec();
     let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key);
-    let expected = &[
-        // Ordering requirement of sort required exec is NOT satisfied
-        // by existing ordering at the source.
-        "SortRequiredExec: [a@0 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    assert_plan_txt!(expected, physical_plan);
-
-    let expected = &[
-        "SortRequiredExec: [a@0 ASC]",
-        // Since at the start of the rule ordering requirement is not satisfied
-        // EnforceDistribution rule doesn't satisfy this requirement either.
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
+    // Ordering requirement of sort required exec is NOT satisfied
+    // by existing ordering at the source.
+    assert_plan!(physical_plan, @r"
+SortRequiredExec: [a@0 ASC]
+  FilterExec: c@2 = 0
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     let mut config = ConfigOptions::new();
     config.execution.target_partitions = 10;
     config.optimizer.enable_round_robin_repartition = true;
     config.optimizer.prefer_existing_sort = false;
     let dist_plan = EnforceDistribution::new().optimize(physical_plan, &config)?;
-    assert_plan_txt!(expected, dist_plan);
+    // Since at the start of the rule ordering requirement is not satisfied
+    // EnforceDistribution rule doesn't satisfy this requirement either.
+    assert_plan!(dist_plan, @r"
+SortRequiredExec: [a@0 ASC]
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     Ok(())
 }
@@ -3361,29 +3396,26 @@ fn put_sort_when_input_is_valid() -> Result<()> {
     let input = parquet_exec_multiple_sorted(vec![sort_key.clone()]);
     let physical_plan = sort_required_exec_with_req(filter_exec(input), sort_key);
 
-    let expected = &[
-        // Ordering requirement of sort required exec is satisfied
-        // by existing ordering at the source.
-        "SortRequiredExec: [a@0 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
-    assert_plan_txt!(expected, physical_plan);
-
-    let expected = &[
-        // Since at the start of the rule ordering requirement is satisfied
-        // EnforceDistribution rule satisfy this requirement also.
-        "SortRequiredExec: [a@0 ASC]",
-        "  FilterExec: c@2 = 0",
-        "    DataSourceExec: file_groups={10 groups: [[x:0..20], [y:0..20], [x:20..40], [y:20..40], [x:40..60], [y:40..60], [x:60..80], [y:60..80], [x:80..100], [y:80..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet",
-    ];
+    // Ordering requirement of sort required exec is satisfied
+    // by existing ordering at the source.
+    assert_plan!(physical_plan, @r"
+SortRequiredExec: [a@0 ASC]
+  FilterExec: c@2 = 0
+    DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
 
     let mut config = ConfigOptions::new();
     config.execution.target_partitions = 10;
     config.optimizer.enable_round_robin_repartition = true;
     config.optimizer.prefer_existing_sort = false;
     let dist_plan = EnforceDistribution::new().optimize(physical_plan, &config)?;
-    assert_plan_txt!(expected, dist_plan);
+    // Since at the start of the rule ordering requirement is satisfied
+    // EnforceDistribution rule satisfy this requirement also.
+    assert_plan!(dist_plan, @r"
+SortRequiredExec: [a@0 ASC]
+  FilterExec: c@2 = 0
+    DataSourceExec: file_groups={10 groups: [[x:0..20], [y:0..20], [x:20..40], [y:20..40], [x:40..60], [y:40..60], [x:60..80], [y:60..80], [x:80..100], [y:80..100]]}, projection=[a, b, c, d, e], output_ordering=[a@0 ASC], file_type=parquet
+");
 
     Ok(())
 }
@@ -3404,13 +3436,15 @@ fn do_not_add_unnecessary_hash() -> Result<()> {
     // Make sure target partition number is 1. In this case hash repartition is unnecessary.
     let test_config = TestConfig::default().with_query_execution_partitions(1);
 
-    let expected = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "  AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3432,19 +3466,21 @@ fn do_not_add_unnecessary_hash2() -> Result<()> {
     // Make sure target partition number is larger than 2 (e.g partition number at the source).
     let test_config = TestConfig::default().with_query_execution_partitions(4);
 
-    let expected = &[
-        "AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        // Since hash requirements of this operator is satisfied. There shouldn't be
-        // a hash repartition here
-        "  AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]",
-        "      RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4",
-        "        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]",
-        "          RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2",
-        "            DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+  AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+    AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[]
+      RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
+        AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[]
+          RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=2
+            DataSourceExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], file_type=parquet
+");
+    // Since hash requirements of this operator is satisfied. There shouldn't be
+    // a hash repartition here
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3452,19 +3488,21 @@ fn do_not_add_unnecessary_hash2() -> Result<()> {
 #[test]
 fn optimize_away_unnecessary_repartition() -> Result<()> {
     let physical_plan = coalesce_partitions_exec(repartition_exec(parquet_exec()));
-    let expected = &[
-        "CoalescePartitionsExec",
-        "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    plans_matches_expected!(expected, physical_plan.clone());
-
-    let expected =
-        &["DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet"];
+    assert_plan!(physical_plan,
+                                                                                        @r"
+CoalescePartitionsExec
+  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+    DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     let test_config = TestConfig::default();
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3474,25 +3512,27 @@ fn optimize_away_unnecessary_repartition2() -> Result<()> {
     let physical_plan = filter_exec(repartition_exec(coalesce_partitions_exec(
         filter_exec(repartition_exec(parquet_exec())),
     )));
-    let expected = &[
-        "FilterExec: c@2 = 0",
-        "  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "    CoalescePartitionsExec",
-        "      FilterExec: c@2 = 0",
-        "        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    plans_matches_expected!(expected, physical_plan.clone());
+    assert_plan!(physical_plan,
+                                                                                        @r"
+FilterExec: c@2 = 0
+  RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+    CoalescePartitionsExec
+      FilterExec: c@2 = 0
+        RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
-    let expected = &[
-        "FilterExec: c@2 = 0",
-        "  FilterExec: c@2 = 0",
-        "    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1",
-        "      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
     let test_config = TestConfig::default();
-    test_config.run(expected, physical_plan.clone(), &DISTRIB_DISTRIB_SORT)?;
-    test_config.run(expected, physical_plan, &SORT_DISTRIB_DISTRIB)?;
+    let plan_distrib = test_config.to_plan(physical_plan.clone(), &DISTRIB_DISTRIB_SORT);
+    assert_plan!(plan_distrib,
+                                                                                        @r"
+FilterExec: c@2 = 0
+  FilterExec: c@2 = 0
+    RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
+    let plan_sort = test_config.to_plan(physical_plan, &SORT_DISTRIB_DISTRIB);
+    assert_plan!(plan_distrib, plan_sort);
 
     Ok(())
 }
@@ -3512,27 +3552,31 @@ async fn test_distribute_sort_parquet() -> Result<()> {
     let physical_plan = sort_exec(sort_key, parquet_exec_with_stats(10000 * 8192));
 
     // prior to optimization, this is the starting plan
-    let starting = &[
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    plans_matches_expected!(starting, physical_plan.clone());
+    assert_plan!(physical_plan,
+                                                                                        @r"
+SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+  DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     // what the enforce distribution run does.
-    let expected = &[
-        "SortExec: expr=[c@2 ASC], preserve_partitioning=[false]",
-        "  CoalescePartitionsExec",
-        "    DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan.clone(), &[Run::Distribution])?;
+    let plan_distribution =
+        test_config.to_plan(physical_plan.clone(), &[Run::Distribution]);
+    assert_plan!(plan_distribution,
+                                                                                        @r"
+SortExec: expr=[c@2 ASC], preserve_partitioning=[false]
+  CoalescePartitionsExec
+    DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet
+");
 
     // what the sort parallelization (in enforce sorting), does after the enforce distribution changes
-    let expected = &[
-        "SortPreservingMergeExec: [c@2 ASC]",
-        "  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]",
-        "    DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet",
-    ];
-    test_config.run(expected, physical_plan, &[Run::Distribution, Run::Sorting])?;
+    let plan_both =
+        test_config.to_plan(physical_plan, &[Run::Distribution, Run::Sorting]);
+    assert_plan!(plan_both,
+                                                                                        @r"
+SortPreservingMergeExec: [c@2 ASC]
+  SortExec: expr=[c@2 ASC], preserve_partitioning=[true]
+    DataSourceExec: file_groups={10 groups: [[x:0..8192000], [x:8192000..16384000], [x:16384000..24576000], [x:24576000..32768000], [x:32768000..40960000], [x:40960000..49152000], [x:49152000..57344000], [x:57344000..65536000], [x:65536000..73728000], [x:73728000..81920000]]}, projection=[a, b, c, d, e], file_type=parquet
+");
     Ok(())
 }
 
@@ -3557,12 +3601,12 @@ async fn test_distribute_sort_memtable() -> Result<()> {
     let physical_plan = dataframe.create_physical_plan().await?;
 
     // this is the final, optimized plan
-    let expected = &[
-        "SortPreservingMergeExec: [id@0 ASC NULLS LAST]",
-        "  SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]",
-        "    DataSourceExec: partitions=3, partition_sizes=[34, 33, 33]",
-    ];
-    plans_matches_expected!(expected, physical_plan);
+    assert_plan!(physical_plan,
+                                                                                        @r"
+SortPreservingMergeExec: [id@0 ASC NULLS LAST]
+  SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[true]
+    DataSourceExec: partitions=3, partition_sizes=[34, 33, 33]
+");
 
     Ok(())
 }
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
index ad77a453350f..c0cfa46733f1 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting.rs
@@ -31,7 +31,7 @@ use crate::physical_optimizer::test_utils::{
 
 use arrow::compute::SortOptions;
 use arrow::datatypes::{DataType, SchemaRef};
-use datafusion_common::config::ConfigOptions;
+use datafusion_common::config::{ConfigOptions, CsvOptions};
 use datafusion_common::tree_node::{TreeNode, TransformedResult};
 use datafusion_common::{Result,  TableReference};
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
@@ -72,10 +72,15 @@ fn csv_exec_sorted(
     schema: &SchemaRef,
     sort_exprs: impl IntoIterator<Item = PhysicalSortExpr>,
 ) -> Arc<dyn ExecutionPlan> {
+    let options = CsvOptions {
+        has_header: Some(false),
+        delimiter: 0,
+        quote: 0,
+        ..Default::default()
+    };
     let mut builder = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema.clone(),
-        Arc::new(CsvSource::new(false, 0, 0)),
+        Arc::new(CsvSource::new(schema.clone()).with_csv_options(options)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100));
     if let Some(ordering) = LexOrdering::new(sort_exprs) {
@@ -359,6 +364,94 @@ async fn test_union_inputs_different_sorted2() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+// Test with `repartition_sorts` enabled to preserve pre-sorted partitions and avoid resorting
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_true(
+) -> Result<()> {
+    assert_snapshot!(
+        union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(true).await?,
+        @r"
+    Input Plan:
+    OutputRequirementExec: order_by=[(nullable_col@0, asc)], dist_by=SinglePartition
+      CoalescePartitionsExec
+        UnionExec
+          SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
+
+    Optimized Plan:
+    OutputRequirementExec: order_by=[(nullable_col@0, asc)], dist_by=SinglePartition
+      SortPreservingMergeExec: [nullable_col@0 ASC]
+        UnionExec
+          SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
+    ");
+    Ok(())
+}
+
+#[tokio::test]
+// Test with `repartition_sorts` disabled, causing a full resort of the data
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_with_repartition_sorts_false(
+) -> Result<()> {
+    assert_snapshot!(
+        union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(false).await?,
+        @r"
+    Input Plan:
+    OutputRequirementExec: order_by=[(nullable_col@0, asc)], dist_by=SinglePartition
+      CoalescePartitionsExec
+        UnionExec
+          SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
+          DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
+
+    Optimized Plan:
+    OutputRequirementExec: order_by=[(nullable_col@0, asc)], dist_by=SinglePartition
+      SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
+        CoalescePartitionsExec
+          UnionExec
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
+            DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
+    ");
+    Ok(())
+}
+
+async fn union_with_mix_of_presorted_and_explicitly_resorted_inputs_impl(
+    repartition_sorts: bool,
+) -> Result<String> {
+    let schema = create_test_schema()?;
+
+    // Source 1, will be sorted explicitly (on `nullable_col`)
+    let source1 = parquet_exec(schema.clone());
+    let ordering1 = [sort_expr("nullable_col", &schema)].into();
+    let sort1 = sort_exec(ordering1, source1.clone());
+
+    // Source 2, pre-sorted (on `nullable_col`)
+    let parquet_ordering: LexOrdering = [sort_expr("nullable_col", &schema)].into();
+    let source2 = parquet_exec_with_sort(schema.clone(), vec![parquet_ordering.clone()]);
+
+    let union = union_exec(vec![sort1, source2]);
+
+    let coalesced = coalesce_partitions_exec(union);
+
+    // Required sorted / single partitioned output
+    let requirement = [PhysicalSortRequirement::new(
+        col("nullable_col", &schema)?,
+        Some(SortOptions::new(false, true)),
+    )]
+    .into();
+    let physical_plan = Arc::new(OutputRequirementExec::new(
+        coalesced,
+        Some(OrderingRequirements::new(requirement)),
+        Distribution::SinglePartition,
+        None,
+    ));
+
+    let test =
+        EnforceSortingTest::new(physical_plan).with_repartition_sorts(repartition_sorts);
+    Ok(test.run())
+}
+
 #[tokio::test]
 async fn test_union_inputs_different_sorted3() -> Result<()> {
     let schema = create_test_schema()?;
@@ -667,12 +760,12 @@ async fn test_soft_hard_requirements_remove_soft_requirement() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -716,13 +809,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
     assert_snapshot!(test.run(), @r#"
     Input Plan:
     ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as count]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -763,13 +856,13 @@ async fn test_soft_hard_requirements_remove_soft_requirement_without_pushdowns(
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
         ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -824,15 +917,15 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()>
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -889,17 +982,17 @@ async fn test_soft_hard_requirements_multiple_soft_requirements() -> Result<()>
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
         SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
               SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
                 DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -961,14 +1054,14 @@ async fn test_soft_hard_requirements_multiple_sorts() -> Result<()> {
     Input Plan:
     SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
       SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
           ProjectionExec: expr=[nullable_col@0 + non_nullable_col@1 as nullable_col]
             SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -1023,16 +1116,16 @@ async fn test_soft_hard_requirements_with_multiple_soft_requirements_and_output_
     assert_snapshot!(test.run(), @r#"
     Input Plan:
     OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
 
     Optimized Plan:
     OutputRequirementExec: order_by=[(non_nullable_col@1, asc)], dist_by=SinglePartition
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], file_type=parquet
     "#);
@@ -1081,7 +1174,7 @@ async fn test_window_multi_path_sort() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]
         UnionExec
           SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
@@ -1090,7 +1183,7 @@ async fn test_window_multi_path_sort() -> Result<()> {
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
       SortPreservingMergeExec: [nullable_col@0 ASC]
         UnionExec
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC], file_type=parquet
@@ -1122,7 +1215,7 @@ async fn test_window_multi_path_sort2() -> Result<()> {
     let test = EnforceSortingTest::new(physical_plan).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 ASC, non_nullable_col@1 ASC]
         UnionExec
           SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
@@ -1131,7 +1224,7 @@ async fn test_window_multi_path_sort2() -> Result<()> {
             DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [nullable_col@0 ASC]
         UnionExec
           DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC], file_type=parquet
@@ -1678,7 +1771,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC, b@1 ASC
           RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
@@ -1686,7 +1779,7 @@ async fn test_window_multi_layer_requirement() -> Result<()> {
               DataSourceExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], file_type=csv, has_header=false
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortPreservingMergeExec: [a@0 ASC, b@1 ASC]
         SortExec: expr=[a@0 ASC, b@1 ASC], preserve_partitioning=[true]
           RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10
@@ -1783,18 +1876,18 @@ async fn test_remove_unnecessary_sort_window_multilayer() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       FilterExec: NOT non_nullable_col@1
         SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             CoalesceBatchesExec: target_batch_size=128
               SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]
                 DataSourceExec: partitions=1, partition_sizes=[0]
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
       FilterExec: NOT non_nullable_col@1
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           CoalesceBatchesExec: target_batch_size=128
             SortExec: expr=[non_nullable_col@1 DESC], preserve_partitioning=[false]
               DataSourceExec: partitions=1, partition_sizes=[0]
@@ -2238,17 +2331,17 @@ async fn test_multiple_sort_window_exec() -> Result<()> {
         EnforceSortingTest::new(physical_plan.clone()).with_repartition_sorts(true);
     assert_snapshot!(test.run(), @r#"
     Input Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
             DataSourceExec: partitions=1, partition_sizes=[0]
 
     Optimized Plan:
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         SortExec: expr=[nullable_col@0 ASC, non_nullable_col@1 ASC], preserve_partitioning=[false]
-          BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+          BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
             SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
               DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
@@ -2273,7 +2366,7 @@ async fn test_commutativity() -> Result<()> {
     assert_snapshot!(displayable(orig_plan.as_ref()).indent(true), @r#"
     SortExec: expr=[nullable_col@0 ASC], preserve_partitioning=[false]
       RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1
-        BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[0]
     "#);
 
@@ -2483,7 +2576,6 @@ async fn test_not_replaced_with_partial_sort_for_unbounded_input() -> Result<()>
     Ok(())
 }
 
-// Test that verifies that an orthogonal sort (a sort on columns not in the input ordering)
 #[test]
 fn test_removes_unused_orthogonal_sort() -> Result<()> {
     let schema = create_test_schema3()?;
diff --git a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
index 7d6c0484b624..ef233e222912 100644
--- a/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
+++ b/datafusion/core/tests/physical_optimizer/enforce_sorting_monotonicity.rs
@@ -229,11 +229,11 @@ fn test_window_partial_constant_and_set_monotonicity_0() {
         @ r#"
     Input Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
       DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -253,11 +253,11 @@ fn test_window_partial_constant_and_set_monotonicity_1() {
         @ r#"
     Input Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
     Optimized Plan:
-    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
       DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -275,15 +275,15 @@ fn test_window_partial_constant_and_set_monotonicity_2() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -299,15 +299,15 @@ fn test_window_partial_constant_and_set_monotonicity_3() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -323,16 +323,16 @@ fn test_window_partial_constant_and_set_monotonicity_4() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -348,16 +348,16 @@ fn test_window_partial_constant_and_set_monotonicity_5() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[non_nullable_col@1 DESC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -373,16 +373,16 @@ fn test_window_partial_constant_and_set_monotonicity_6() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 ASC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -398,16 +398,16 @@ fn test_window_partial_constant_and_set_monotonicity_7() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    SortExec: expr=[nullable_col@0 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
             );
 }
 
@@ -427,15 +427,15 @@ fn test_window_partial_constant_and_set_monotonicity_8() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -451,15 +451,15 @@ fn test_window_partial_constant_and_set_monotonicity_9() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -477,7 +477,7 @@ fn test_window_partial_constant_and_set_monotonicity_10() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -497,7 +497,7 @@ fn test_window_partial_constant_and_set_monotonicity_11() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -522,7 +522,7 @@ fn test_window_partial_constant_and_set_monotonicity_12() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -543,7 +543,7 @@ fn test_window_partial_constant_and_set_monotonicity_13() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[non_nullable_col@1 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -564,7 +564,7 @@ fn test_window_partial_constant_and_set_monotonicity_14() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, non_nullable_col@1 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -585,7 +585,7 @@ fn test_window_partial_constant_and_set_monotonicity_15() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -608,15 +608,15 @@ fn test_window_partial_constant_and_set_monotonicity_16() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -633,15 +633,15 @@ fn test_window_partial_constant_and_set_monotonicity_17() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[max@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -658,15 +658,15 @@ fn test_window_partial_constant_and_set_monotonicity_18() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 ASC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -685,7 +685,7 @@ fn test_window_partial_constant_and_set_monotonicity_19() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -710,7 +710,7 @@ fn test_window_partial_constant_and_set_monotonicity_20() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -729,15 +729,15 @@ fn test_window_partial_constant_and_set_monotonicity_21() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -756,7 +756,7 @@ fn test_window_partial_constant_and_set_monotonicity_22() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -777,7 +777,7 @@ fn test_window_partial_constant_and_set_monotonicity_23() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -800,15 +800,15 @@ fn test_window_partial_constant_and_set_monotonicity_24() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
-  WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 DESC NULLS LAST], preserve_partitioning=[false]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -827,7 +827,7 @@ fn test_window_partial_constant_and_set_monotonicity_25() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -847,7 +847,7 @@ fn test_window_partial_constant_and_set_monotonicity_26() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#);
 }
@@ -867,7 +867,7 @@ fn test_window_partial_constant_and_set_monotonicity_27() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#);
 }
@@ -893,7 +893,7 @@ fn test_window_partial_constant_and_set_monotonicity_28() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[count@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[count: Ok(Field { name: "count", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -912,15 +912,15 @@ fn test_window_partial_constant_and_set_monotonicity_29() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
-  WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC], preserve_partitioning=[false]
+      WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#)
+    Optimized Plan:
+    WindowAggExec: wdw=[max: Ok(Field { name: "max", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#)
 }
 
 // Case 30:
@@ -937,7 +937,7 @@ fn test_window_partial_constant_and_set_monotonicity_30() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[min: Ok(Field { name: "min", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#);
 }
@@ -957,7 +957,7 @@ fn test_window_partial_constant_and_set_monotonicity_31() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
+      WindowAggExec: wdw=[avg: Ok(Field { name: "avg", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: CurrentRow, end_bound: Following(UInt64(NULL)), is_causal: false }]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -981,15 +981,15 @@ fn test_window_partial_constant_and_set_monotonicity_32() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1008,7 +1008,7 @@ fn test_window_partial_constant_and_set_monotonicity_33() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[max@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1027,15 +1027,15 @@ fn test_window_partial_constant_and_set_monotonicity_34() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 // Case 35:
@@ -1053,7 +1053,7 @@ fn test_window_partial_constant_and_set_monotonicity_35() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1077,15 +1077,15 @@ fn test_window_partial_constant_and_set_monotonicity_36() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1102,15 +1102,15 @@ fn test_window_partial_constant_and_set_monotonicity_37() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1129,7 +1129,7 @@ fn test_window_partial_constant_and_set_monotonicity_38() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1149,7 +1149,7 @@ fn test_window_partial_constant_and_set_monotonicity_39() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1173,15 +1173,15 @@ fn test_window_partial_constant_and_set_monotonicity_40() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1200,7 +1200,7 @@ fn test_window_partial_constant_and_set_monotonicity_41() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[max@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1221,7 +1221,7 @@ fn test_window_partial_constant_and_set_monotonicity_42() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1242,7 +1242,7 @@ fn test_window_partial_constant_and_set_monotonicity_43() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1267,7 +1267,7 @@ fn test_window_partial_constant_and_set_monotonicity_44() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[count@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1288,7 +1288,7 @@ fn test_window_partial_constant_and_set_monotonicity_45() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1307,15 +1307,15 @@ fn test_window_partial_constant_and_set_monotonicity_46() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1331,15 +1331,15 @@ fn test_window_partial_constant_and_set_monotonicity_47() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1361,15 +1361,15 @@ fn test_window_partial_constant_and_set_monotonicity_48() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1387,7 +1387,7 @@ fn test_window_partial_constant_and_set_monotonicity_49() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1406,15 +1406,15 @@ fn test_window_partial_constant_and_set_monotonicity_50() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1432,7 +1432,7 @@ fn test_window_partial_constant_and_set_monotonicity_51() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1458,7 +1458,7 @@ fn test_window_partial_constant_and_set_monotonicity_52() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1479,7 +1479,7 @@ fn test_window_partial_constant_and_set_monotonicity_53() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1499,7 +1499,7 @@ fn test_window_partial_constant_and_set_monotonicity_54() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1517,15 +1517,15 @@ fn test_window_partial_constant_and_set_monotonicity_55() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1547,15 +1547,15 @@ fn test_window_partial_constant_and_set_monotonicity_56() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[count@2 ASC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 
@@ -1574,7 +1574,7 @@ fn test_window_partial_constant_and_set_monotonicity_57() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1595,7 +1595,7 @@ fn test_window_partial_constant_and_set_monotonicity_58() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[min@2 DESC NULLS LAST, nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1615,7 +1615,7 @@ fn test_window_partial_constant_and_set_monotonicity_59() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[avg@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1641,7 +1641,7 @@ fn test_window_partial_constant_and_set_monotonicity_60() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, count@2 ASC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1662,7 +1662,7 @@ fn test_window_partial_constant_and_set_monotonicity_61() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, max@2 ASC], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[max: Field { name: "max", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[max: Field { "max": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1683,7 +1683,7 @@ fn test_window_partial_constant_and_set_monotonicity_62() {
         @ r#"
     Input / Optimized Plan:
     SortExec: expr=[nullable_col@0 ASC NULLS LAST, min@2 DESC NULLS LAST], preserve_partitioning=[false]
-      BoundedWindowAggExec: wdw=[min: Field { name: "min", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      BoundedWindowAggExec: wdw=[min: Field { "min": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
         DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
     "#
     );
@@ -1701,15 +1701,15 @@ fn test_window_partial_constant_and_set_monotonicity_63() {
         ],
     }.run(),
         @ r#"
-Input Plan:
-SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
-  BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-    DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    Input Plan:
+    SortExec: expr=[nullable_col@0 ASC NULLS LAST], preserve_partitioning=[false]
+      BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+        DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
 
-Optimized Plan:
-BoundedWindowAggExec: wdw=[avg: Field { name: "avg", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
-  DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
-"#
+    Optimized Plan:
+    BoundedWindowAggExec: wdw=[avg: Field { "avg": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+      DataSourceExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC NULLS LAST], file_type=parquet
+    "#
     );
 }
 // =============================================REGION ENDS=============================================
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
index b91c1732260c..31909415a286 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/mod.rs
@@ -543,11 +543,11 @@ fn test_push_down_through_transparent_nodes() {
 }
 
 #[test]
-fn test_no_pushdown_through_aggregates() {
-    // There are 2 important points here:
-    // 1. The outer filter **is not** pushed down at all because we haven't implemented pushdown support
-    //    yet for AggregateExec.
-    // 2. The inner filter **is** pushed down into the DataSource.
+fn test_pushdown_through_aggregates_on_grouping_columns() {
+    // Test that filters on grouping columns can be pushed through AggregateExec.
+    // This test has two filters:
+    // 1. An inner filter (a@0 = foo) below the aggregate - gets pushed to DataSource
+    // 2. An outer filter (b@1 = bar) above the aggregate - also gets pushed through because 'b' is a grouping column
     let scan = TestScanBuilder::new(schema()).with_support(true).build();
 
     let coalesce = Arc::new(CoalesceBatchesExec::new(scan, 10));
@@ -586,7 +586,7 @@ fn test_no_pushdown_through_aggregates() {
     let predicate = col_lit_predicate("b", "bar", &schema());
     let plan = Arc::new(FilterExec::try_new(predicate, coalesce).unwrap());
 
-    // expect the predicate to be pushed down into the DataSource
+    // Both filters should be pushed down to the DataSource since both reference grouping columns
     insta::assert_snapshot!(
         OptimizationTest::new(plan, FilterPushdown::new(), true),
         @r"
@@ -600,11 +600,10 @@ fn test_no_pushdown_through_aggregates() {
         -           DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
       output:
         Ok:
-          - FilterExec: b@1 = bar
-          -   CoalesceBatchesExec: target_batch_size=100
-          -     AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt], ordering_mode=PartiallySorted([0])
-          -       CoalesceBatchesExec: target_batch_size=10
-          -         DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+          - CoalesceBatchesExec: target_batch_size=100
+          -   AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt], ordering_mode=Sorted
+          -     CoalesceBatchesExec: target_batch_size=10
+          -       DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo AND b@1 = bar
     "
     );
 }
@@ -860,20 +859,17 @@ async fn test_topk_filter_passes_through_coalesce_partitions() {
     ];
 
     // Create a source that supports all batches
-    let source = Arc::new(TestSource::new(true, batches));
-
-    let base_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test://").unwrap(),
-        Arc::clone(&schema()),
-        source,
-    )
-    .with_file_groups(vec![
-        // Partition 0
-        FileGroup::new(vec![PartitionedFile::new("test1.parquet", 123)]),
-        // Partition 1
-        FileGroup::new(vec![PartitionedFile::new("test2.parquet", 123)]),
-    ])
-    .build();
+    let source = Arc::new(TestSource::new(schema(), true, batches));
+
+    let base_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test://").unwrap(), source)
+            .with_file_groups(vec![
+                // Partition 0
+                FileGroup::new(vec![PartitionedFile::new("test1.parquet", 123)]),
+                // Partition 1
+                FileGroup::new(vec![PartitionedFile::new("test2.parquet", 123)]),
+            ])
+            .build();
 
     let scan = DataSourceExec::from_data_source(base_config);
 
@@ -1892,3 +1888,445 @@ fn col_lit_predicate(
         Arc::new(Literal::new(scalar_value)),
     ))
 }
+
+#[tokio::test]
+async fn test_aggregate_filter_pushdown() {
+    // Test that filters can pass through AggregateExec even with aggregate functions
+    // when the filter references grouping columns
+    // Simulates: SELECT a, COUNT(b) FROM table WHERE a = 'x' GROUP BY a
+
+    let batches =
+        vec![
+            record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
+        ];
+
+    let scan = TestScanBuilder::new(schema())
+        .with_support(true)
+        .with_batches(batches)
+        .build();
+
+    // Create an aggregate: GROUP BY a with COUNT(b)
+    let group_by = PhysicalGroupBy::new_single(vec![(
+        col("a", &schema()).unwrap(),
+        "a".to_string(),
+    )]);
+
+    // Add COUNT aggregate
+    let count_expr =
+        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
+            .schema(schema())
+            .alias("count")
+            .build()
+            .unwrap();
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Partial,
+            group_by,
+            vec![count_expr.into()], // Has aggregate function
+            vec![None],              // No filter on the aggregate function
+            Arc::clone(&scan),
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // Add a filter on the grouping column 'a'
+    let predicate = col_lit_predicate("a", "x", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
+        as Arc<dyn ExecutionPlan>;
+
+    // Even with aggregate functions, filter on grouping column should be pushed through
+    insta::assert_snapshot!(
+        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = x
+        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count], ordering_mode=Sorted
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = x
+    "
+    );
+}
+
+#[tokio::test]
+async fn test_no_pushdown_filter_on_aggregate_result() {
+    // Test that filters on aggregate results (not grouping columns) are NOT pushed through
+    // SELECT a, COUNT(b) as cnt FROM table GROUP BY a HAVING cnt > 5
+    // The filter on 'cnt' cannot be pushed down because it's an aggregate result
+
+    let batches =
+        vec![
+            record_batch!(("a", Utf8, ["x", "y"]), ("b", Utf8, ["foo", "bar"])).unwrap(),
+        ];
+
+    let scan = TestScanBuilder::new(schema())
+        .with_support(true)
+        .with_batches(batches)
+        .build();
+
+    // Create an aggregate: GROUP BY a with COUNT(b)
+    let group_by = PhysicalGroupBy::new_single(vec![(
+        col("a", &schema()).unwrap(),
+        "a".to_string(),
+    )]);
+
+    // Add COUNT aggregate
+    let count_expr =
+        AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema()).unwrap()])
+            .schema(schema())
+            .alias("count")
+            .build()
+            .unwrap();
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Partial,
+            group_by,
+            vec![count_expr.into()],
+            vec![None],
+            Arc::clone(&scan),
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // Add a filter on the aggregate output column
+    // This simulates filtering on COUNT result, which should NOT be pushed through
+    let agg_schema = aggregate.schema();
+    let predicate = Arc::new(BinaryExpr::new(
+        Arc::new(Column::new_with_schema("count[count]", &agg_schema).unwrap()),
+        Operator::Gt,
+        Arc::new(Literal::new(ScalarValue::Int64(Some(5)))),
+    ));
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap())
+        as Arc<dyn ExecutionPlan>;
+
+    // The filter should NOT be pushed through the aggregate since it's on an aggregate result
+    insta::assert_snapshot!(
+        OptimizationTest::new(Arc::clone(&plan), FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: count[count]@1 > 5
+        -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - FilterExec: count[count]@1 > 5
+          -   AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count]
+          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+    "
+    );
+}
+
+#[test]
+fn test_pushdown_filter_on_non_first_grouping_column() {
+    // Test that filters on non-first grouping columns are still pushed down
+    // SELECT a, b, count(*) as cnt FROM table GROUP BY a, b HAVING b = 'bar'
+    // The filter is on 'b' (second grouping column), should push down
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    let group_by = PhysicalGroupBy::new_single(vec![
+        (col("a", &schema()).unwrap(), "a".to_string()),
+        (col("b", &schema()).unwrap(), "b".to_string()),
+    ]);
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            scan,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    let predicate = col_lit_predicate("b", "bar", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: b@1 = bar
+        -   AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Final, gby=[a@0 as a, b@1 as b], aggr=[cnt], ordering_mode=PartiallySorted([1])
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
+    "
+    );
+}
+
+#[test]
+fn test_no_pushdown_grouping_sets_filter_on_missing_column() {
+    // Test that filters on columns missing from some grouping sets are NOT pushed through
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    // Create GROUPING SETS with (a, b) and (b)
+    let group_by = PhysicalGroupBy::new(
+        vec![
+            (col("a", &schema()).unwrap(), "a".to_string()),
+            (col("b", &schema()).unwrap(), "b".to_string()),
+        ],
+        vec![
+            (
+                Arc::new(Literal::new(ScalarValue::Utf8(None))),
+                "a".to_string(),
+            ),
+            (
+                Arc::new(Literal::new(ScalarValue::Utf8(None))),
+                "b".to_string(),
+            ),
+        ],
+        vec![
+            vec![false, false], // (a, b) - both present
+            vec![true, false],  // (b) - a is NULL, b present
+        ],
+    );
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            scan,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // Filter on column 'a' which is missing in the second grouping set, should not be pushed down
+    let predicate = col_lit_predicate("a", "foo", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   AggregateExec: mode=Final, gby=[(a@0 as a, b@1 as b), (NULL as a, b@1 as b)], aggr=[cnt]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - FilterExec: a@0 = foo
+          -   AggregateExec: mode=Final, gby=[(a@0 as a, b@1 as b), (NULL as a, b@1 as b)], aggr=[cnt]
+          -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+    "
+    );
+}
+
+#[test]
+fn test_pushdown_grouping_sets_filter_on_common_column() {
+    // Test that filters on columns present in ALL grouping sets ARE pushed through
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    // Create GROUPING SETS with (a, b) and (b)
+    let group_by = PhysicalGroupBy::new(
+        vec![
+            (col("a", &schema()).unwrap(), "a".to_string()),
+            (col("b", &schema()).unwrap(), "b".to_string()),
+        ],
+        vec![
+            (
+                Arc::new(Literal::new(ScalarValue::Utf8(None))),
+                "a".to_string(),
+            ),
+            (
+                Arc::new(Literal::new(ScalarValue::Utf8(None))),
+                "b".to_string(),
+            ),
+        ],
+        vec![
+            vec![false, false], // (a, b) - both present
+            vec![true, false],  // (b) - a is NULL, b present
+        ],
+    );
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            scan,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // Filter on column 'b' which is present in all grouping sets will be pushed down
+    let predicate = col_lit_predicate("b", "bar", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
+
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: b@1 = bar
+        -   AggregateExec: mode=Final, gby=[(a@0 as a, b@1 as b), (NULL as a, b@1 as b)], aggr=[cnt]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Final, gby=[(a@0 as a, b@1 as b), (NULL as a, b@1 as b)], aggr=[cnt], ordering_mode=PartiallySorted([1])
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=b@1 = bar
+    "
+    );
+}
+
+#[test]
+fn test_pushdown_with_empty_group_by() {
+    // Test that filters can be pushed down when GROUP BY is empty (no grouping columns)
+    // SELECT count(*) as cnt FROM table WHERE a = 'foo'
+    // There are no grouping columns, so the filter should still push down
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("c", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    // Empty GROUP BY - no grouping columns
+    let group_by = PhysicalGroupBy::new_single(vec![]);
+
+    let aggregate = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            scan,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // Filter on 'a'
+    let predicate = col_lit_predicate("a", "foo", &schema());
+    let plan = Arc::new(FilterExec::try_new(predicate, aggregate).unwrap());
+
+    // The filter should be pushed down even with empty GROUP BY
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - FilterExec: a@0 = foo
+        -   AggregateExec: mode=Final, gby=[], aggr=[cnt]
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Final, gby=[], aggr=[cnt]
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=a@0 = foo
+    "
+    );
+}
+
+#[test]
+fn test_pushdown_with_computed_grouping_key() {
+    // Test filter pushdown with computed grouping expression
+    // SELECT (c + 1.0) as c_plus_1, count(*) FROM table WHERE c > 5.0 GROUP BY (c + 1.0)
+
+    let scan = TestScanBuilder::new(schema()).with_support(true).build();
+
+    let predicate = Arc::new(BinaryExpr::new(
+        col("c", &schema()).unwrap(),
+        Operator::Gt,
+        Arc::new(Literal::new(ScalarValue::Float64(Some(5.0)))),
+    )) as Arc<dyn PhysicalExpr>;
+    let filter = Arc::new(FilterExec::try_new(predicate, scan).unwrap());
+
+    let aggregate_expr =
+        vec![
+            AggregateExprBuilder::new(count_udaf(), vec![col("a", &schema()).unwrap()])
+                .schema(schema())
+                .alias("cnt")
+                .build()
+                .map(Arc::new)
+                .unwrap(),
+        ];
+
+    let c_plus_one = Arc::new(BinaryExpr::new(
+        col("c", &schema()).unwrap(),
+        Operator::Plus,
+        Arc::new(Literal::new(ScalarValue::Float64(Some(1.0)))),
+    )) as Arc<dyn PhysicalExpr>;
+
+    let group_by =
+        PhysicalGroupBy::new_single(vec![(c_plus_one, "c_plus_1".to_string())]);
+
+    let plan = Arc::new(
+        AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by,
+            aggregate_expr.clone(),
+            vec![None],
+            filter,
+            schema(),
+        )
+        .unwrap(),
+    );
+
+    // The filter should be pushed down because 'c' is extracted from the grouping expression (c + 1.0)
+    insta::assert_snapshot!(
+        OptimizationTest::new(plan, FilterPushdown::new(), true),
+        @r"
+    OptimizationTest:
+      input:
+        - AggregateExec: mode=Final, gby=[c@2 + 1 as c_plus_1], aggr=[cnt]
+        -   FilterExec: c@2 > 5
+        -     DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true
+      output:
+        Ok:
+          - AggregateExec: mode=Final, gby=[c@2 + 1 as c_plus_1], aggr=[cnt]
+          -   DataSourceExec: file_groups={1 group: [[test.parquet]]}, projection=[a, b, c], file_type=test, pushdown_supported=true, predicate=c@2 > 5
+    "
+    );
+}
diff --git a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
index f05f3f00281d..2bd70221f41e 100644
--- a/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
+++ b/datafusion/core/tests/physical_optimizer/filter_pushdown/util.rs
@@ -52,7 +52,7 @@ use std::{
 pub struct TestOpener {
     batches: Vec<RecordBatch>,
     batch_size: Option<usize>,
-    schema: Option<SchemaRef>,
+    schema: SchemaRef,
     projection: Option<Vec<usize>>,
     predicate: Option<Arc<dyn PhysicalExpr>>,
 }
@@ -70,23 +70,23 @@ impl FileOpener for TestOpener {
             }
             batches = new_batches.into_iter().collect();
         }
-        if let Some(schema) = &self.schema {
-            let factory = DefaultSchemaAdapterFactory::from_schema(Arc::clone(schema));
-            let (mapper, projection) = factory.map_schema(&batches[0].schema()).unwrap();
-            let mut new_batches = Vec::new();
-            for batch in batches {
-                let batch = if let Some(predicate) = &self.predicate {
-                    batch_filter(&batch, predicate)?
-                } else {
-                    batch
-                };
 
-                let batch = batch.project(&projection).unwrap();
-                let batch = mapper.map_batch(batch).unwrap();
-                new_batches.push(batch);
-            }
-            batches = new_batches;
+        let factory = DefaultSchemaAdapterFactory::from_schema(Arc::clone(&self.schema));
+        let (mapper, projection) = factory.map_schema(&batches[0].schema()).unwrap();
+        let mut new_batches = Vec::new();
+        for batch in batches {
+            let batch = if let Some(predicate) = &self.predicate {
+                batch_filter(&batch, predicate)?
+            } else {
+                batch
+            };
+
+            let batch = batch.project(&projection).unwrap();
+            let batch = mapper.map_batch(batch).unwrap();
+            new_batches.push(batch);
         }
+        batches = new_batches;
+
         if let Some(projection) = &self.projection {
             batches = batches
                 .into_iter()
@@ -101,26 +101,35 @@ impl FileOpener for TestOpener {
 }
 
 /// A placeholder data source that accepts filter pushdown
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct TestSource {
     support: bool,
     predicate: Option<Arc<dyn PhysicalExpr>>,
     statistics: Option<Statistics>,
     batch_size: Option<usize>,
     batches: Vec<RecordBatch>,
-    schema: Option<SchemaRef>,
+    schema: SchemaRef,
     metrics: ExecutionPlanMetricsSet,
     projection: Option<Vec<usize>>,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    table_schema: datafusion_datasource::TableSchema,
 }
 
 impl TestSource {
-    pub fn new(support: bool, batches: Vec<RecordBatch>) -> Self {
+    pub fn new(schema: SchemaRef, support: bool, batches: Vec<RecordBatch>) -> Self {
+        let table_schema =
+            datafusion_datasource::TableSchema::new(Arc::clone(&schema), vec![]);
         Self {
+            schema,
             support,
             metrics: ExecutionPlanMetricsSet::new(),
             batches,
-            ..Default::default()
+            predicate: None,
+            statistics: None,
+            batch_size: None,
+            projection: None,
+            schema_adapter_factory: None,
+            table_schema,
         }
     }
 }
@@ -135,7 +144,7 @@ impl FileSource for TestSource {
         Arc::new(TestOpener {
             batches: self.batches.clone(),
             batch_size: self.batch_size,
-            schema: self.schema.clone(),
+            schema: Arc::clone(&self.schema),
             projection: self.projection.clone(),
             predicate: self.predicate.clone(),
         })
@@ -156,16 +165,9 @@ impl FileSource for TestSource {
         })
     }
 
-    fn with_schema(&self, schema: SchemaRef) -> Arc<dyn FileSource> {
-        Arc::new(TestSource {
-            schema: Some(schema),
-            ..self.clone()
-        })
-    }
-
     fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
         Arc::new(TestSource {
-            projection: config.projection.clone(),
+            projection: config.projection_exprs.as_ref().map(|p| p.column_indices()),
             ..self.clone()
         })
     }
@@ -255,6 +257,10 @@ impl FileSource for TestSource {
     fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
         self.schema_adapter_factory.clone()
     }
+
+    fn table_schema(&self) -> &datafusion_datasource::TableSchema {
+        &self.table_schema
+    }
 }
 
 #[derive(Debug, Clone)]
@@ -284,14 +290,15 @@ impl TestScanBuilder {
     }
 
     pub fn build(self) -> Arc<dyn ExecutionPlan> {
-        let source = Arc::new(TestSource::new(self.support, self.batches));
-        let base_config = FileScanConfigBuilder::new(
-            ObjectStoreUrl::parse("test://").unwrap(),
+        let source = Arc::new(TestSource::new(
             Arc::clone(&self.schema),
-            source,
-        )
-        .with_file(PartitionedFile::new("test.parquet", 123))
-        .build();
+            self.support,
+            self.batches,
+        ));
+        let base_config =
+            FileScanConfigBuilder::new(ObjectStoreUrl::parse("test://").unwrap(), source)
+                .with_file(PartitionedFile::new("test.parquet", 123))
+                .build();
         DataSourceExec::from_data_source(base_config)
     }
 }
diff --git a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
index c51a5e02c9c3..9d39a80fb9df 100644
--- a/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
+++ b/datafusion/core/tests/physical_optimizer/projection_pushdown.rs
@@ -24,9 +24,10 @@ use datafusion::datasource::listing::PartitionedFile;
 use datafusion::datasource::memory::MemorySourceConfig;
 use datafusion::datasource::physical_plan::CsvSource;
 use datafusion::datasource::source::DataSourceExec;
-use datafusion_common::config::ConfigOptions;
+use datafusion_common::config::{ConfigOptions, CsvOptions};
 use datafusion_common::{JoinSide, JoinType, NullEquality, Result, ScalarValue};
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+use datafusion_datasource::TableSchema;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::{
@@ -384,14 +385,19 @@ fn create_simple_csv_exec() -> Arc<dyn ExecutionPlan> {
         Field::new("d", DataType::Int32, true),
         Field::new("e", DataType::Int32, true),
     ]));
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(CsvSource::new(false, 0, 0)),
-    )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_projection(Some(vec![0, 1, 2, 3, 4]))
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: 0,
+                quote: 0,
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema.clone()).with_csv_options(options))
+        })
+        .with_file(PartitionedFile::new("x".to_string(), 100))
+        .with_projection_indices(Some(vec![0, 1, 2, 3, 4]))
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -403,14 +409,19 @@ fn create_projecting_csv_exec() -> Arc<dyn ExecutionPlan> {
         Field::new("c", DataType::Int32, true),
         Field::new("d", DataType::Int32, true),
     ]));
-    let config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(CsvSource::new(false, 0, 0)),
-    )
-    .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_projection(Some(vec![3, 2, 1]))
-    .build();
+    let config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::parse("test:///").unwrap(), {
+            let options = CsvOptions {
+                has_header: Some(false),
+                delimiter: 0,
+                quote: 0,
+                ..Default::default()
+            };
+            Arc::new(CsvSource::new(schema.clone()).with_csv_options(options))
+        })
+        .with_file(PartitionedFile::new("x".to_string(), 100))
+        .with_projection_indices(Some(vec![3, 2, 1]))
+        .build();
 
     DataSourceExec::from_data_source(config)
 }
@@ -1589,14 +1600,22 @@ fn partitioned_data_source() -> Arc<DataSourceExec> {
         Field::new("string_col", DataType::Utf8, true),
     ]));
 
+    let options = CsvOptions {
+        has_header: Some(false),
+        delimiter: b',',
+        quote: b'"',
+        ..Default::default()
+    };
+    let table_schema = TableSchema::new(
+        Arc::clone(&file_schema),
+        vec![Arc::new(Field::new("partition_col", DataType::Utf8, true))],
+    );
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        file_schema.clone(),
-        Arc::new(CsvSource::default()),
+        Arc::new(CsvSource::new(table_schema).with_csv_options(options)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100))
-    .with_table_partition_cols(vec![Field::new("partition_col", DataType::Utf8, true)])
-    .with_projection(Some(vec![0, 1, 2]))
+    .with_projection_indices(Some(vec![0, 1, 2]))
     .build();
 
     DataSourceExec::from_data_source(config)
diff --git a/datafusion/core/tests/physical_optimizer/sanity_checker.rs b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
index ce6eb13c86c4..9867ed173341 100644
--- a/datafusion/core/tests/physical_optimizer/sanity_checker.rs
+++ b/datafusion/core/tests/physical_optimizer/sanity_checker.rs
@@ -421,7 +421,7 @@ async fn test_bounded_window_agg_sort_requirement() -> Result<()> {
     assert_snapshot!(
         actual,
         @r#"
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
         DataSourceExec: partitions=1, partition_sizes=[0]
     "#
@@ -449,7 +449,7 @@ async fn test_bounded_window_agg_no_sort_requirement() -> Result<()> {
     assert_snapshot!(
         actual,
         @r#"
-    BoundedWindowAggExec: wdw=[count: Field { name: "count", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+    BoundedWindowAggExec: wdw=[count: Field { "count": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
       DataSourceExec: partitions=1, partition_sizes=[0]
     "#
     );
diff --git a/datafusion/core/tests/physical_optimizer/test_utils.rs b/datafusion/core/tests/physical_optimizer/test_utils.rs
index 8ca33f3d4abb..60fec2243621 100644
--- a/datafusion/core/tests/physical_optimizer/test_utils.rs
+++ b/datafusion/core/tests/physical_optimizer/test_utils.rs
@@ -73,8 +73,7 @@ use datafusion_physical_plan::{
 pub fn parquet_exec(schema: SchemaRef) -> Arc<DataSourceExec> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(schema)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100))
     .build();
@@ -89,8 +88,7 @@ pub(crate) fn parquet_exec_with_sort(
 ) -> Arc<DataSourceExec> {
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema,
-        Arc::new(ParquetSource::default()),
+        Arc::new(ParquetSource::new(schema)),
     )
     .with_file(PartitionedFile::new("x".to_string(), 100))
     .with_output_ordering(output_ordering)
@@ -127,8 +125,7 @@ pub(crate) fn parquet_exec_with_stats(file_size: u64) -> Arc<DataSourceExec> {
 
     let config = FileScanConfigBuilder::new(
         ObjectStoreUrl::parse("test:///").unwrap(),
-        schema(),
-        Arc::new(ParquetSource::new(Default::default())),
+        Arc::new(ParquetSource::new(schema())),
     )
     .with_file(PartitionedFile::new("x".to_string(), file_size))
     .with_statistics(statistics)
diff --git a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
index c3c92a9028d6..191529816481 100644
--- a/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
+++ b/datafusion/core/tests/schema_adapter/schema_adapter_integration_tests.rs
@@ -27,12 +27,14 @@ use datafusion::datasource::physical_plan::{
 };
 use datafusion::physical_plan::ExecutionPlan;
 use datafusion::prelude::SessionContext;
+use datafusion_common::config::CsvOptions;
 use datafusion_common::ColumnStatistics;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::schema_adapter::{
     SchemaAdapter, SchemaAdapterFactory, SchemaMapper,
 };
 use datafusion_datasource::source::DataSourceExec;
+use datafusion_datasource::TableSchema;
 use datafusion_execution::object_store::ObjectStoreUrl;
 use object_store::{memory::InMemory, path::Path, ObjectStore};
 use parquet::arrow::ArrowWriter;
@@ -182,17 +184,17 @@ async fn test_parquet_integration_with_schema_adapter() -> Result<()> {
     let ctx = SessionContext::new();
     ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
 
-    // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
-        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
-
     // Create a table schema with uppercase column names
     let table_schema = Arc::new(Schema::new(vec![
         Field::new("ID", DataType::Int32, false),
         Field::new("NAME", DataType::Utf8, true),
     ]));
 
-    let config = FileScanConfigBuilder::new(store_url, table_schema.clone(), file_source)
+    // Create a ParquetSource with the adapter factory
+    let file_source = ParquetSource::new(table_schema.clone())
+        .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
+
+    let config = FileScanConfigBuilder::new(store_url, file_source)
         .with_file(PartitionedFile::new(path, file_size))
         .build();
 
@@ -245,10 +247,10 @@ async fn test_parquet_integration_with_schema_adapter_and_expression_rewriter(
     ctx.register_object_store(store_url.as_ref(), Arc::clone(&store));
 
     // Create a ParquetSource with the adapter factory
-    let file_source = ParquetSource::default()
+    let file_source = ParquetSource::new(batch.schema())
         .with_schema_adapter_factory(Arc::new(UppercaseAdapterFactory {}))?;
 
-    let config = FileScanConfigBuilder::new(store_url, batch.schema(), file_source)
+    let config = FileScanConfigBuilder::new(store_url, file_source)
         .with_file(PartitionedFile::new(path, file_size))
         .build();
 
@@ -282,9 +284,12 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     // Create a test factory
     let factory = Arc::new(UppercaseAdapterFactory {});
 
-    // Test ArrowSource
+    // Test ArrowFileSource
     {
-        let source = ArrowSource::default();
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+        let table_schema = TableSchema::new(schema, vec![]);
+        let source = ArrowSource::new_file_source(table_schema);
         let source_with_adapter = source
             .clone()
             .with_schema_adapter_factory(factory.clone())
@@ -304,7 +309,9 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
     // Test ParquetSource
     #[cfg(feature = "parquet")]
     {
-        let source = ParquetSource::default();
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+        let source = ParquetSource::new(schema);
         let source_with_adapter = source
             .clone()
             .with_schema_adapter_factory(factory.clone())
@@ -323,7 +330,15 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
 
     // Test CsvSource
     {
-        let source = CsvSource::default();
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+        let options = CsvOptions {
+            has_header: Some(true),
+            delimiter: b',',
+            quote: b'"',
+            ..Default::default()
+        };
+        let source = CsvSource::new(schema).with_csv_options(options);
         let source_with_adapter = source
             .clone()
             .with_schema_adapter_factory(factory.clone())
@@ -342,7 +357,10 @@ async fn test_multi_source_schema_adapter_reuse() -> Result<()> {
 
     // Test JsonSource
     {
-        let source = JsonSource::default();
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("id", DataType::Int32, false)]));
+        let table_schema = TableSchema::new(schema, vec![]);
+        let source = JsonSource::new(table_schema);
         let source_with_adapter = source
             .clone()
             .with_schema_adapter_factory(factory.clone())
diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs
index 6d386cc456d8..929de7a5304d 100644
--- a/datafusion/core/tests/sql/explain_analyze.rs
+++ b/datafusion/core/tests/sql/explain_analyze.rs
@@ -61,37 +61,86 @@ async fn explain_analyze_baseline_metrics() {
     assert_metrics!(
         &formatted,
         "AggregateExec: mode=Partial, gby=[]",
-        "metrics=[output_rows=3, elapsed_compute="
+        "metrics=[output_rows=3, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=3"
     );
+
     assert_metrics!(
         &formatted,
-        "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
-        "metrics=[output_rows=5, elapsed_compute="
+        "AggregateExec: mode=Partial, gby=[c1@0 as c1]",
+        "reduction_factor=5.1% (5/99)"
     );
+
+    {
+        let expected_batch_count_after_repartition =
+            if cfg!(not(feature = "force_hash_collisions")) {
+                "output_batches=3"
+            } else {
+                "output_batches=1"
+            };
+
+        assert_metrics!(
+            &formatted,
+            "AggregateExec: mode=FinalPartitioned, gby=[c1@0 as c1]",
+            "metrics=[output_rows=5, elapsed_compute=",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+
+        assert_metrics!(
+            &formatted,
+            "RepartitionExec: partitioning=Hash([c1@0], 3), input_partitions=3",
+            "metrics=[output_rows=5, elapsed_compute=",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+
+        assert_metrics!(
+            &formatted,
+            "ProjectionExec: expr=[]",
+            "metrics=[output_rows=5, elapsed_compute=",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+
+        assert_metrics!(
+            &formatted,
+            "CoalesceBatchesExec: target_batch_size=4096",
+            "metrics=[output_rows=5, elapsed_compute",
+            "output_bytes=",
+            expected_batch_count_after_repartition
+        );
+    }
+
     assert_metrics!(
         &formatted,
         "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
-        "metrics=[output_rows=99, elapsed_compute="
-    );
-    assert_metrics!(
-        &formatted,
-        "ProjectionExec: expr=[]",
-        "metrics=[output_rows=5, elapsed_compute="
+        "metrics=[output_rows=99, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=1"
     );
+
     assert_metrics!(
         &formatted,
-        "CoalesceBatchesExec: target_batch_size=4096",
-        "metrics=[output_rows=5, elapsed_compute"
+        "FilterExec: c13@1 != C2GT5KVyOPZpgKVl110TyZO0NcJ434",
+        "selectivity=99% (99/100)"
     );
+
     assert_metrics!(
         &formatted,
         "UnionExec",
-        "metrics=[output_rows=3, elapsed_compute="
+        "metrics=[output_rows=3, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=3"
     );
+
     assert_metrics!(
         &formatted,
         "WindowAggExec",
-        "metrics=[output_rows=1, elapsed_compute="
+        "metrics=[output_rows=1, elapsed_compute=",
+        "output_bytes=",
+        "output_batches=1"
     );
 
     fn expected_to_have_metrics(plan: &dyn ExecutionPlan) -> bool {
@@ -193,9 +242,13 @@ async fn explain_analyze_level() {
 
     for (level, needle, should_contain) in [
         (ExplainAnalyzeLevel::Summary, "spill_count", false),
+        (ExplainAnalyzeLevel::Summary, "output_batches", false),
         (ExplainAnalyzeLevel::Summary, "output_rows", true),
+        (ExplainAnalyzeLevel::Summary, "output_bytes", true),
         (ExplainAnalyzeLevel::Dev, "spill_count", true),
         (ExplainAnalyzeLevel::Dev, "output_rows", true),
+        (ExplainAnalyzeLevel::Dev, "output_bytes", true),
+        (ExplainAnalyzeLevel::Dev, "output_batches", true),
     ] {
         let plan = collect_plan(sql, level).await;
         assert_eq!(
@@ -234,6 +287,37 @@ async fn explain_analyze_level_datasource_parquet() {
     }
 }
 
+#[tokio::test]
+async fn explain_analyze_parquet_pruning_metrics() {
+    let table_name = "tpch_lineitem_small";
+    let parquet_path = "tests/data/tpch_lineitem_small.parquet";
+    let ctx = SessionContext::new();
+    ctx.register_parquet(table_name, parquet_path, ParquetReadOptions::default())
+        .await
+        .expect("register parquet table for explain analyze test");
+
+    // Test scenario:
+    // This table's l_orderkey has range [1, 7]
+    // So the following query can't prune the file:
+    //  select * from tpch_lineitem_small where l_orderkey = 5;
+    // If change filter to `l_orderkey=10`, the whole file can be pruned using stat.
+    for (l_orderkey, expected_pruning_metrics) in
+        [(5, "1 total → 1 matched"), (10, "1 total → 0 matched")]
+    {
+        let sql = format!(
+            "explain analyze select * from {table_name} where l_orderkey = {l_orderkey};"
+        );
+
+        let plan =
+            collect_plan_with_context(&sql, &ctx, ExplainAnalyzeLevel::Summary).await;
+
+        let expected_metrics =
+            format!("files_ranges_pruned_statistics={expected_pruning_metrics}");
+
+        assert_metrics!(&plan, "DataSourceExec", &expected_metrics);
+    }
+}
+
 #[tokio::test]
 async fn csv_explain_plans() {
     // This test verify the look of each plan in its full cycle plan creation
@@ -732,14 +816,12 @@ async fn test_physical_plan_display_indent_multi_children() {
     CoalesceBatchesExec: target_batch_size=4096
       HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c1@0, c2@0)], projection=[c1@0]
         CoalesceBatchesExec: target_batch_size=4096
-          RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=9000
-            RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1
-              DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+          RepartitionExec: partitioning=Hash([c1@0], 9000), input_partitions=1
+            DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
         CoalesceBatchesExec: target_batch_size=4096
-          RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=9000
-            RepartitionExec: partitioning=RoundRobinBatch(9000), input_partitions=1
-              ProjectionExec: expr=[c1@0 as c2]
-                DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+          RepartitionExec: partitioning=Hash([c2@0], 9000), input_partitions=1
+            ProjectionExec: expr=[c1@0 as c2]
+              DataSourceExec: file_groups={1 group: [[ARROW_TEST_DATA/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
     "###
     );
 }
@@ -798,10 +880,29 @@ async fn parquet_explain_analyze() {
 
     // should contain aggregated stats
     assert_contains!(&formatted, "output_rows=8");
-    assert_contains!(&formatted, "row_groups_matched_bloom_filter=0");
-    assert_contains!(&formatted, "row_groups_pruned_bloom_filter=0");
-    assert_contains!(&formatted, "row_groups_matched_statistics=1");
-    assert_contains!(&formatted, "row_groups_pruned_statistics=0");
+    assert_contains!(
+        &formatted,
+        "row_groups_pruned_bloom_filter=1 total \u{2192} 1 matched"
+    );
+    assert_contains!(
+        &formatted,
+        "row_groups_pruned_statistics=1 total \u{2192} 1 matched"
+    );
+
+    // The order of metrics is expected to be the same as the actual pruning order
+    // (file-> row-group -> page)
+    let i_file = formatted.find("files_ranges_pruned_statistics").unwrap();
+    let i_rowgroup_stat = formatted.find("row_groups_pruned_statistics").unwrap();
+    let i_rowgroup_bloomfilter =
+        formatted.find("row_groups_pruned_bloom_filter").unwrap();
+    let i_page = formatted.find("page_index_rows_pruned").unwrap();
+
+    assert!(
+        (i_file < i_rowgroup_stat)
+            && (i_rowgroup_stat < i_rowgroup_bloomfilter)
+            && (i_rowgroup_bloomfilter < i_page),
+            "The parquet pruning metrics should be displayed in an order of: file range -> row group statistics -> row group bloom filter -> page index."
+    );
 }
 
 // This test reproduces the behavior described in
@@ -941,9 +1042,7 @@ async fn parquet_explain_analyze_verbose() {
         .to_string();
 
     // should contain the raw per file stats (with the label)
-    assert_contains!(&formatted, "row_groups_matched_bloom_filter{partition=0");
     assert_contains!(&formatted, "row_groups_pruned_bloom_filter{partition=0");
-    assert_contains!(&formatted, "row_groups_matched_statistics{partition=0");
     assert_contains!(&formatted, "row_groups_pruned_statistics{partition=0");
 }
 
@@ -1027,3 +1126,33 @@ async fn csv_explain_analyze_with_statistics() {
         ", statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:)]]"
     );
 }
+
+#[tokio::test]
+async fn nested_loop_join_selectivity() {
+    for (join_type, expected_selectivity) in [
+        ("INNER", "1% (1/100)"),
+        ("LEFT", "10% (10/100)"),
+        ("RIGHT", "10% (10/100)"),
+        // 1 match + 9 left + 9 right = 19
+        ("FULL", "19% (19/100)"),
+    ] {
+        let ctx = SessionContext::new();
+        let sql = format!(
+            "EXPLAIN ANALYZE SELECT * \
+                FROM generate_series(1, 10) as t1(a) \
+                {join_type} JOIN generate_series(1, 10) as t2(b) \
+                ON (t1.a + t2.b) = 20"
+        );
+
+        let actual = execute_to_batches(&ctx, sql.as_str()).await;
+        let formatted = arrow::util::pretty::pretty_format_batches(&actual)
+            .unwrap()
+            .to_string();
+
+        assert_metrics!(
+            &formatted,
+            "NestedLoopJoinExec",
+            &format!("selectivity={expected_selectivity}")
+        );
+    }
+}
diff --git a/datafusion/core/tests/sql/joins.rs b/datafusion/core/tests/sql/joins.rs
index 7a5983447592..b64a1e18f3f6 100644
--- a/datafusion/core/tests/sql/joins.rs
+++ b/datafusion/core/tests/sql/joins.rs
@@ -73,13 +73,11 @@ async fn join_change_in_planner() -> Result<()> {
         @r"
     SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10
       CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+          StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
       CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8, preserve_order=true, sort_exprs=a1@0 ASC NULLS LAST
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+          StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
     "
     );
     Ok(())
@@ -134,13 +132,11 @@ async fn join_no_order_on_filter() -> Result<()> {
         @r"
     SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a3@0 AS Int64) > CAST(a3@1 AS Int64) + 3 AND CAST(a3@0 AS Int64) < CAST(a3@1 AS Int64) + 10
       CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+          StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
       CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+          StreamingTableExec: partition_sizes=1, projection=[a1, a2, a3], infinite_source=true, output_ordering=[a1@0 ASC NULLS LAST]
     "
     );
     Ok(())
@@ -177,13 +173,11 @@ async fn join_change_in_planner_without_sort() -> Result<()> {
         @r"
     SymmetricHashJoinExec: mode=Partitioned, join_type=Full, on=[(a2@1, a2@1)], filter=CAST(a1@0 AS Int64) > CAST(a1@1 AS Int64) + 3 AND CAST(a1@0 AS Int64) < CAST(a1@1 AS Int64) + 10
       CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+          StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
       CoalesceBatchesExec: target_batch_size=8192
-        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=8
-          RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-            StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
+        RepartitionExec: partitioning=Hash([a2@1], 8), input_partitions=1
+          StreamingTableExec: partition_sizes=1, projection=[a1, a2], infinite_source=true
     "
     );
     Ok(())
diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs
index e212ee269b15..426ec213b324 100644
--- a/datafusion/core/tests/sql/mod.rs
+++ b/datafusion/core/tests/sql/mod.rs
@@ -40,19 +40,24 @@ use std::io::Write;
 use std::path::PathBuf;
 use tempfile::TempDir;
 
-/// A macro to assert that some particular line contains two substrings
-///
-/// Usage: `assert_metrics!(actual, operator_name, metrics)`
+/// A macro to assert that some particular line contains the given substrings
 ///
+/// Usage: `assert_metrics!(actual, operator_name, metrics_1, metrics_2, ...)`
 macro_rules! assert_metrics {
-    ($ACTUAL: expr, $OPERATOR_NAME: expr, $METRICS: expr) => {
+    ($ACTUAL: expr, $OPERATOR_NAME: expr, $($METRICS: expr),+) => {
         let found = $ACTUAL
             .lines()
-            .any(|line| line.contains($OPERATOR_NAME) && line.contains($METRICS));
+            .any(|line| line.contains($OPERATOR_NAME) $( && line.contains($METRICS))+);
+
+        let mut metrics = String::new();
+        $(metrics.push_str(format!(" '{}',", $METRICS).as_str());)+
+        // remove the last `,` from the string
+        metrics.pop();
+
         assert!(
             found,
-            "Can not find a line with both '{}' and '{}' in\n\n{}",
-            $OPERATOR_NAME, $METRICS, $ACTUAL
+            "Cannot find a line with operator name '{}' and metrics containing values {} in :\n\n{}",
+            $OPERATOR_NAME, metrics, $ACTUAL
         );
     };
 }
diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs
index 2eb3ba36dd90..8a0f62062738 100644
--- a/datafusion/core/tests/sql/select.rs
+++ b/datafusion/core/tests/sql/select.rs
@@ -222,11 +222,11 @@ async fn test_parameter_invalid_types() -> Result<()> {
         .collect()
         .await;
     assert_snapshot!(results.unwrap_err().strip_backtrace(),
-        @r#"
-    type_coercion
-    caused by
-    Error during planning: Cannot infer common argument type for comparison operation List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) = Int32
-    "#);
+        @r"
+        type_coercion
+        caused by
+        Error during planning: Cannot infer common argument type for comparison operation List(nullable Int32) = Int32
+        ");
     Ok(())
 }
 
diff --git a/datafusion/core/tests/user_defined/user_defined_aggregates.rs b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
index 982b4804597e..62e8ab18b9be 100644
--- a/datafusion/core/tests/user_defined/user_defined_aggregates.rs
+++ b/datafusion/core/tests/user_defined/user_defined_aggregates.rs
@@ -954,13 +954,7 @@ impl AggregateUDFImpl for MetadataBasedAggregateUdf {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let input_expr = acc_args
-            .exprs
-            .first()
-            .ok_or(exec_datafusion_err!("Expected one argument"))?;
-        let input_field = input_expr.return_field(acc_args.schema)?;
-
-        let double_output = input_field
+        let double_output = acc_args.expr_fields[0]
             .metadata()
             .get("modify_values")
             .map(|v| v == "double_output")
diff --git a/datafusion/core/tests/user_defined/user_defined_plan.rs b/datafusion/core/tests/user_defined/user_defined_plan.rs
index f0bf15d3483b..ffe0ba021edb 100644
--- a/datafusion/core/tests/user_defined/user_defined_plan.rs
+++ b/datafusion/core/tests/user_defined/user_defined_plan.rs
@@ -56,7 +56,6 @@
 //!
 //! The same answer can be produced by simply keeping track of the top
 //! N elements, reducing the total amount of required buffer memory.
-//!
 
 use std::fmt::Debug;
 use std::hash::Hash;
diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
index fb1371da6ceb..3ca8f846aa5e 100644
--- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
+++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs
@@ -1812,7 +1812,7 @@ async fn test_config_options_work_for_scalar_func() -> Result<()> {
     });
 
     let mut config = SessionConfig::new();
-    config.options_mut().execution.time_zone = "AEST".into();
+    config.options_mut().execution.time_zone = Some("AEST".into());
 
     let ctx = SessionContext::new_with_config(config);
 
diff --git a/datafusion/datasource-arrow/Cargo.toml b/datafusion/datasource-arrow/Cargo.toml
index b3d1e3f2accc..fbadc8708ca6 100644
--- a/datafusion/datasource-arrow/Cargo.toml
+++ b/datafusion/datasource-arrow/Cargo.toml
@@ -51,6 +51,9 @@ tokio = { workspace = true }
 [dev-dependencies]
 chrono = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/datasource-arrow/src/file_format.rs b/datafusion/datasource-arrow/src/file_format.rs
index 3b8564080421..ef478e268890 100644
--- a/datafusion/datasource-arrow/src/file_format.rs
+++ b/datafusion/datasource-arrow/src/file_format.rs
@@ -20,15 +20,15 @@
 //! Works with files following the [Arrow IPC format](https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format)
 
 use std::any::Any;
-use std::borrow::Cow;
 use std::collections::HashMap;
 use std::fmt::{self, Debug};
+use std::io::{Seek, SeekFrom};
 use std::sync::Arc;
 
 use arrow::datatypes::{Schema, SchemaRef};
 use arrow::error::ArrowError;
 use arrow::ipc::convert::fb_to_schema;
-use arrow::ipc::reader::FileReader;
+use arrow::ipc::reader::{FileReader, StreamReader};
 use arrow::ipc::writer::IpcWriteOptions;
 use arrow::ipc::{root_as_message, CompressionType};
 use datafusion_common::error::Result;
@@ -45,6 +45,7 @@ use datafusion_datasource::sink::{DataSink, DataSinkExec};
 use datafusion_datasource::write::{
     get_writer_schema, ObjectWriterBuilder, SharedBuffer,
 };
+use datafusion_datasource::TableSchema;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
 use datafusion_physical_expr_common::sort_expr::LexRequirement;
@@ -61,7 +62,9 @@ use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 use datafusion_session::Session;
 use futures::stream::BoxStream;
 use futures::StreamExt;
-use object_store::{GetResultPayload, ObjectMeta, ObjectStore};
+use object_store::{
+    path::Path, GetOptions, GetRange, GetResultPayload, ObjectMeta, ObjectStore,
+};
 use tokio::io::AsyncWriteExt;
 
 /// Initial writing buffer size. Note this is just a size hint for efficiency. It
@@ -71,8 +74,8 @@ const INITIAL_BUFFER_BYTES: usize = 1048576;
 /// If the buffered Arrow data exceeds this size, it is flushed to object store
 const BUFFER_FLUSH_BYTES: usize = 1024000;
 
+/// Factory struct used to create [`ArrowFormat`]
 #[derive(Default, Debug)]
-/// Factory struct used to create [ArrowFormat]
 pub struct ArrowFormatFactory;
 
 impl ArrowFormatFactory {
@@ -107,7 +110,7 @@ impl GetExt for ArrowFormatFactory {
     }
 }
 
-/// Arrow `FileFormat` implementation.
+/// Arrow [`FileFormat`] implementation.
 #[derive(Default, Debug)]
 pub struct ArrowFormat;
 
@@ -150,12 +153,23 @@ impl FileFormat for ArrowFormat {
             let schema = match r.payload {
                 #[cfg(not(target_arch = "wasm32"))]
                 GetResultPayload::File(mut file, _) => {
-                    let reader = FileReader::try_new(&mut file, None)?;
-                    reader.schema()
-                }
-                GetResultPayload::Stream(stream) => {
-                    infer_schema_from_file_stream(stream).await?
+                    match FileReader::try_new(&mut file, None) {
+                        Ok(reader) => reader.schema(),
+                        Err(file_error) => {
+                            // not in the file format, but FileReader read some bytes
+                            // while trying to parse the file and so we need to rewind
+                            // it to the beginning of the file
+                            file.seek(SeekFrom::Start(0))?;
+                            match StreamReader::try_new(&mut file, None) {
+                                Ok(reader) => reader.schema(),
+                                Err(stream_error) => {
+                                    return Err(internal_datafusion_err!("Failed to parse Arrow file as either file format or stream format. File format error: {file_error}. Stream format error: {stream_error}"));
+                                }
+                            }
+                        }
+                    }
                 }
+                GetResultPayload::Stream(stream) => infer_stream_schema(stream).await?,
             };
             schemas.push(schema.as_ref().clone());
         }
@@ -175,10 +189,33 @@ impl FileFormat for ArrowFormat {
 
     async fn create_physical_plan(
         &self,
-        _state: &dyn Session,
+        state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let source = Arc::new(ArrowSource::default());
+        let object_store = state.runtime_env().object_store(&conf.object_store_url)?;
+        let object_location = &conf
+            .file_groups
+            .first()
+            .ok_or_else(|| internal_datafusion_err!("No files found in file group"))?
+            .files()
+            .first()
+            .ok_or_else(|| internal_datafusion_err!("No files found in file group"))?
+            .object_meta
+            .location;
+
+        let table_schema = TableSchema::new(
+            Arc::clone(conf.file_schema()),
+            conf.table_partition_cols().clone(),
+        );
+
+        let source: Arc<dyn FileSource> =
+            match is_object_in_arrow_ipc_file_format(object_store, object_location).await
+            {
+                Ok(true) => Arc::new(ArrowSource::new_file_source(table_schema)),
+                Ok(false) => Arc::new(ArrowSource::new_stream_file_source(table_schema)),
+                Err(e) => Err(e)?,
+            };
+
         let config = FileScanConfigBuilder::from(conf)
             .with_source(source)
             .build();
@@ -202,12 +239,12 @@ impl FileFormat for ArrowFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ArrowSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        Arc::new(ArrowSource::new_file_source(table_schema))
     }
 }
 
-/// Implements [`FileSink`] for writing to arrow_ipc files
+/// Implements [`FileSink`] for Arrow IPC files
 struct ArrowFileSink {
     config: FileSinkConfig,
 }
@@ -344,94 +381,160 @@ impl DataSink for ArrowFileSink {
     }
 }
 
+// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
+// See <https://github.com/apache/arrow-rs/issues/5021>
+
 const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
 const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
 
-/// Custom implementation of inferring schema. Should eventually be moved upstream to arrow-rs.
-/// See <https://github.com/apache/arrow-rs/issues/5021>
-async fn infer_schema_from_file_stream(
+async fn infer_stream_schema(
     mut stream: BoxStream<'static, object_store::Result<Bytes>>,
 ) -> Result<SchemaRef> {
-    // Expected format:
-    // <magic number "ARROW1"> - 6 bytes
-    // <empty padding bytes [to 8 byte boundary]> - 2 bytes
-    // <continuation: 0xFFFFFFFF> - 4 bytes, not present below v0.15.0
-    // <metadata_size: int32> - 4 bytes
-    // <metadata_flatbuffer: bytes>
-    // <rest of file bytes>
-
-    // So in first read we need at least all known sized sections,
-    // which is 6 + 2 + 4 + 4 = 16 bytes.
-    let bytes = collect_at_least_n_bytes(&mut stream, 16, None).await?;
-
-    // Files should start with these magic bytes
-    if bytes[0..6] != ARROW_MAGIC {
-        return Err(ArrowError::ParseError(
-            "Arrow file does not contain correct header".to_string(),
-        ))?;
-    }
-
-    // Since continuation marker bytes added in later versions
-    let (meta_len, rest_of_bytes_start_index) = if bytes[8..12] == CONTINUATION_MARKER {
-        (&bytes[12..16], 16)
+    // IPC streaming format.
+    // See https://arrow.apache.org/docs/format/Columnar.html#ipc-streaming-format
+    //
+    //   <SCHEMA>
+    //   <DICTIONARY 0>
+    //   ...
+    //   <DICTIONARY k - 1>
+    //   <RECORD BATCH 0>
+    //   ...
+    //   <DICTIONARY x DELTA>
+    //   ...
+    //   <DICTIONARY y DELTA>
+    //   ...
+    //   <RECORD BATCH n - 1>
+    //   <EOS [optional]: 0xFFFFFFFF 0x00000000>
+
+    // The streaming format is made up of a sequence of encapsulated messages.
+    // See https://arrow.apache.org/docs/format/Columnar.html#encapsulated-message-format
+    //
+    //   <continuation: 0xFFFFFFFF>  (added in v0.15.0)
+    //   <metadata_size: int32>
+    //   <metadata_flatbuffer: bytes>
+    //   <padding>
+    //   <message body>
+    //
+    // The first message is the schema.
+
+    // IPC file format is a wrapper around the streaming format with indexing information.
+    // See https://arrow.apache.org/docs/format/Columnar.html#ipc-file-format
+    //
+    //   <magic number "ARROW1">
+    //   <empty padding bytes [to 8 byte boundary]>
+    //   <STREAMING FORMAT with EOS>
+    //   <FOOTER>
+    //   <FOOTER SIZE: int32>
+    //   <magic number "ARROW1">
+
+    // For the purposes of this function, the arrow "preamble" is the magic number, padding,
+    // and the continuation marker. 16 bytes covers the preamble and metadata length
+    // no matter which version or format is used.
+    let bytes = extend_bytes_to_n_length_from_stream(vec![], 16, &mut stream).await?;
+
+    // The preamble length is everything before the metadata length
+    let preamble_len = if bytes[0..6] == ARROW_MAGIC {
+        // File format starts with magic number "ARROW1"
+        if bytes[8..12] == CONTINUATION_MARKER {
+            // Continuation marker was added in v0.15.0
+            12
+        } else {
+            // File format before v0.15.0
+            8
+        }
+    } else if bytes[0..4] == CONTINUATION_MARKER {
+        // Stream format after v0.15.0 starts with continuation marker
+        4
     } else {
-        (&bytes[8..12], 12)
+        // Stream format before v0.15.0 does not have a preamble
+        0
     };
 
-    let meta_len = [meta_len[0], meta_len[1], meta_len[2], meta_len[3]];
-    let meta_len = i32::from_le_bytes(meta_len);
-
-    // Read bytes for Schema message
-    let block_data = if bytes[rest_of_bytes_start_index..].len() < meta_len as usize {
-        // Need to read more bytes to decode Message
-        let mut block_data = Vec::with_capacity(meta_len as usize);
-        // In case we had some spare bytes in our initial read chunk
-        block_data.extend_from_slice(&bytes[rest_of_bytes_start_index..]);
-        let size_to_read = meta_len as usize - block_data.len();
-        let block_data =
-            collect_at_least_n_bytes(&mut stream, size_to_read, Some(block_data)).await?;
-        Cow::Owned(block_data)
-    } else {
-        // Already have the bytes we need
-        let end_index = meta_len as usize + rest_of_bytes_start_index;
-        let block_data = &bytes[rest_of_bytes_start_index..end_index];
-        Cow::Borrowed(block_data)
-    };
+    let meta_len_bytes: [u8; 4] = bytes[preamble_len..preamble_len + 4]
+        .try_into()
+        .map_err(|err| {
+            ArrowError::ParseError(format!(
+                "Unable to read IPC message metadata length: {err:?}"
+            ))
+        })?;
+
+    let meta_len = i32::from_le_bytes([
+        meta_len_bytes[0],
+        meta_len_bytes[1],
+        meta_len_bytes[2],
+        meta_len_bytes[3],
+    ]);
+
+    if meta_len < 0 {
+        return Err(ArrowError::ParseError(
+            "IPC message metadata length is negative".to_string(),
+        )
+        .into());
+    }
+
+    let bytes = extend_bytes_to_n_length_from_stream(
+        bytes,
+        preamble_len + 4 + (meta_len as usize),
+        &mut stream,
+    )
+    .await?;
 
-    // Decode Schema message
-    let message = root_as_message(&block_data).map_err(|err| {
-        ArrowError::ParseError(format!("Unable to read IPC message as metadata: {err:?}"))
+    let message = root_as_message(&bytes[preamble_len + 4..]).map_err(|err| {
+        ArrowError::ParseError(format!("Unable to read IPC message metadata: {err:?}"))
     })?;
-    let ipc_schema = message.header_as_schema().ok_or_else(|| {
-        ArrowError::IpcError("Unable to read IPC message as schema".to_string())
+    let fb_schema = message.header_as_schema().ok_or_else(|| {
+        ArrowError::IpcError("Unable to read IPC message schema".to_string())
     })?;
-    let schema = fb_to_schema(ipc_schema);
+    let schema = fb_to_schema(fb_schema);
 
     Ok(Arc::new(schema))
 }
 
-async fn collect_at_least_n_bytes(
-    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
+async fn extend_bytes_to_n_length_from_stream(
+    bytes: Vec<u8>,
     n: usize,
-    extend_from: Option<Vec<u8>>,
+    stream: &mut BoxStream<'static, object_store::Result<Bytes>>,
 ) -> Result<Vec<u8>> {
-    let mut buf = extend_from.unwrap_or_else(|| Vec::with_capacity(n));
-    // If extending existing buffer then ensure we read n additional bytes
-    let n = n + buf.len();
-    while let Some(bytes) = stream.next().await.transpose()? {
-        buf.extend_from_slice(&bytes);
+    if bytes.len() >= n {
+        return Ok(bytes);
+    }
+
+    let mut buf = bytes;
+
+    while let Some(b) = stream.next().await.transpose()? {
+        buf.extend_from_slice(&b);
+
         if buf.len() >= n {
             break;
         }
     }
+
     if buf.len() < n {
         return Err(ArrowError::ParseError(
             "Unexpected end of byte stream for Arrow IPC file".to_string(),
-        ))?;
+        )
+        .into());
     }
+
     Ok(buf)
 }
 
+async fn is_object_in_arrow_ipc_file_format(
+    store: Arc<dyn ObjectStore>,
+    object_location: &Path,
+) -> Result<bool> {
+    let get_opts = GetOptions {
+        range: Some(GetRange::Bounded(0..6)),
+        ..Default::default()
+    };
+    let bytes = store
+        .get_opts(object_location, get_opts)
+        .await?
+        .bytes()
+        .await?;
+    Ok(bytes.len() >= 6 && bytes[0..6] == ARROW_MAGIC)
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -524,80 +627,143 @@ mod tests {
 
     #[tokio::test]
     async fn test_infer_schema_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let state = MockSession::new();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-        let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
-
-        // Test chunk sizes where too small so we keep having to read more bytes
-        // And when large enough that first read contains all we need
-        for chunk_size in [7, 3000] {
-            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
-            let inferred_schema = arrow_format
+        for file in ["example.arrow", "example_stream.arrow"] {
+            let mut bytes = std::fs::read(format!("tests/data/{file}"))?;
+            bytes.truncate(bytes.len() - 20); // mangle end to show we don't need to read whole file
+            let location = Path::parse(file)?;
+            let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+            in_memory_store.put(&location, bytes.into()).await?;
+
+            let state = MockSession::new();
+            let object_meta = ObjectMeta {
+                location,
+                last_modified: DateTime::default(),
+                size: u64::MAX,
+                e_tag: None,
+                version: None,
+            };
+
+            let arrow_format = ArrowFormat {};
+            let expected = vec!["f0: Int64", "f1: Utf8", "f2: Boolean"];
+
+            // Test chunk sizes where too small so we keep having to read more bytes
+            // And when large enough that first read contains all we need
+            for chunk_size in [7, 3000] {
+                let store =
+                    Arc::new(ChunkedStore::new(in_memory_store.clone(), chunk_size));
+                let inferred_schema = arrow_format
+                    .infer_schema(
+                        &state,
+                        &(store.clone() as Arc<dyn ObjectStore>),
+                        std::slice::from_ref(&object_meta),
+                    )
+                    .await?;
+                let actual_fields = inferred_schema
+                    .fields()
+                    .iter()
+                    .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
+                    .collect::<Vec<_>>();
+                assert_eq!(expected, actual_fields);
+            }
+        }
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_infer_schema_short_stream() -> Result<()> {
+        for file in ["example.arrow", "example_stream.arrow"] {
+            let mut bytes = std::fs::read(format!("tests/data/{file}"))?;
+            bytes.truncate(20); // should cause error that file shorter than expected
+            let location = Path::parse(file)?;
+            let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
+            in_memory_store.put(&location, bytes.into()).await?;
+
+            let state = MockSession::new();
+            let object_meta = ObjectMeta {
+                location,
+                last_modified: DateTime::default(),
+                size: u64::MAX,
+                e_tag: None,
+                version: None,
+            };
+
+            let arrow_format = ArrowFormat {};
+
+            let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
+            let err = arrow_format
                 .infer_schema(
                     &state,
                     &(store.clone() as Arc<dyn ObjectStore>),
                     std::slice::from_ref(&object_meta),
                 )
-                .await?;
-            let actual_fields = inferred_schema
-                .fields()
-                .iter()
-                .map(|f| format!("{}: {:?}", f.name(), f.data_type()))
-                .collect::<Vec<_>>();
-            assert_eq!(expected, actual_fields);
+                .await;
+
+            assert!(err.is_err());
+            assert_eq!( "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file", err.unwrap_err().to_string().lines().next().unwrap());
         }
 
         Ok(())
     }
 
     #[tokio::test]
-    async fn test_infer_schema_short_stream() -> Result<()> {
-        let mut bytes = std::fs::read("tests/data/example.arrow")?;
-        bytes.truncate(20); // should cause error that file shorter than expected
-        let location = Path::parse("example.arrow")?;
-        let in_memory_store: Arc<dyn ObjectStore> = Arc::new(InMemory::new());
-        in_memory_store.put(&location, bytes.into()).await?;
-
-        let state = MockSession::new();
-        let object_meta = ObjectMeta {
-            location,
-            last_modified: DateTime::default(),
-            size: u64::MAX,
-            e_tag: None,
-            version: None,
-        };
-
-        let arrow_format = ArrowFormat {};
-
-        let store = Arc::new(ChunkedStore::new(in_memory_store.clone(), 7));
-        let err = arrow_format
-            .infer_schema(
-                &state,
-                &(store.clone() as Arc<dyn ObjectStore>),
-                std::slice::from_ref(&object_meta),
-            )
-            .await;
+    async fn test_format_detection_file_format() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("test.arrow");
+
+        let file_bytes = std::fs::read("tests/data/example.arrow")?;
+        store.put(&path, file_bytes.into()).await?;
+
+        let is_file = is_object_in_arrow_ipc_file_format(store.clone(), &path).await?;
+        assert!(is_file, "Should detect file format");
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_format_detection_stream_format() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("test_stream.arrow");
+
+        let stream_bytes = std::fs::read("tests/data/example_stream.arrow")?;
+        store.put(&path, stream_bytes.into()).await?;
 
-        assert!(err.is_err());
-        assert_eq!(
-            "Arrow error: Parser error: Unexpected end of byte stream for Arrow IPC file",
-            err.unwrap_err().to_string().lines().next().unwrap()
+        let is_file = is_object_in_arrow_ipc_file_format(store.clone(), &path).await?;
+
+        assert!(!is_file, "Should detect stream format (not file)");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_format_detection_corrupted_file() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("corrupted.arrow");
+
+        store
+            .put(&path, Bytes::from(vec![0x43, 0x4f, 0x52, 0x41]).into())
+            .await?;
+
+        let is_file = is_object_in_arrow_ipc_file_format(store.clone(), &path).await?;
+
+        assert!(
+            !is_file,
+            "Corrupted file should not be detected as file format"
         );
 
         Ok(())
     }
+
+    #[tokio::test]
+    async fn test_format_detection_empty_file() -> Result<()> {
+        let store = Arc::new(InMemory::new());
+        let path = Path::from("empty.arrow");
+
+        store.put(&path, Bytes::new().into()).await?;
+
+        let result = is_object_in_arrow_ipc_file_format(store.clone(), &path).await;
+
+        // currently errors because it tries to read 0..6 from an empty file
+        assert!(result.is_err(), "Empty file should error");
+
+        Ok(())
+    }
 }
diff --git a/datafusion/datasource-arrow/src/mod.rs b/datafusion/datasource-arrow/src/mod.rs
index 18bb8792c3ff..74d7ac3a067d 100644
--- a/datafusion/datasource-arrow/src/mod.rs
+++ b/datafusion/datasource-arrow/src/mod.rs
@@ -15,10 +15,15 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
 
+//! [`ArrowFormat`]: Apache Arrow file format abstractions
+
 pub mod file_format;
 pub mod source;
 
diff --git a/datafusion/datasource-arrow/src/source.rs b/datafusion/datasource-arrow/src/source.rs
index f43f11880182..dd90c0f107ea 100644
--- a/datafusion/datasource-arrow/src/source.rs
+++ b/datafusion/datasource-arrow/src/source.rs
@@ -15,20 +15,34 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::any::Any;
+//! Execution plan for reading Arrow IPC files
+//!
+//! # Naming Note
+//!
+//! The naming in this module can be confusing:
+//! - `ArrowFileSource` / `ArrowFileOpener` handle the Arrow IPC **file format**
+//!   (with footer, supports parallel reading)
+//! - `ArrowStreamFileSource` / `ArrowStreamFileOpener` handle the Arrow IPC **stream format**
+//!   (without footer, sequential only)
+//!
+//! Despite the name "ArrowStreamFileSource", it still reads from files - the "Stream"
+//! refers to the Arrow IPC stream format, not streaming I/O. Both formats can be stored
+//! in files on disk or object storage.
+
 use std::sync::Arc;
+use std::{any::Any, io::Cursor};
 
-use datafusion_datasource::as_file_source;
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::{as_file_source, TableSchema};
 
 use arrow::buffer::Buffer;
-use arrow::datatypes::SchemaRef;
-use arrow_ipc::reader::FileDecoder;
+use arrow::ipc::reader::{FileDecoder, FileReader, StreamReader};
 use datafusion_common::error::Result;
 use datafusion_common::{exec_datafusion_err, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::PartitionedFile;
+use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 
 use datafusion_datasource::file_stream::FileOpenFuture;
@@ -37,29 +51,41 @@ use futures::StreamExt;
 use itertools::Itertools;
 use object_store::{GetOptions, GetRange, GetResultPayload, ObjectStore};
 
-/// Arrow configuration struct that is given to DataSourceExec
-/// Does not hold anything special, since [`FileScanConfig`] is sufficient for arrow
-#[derive(Clone, Default)]
-pub struct ArrowSource {
+/// `FileSource` for Arrow IPC file format. Supports range-based parallel reading.
+#[derive(Clone)]
+pub(crate) struct ArrowFileSource {
+    table_schema: TableSchema,
     metrics: ExecutionPlanMetricsSet,
     projected_statistics: Option<Statistics>,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
 }
 
-impl From<ArrowSource> for Arc<dyn FileSource> {
-    fn from(source: ArrowSource) -> Self {
+impl ArrowFileSource {
+    /// Initialize an ArrowFileSource with the provided schema
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        Self {
+            table_schema: table_schema.into(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+        }
+    }
+}
+
+impl From<ArrowFileSource> for Arc<dyn FileSource> {
+    fn from(source: ArrowFileSource) -> Self {
         as_file_source(source)
     }
 }
 
-impl FileSource for ArrowSource {
+impl FileSource for ArrowFileSource {
     fn create_file_opener(
         &self,
         object_store: Arc<dyn ObjectStore>,
         base_config: &FileScanConfig,
         _partition: usize,
     ) -> Arc<dyn FileOpener> {
-        Arc::new(ArrowOpener {
+        Arc::new(ArrowFileOpener {
             object_store,
             projection: base_config.file_column_projection_indices(),
         })
@@ -69,13 +95,102 @@ impl FileSource for ArrowSource {
         self
     }
 
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
+
     fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
         Arc::new(Self { ..self.clone() })
     }
 
-    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
+    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
+        let mut conf = self.clone();
+        conf.projected_statistics = Some(statistics);
+        Arc::new(conf)
+    }
+
+    fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
+        Arc::new(Self { ..self.clone() })
+    }
+
+    fn metrics(&self) -> &ExecutionPlanMetricsSet {
+        &self.metrics
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        let statistics = &self.projected_statistics;
+        Ok(statistics
+            .clone()
+            .expect("projected_statistics must be set"))
+    }
+
+    fn file_type(&self) -> &str {
+        "arrow"
+    }
+
+    fn with_schema_adapter_factory(
+        &self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Result<Arc<dyn FileSource>> {
+        Ok(Arc::new(Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self.clone()
+        }))
+    }
+
+    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.clone()
+    }
+}
+
+/// `FileSource` for Arrow IPC stream format. Supports only sequential reading.
+#[derive(Clone)]
+pub(crate) struct ArrowStreamFileSource {
+    table_schema: TableSchema,
+    metrics: ExecutionPlanMetricsSet,
+    projected_statistics: Option<Statistics>,
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+}
+
+impl ArrowStreamFileSource {
+    /// Initialize an ArrowStreamFileSource with the provided schema
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        Self {
+            table_schema: table_schema.into(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+        }
+    }
+}
+
+impl From<ArrowStreamFileSource> for Arc<dyn FileSource> {
+    fn from(source: ArrowStreamFileSource) -> Self {
+        as_file_source(source)
+    }
+}
+
+impl FileSource for ArrowStreamFileSource {
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        _partition: usize,
+    ) -> Arc<dyn FileOpener> {
+        Arc::new(ArrowStreamFileOpener {
+            object_store,
+            projection: base_config.file_column_projection_indices(),
+        })
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn with_batch_size(&self, _batch_size: usize) -> Arc<dyn FileSource> {
         Arc::new(Self { ..self.clone() })
     }
+
     fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.projected_statistics = Some(statistics);
@@ -86,6 +201,25 @@ impl FileSource for ArrowSource {
         Arc::new(Self { ..self.clone() })
     }
 
+    fn repartitioned(
+        &self,
+        _target_partitions: usize,
+        _repartition_file_min_size: usize,
+        _output_ordering: Option<LexOrdering>,
+        _config: &FileScanConfig,
+    ) -> Result<Option<FileScanConfig>> {
+        // The Arrow IPC stream format doesn't support range-based parallel reading
+        // because it lacks a footer with the information that would be needed to
+        // make range-based parallel reading practical. Without the data in the
+        // footer you would either need to read the the entire file and record the
+        // offsets of the record batches and dictionaries, essentially recreating
+        // the footer's contents, or else each partition would need to read the
+        // entire file up to the correct offset which is a lot of duplicate I/O.
+        // We're opting to avoid that entirely by only acting on a single partition
+        // and reading sequentially.
+        Ok(None)
+    }
+
     fn metrics(&self) -> &ExecutionPlanMetricsSet {
         &self.metrics
     }
@@ -98,7 +232,7 @@ impl FileSource for ArrowSource {
     }
 
     fn file_type(&self) -> &str {
-        "arrow"
+        "arrow_stream"
     }
 
     fn with_schema_adapter_factory(
@@ -114,15 +248,60 @@ impl FileSource for ArrowSource {
     fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
         self.schema_adapter_factory.clone()
     }
+
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
 }
 
-/// The struct arrow that implements `[FileOpener]` trait
-pub struct ArrowOpener {
-    pub object_store: Arc<dyn ObjectStore>,
-    pub projection: Option<Vec<usize>>,
+/// `FileOpener` for Arrow IPC stream format. Supports only sequential reading.
+pub(crate) struct ArrowStreamFileOpener {
+    object_store: Arc<dyn ObjectStore>,
+    projection: Option<Vec<usize>>,
 }
 
-impl FileOpener for ArrowOpener {
+impl FileOpener for ArrowStreamFileOpener {
+    fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
+        if partitioned_file.range.is_some() {
+            return Err(exec_datafusion_err!(
+                "ArrowStreamFileOpener does not support range-based reading"
+            ));
+        }
+        let object_store = Arc::clone(&self.object_store);
+        let projection = self.projection.clone();
+        Ok(Box::pin(async move {
+            let r = object_store
+                .get(&partitioned_file.object_meta.location)
+                .await?;
+            match r.payload {
+                #[cfg(not(target_arch = "wasm32"))]
+                GetResultPayload::File(file, _) => Ok(futures::stream::iter(
+                    StreamReader::try_new(file.try_clone()?, projection.clone())?,
+                )
+                .map(|r| r.map_err(Into::into))
+                .boxed()),
+                GetResultPayload::Stream(_) => {
+                    let bytes = r.bytes().await?;
+                    let cursor = Cursor::new(bytes);
+                    Ok(futures::stream::iter(StreamReader::try_new(
+                        cursor,
+                        projection.clone(),
+                    )?)
+                    .map(|r| r.map_err(Into::into))
+                    .boxed())
+                }
+            }
+        }))
+    }
+}
+
+/// `FileOpener` for Arrow IPC file format. Supports range-based parallel reading.
+pub(crate) struct ArrowFileOpener {
+    object_store: Arc<dyn ObjectStore>,
+    projection: Option<Vec<usize>>,
+}
+
+impl FileOpener for ArrowFileOpener {
     fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
         let object_store = Arc::clone(&self.object_store);
         let projection = self.projection.clone();
@@ -135,23 +314,20 @@ impl FileOpener for ArrowOpener {
                         .await?;
                     match r.payload {
                         #[cfg(not(target_arch = "wasm32"))]
-                        GetResultPayload::File(file, _) => {
-                            let arrow_reader = arrow::ipc::reader::FileReader::try_new(
-                                file, projection,
-                            )?;
-                            Ok(futures::stream::iter(arrow_reader)
-                                .map(|r| r.map_err(Into::into))
-                                .boxed())
-                        }
+                        GetResultPayload::File(file, _) => Ok(futures::stream::iter(
+                            FileReader::try_new(file.try_clone()?, projection.clone())?,
+                        )
+                        .map(|r| r.map_err(Into::into))
+                        .boxed()),
                         GetResultPayload::Stream(_) => {
                             let bytes = r.bytes().await?;
-                            let cursor = std::io::Cursor::new(bytes);
-                            let arrow_reader = arrow::ipc::reader::FileReader::try_new(
-                                cursor, projection,
-                            )?;
-                            Ok(futures::stream::iter(arrow_reader)
-                                .map(|r| r.map_err(Into::into))
-                                .boxed())
+                            let cursor = Cursor::new(bytes);
+                            Ok(futures::stream::iter(FileReader::try_new(
+                                cursor,
+                                projection.clone(),
+                            )?)
+                            .map(|r| r.map_err(Into::into))
+                            .boxed())
                         }
                     }
                 }
@@ -259,3 +435,361 @@ impl FileOpener for ArrowOpener {
         }))
     }
 }
+
+/// `FileSource` wrapper for both Arrow IPC file and stream formats
+#[derive(Clone)]
+pub struct ArrowSource {
+    pub(crate) inner: Arc<dyn FileSource>,
+}
+
+impl ArrowSource {
+    /// Creates a new [`ArrowSource`]
+    pub fn new(inner: Arc<dyn FileSource>) -> Self {
+        Self { inner }
+    }
+
+    /// Creates an [`ArrowSource`] for file format
+    pub fn new_file_source(table_schema: impl Into<TableSchema>) -> Self {
+        Self {
+            inner: Arc::new(ArrowFileSource::new(table_schema)),
+        }
+    }
+
+    /// Creates an [`ArrowSource`] for stream format
+    pub fn new_stream_file_source(table_schema: impl Into<TableSchema>) -> Self {
+        Self {
+            inner: Arc::new(ArrowStreamFileSource::new(table_schema)),
+        }
+    }
+}
+
+impl FileSource for ArrowSource {
+    fn create_file_opener(
+        &self,
+        object_store: Arc<dyn ObjectStore>,
+        base_config: &FileScanConfig,
+        partition: usize,
+    ) -> Arc<dyn FileOpener> {
+        self.inner
+            .create_file_opener(object_store, base_config, partition)
+    }
+
+    fn as_any(&self) -> &dyn Any {
+        self.inner.as_any()
+    }
+
+    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
+        Arc::new(Self {
+            inner: self.inner.with_batch_size(batch_size),
+        })
+    }
+
+    fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource> {
+        Arc::new(Self {
+            inner: self.inner.with_projection(config),
+        })
+    }
+
+    fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
+        Arc::new(Self {
+            inner: self.inner.with_statistics(statistics),
+        })
+    }
+
+    fn metrics(&self) -> &ExecutionPlanMetricsSet {
+        self.inner.metrics()
+    }
+
+    fn statistics(&self) -> Result<Statistics> {
+        self.inner.statistics()
+    }
+
+    fn file_type(&self) -> &str {
+        self.inner.file_type()
+    }
+
+    fn with_schema_adapter_factory(
+        &self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Result<Arc<dyn FileSource>> {
+        Ok(Arc::new(Self {
+            inner: self
+                .inner
+                .with_schema_adapter_factory(schema_adapter_factory)?,
+        }))
+    }
+
+    fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
+        self.inner.schema_adapter_factory()
+    }
+
+    fn repartitioned(
+        &self,
+        target_partitions: usize,
+        repartition_file_min_size: usize,
+        output_ordering: Option<LexOrdering>,
+        config: &FileScanConfig,
+    ) -> Result<Option<FileScanConfig>> {
+        self.inner.repartitioned(
+            target_partitions,
+            repartition_file_min_size,
+            output_ordering,
+            config,
+        )
+    }
+
+    fn table_schema(&self) -> &TableSchema {
+        self.inner.table_schema()
+    }
+}
+
+/// `FileOpener` wrapper for both Arrow IPC file and stream formats
+pub struct ArrowOpener {
+    pub inner: Arc<dyn FileOpener>,
+}
+
+impl FileOpener for ArrowOpener {
+    fn open(&self, partitioned_file: PartitionedFile) -> Result<FileOpenFuture> {
+        self.inner.open(partitioned_file)
+    }
+}
+
+impl ArrowOpener {
+    /// Creates a new [`ArrowOpener`]
+    pub fn new(inner: Arc<dyn FileOpener>) -> Self {
+        Self { inner }
+    }
+
+    pub fn new_file_opener(
+        object_store: Arc<dyn ObjectStore>,
+        projection: Option<Vec<usize>>,
+    ) -> Self {
+        Self {
+            inner: Arc::new(ArrowFileOpener {
+                object_store,
+                projection,
+            }),
+        }
+    }
+
+    pub fn new_stream_file_opener(
+        object_store: Arc<dyn ObjectStore>,
+        projection: Option<Vec<usize>>,
+    ) -> Self {
+        Self {
+            inner: Arc::new(ArrowStreamFileOpener {
+                object_store,
+                projection,
+            }),
+        }
+    }
+}
+
+impl From<ArrowSource> for Arc<dyn FileSource> {
+    fn from(source: ArrowSource) -> Self {
+        as_file_source(source)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{fs::File, io::Read};
+
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow_ipc::reader::{FileReader, StreamReader};
+    use bytes::Bytes;
+    use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+    use datafusion_execution::object_store::ObjectStoreUrl;
+    use object_store::memory::InMemory;
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_file_opener_without_ranges() -> Result<()> {
+        for filename in ["example.arrow", "example_stream.arrow"] {
+            let path = format!("tests/data/{filename}");
+            let path_str = path.as_str();
+            let mut file = File::open(path_str)?;
+            let file_size = file.metadata()?.len();
+
+            let mut buffer = Vec::new();
+            file.read_to_end(&mut buffer)?;
+            let bytes = Bytes::from(buffer);
+
+            let object_store = Arc::new(InMemory::new());
+            let partitioned_file = PartitionedFile::new(filename, file_size);
+            object_store
+                .put(&partitioned_file.object_meta.location, bytes.into())
+                .await?;
+
+            let schema = match FileReader::try_new(File::open(path_str)?, None) {
+                Ok(reader) => reader.schema(),
+                Err(_) => StreamReader::try_new(File::open(path_str)?, None)?.schema(),
+            };
+
+            let source: Arc<dyn FileSource> = if filename.contains("stream") {
+                Arc::new(ArrowStreamFileSource::new(schema))
+            } else {
+                Arc::new(ArrowFileSource::new(schema))
+            };
+
+            let scan_config = FileScanConfigBuilder::new(
+                ObjectStoreUrl::local_filesystem(),
+                source.clone(),
+            )
+            .build();
+
+            let file_opener = source.create_file_opener(object_store, &scan_config, 0);
+            let mut stream = file_opener.open(partitioned_file)?.await?;
+
+            assert!(stream.next().await.is_some());
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_file_opener_with_ranges() -> Result<()> {
+        let filename = "example.arrow";
+        let path = format!("tests/data/{filename}");
+        let path_str = path.as_str();
+        let mut file = File::open(path_str)?;
+        let file_size = file.metadata()?.len();
+
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let bytes = Bytes::from(buffer);
+
+        let object_store = Arc::new(InMemory::new());
+        let partitioned_file = PartitionedFile::new_with_range(
+            filename.into(),
+            file_size,
+            0,
+            (file_size - 1) as i64,
+        );
+        object_store
+            .put(&partitioned_file.object_meta.location, bytes.into())
+            .await?;
+
+        let schema = FileReader::try_new(File::open(path_str)?, None)?.schema();
+
+        let source = Arc::new(ArrowFileSource::new(schema));
+
+        let scan_config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::local_filesystem(),
+            source.clone(),
+        )
+        .build();
+
+        let file_opener = source.create_file_opener(object_store, &scan_config, 0);
+        let mut stream = file_opener.open(partitioned_file)?.await?;
+
+        assert!(stream.next().await.is_some());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_stream_opener_errors_with_ranges() -> Result<()> {
+        let filename = "example_stream.arrow";
+        let path = format!("tests/data/{filename}");
+        let path_str = path.as_str();
+        let mut file = File::open(path_str)?;
+        let file_size = file.metadata()?.len();
+
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let bytes = Bytes::from(buffer);
+
+        let object_store = Arc::new(InMemory::new());
+        let partitioned_file = PartitionedFile::new_with_range(
+            filename.into(),
+            file_size,
+            0,
+            (file_size - 1) as i64,
+        );
+        object_store
+            .put(&partitioned_file.object_meta.location, bytes.into())
+            .await?;
+
+        let schema = StreamReader::try_new(File::open(path_str)?, None)?.schema();
+
+        let source = Arc::new(ArrowStreamFileSource::new(schema));
+
+        let scan_config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::local_filesystem(),
+            source.clone(),
+        )
+        .build();
+
+        let file_opener = source.create_file_opener(object_store, &scan_config, 0);
+        let result = file_opener.open(partitioned_file);
+        assert!(result.is_err());
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_arrow_stream_repartitioning_not_supported() -> Result<()> {
+        let schema =
+            Arc::new(Schema::new(vec![Field::new("f0", DataType::Int64, false)]));
+        let source = ArrowStreamFileSource::new(schema);
+
+        let config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::local_filesystem(),
+            Arc::new(source.clone()) as Arc<dyn FileSource>,
+        )
+        .build();
+
+        for target_partitions in [2, 4, 8, 16] {
+            let result =
+                source.repartitioned(target_partitions, 1024 * 1024, None, &config)?;
+
+            assert!(
+                result.is_none(),
+                "Stream format should not support repartitioning with {target_partitions} partitions",
+            );
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_stream_opener_with_projection() -> Result<()> {
+        let filename = "example_stream.arrow";
+        let path = format!("tests/data/{filename}");
+        let path_str = path.as_str();
+        let mut file = File::open(path_str)?;
+        let file_size = file.metadata()?.len();
+
+        let mut buffer = Vec::new();
+        file.read_to_end(&mut buffer)?;
+        let bytes = Bytes::from(buffer);
+
+        let object_store = Arc::new(InMemory::new());
+        let partitioned_file = PartitionedFile::new(filename, file_size);
+        object_store
+            .put(&partitioned_file.object_meta.location, bytes.into())
+            .await?;
+
+        let opener = ArrowStreamFileOpener {
+            object_store,
+            projection: Some(vec![0]), // just the first column
+        };
+
+        let mut stream = opener.open(partitioned_file)?.await?;
+
+        if let Some(batch) = stream.next().await {
+            let batch = batch?;
+            assert_eq!(
+                batch.num_columns(),
+                1,
+                "Projection should result in 1 column"
+            );
+        } else {
+            panic!("Expected at least one batch");
+        }
+
+        Ok(())
+    }
+}
diff --git a/datafusion/datasource-arrow/tests/data/example_stream.arrow b/datafusion/datasource-arrow/tests/data/example_stream.arrow
new file mode 100644
index 000000000000..dbe10596f3a9
Binary files /dev/null and b/datafusion/datasource-arrow/tests/data/example_stream.arrow differ
diff --git a/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow b/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow
new file mode 100644
index 000000000000..78e56749d7f0
Binary files /dev/null and b/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow differ
diff --git a/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow b/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow
new file mode 100644
index 000000000000..3fa48d7669d9
Binary files /dev/null and b/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow differ
diff --git a/datafusion/datasource-avro/Cargo.toml b/datafusion/datasource-avro/Cargo.toml
index e013e8a3d093..c9299aeb101d 100644
--- a/datafusion/datasource-avro/Cargo.toml
+++ b/datafusion/datasource-avro/Cargo.toml
@@ -41,12 +41,15 @@ datafusion-physical-expr-common = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 datafusion-session = { workspace = true }
 futures = { workspace = true }
-num-traits = { version = "0.2" }
+num-traits = { workspace = true }
 object_store = { workspace = true }
 
 [dev-dependencies]
 serde_json = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs
index 9a4d13fc191d..5ef35e2bee89 100644
--- a/datafusion/datasource-avro/src/avro_to_arrow/reader.rs
+++ b/datafusion/datasource-avro/src/avro_to_arrow/reader.rs
@@ -64,13 +64,9 @@ impl ReaderBuilder {
     ///     let file = File::open("test/data/basic.avro").unwrap();
     ///
     ///     // create a builder, inferring the schema with the first 100 records
-    ///     let builder = ReaderBuilder::new()
-    ///       .read_schema()
-    ///       .with_batch_size(100);
+    ///     let builder = ReaderBuilder::new().read_schema().with_batch_size(100);
     ///
-    ///     let reader = builder
-    ///       .build::<File>(file)
-    ///       .unwrap();
+    ///     let reader = builder.build::<File>(file).unwrap();
     ///
     ///     reader
     /// }
diff --git a/datafusion/datasource-avro/src/file_format.rs b/datafusion/datasource-avro/src/file_format.rs
index 60c361b42e77..50aecf97b299 100644
--- a/datafusion/datasource-avro/src/file_format.rs
+++ b/datafusion/datasource-avro/src/file_format.rs
@@ -154,13 +154,17 @@ impl FileFormat for AvroFormat {
         _state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        let file_schema = Arc::clone(conf.file_schema());
         let config = FileScanConfigBuilder::from(conf)
-            .with_source(self.file_source())
+            .with_source(Arc::new(AvroSource::new(file_schema)))
             .build();
         Ok(DataSourceExec::from_data_source(config))
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(AvroSource::new())
+    fn file_source(
+        &self,
+        table_schema: datafusion_datasource::TableSchema,
+    ) -> Arc<dyn FileSource> {
+        Arc::new(AvroSource::new(table_schema))
     }
 }
diff --git a/datafusion/datasource-avro/src/source.rs b/datafusion/datasource-avro/src/source.rs
index 0916222337b8..9859e11e25d2 100644
--- a/datafusion/datasource-avro/src/source.rs
+++ b/datafusion/datasource-avro/src/source.rs
@@ -22,22 +22,22 @@ use std::sync::Arc;
 
 use crate::avro_to_arrow::Reader as AvroReader;
 
-use arrow::datatypes::SchemaRef;
 use datafusion_common::error::Result;
 use datafusion_common::Statistics;
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
 use datafusion_datasource::file_stream::FileOpener;
 use datafusion_datasource::schema_adapter::SchemaAdapterFactory;
+use datafusion_datasource::TableSchema;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 
 use object_store::ObjectStore;
 
 /// AvroSource holds the extra configuration that is necessary for opening avro files
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct AvroSource {
-    schema: Option<SchemaRef>,
+    table_schema: TableSchema,
     batch_size: Option<usize>,
     projection: Option<Vec<String>>,
     metrics: ExecutionPlanMetricsSet,
@@ -46,15 +46,22 @@ pub struct AvroSource {
 }
 
 impl AvroSource {
-    /// Initialize an AvroSource with default values
-    pub fn new() -> Self {
-        Self::default()
+    /// Initialize an AvroSource with the provided schema
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
+        Self {
+            table_schema: table_schema.into(),
+            batch_size: None,
+            projection: None,
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+        }
     }
 
     fn open<R: std::io::Read>(&self, reader: R) -> Result<AvroReader<'static, R>> {
         AvroReader::try_new(
             reader,
-            Arc::clone(self.schema.as_ref().expect("Schema must set before open")),
+            Arc::clone(self.table_schema.file_schema()),
             self.batch_size.expect("Batch size must set before open"),
             self.projection.clone(),
         )
@@ -78,17 +85,16 @@ impl FileSource for AvroSource {
         self
     }
 
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
+
     fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.batch_size = Some(batch_size);
         Arc::new(conf)
     }
 
-    fn with_schema(&self, schema: SchemaRef) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.schema = Some(schema);
-        Arc::new(conf)
-    }
     fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.projected_statistics = Some(statistics);
diff --git a/datafusion/datasource-csv/Cargo.toml b/datafusion/datasource-csv/Cargo.toml
index 209cea403896..c9e138759ef4 100644
--- a/datafusion/datasource-csv/Cargo.toml
+++ b/datafusion/datasource-csv/Cargo.toml
@@ -47,6 +47,9 @@ object_store = { workspace = true }
 regex = { workspace = true }
 tokio = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/datasource-csv/src/file_format.rs b/datafusion/datasource-csv/src/file_format.rs
index 1c39893b23c8..6b27687a56f7 100644
--- a/datafusion/datasource-csv/src/file_format.rs
+++ b/datafusion/datasource-csv/src/file_format.rs
@@ -48,6 +48,7 @@ use datafusion_datasource::sink::{DataSink, DataSinkExec};
 use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 use datafusion_datasource::write::orchestration::spawn_writer_tasks_and_join;
 use datafusion_datasource::write::BatchSerializer;
+use datafusion_datasource::TableSchema;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
 use datafusion_physical_expr_common::sort_expr::LexRequirement;
@@ -434,20 +435,23 @@ impl FileFormat for CsvFormat {
             .newlines_in_values
             .unwrap_or_else(|| state.config_options().catalog.newlines_in_values);
 
-        let conf_builder = FileScanConfigBuilder::from(conf)
-            .with_file_compression_type(self.options.compression.into())
-            .with_newlines_in_values(newlines_in_values);
+        let mut csv_options = self.options.clone();
+        csv_options.has_header = Some(has_header);
 
-        let truncated_rows = self.options.truncated_rows.unwrap_or(false);
-        let source = Arc::new(
-            CsvSource::new(has_header, self.options.delimiter, self.options.quote)
-                .with_escape(self.options.escape)
-                .with_terminator(self.options.terminator)
-                .with_comment(self.options.comment)
-                .with_truncate_rows(truncated_rows),
-        );
+        // Get the existing CsvSource and update its options
+        // We need to preserve the table_schema from the original source (which includes partition columns)
+        let csv_source = conf
+            .file_source
+            .as_any()
+            .downcast_ref::<CsvSource>()
+            .expect("file_source should be a CsvSource");
+        let source = Arc::new(csv_source.clone().with_csv_options(csv_options));
 
-        let config = conf_builder.with_source(source).build();
+        let config = FileScanConfigBuilder::from(conf)
+            .with_file_compression_type(self.options.compression.into())
+            .with_newlines_in_values(newlines_in_values)
+            .with_source(source)
+            .build();
 
         Ok(DataSourceExec::from_data_source(config))
     }
@@ -489,8 +493,12 @@ impl FileFormat for CsvFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(CsvSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        let mut csv_options = self.options.clone();
+        if csv_options.has_header.is_none() {
+            csv_options.has_header = Some(true);
+        }
+        Arc::new(CsvSource::new(table_schema).with_csv_options(csv_options))
     }
 }
 
diff --git a/datafusion/datasource-csv/src/mod.rs b/datafusion/datasource-csv/src/mod.rs
index 90538d0808b1..9af57c43103c 100644
--- a/datafusion/datasource-csv/src/mod.rs
+++ b/datafusion/datasource-csv/src/mod.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
@@ -24,7 +27,6 @@ pub mod source;
 
 use std::sync::Arc;
 
-use arrow::datatypes::SchemaRef;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
 use datafusion_datasource::{file::FileSource, file_scan_config::FileScanConfig};
@@ -33,11 +35,10 @@ pub use file_format::*;
 
 /// Returns a [`FileScanConfig`] for given `file_groups`
 pub fn partitioned_csv_config(
-    schema: SchemaRef,
     file_groups: Vec<FileGroup>,
     file_source: Arc<dyn FileSource>,
 ) -> FileScanConfig {
-    FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), schema, file_source)
+    FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
         .with_file_groups(file_groups)
         .build()
 }
diff --git a/datafusion/datasource-csv/src/source.rs b/datafusion/datasource-csv/src/source.rs
index 0445329d0653..94c6b3810ae2 100644
--- a/datafusion/datasource-csv/src/source.rs
+++ b/datafusion/datasource-csv/src/source.rs
@@ -29,11 +29,11 @@ use datafusion_datasource::file_compression_type::FileCompressionType;
 use datafusion_datasource::file_stream::{FileOpenFuture, FileOpener};
 use datafusion_datasource::{
     as_file_source, calculate_range, FileRange, ListingTableUrl, PartitionedFile,
-    RangeCalculation,
+    RangeCalculation, TableSchema,
 };
 
 use arrow::csv;
-use arrow::datatypes::SchemaRef;
+use datafusion_common::config::CsvOptions;
 use datafusion_common::{DataFusionError, Result, Statistics};
 use datafusion_common_runtime::JoinSet;
 use datafusion_datasource::file::FileSource;
@@ -61,111 +61,118 @@ use tokio::io::AsyncWriteExt;
 /// # use datafusion_datasource_csv::source::CsvSource;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
 /// # use datafusion_datasource::source::DataSourceExec;
+/// # use datafusion_common::config::CsvOptions;
 ///
 /// # let object_store_url = ObjectStoreUrl::local_filesystem();
 /// # let file_schema = Arc::new(Schema::empty());
 ///
-/// let source = Arc::new(CsvSource::new(
-///         true,
-///         b',',
-///         b'"',
-///     )
-///     .with_terminator(Some(b'#')
-/// ));
+/// let options = CsvOptions {
+///     has_header: Some(true),
+///     delimiter: b',',
+///     quote: b'"',
+///     ..Default::default()
+/// };
+/// let source = Arc::new(CsvSource::new(file_schema.clone())
+///     .with_csv_options(options)
+///     .with_terminator(Some(b'#'))
+/// );
 /// // Create a DataSourceExec for reading the first 100MB of `file1.csv`
-/// let config = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+/// let config = FileScanConfigBuilder::new(object_store_url, source)
 ///     .with_file(PartitionedFile::new("file1.csv", 100*1024*1024))
 ///     .with_newlines_in_values(true) // The file contains newlines in values;
 ///     .build();
 /// let exec = (DataSourceExec::from_data_source(config));
 /// ```
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
 pub struct CsvSource {
+    options: CsvOptions,
     batch_size: Option<usize>,
-    file_schema: Option<SchemaRef>,
+    table_schema: TableSchema,
     file_projection: Option<Vec<usize>>,
-    pub(crate) has_header: bool,
-    delimiter: u8,
-    quote: u8,
-    terminator: Option<u8>,
-    escape: Option<u8>,
-    comment: Option<u8>,
     metrics: ExecutionPlanMetricsSet,
     projected_statistics: Option<Statistics>,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
-    truncate_rows: bool,
 }
 
 impl CsvSource {
     /// Returns a [`CsvSource`]
-    pub fn new(has_header: bool, delimiter: u8, quote: u8) -> Self {
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
         Self {
-            has_header,
-            delimiter,
-            quote,
-            ..Self::default()
+            options: CsvOptions::default(),
+            table_schema: table_schema.into(),
+            batch_size: None,
+            file_projection: None,
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
         }
     }
 
+    /// Sets the CSV options
+    pub fn with_csv_options(mut self, options: CsvOptions) -> Self {
+        self.options = options;
+        self
+    }
+
     /// true if the first line of each file is a header
     pub fn has_header(&self) -> bool {
-        self.has_header
+        self.options.has_header.unwrap_or(true)
     }
 
     // true if rows length support truncate
     pub fn truncate_rows(&self) -> bool {
-        self.truncate_rows
+        self.options.truncated_rows.unwrap_or(false)
     }
     /// A column delimiter
     pub fn delimiter(&self) -> u8 {
-        self.delimiter
+        self.options.delimiter
     }
 
     /// The quote character
     pub fn quote(&self) -> u8 {
-        self.quote
+        self.options.quote
     }
 
     /// The line terminator
     pub fn terminator(&self) -> Option<u8> {
-        self.terminator
+        self.options.terminator
     }
 
     /// Lines beginning with this byte are ignored.
     pub fn comment(&self) -> Option<u8> {
-        self.comment
+        self.options.comment
     }
 
     /// The escape character
     pub fn escape(&self) -> Option<u8> {
-        self.escape
+        self.options.escape
     }
 
     /// Initialize a CsvSource with escape
     pub fn with_escape(&self, escape: Option<u8>) -> Self {
         let mut conf = self.clone();
-        conf.escape = escape;
+        conf.options.escape = escape;
         conf
     }
 
     /// Initialize a CsvSource with terminator
     pub fn with_terminator(&self, terminator: Option<u8>) -> Self {
         let mut conf = self.clone();
-        conf.terminator = terminator;
+        conf.options.terminator = terminator;
         conf
     }
 
     /// Initialize a CsvSource with comment
     pub fn with_comment(&self, comment: Option<u8>) -> Self {
         let mut conf = self.clone();
-        conf.comment = comment;
+        conf.options.comment = comment;
         conf
     }
 
     /// Whether to support truncate rows when read csv file
     pub fn with_truncate_rows(&self, truncate_rows: bool) -> Self {
         let mut conf = self.clone();
-        conf.truncate_rows = truncate_rows;
+        conf.options.truncated_rows = Some(truncate_rows);
         conf
     }
 }
@@ -176,29 +183,26 @@ impl CsvSource {
     }
 
     fn builder(&self) -> csv::ReaderBuilder {
-        let mut builder = csv::ReaderBuilder::new(Arc::clone(
-            self.file_schema
-                .as_ref()
-                .expect("Schema must be set before initializing builder"),
-        ))
-        .with_delimiter(self.delimiter)
-        .with_batch_size(
-            self.batch_size
-                .expect("Batch size must be set before initializing builder"),
-        )
-        .with_header(self.has_header)
-        .with_quote(self.quote)
-        .with_truncated_rows(self.truncate_rows);
-        if let Some(terminator) = self.terminator {
+        let mut builder =
+            csv::ReaderBuilder::new(Arc::clone(self.table_schema.file_schema()))
+                .with_delimiter(self.delimiter())
+                .with_batch_size(
+                    self.batch_size
+                        .expect("Batch size must be set before initializing builder"),
+                )
+                .with_header(self.has_header())
+                .with_quote(self.quote())
+                .with_truncated_rows(self.truncate_rows());
+        if let Some(terminator) = self.terminator() {
             builder = builder.with_terminator(terminator);
         }
         if let Some(proj) = &self.file_projection {
             builder = builder.with_projection(proj.clone());
         }
-        if let Some(escape) = self.escape {
+        if let Some(escape) = self.escape() {
             builder = builder.with_escape(escape)
         }
-        if let Some(comment) = self.comment {
+        if let Some(comment) = self.comment() {
             builder = builder.with_comment(comment);
         }
 
@@ -252,15 +256,13 @@ impl FileSource for CsvSource {
         self
     }
 
-    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
-        let mut conf = self.clone();
-        conf.batch_size = Some(batch_size);
-        Arc::new(conf)
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
     }
 
-    fn with_schema(&self, schema: SchemaRef) -> Arc<dyn FileSource> {
+    fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
-        conf.file_schema = Some(schema);
+        conf.batch_size = Some(batch_size);
         Arc::new(conf)
     }
 
@@ -291,7 +293,7 @@ impl FileSource for CsvSource {
     fn fmt_extra(&self, t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
         match t {
             DisplayFormatType::Default | DisplayFormatType::Verbose => {
-                write!(f, ", has_header={}", self.has_header)
+                write!(f, ", has_header={}", self.has_header())
             }
             DisplayFormatType::TreeRender => Ok(()),
         }
@@ -340,18 +342,16 @@ impl FileOpener for CsvOpener {
         // `self.config.has_header` controls whether to skip reading the 1st line header
         // If the .csv file is read in parallel and this `CsvOpener` is only reading some middle
         // partition, then don't skip first line
-        let mut csv_has_header = self.config.has_header;
+        let mut csv_has_header = self.config.has_header();
         if let Some(FileRange { start, .. }) = partitioned_file.range {
             if start != 0 {
                 csv_has_header = false;
             }
         }
 
-        let config = CsvSource {
-            has_header: csv_has_header,
-            truncate_rows: self.config.truncate_rows,
-            ..(*self.config).clone()
-        };
+        let mut config = (*self.config).clone();
+        config.options.has_header = Some(csv_has_header);
+        config.options.truncated_rows = Some(config.truncate_rows());
 
         let file_compression_type = self.file_compression_type.to_owned();
 
@@ -363,7 +363,7 @@ impl FileOpener for CsvOpener {
         }
 
         let store = Arc::clone(&self.object_store);
-        let terminator = self.config.terminator;
+        let terminator = self.config.terminator();
 
         Ok(Box::pin(async move {
             // Current partition contains bytes [start_byte, end_byte) (might contain incomplete lines at boundaries)
diff --git a/datafusion/datasource-json/Cargo.toml b/datafusion/datasource-json/Cargo.toml
index 987ab60c70b7..37fa8d43a081 100644
--- a/datafusion/datasource-json/Cargo.toml
+++ b/datafusion/datasource-json/Cargo.toml
@@ -46,6 +46,9 @@ futures = { workspace = true }
 object_store = { workspace = true }
 tokio = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/datasource-json/src/file_format.rs b/datafusion/datasource-json/src/file_format.rs
index 51f4bd7e963e..afb12e526271 100644
--- a/datafusion/datasource-json/src/file_format.rs
+++ b/datafusion/datasource-json/src/file_format.rs
@@ -50,6 +50,7 @@ use datafusion_datasource::sink::{DataSink, DataSinkExec};
 use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 use datafusion_datasource::write::orchestration::spawn_writer_tasks_and_join;
 use datafusion_datasource::write::BatchSerializer;
+use datafusion_datasource::TableSchema;
 use datafusion_execution::{SendableRecordBatchStream, TaskContext};
 use datafusion_expr::dml::InsertOp;
 use datafusion_physical_expr_common::sort_expr::LexRequirement;
@@ -253,7 +254,11 @@ impl FileFormat for JsonFormat {
         _state: &dyn Session,
         conf: FileScanConfig,
     ) -> Result<Arc<dyn ExecutionPlan>> {
-        let source = Arc::new(JsonSource::new());
+        let table_schema = TableSchema::new(
+            Arc::clone(conf.file_schema()),
+            conf.table_partition_cols().clone(),
+        );
+        let source = Arc::new(JsonSource::new(table_schema));
         let conf = FileScanConfigBuilder::from(conf)
             .with_file_compression_type(FileCompressionType::from(
                 self.options.compression,
@@ -281,8 +286,8 @@ impl FileFormat for JsonFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(JsonSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        Arc::new(JsonSource::new(table_schema))
     }
 }
 
diff --git a/datafusion/datasource-json/src/mod.rs b/datafusion/datasource-json/src/mod.rs
index 18bb8792c3ff..cc77e6bcc14d 100644
--- a/datafusion/datasource-json/src/mod.rs
+++ b/datafusion/datasource-json/src/mod.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![cfg_attr(not(test), deny(clippy::clone_on_ref_ptr))]
diff --git a/datafusion/datasource-json/src/source.rs b/datafusion/datasource-json/src/source.rs
index 0b1eee1dac58..44b71ce680fd 100644
--- a/datafusion/datasource-json/src/source.rs
+++ b/datafusion/datasource-json/src/source.rs
@@ -74,8 +74,9 @@ impl JsonOpener {
 }
 
 /// JsonSource holds the extra configuration that is necessary for [`JsonOpener`]
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub struct JsonSource {
+    table_schema: datafusion_datasource::TableSchema,
     batch_size: Option<usize>,
     metrics: ExecutionPlanMetricsSet,
     projected_statistics: Option<Statistics>,
@@ -83,9 +84,15 @@ pub struct JsonSource {
 }
 
 impl JsonSource {
-    /// Initialize a JsonSource with default values
-    pub fn new() -> Self {
-        Self::default()
+    /// Initialize a JsonSource with the provided schema
+    pub fn new(table_schema: impl Into<datafusion_datasource::TableSchema>) -> Self {
+        Self {
+            table_schema: table_schema.into(),
+            batch_size: None,
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+        }
     }
 }
 
@@ -116,15 +123,16 @@ impl FileSource for JsonSource {
         self
     }
 
+    fn table_schema(&self) -> &datafusion_datasource::TableSchema {
+        &self.table_schema
+    }
+
     fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.batch_size = Some(batch_size);
         Arc::new(conf)
     }
 
-    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
     fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.projected_statistics = Some(statistics);
diff --git a/datafusion/datasource-parquet/Cargo.toml b/datafusion/datasource-parquet/Cargo.toml
index 1f866ffd6cc2..a5f6f56ac6f3 100644
--- a/datafusion/datasource-parquet/Cargo.toml
+++ b/datafusion/datasource-parquet/Cargo.toml
@@ -57,6 +57,9 @@ tokio = { workspace = true }
 [dev-dependencies]
 chrono = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/datasource-parquet/src/file_format.rs b/datafusion/datasource-parquet/src/file_format.rs
index 963c1d77950c..1e86d4192774 100644
--- a/datafusion/datasource-parquet/src/file_format.rs
+++ b/datafusion/datasource-parquet/src/file_format.rs
@@ -32,14 +32,13 @@ use datafusion_datasource::file_sink_config::{FileSink, FileSinkConfig};
 use datafusion_datasource::write::{
     get_writer_schema, ObjectWriterBuilder, SharedBuffer,
 };
+use datafusion_datasource::TableSchema;
 
 use datafusion_datasource::file_format::{FileFormat, FileFormatFactory};
 use datafusion_datasource::write::demux::DemuxedStreamReceiver;
 
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::config::{ConfigField, ConfigFileType, TableParquetOptions};
-#[cfg(feature = "parquet_encryption")]
-use datafusion_common::encryption::map_config_decryption_to_decryption;
 use datafusion_common::encryption::FileDecryptionProperties;
 use datafusion_common::parsers::CompressionTypeVariant;
 use datafusion_common::{
@@ -59,11 +58,13 @@ use datafusion_physical_expr_common::sort_expr::LexRequirement;
 use datafusion_physical_plan::{DisplayAs, DisplayFormatType, ExecutionPlan};
 use datafusion_session::Session;
 
+use crate::metadata::DFParquetMetadata;
 use crate::reader::CachedParquetFileReaderFactory;
 use crate::source::{parse_coerce_int96_string, ParquetSource};
 use async_trait::async_trait;
 use bytes::Bytes;
 use datafusion_datasource::source::DataSourceExec;
+use datafusion_execution::cache::cache_manager::FileMetadataCache;
 use datafusion_execution::runtime_env::RuntimeEnv;
 use futures::future::BoxFuture;
 use futures::{FutureExt, StreamExt, TryStreamExt};
@@ -77,14 +78,12 @@ use parquet::arrow::arrow_writer::{
 use parquet::arrow::async_reader::MetadataFetch;
 use parquet::arrow::{ArrowWriter, AsyncArrowWriter};
 use parquet::basic::Type;
-
-use crate::metadata::DFParquetMetadata;
-use datafusion_execution::cache::cache_manager::FileMetadataCache;
+#[cfg(feature = "parquet_encryption")]
+use parquet::encryption::encrypt::FileEncryptionProperties;
 use parquet::errors::ParquetError;
 use parquet::file::metadata::ParquetMetaData;
 use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};
 use parquet::file::writer::SerializedFileWriter;
-use parquet::format::FileMetaData;
 use parquet::schema::types::SchemaDescriptor;
 use tokio::io::{AsyncWrite, AsyncWriteExt};
 use tokio::sync::mpsc::{self, Receiver, Sender};
@@ -306,25 +305,23 @@ async fn get_file_decryption_properties(
     state: &dyn Session,
     options: &TableParquetOptions,
     file_path: &Path,
-) -> Result<Option<FileDecryptionProperties>> {
-    let file_decryption_properties: Option<FileDecryptionProperties> =
-        match &options.crypto.file_decryption {
-            Some(cfd) => Some(map_config_decryption_to_decryption(cfd)),
-            None => match &options.crypto.factory_id {
-                Some(factory_id) => {
-                    let factory =
-                        state.runtime_env().parquet_encryption_factory(factory_id)?;
-                    factory
-                        .get_file_decryption_properties(
-                            &options.crypto.factory_options,
-                            file_path,
-                        )
-                        .await?
-                }
-                None => None,
-            },
-        };
-    Ok(file_decryption_properties)
+) -> Result<Option<Arc<FileDecryptionProperties>>> {
+    Ok(match &options.crypto.file_decryption {
+        Some(cfd) => Some(Arc::new(FileDecryptionProperties::from(cfd.clone()))),
+        None => match &options.crypto.factory_id {
+            Some(factory_id) => {
+                let factory =
+                    state.runtime_env().parquet_encryption_factory(factory_id)?;
+                factory
+                    .get_file_decryption_properties(
+                        &options.crypto.factory_options,
+                        file_path,
+                    )
+                    .await?
+            }
+            None => None,
+        },
+    })
 }
 
 #[cfg(not(feature = "parquet_encryption"))]
@@ -332,7 +329,7 @@ async fn get_file_decryption_properties(
     _state: &dyn Session,
     _options: &TableParquetOptions,
     _file_path: &Path,
-) -> Result<Option<FileDecryptionProperties>> {
+) -> Result<Option<Arc<FileDecryptionProperties>>> {
     Ok(None)
 }
 
@@ -385,7 +382,7 @@ impl FileFormat for ParquetFormat {
                 .await?;
                 let result = DFParquetMetadata::new(store.as_ref(), object)
                     .with_metadata_size_hint(self.metadata_size_hint())
-                    .with_decryption_properties(file_decryption_properties.as_ref())
+                    .with_decryption_properties(file_decryption_properties)
                     .with_file_metadata_cache(Some(Arc::clone(&file_metadata_cache)))
                     .with_coerce_int96(coerce_int96)
                     .fetch_schema_with_location()
@@ -446,7 +443,7 @@ impl FileFormat for ParquetFormat {
             state.runtime_env().cache_manager.get_file_metadata_cache();
         DFParquetMetadata::new(store, object)
             .with_metadata_size_hint(self.metadata_size_hint())
-            .with_decryption_properties(file_decryption_properties.as_ref())
+            .with_decryption_properties(file_decryption_properties)
             .with_file_metadata_cache(Some(file_metadata_cache))
             .fetch_statistics(&table_schema)
             .await
@@ -463,7 +460,12 @@ impl FileFormat for ParquetFormat {
             metadata_size_hint = Some(metadata);
         }
 
-        let mut source = ParquetSource::new(self.options.clone());
+        let table_schema = TableSchema::new(
+            Arc::clone(conf.file_schema()),
+            conf.table_partition_cols().clone(),
+        );
+        let mut source = ParquetSource::new(table_schema)
+            .with_table_parquet_options(self.options.clone());
 
         // Use the CachedParquetFileReaderFactory
         let metadata_cache = state.runtime_env().cache_manager.get_file_metadata_cache();
@@ -505,8 +507,11 @@ impl FileFormat for ParquetFormat {
         Ok(Arc::new(DataSinkExec::new(input, sink, order_requirements)) as _)
     }
 
-    fn file_source(&self) -> Arc<dyn FileSource> {
-        Arc::new(ParquetSource::default())
+    fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+        Arc::new(
+            ParquetSource::new(table_schema)
+                .with_table_parquet_options(self.options.clone()),
+        )
     }
 }
 
@@ -1027,9 +1032,10 @@ pub async fn fetch_parquet_metadata(
     store: &dyn ObjectStore,
     object_meta: &ObjectMeta,
     size_hint: Option<usize>,
-    #[allow(unused)] decryption_properties: Option<&FileDecryptionProperties>,
+    decryption_properties: Option<&FileDecryptionProperties>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
 ) -> Result<Arc<ParquetMetaData>> {
+    let decryption_properties = decryption_properties.cloned().map(Arc::new);
     DFParquetMetadata::new(store, object_meta)
         .with_metadata_size_hint(size_hint)
         .with_decryption_properties(decryption_properties)
@@ -1053,6 +1059,7 @@ pub async fn fetch_statistics(
     decryption_properties: Option<&FileDecryptionProperties>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
 ) -> Result<Statistics> {
+    let decryption_properties = decryption_properties.cloned().map(Arc::new);
     DFParquetMetadata::new(store, file)
         .with_metadata_size_hint(metadata_size_hint)
         .with_decryption_properties(decryption_properties)
@@ -1080,7 +1087,7 @@ pub struct ParquetSink {
     parquet_options: TableParquetOptions,
     /// File metadata from successfully produced parquet files. The Mutex is only used
     /// to allow inserting to HashMap from behind borrowed reference in DataSink::write_all.
-    written: Arc<parking_lot::Mutex<HashMap<Path, FileMetaData>>>,
+    written: Arc<parking_lot::Mutex<HashMap<Path, ParquetMetaData>>>,
 }
 
 impl Debug for ParquetSink {
@@ -1117,7 +1124,7 @@ impl ParquetSink {
 
     /// Retrieve the file metadata for the written files, keyed to the path
     /// which may be partitioned (in the case of hive style partitioning).
-    pub fn written(&self) -> HashMap<Path, FileMetaData> {
+    pub fn written(&self) -> HashMap<Path, ParquetMetaData> {
         self.written.lock().clone()
     }
 
@@ -1141,7 +1148,7 @@ impl ParquetSink {
         builder = set_writer_encryption_properties(
             builder,
             runtime,
-            &parquet_opts,
+            parquet_opts,
             schema,
             path,
         )
@@ -1189,14 +1196,15 @@ impl ParquetSink {
 async fn set_writer_encryption_properties(
     builder: WriterPropertiesBuilder,
     runtime: &Arc<RuntimeEnv>,
-    parquet_opts: &TableParquetOptions,
+    parquet_opts: TableParquetOptions,
     schema: &Arc<Schema>,
     path: &Path,
 ) -> Result<WriterPropertiesBuilder> {
-    if let Some(file_encryption_properties) = &parquet_opts.crypto.file_encryption {
+    if let Some(file_encryption_properties) = parquet_opts.crypto.file_encryption {
         // Encryption properties have been specified directly
-        return Ok(builder
-            .with_file_encryption_properties(file_encryption_properties.clone().into()));
+        return Ok(builder.with_file_encryption_properties(Arc::new(
+            FileEncryptionProperties::from(file_encryption_properties),
+        )));
     } else if let Some(encryption_factory_id) = &parquet_opts.crypto.factory_id.as_ref() {
         // Encryption properties will be generated by an encryption factory
         let encryption_factory =
@@ -1221,7 +1229,7 @@ async fn set_writer_encryption_properties(
 async fn set_writer_encryption_properties(
     builder: WriterPropertiesBuilder,
     _runtime: &Arc<RuntimeEnv>,
-    _parquet_opts: &TableParquetOptions,
+    _parquet_opts: TableParquetOptions,
     _schema: &Arc<Schema>,
     _path: &Path,
 ) -> Result<WriterPropertiesBuilder> {
@@ -1244,7 +1252,7 @@ impl FileSink for ParquetSink {
         let parquet_opts = &self.parquet_options;
 
         let mut file_write_tasks: JoinSet<
-            std::result::Result<(Path, FileMetaData), DataFusionError>,
+            std::result::Result<(Path, ParquetMetaData), DataFusionError>,
         > = JoinSet::new();
 
         let runtime = context.runtime_env();
@@ -1275,11 +1283,11 @@ impl FileSink for ParquetSink {
                         writer.write(&batch).await?;
                         reservation.try_resize(writer.memory_size())?;
                     }
-                    let file_metadata = writer
+                    let parquet_meta_data = writer
                         .close()
                         .await
                         .map_err(|e| DataFusionError::ParquetError(Box::new(e)))?;
-                    Ok((path, file_metadata))
+                    Ok((path, parquet_meta_data))
                 });
             } else {
                 let writer = ObjectWriterBuilder::new(
@@ -1303,7 +1311,7 @@ impl FileSink for ParquetSink {
                 let parallel_options_clone = parallel_options.clone();
                 let pool = Arc::clone(context.memory_pool());
                 file_write_tasks.spawn(async move {
-                    let file_metadata = output_single_parquet_file_parallelized(
+                    let parquet_meta_data = output_single_parquet_file_parallelized(
                         writer,
                         rx,
                         schema,
@@ -1313,7 +1321,7 @@ impl FileSink for ParquetSink {
                         pool,
                     )
                     .await?;
-                    Ok((path, file_metadata))
+                    Ok((path, parquet_meta_data))
                 });
             }
         }
@@ -1322,11 +1330,11 @@ impl FileSink for ParquetSink {
         while let Some(result) = file_write_tasks.join_next().await {
             match result {
                 Ok(r) => {
-                    let (path, file_metadata) = r?;
-                    row_count += file_metadata.num_rows;
+                    let (path, parquet_meta_data) = r?;
+                    row_count += parquet_meta_data.file_metadata().num_rows();
                     let mut written_files = self.written.lock();
                     written_files
-                        .try_insert(path.clone(), file_metadata)
+                        .try_insert(path.clone(), parquet_meta_data)
                         .map_err(|e| internal_datafusion_err!("duplicate entry detected for partitioned file {path}: {e}"))?;
                     drop(written_files);
                 }
@@ -1589,7 +1597,7 @@ async fn concatenate_parallel_row_groups(
     mut serialize_rx: Receiver<SpawnedTask<RBStreamSerializeResult>>,
     mut object_store_writer: Box<dyn AsyncWrite + Send + Unpin>,
     pool: Arc<dyn MemoryPool>,
-) -> Result<FileMetaData> {
+) -> Result<ParquetMetaData> {
     let mut file_reservation =
         MemoryConsumer::new("ParquetSink(SerializedFileWriter)").register(&pool);
 
@@ -1617,14 +1625,14 @@ async fn concatenate_parallel_row_groups(
         rg_out.close()?;
     }
 
-    let file_metadata = parquet_writer.close()?;
+    let parquet_meta_data = parquet_writer.close()?;
     let final_buff = merged_buff.buffer.try_lock().unwrap();
 
     object_store_writer.write_all(final_buff.as_slice()).await?;
     object_store_writer.shutdown().await?;
     file_reservation.free();
 
-    Ok(file_metadata)
+    Ok(parquet_meta_data)
 }
 
 /// Parallelizes the serialization of a single parquet file, by first serializing N
@@ -1639,7 +1647,7 @@ async fn output_single_parquet_file_parallelized(
     skip_arrow_metadata: bool,
     parallel_options: ParallelParquetWriterOptions,
     pool: Arc<dyn MemoryPool>,
-) -> Result<FileMetaData> {
+) -> Result<ParquetMetaData> {
     let max_rowgroups = parallel_options.max_parallel_row_groups;
     // Buffer size of this channel limits maximum number of RowGroups being worked on in parallel
     let (serialize_tx, serialize_rx) =
@@ -1666,7 +1674,7 @@ async fn output_single_parquet_file_parallelized(
         parallel_options,
         Arc::clone(&pool),
     );
-    let file_metadata = concatenate_parallel_row_groups(
+    let parquet_meta_data = concatenate_parallel_row_groups(
         writer,
         merged_buff,
         serialize_rx,
@@ -1679,7 +1687,7 @@ async fn output_single_parquet_file_parallelized(
         .join_unwind()
         .await
         .map_err(|e| DataFusionError::ExecutionJoin(Box::new(e)))??;
-    Ok(file_metadata)
+    Ok(parquet_meta_data)
 }
 
 #[cfg(test)]
diff --git a/datafusion/datasource-parquet/src/metadata.rs b/datafusion/datasource-parquet/src/metadata.rs
index c8ee4d3b9f57..6505a447d7ce 100644
--- a/datafusion/datasource-parquet/src/metadata.rs
+++ b/datafusion/datasource-parquet/src/metadata.rs
@@ -58,7 +58,7 @@ pub struct DFParquetMetadata<'a> {
     store: &'a dyn ObjectStore,
     object_meta: &'a ObjectMeta,
     metadata_size_hint: Option<usize>,
-    decryption_properties: Option<&'a FileDecryptionProperties>,
+    decryption_properties: Option<Arc<FileDecryptionProperties>>,
     file_metadata_cache: Option<Arc<dyn FileMetadataCache>>,
     /// timeunit to coerce INT96 timestamps to
     pub coerce_int96: Option<TimeUnit>,
@@ -85,7 +85,7 @@ impl<'a> DFParquetMetadata<'a> {
     /// set decryption properties
     pub fn with_decryption_properties(
         mut self,
-        decryption_properties: Option<&'a FileDecryptionProperties>,
+        decryption_properties: Option<Arc<FileDecryptionProperties>>,
     ) -> Self {
         self.decryption_properties = decryption_properties;
         self
@@ -145,7 +145,8 @@ impl<'a> DFParquetMetadata<'a> {
 
         #[cfg(feature = "parquet_encryption")]
         if let Some(decryption_properties) = decryption_properties {
-            reader = reader.with_decryption_properties(Some(decryption_properties));
+            reader = reader
+                .with_decryption_properties(Some(Arc::clone(decryption_properties)));
         }
 
         if cache_metadata && file_metadata_cache.is_some() {
diff --git a/datafusion/datasource-parquet/src/metrics.rs b/datafusion/datasource-parquet/src/metrics.rs
index 5f17fbb4b9ee..306bc9e6b013 100644
--- a/datafusion/datasource-parquet/src/metrics.rs
+++ b/datafusion/datasource-parquet/src/metrics.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion_physical_plan::metrics::{
-    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, Time,
+    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, PruningMetrics, Time,
 };
 
 /// Stores metrics about the parquet execution for a particular parquet file.
@@ -27,7 +27,7 @@ use datafusion_physical_plan::metrics::{
 /// [`ParquetFileReaderFactory`]: super::ParquetFileReaderFactory
 #[derive(Debug, Clone)]
 pub struct ParquetFileMetrics {
-    /// Number of file **ranges** pruned by partition or file level statistics.
+    /// Number of file **ranges** pruned or matched by partition or file level statistics.
     /// Pruning of files often happens at planning time but may happen at execution time
     /// if dynamic filters (e.g. from a join) result in additional pruning.
     ///
@@ -41,17 +41,13 @@ pub struct ParquetFileMetrics {
     /// pushdown optimization may fill up the TopK heap when reading the first part of a file,
     /// then skip the second part if file statistics indicate it cannot contain rows
     /// that would be in the TopK.
-    pub files_ranges_pruned_statistics: Count,
+    pub files_ranges_pruned_statistics: PruningMetrics,
     /// Number of times the predicate could not be evaluated
     pub predicate_evaluation_errors: Count,
-    /// Number of row groups whose bloom filters were checked and matched (not pruned)
-    pub row_groups_matched_bloom_filter: Count,
-    /// Number of row groups pruned by bloom filters
-    pub row_groups_pruned_bloom_filter: Count,
-    /// Number of row groups whose statistics were checked and matched (not pruned)
-    pub row_groups_matched_statistics: Count,
-    /// Number of row groups pruned by statistics
-    pub row_groups_pruned_statistics: Count,
+    /// Number of row groups whose bloom filters were checked, tracked with matched/pruned counts
+    pub row_groups_pruned_bloom_filter: PruningMetrics,
+    /// Number of row groups whose statistics were checked, tracked with matched/pruned counts
+    pub row_groups_pruned_statistics: PruningMetrics,
     /// Total number of bytes scanned
     pub bytes_scanned: Count,
     /// Total rows filtered out by predicates pushed into parquet scan
@@ -64,10 +60,8 @@ pub struct ParquetFileMetrics {
     pub statistics_eval_time: Time,
     /// Total time spent evaluating row group Bloom Filters
     pub bloom_filter_eval_time: Time,
-    /// Total rows filtered out by parquet page index
-    pub page_index_rows_pruned: Count,
-    /// Total rows passed through the parquet page index
-    pub page_index_rows_matched: Count,
+    /// Total rows filtered or matched by parquet page index
+    pub page_index_rows_pruned: PruningMetrics,
     /// Total time spent evaluating parquet page index filters
     pub page_index_eval_time: Time,
     /// Total time spent reading and parsing metadata from the footer
@@ -91,34 +85,20 @@ impl ParquetFileMetrics {
         // -----------------------
         // 'summary' level metrics
         // -----------------------
-        let row_groups_matched_bloom_filter = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .with_type(MetricType::SUMMARY)
-            .counter("row_groups_matched_bloom_filter", partition);
-
         let row_groups_pruned_bloom_filter = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .with_type(MetricType::SUMMARY)
-            .counter("row_groups_pruned_bloom_filter", partition);
-
-        let row_groups_matched_statistics = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .with_type(MetricType::SUMMARY)
-            .counter("row_groups_matched_statistics", partition);
+            .pruning_metrics("row_groups_pruned_bloom_filter", partition);
 
         let row_groups_pruned_statistics = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .with_type(MetricType::SUMMARY)
-            .counter("row_groups_pruned_statistics", partition);
+            .pruning_metrics("row_groups_pruned_statistics", partition);
 
         let page_index_rows_pruned = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
             .with_type(MetricType::SUMMARY)
-            .counter("page_index_rows_pruned", partition);
-        let page_index_rows_matched = MetricBuilder::new(metrics)
-            .with_new_label("filename", filename.to_string())
-            .with_type(MetricType::SUMMARY)
-            .counter("page_index_rows_matched", partition);
+            .pruning_metrics("page_index_rows_pruned", partition);
 
         let bytes_scanned = MetricBuilder::new(metrics)
             .with_new_label("filename", filename.to_string())
@@ -132,7 +112,7 @@ impl ParquetFileMetrics {
 
         let files_ranges_pruned_statistics = MetricBuilder::new(metrics)
             .with_type(MetricType::SUMMARY)
-            .counter("files_ranges_pruned_statistics", partition);
+            .pruning_metrics("files_ranges_pruned_statistics", partition);
 
         // -----------------------
         // 'dev' level metrics
@@ -173,16 +153,13 @@ impl ParquetFileMetrics {
         Self {
             files_ranges_pruned_statistics,
             predicate_evaluation_errors,
-            row_groups_matched_bloom_filter,
             row_groups_pruned_bloom_filter,
-            row_groups_matched_statistics,
             row_groups_pruned_statistics,
             bytes_scanned,
             pushdown_rows_pruned,
             pushdown_rows_matched,
             row_pushdown_eval_time,
             page_index_rows_pruned,
-            page_index_rows_matched,
             statistics_eval_time,
             bloom_filter_eval_time,
             page_index_eval_time,
diff --git a/datafusion/datasource-parquet/src/opener.rs b/datafusion/datasource-parquet/src/opener.rs
index 167fc3c5147e..3c905d950a96 100644
--- a/datafusion/datasource-parquet/src/opener.rs
+++ b/datafusion/datasource-parquet/src/opener.rs
@@ -40,7 +40,9 @@ use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::physical_expr::{
     is_dynamic_physical_expr, PhysicalExpr,
 };
-use datafusion_physical_plan::metrics::{Count, ExecutionPlanMetricsSet, MetricBuilder};
+use datafusion_physical_plan::metrics::{
+    Count, ExecutionPlanMetricsSet, MetricBuilder, PruningMetrics,
+};
 use datafusion_pruning::{build_pruning_predicate, FilePruner, PruningPredicate};
 
 #[cfg(feature = "parquet_encryption")]
@@ -195,11 +197,13 @@ impl FileOpener for ParquetOpener {
             if let Some(file_pruner) = &mut file_pruner {
                 if file_pruner.should_prune()? {
                     // Return an empty stream immediately to skip the work of setting up the actual stream
-                    file_metrics.files_ranges_pruned_statistics.add(1);
+                    file_metrics.files_ranges_pruned_statistics.add_pruned(1);
                     return Ok(futures::stream::empty().boxed());
                 }
             }
 
+            file_metrics.files_ranges_pruned_statistics.add_matched(1);
+
             // Don't load the page index yet. Since it is not stored inline in
             // the footer, loading the page index if it is not needed will do
             // unnecessary I/O. We decide later if it is needed to evaluate the
@@ -208,7 +212,7 @@ impl FileOpener for ParquetOpener {
             let mut options = ArrowReaderOptions::new().with_page_index(false);
             #[cfg(feature = "parquet_encryption")]
             if let Some(fd_val) = file_decryption_properties {
-                options = options.with_file_decryption_properties((*fd_val).clone());
+                options = options.with_file_decryption_properties(Arc::clone(&fd_val));
             }
             let mut metadata_timer = file_metrics.metadata_load_time.timer();
 
@@ -266,7 +270,7 @@ impl FileOpener for ParquetOpener {
                         let partition_values = partition_fields
                             .iter()
                             .cloned()
-                            .zip(partitioned_file.partition_values)
+                            .zip(partitioned_file.partition_values.clone())
                             .collect_vec();
                         let expr = expr_adapter_factory
                             .create(
@@ -357,6 +361,7 @@ impl FileOpener for ParquetOpener {
             if let Some(range) = file_range.as_ref() {
                 row_groups.prune_by_range(rg_metadata, range);
             }
+
             // If there is a predicate that can be evaluated against the metadata
             if let Some(predicate) = predicate.as_ref() {
                 if enable_row_group_stats_pruning {
@@ -367,6 +372,12 @@ impl FileOpener for ParquetOpener {
                         predicate,
                         &file_metrics,
                     );
+                } else {
+                    // Update metrics: statistics unavailable, so all row groups are
+                    // matched (not pruned)
+                    file_metrics
+                        .row_groups_pruned_statistics
+                        .add_matched(row_groups.remaining_row_group_count());
                 }
 
                 if enable_bloom_filter && !row_groups.is_empty() {
@@ -378,7 +389,22 @@ impl FileOpener for ParquetOpener {
                             &file_metrics,
                         )
                         .await;
+                } else {
+                    // Update metrics: bloom filter unavailable, so all row groups are
+                    // matched (not pruned)
+                    file_metrics
+                        .row_groups_pruned_bloom_filter
+                        .add_matched(row_groups.remaining_row_group_count());
                 }
+            } else {
+                // Update metrics: no predicate, so all row groups are matched (not pruned)
+                let n_remaining_row_groups = row_groups.remaining_row_group_count();
+                file_metrics
+                    .row_groups_pruned_statistics
+                    .add_matched(n_remaining_row_groups);
+                file_metrics
+                    .row_groups_pruned_bloom_filter
+                    .add_matched(n_remaining_row_groups);
             }
 
             let mut access_plan = row_groups.build();
@@ -480,7 +506,7 @@ struct EarlyStoppingStream<S> {
     /// None
     done: bool,
     file_pruner: FilePruner,
-    files_ranges_pruned_statistics: Count,
+    files_ranges_pruned_statistics: PruningMetrics,
     /// The inner stream
     inner: S,
 }
@@ -489,7 +515,7 @@ impl<S> EarlyStoppingStream<S> {
     pub fn new(
         stream: S,
         file_pruner: FilePruner,
-        files_ranges_pruned_statistics: Count,
+        files_ranges_pruned_statistics: PruningMetrics,
     ) -> Self {
         Self {
             done: false,
@@ -509,7 +535,9 @@ where
         // Since dynamic filters may have been updated, see if we can stop
         // reading this stream entirely.
         if self.file_pruner.should_prune()? {
-            self.files_ranges_pruned_statistics.add(1);
+            self.files_ranges_pruned_statistics.add_pruned(1);
+            // Previously this file range has been counted as matched
+            self.files_ranges_pruned_statistics.subtract_matched(1);
             self.done = true;
             Ok(None)
         } else {
@@ -581,8 +609,7 @@ impl EncryptionContext {
             None => match &self.encryption_factory {
                 Some((encryption_factory, encryption_config)) => Ok(encryption_factory
                     .get_file_decryption_properties(encryption_config, file_location)
-                    .await?
-                    .map(Arc::new)),
+                    .await?),
                 None => Ok(None),
             },
         }
diff --git a/datafusion/datasource-parquet/src/page_filter.rs b/datafusion/datasource-parquet/src/page_filter.rs
index 5f3e05747d40..2698b6c5fbb6 100644
--- a/datafusion/datasource-parquet/src/page_filter.rs
+++ b/datafusion/datasource-parquet/src/page_filter.rs
@@ -36,7 +36,7 @@ use datafusion_pruning::PruningPredicate;
 use log::{debug, trace};
 use parquet::arrow::arrow_reader::statistics::StatisticsConverter;
 use parquet::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex};
-use parquet::format::PageLocation;
+use parquet::file::page_index::offset_index::PageLocation;
 use parquet::schema::types::SchemaDescriptor;
 use parquet::{
     arrow::arrow_reader::{RowSelection, RowSelector},
@@ -90,7 +90,6 @@ use parquet::{
 ///  ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━━ ━━┛
 ///
 ///   Total rows: 300
-///
 /// ```
 ///
 /// Given the predicate `A > 35 AND B = 'F'`:
@@ -270,8 +269,10 @@ impl PagePruningAccessPlanFilter {
             }
         }
 
-        file_metrics.page_index_rows_pruned.add(total_skip);
-        file_metrics.page_index_rows_matched.add(total_select);
+        file_metrics.page_index_rows_pruned.add_pruned(total_skip);
+        file_metrics
+            .page_index_rows_pruned
+            .add_matched(total_select);
         access_plan
     }
 
diff --git a/datafusion/datasource-parquet/src/reader.rs b/datafusion/datasource-parquet/src/reader.rs
index 687a7f15fccc..88a3cea5623b 100644
--- a/datafusion/datasource-parquet/src/reader.rs
+++ b/datafusion/datasource-parquet/src/reader.rs
@@ -262,8 +262,9 @@ impl AsyncFileReader for CachedParquetFileReader {
 
         async move {
             #[cfg(feature = "parquet_encryption")]
-            let file_decryption_properties =
-                options.and_then(|o| o.file_decryption_properties());
+            let file_decryption_properties = options
+                .and_then(|o| o.file_decryption_properties())
+                .map(Arc::clone);
 
             #[cfg(not(feature = "parquet_encryption"))]
             let file_decryption_properties = None;
diff --git a/datafusion/datasource-parquet/src/row_group_filter.rs b/datafusion/datasource-parquet/src/row_group_filter.rs
index 51d50d780f10..2043f75070b5 100644
--- a/datafusion/datasource-parquet/src/row_group_filter.rs
+++ b/datafusion/datasource-parquet/src/row_group_filter.rs
@@ -60,6 +60,11 @@ impl RowGroupAccessPlanFilter {
         self.access_plan.is_empty()
     }
 
+    /// Return the number of row groups that are currently expected to be scanned
+    pub fn remaining_row_group_count(&self) -> usize {
+        self.access_plan.row_group_index_iter().count()
+    }
+
     /// Returns the inner access plan
     pub fn build(self) -> ParquetAccessPlan {
         self.access_plan
@@ -134,9 +139,9 @@ impl RowGroupAccessPlanFilter {
                 for (idx, &value) in row_group_indexes.iter().zip(values.iter()) {
                     if !value {
                         self.access_plan.skip(*idx);
-                        metrics.row_groups_pruned_statistics.add(1);
+                        metrics.row_groups_pruned_statistics.add_pruned(1);
                     } else {
-                        metrics.row_groups_matched_statistics.add(1);
+                        metrics.row_groups_pruned_statistics.add_matched(1);
                     }
                 }
             }
@@ -215,10 +220,10 @@ impl RowGroupAccessPlanFilter {
             };
 
             if prune_group {
-                metrics.row_groups_pruned_bloom_filter.add(1);
+                metrics.row_groups_pruned_bloom_filter.add_pruned(1);
                 self.access_plan.skip(idx)
-            } else if !stats.column_sbbf.is_empty() {
-                metrics.row_groups_matched_bloom_filter.add(1);
+            } else {
+                metrics.row_groups_pruned_bloom_filter.add_matched(1);
             }
         }
     }
@@ -494,6 +499,18 @@ mod tests {
         }
     }
 
+    #[test]
+    fn remaining_row_group_count_reports_non_skipped_groups() {
+        let mut filter = RowGroupAccessPlanFilter::new(ParquetAccessPlan::new_all(4));
+        assert_eq!(filter.remaining_row_group_count(), 4);
+
+        filter.access_plan.skip(1);
+        assert_eq!(filter.remaining_row_group_count(), 3);
+
+        filter.access_plan.skip(3);
+        assert_eq!(filter.remaining_row_group_count(), 2);
+    }
+
     #[test]
     fn row_group_pruning_predicate_simple_expr() {
         use datafusion_expr::{col, lit};
diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index 20d71692926f..27640f37cee4 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -35,11 +35,12 @@ use datafusion_datasource::schema_adapter::{
     DefaultSchemaAdapterFactory, SchemaAdapterFactory,
 };
 
-use arrow::datatypes::{SchemaRef, TimeUnit};
+use arrow::datatypes::TimeUnit;
 use datafusion_common::config::TableParquetOptions;
 use datafusion_common::{DataFusionError, Statistics};
 use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_scan_config::FileScanConfig;
+use datafusion_datasource::TableSchema;
 use datafusion_physical_expr::conjunction;
 use datafusion_physical_expr_adapter::DefaultPhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::physical_expr::fmt_sql;
@@ -52,12 +53,12 @@ use datafusion_physical_plan::metrics::Count;
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use datafusion_physical_plan::DisplayFormatType;
 
-#[cfg(feature = "parquet_encryption")]
-use datafusion_common::encryption::map_config_decryption_to_decryption;
 #[cfg(feature = "parquet_encryption")]
 use datafusion_execution::parquet_encryption::EncryptionFactory;
 use itertools::Itertools;
 use object_store::ObjectStore;
+#[cfg(feature = "parquet_encryption")]
+use parquet::encryption::decrypt::FileDecryptionProperties;
 
 /// Execution plan for reading one or more Parquet files.
 ///
@@ -85,7 +86,6 @@ use object_store::ObjectStore;
 ///  │.───────────────────.│
 ///  │                     )
 ///   `───────────────────'
-///
 /// ```
 ///
 /// # Example: Create a `DataSourceExec`
@@ -104,11 +104,11 @@ use object_store::ObjectStore;
 /// # let object_store_url = ObjectStoreUrl::local_filesystem();
 /// # let predicate = lit(true);
 /// let source = Arc::new(
-///     ParquetSource::default()
-///     .with_predicate(predicate)
+///     ParquetSource::new(Arc::clone(&file_schema))
+///         .with_predicate(predicate)
 /// );
 /// // Create a DataSourceExec for reading `file1.parquet` with a file size of 100MB
-/// let config = FileScanConfigBuilder::new(object_store_url, file_schema, source)
+/// let config = FileScanConfigBuilder::new(object_store_url, source)
 ///    .with_file(PartitionedFile::new("file1.parquet", 100*1024*1024)).build();
 /// let exec = DataSourceExec::from_data_source(config);
 /// ```
@@ -231,7 +231,7 @@ use object_store::ObjectStore;
 /// let partitioned_file = PartitionedFile::new("my_file.parquet", 1234)
 ///   .with_extensions(Arc::new(access_plan));
 /// // create a FileScanConfig to scan this file
-/// let config = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), schema(), Arc::new(ParquetSource::default()))
+/// let config = FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), Arc::new(ParquetSource::new(schema())))
 ///     .with_file(partitioned_file).build();
 /// // this parquet DataSourceExec will not even try to read row groups 2 and 4. Additional
 /// // pruning based on predicates may also happen
@@ -266,7 +266,7 @@ use object_store::ObjectStore;
 /// [`RecordBatch`]: arrow::record_batch::RecordBatch
 /// [`SchemaAdapter`]: datafusion_datasource::schema_adapter::SchemaAdapter
 /// [`ParquetMetadata`]: parquet::file::metadata::ParquetMetaData
-#[derive(Clone, Default, Debug)]
+#[derive(Clone, Debug)]
 pub struct ParquetSource {
     /// Options for reading Parquet files
     pub(crate) table_parquet_options: TableParquetOptions,
@@ -275,7 +275,7 @@ pub struct ParquetSource {
     /// The schema of the file.
     /// In particular, this is the schema of the table without partition columns,
     /// *not* the physical schema of the file.
-    pub(crate) file_schema: Option<SchemaRef>,
+    pub(crate) table_schema: TableSchema,
     /// Optional predicate for row filtering during parquet scan
     pub(crate) predicate: Option<Arc<dyn PhysicalExpr>>,
     /// Optional user defined parquet file reader factory
@@ -293,15 +293,35 @@ pub struct ParquetSource {
 
 impl ParquetSource {
     /// Create a new ParquetSource to read the data specified in the file scan
-    /// configuration with the provided `TableParquetOptions`.
-    /// if default values are going to be used, use `ParguetConfig::default()` instead
-    pub fn new(table_parquet_options: TableParquetOptions) -> Self {
+    /// configuration with the provided schema.
+    ///
+    /// Uses default `TableParquetOptions`.
+    /// To set custom options, use [ParquetSource::with_table_parquet_options`].
+    pub fn new(table_schema: impl Into<TableSchema>) -> Self {
         Self {
-            table_parquet_options,
-            ..Self::default()
+            table_schema: table_schema.into(),
+            table_parquet_options: TableParquetOptions::default(),
+            metrics: ExecutionPlanMetricsSet::new(),
+            predicate: None,
+            parquet_file_reader_factory: None,
+            schema_adapter_factory: None,
+            batch_size: None,
+            metadata_size_hint: None,
+            projected_statistics: None,
+            #[cfg(feature = "parquet_encryption")]
+            encryption_factory: None,
         }
     }
 
+    /// Set the `TableParquetOptions` for this ParquetSource.
+    pub fn with_table_parquet_options(
+        mut self,
+        table_parquet_options: TableParquetOptions,
+    ) -> Self {
+        self.table_parquet_options = table_parquet_options;
+        self
+    }
+
     /// Set the metadata size hint
     ///
     /// This value determines how many bytes at the end of the file the default
@@ -349,7 +369,6 @@ impl ParquetSource {
     }
 
     /// Optional user defined parquet file reader factory.
-    ///
     pub fn with_parquet_file_reader_factory(
         mut self,
         parquet_file_reader_factory: Arc<dyn ParquetFileReaderFactory>,
@@ -505,7 +524,7 @@ impl FileSource for ParquetSource {
         ) {
             (Some(expr_adapter_factory), Some(schema_adapter_factory)) => {
                 // Use both the schema adapter factory and the expr adapter factory.
-                // This results in the the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema)
+                // This results in the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema)
                 // but the PhysicalExprAdapterFactory being used for predicate pushdown and stats pruning.
                 (
                     Some(Arc::clone(expr_adapter_factory)),
@@ -547,8 +566,8 @@ impl FileSource for ParquetSource {
             .table_parquet_options()
             .crypto
             .file_decryption
-            .as_ref()
-            .map(map_config_decryption_to_decryption)
+            .clone()
+            .map(FileDecryptionProperties::from)
             .map(Arc::new);
 
         let coerce_int96 = self
@@ -591,6 +610,10 @@ impl FileSource for ParquetSource {
         self
     }
 
+    fn table_schema(&self) -> &TableSchema {
+        &self.table_schema
+    }
+
     fn filter(&self) -> Option<Arc<dyn PhysicalExpr>> {
         self.predicate.clone()
     }
@@ -601,13 +624,6 @@ impl FileSource for ParquetSource {
         Arc::new(conf)
     }
 
-    fn with_schema(&self, schema: SchemaRef) -> Arc<dyn FileSource> {
-        Arc::new(Self {
-            file_schema: Some(schema),
-            ..self.clone()
-        })
-    }
-
     fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> {
         let mut conf = self.clone();
         conf.projected_statistics = Some(statistics);
@@ -661,13 +677,11 @@ impl FileSource for ParquetSource {
                 // the actual predicates are built in reference to the physical schema of
                 // each file, which we do not have at this point and hence cannot use.
                 // Instead we use the logical schema of the file (the table schema without partition columns).
-                if let (Some(file_schema), Some(predicate)) =
-                    (&self.file_schema, &self.predicate)
-                {
+                if let Some(predicate) = &self.predicate {
                     let predicate_creation_errors = Count::new();
                     if let (Some(pruning_predicate), _) = build_pruning_predicates(
                         Some(predicate),
-                        file_schema,
+                        self.table_schema.table_schema(),
                         &predicate_creation_errors,
                     ) {
                         let mut guarantees = pruning_predicate
@@ -700,11 +714,7 @@ impl FileSource for ParquetSource {
         filters: Vec<Arc<dyn PhysicalExpr>>,
         config: &ConfigOptions,
     ) -> datafusion_common::Result<FilterPushdownPropagation<Arc<dyn FileSource>>> {
-        let Some(file_schema) = self.file_schema.clone() else {
-            return Ok(FilterPushdownPropagation::with_parent_pushdown_result(
-                vec![PushedDown::No; filters.len()],
-            ));
-        };
+        let table_schema = self.table_schema.table_schema();
         // Determine if based on configs we should push filters down.
         // If either the table / scan itself or the config has pushdown enabled,
         // we will push down the filters.
@@ -720,7 +730,7 @@ impl FileSource for ParquetSource {
         let filters: Vec<PushedDownPredicate> = filters
             .into_iter()
             .map(|filter| {
-                if can_expr_be_pushed_down_with_schemas(&filter, &file_schema) {
+                if can_expr_be_pushed_down_with_schemas(&filter, table_schema) {
                     PushedDownPredicate::supported(filter)
                 } else {
                     PushedDownPredicate::unsupported(filter)
@@ -785,6 +795,7 @@ impl FileSource for ParquetSource {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow::datatypes::Schema;
     use datafusion_physical_expr::expressions::lit;
 
     #[test]
@@ -792,7 +803,8 @@ mod tests {
     fn test_parquet_source_predicate_same_as_filter() {
         let predicate = lit(true);
 
-        let parquet_source = ParquetSource::default().with_predicate(predicate);
+        let parquet_source =
+            ParquetSource::new(Arc::new(Schema::empty())).with_predicate(predicate);
         // same value. but filter() call Arc::clone internally
         assert_eq!(parquet_source.predicate(), parquet_source.filter().as_ref());
     }
diff --git a/datafusion/datasource/Cargo.toml b/datafusion/datasource/Cargo.toml
index 8e0738448a75..19b247829dbd 100644
--- a/datafusion/datasource/Cargo.toml
+++ b/datafusion/datasource/Cargo.toml
@@ -74,6 +74,9 @@ zstd = { version = "0.13", optional = true, default-features = false }
 criterion = { workspace = true }
 tempfile = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/datasource/src/file.rs b/datafusion/datasource/src/file.rs
index 7a2cf403fd8d..9245f60e2306 100644
--- a/datafusion/datasource/src/file.rs
+++ b/datafusion/datasource/src/file.rs
@@ -26,7 +26,6 @@ use crate::file_groups::FileGroupPartitioner;
 use crate::file_scan_config::FileScanConfig;
 use crate::file_stream::FileOpener;
 use crate::schema_adapter::SchemaAdapterFactory;
-use arrow::datatypes::SchemaRef;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{not_impl_err, Result, Statistics};
 use datafusion_physical_expr::{LexOrdering, PhysicalExpr};
@@ -61,10 +60,12 @@ pub trait FileSource: Send + Sync {
     ) -> Arc<dyn FileOpener>;
     /// Any
     fn as_any(&self) -> &dyn Any;
+    /// Returns the table schema for this file source.
+    ///
+    /// This always returns the unprojected schema (the full schema of the data).
+    fn table_schema(&self) -> &crate::table_schema::TableSchema;
     /// Initialize new type with batch size configuration
     fn with_batch_size(&self, batch_size: usize) -> Arc<dyn FileSource>;
-    /// Initialize new instance with a new schema
-    fn with_schema(&self, schema: SchemaRef) -> Arc<dyn FileSource>;
     /// Initialize new instance with projection information
     fn with_projection(&self, config: &FileScanConfig) -> Arc<dyn FileSource>;
     /// Initialize new instance with projected statistics
diff --git a/datafusion/datasource/src/file_format.rs b/datafusion/datasource/src/file_format.rs
index 23f68636c156..bb4ffded8086 100644
--- a/datafusion/datasource/src/file_format.rs
+++ b/datafusion/datasource/src/file_format.rs
@@ -111,7 +111,10 @@ pub trait FileFormat: Send + Sync + fmt::Debug {
     }
 
     /// Return the related FileSource such as `CsvSource`, `JsonSource`, etc.
-    fn file_source(&self) -> Arc<dyn FileSource>;
+    ///
+    /// # Arguments
+    /// * `table_schema` - The table schema to use for the FileSource (includes partition columns)
+    fn file_source(&self, table_schema: crate::TableSchema) -> Arc<dyn FileSource>;
 }
 
 /// Factory for creating [`FileFormat`] instances based on session and command level options
diff --git a/datafusion/datasource/src/file_scan_config.rs b/datafusion/datasource/src/file_scan_config.rs
index 4dfb6a4ec3d3..82052ee4c39c 100644
--- a/datafusion/datasource/src/file_scan_config.rs
+++ b/datafusion/datasource/src/file_scan_config.rs
@@ -24,7 +24,7 @@ use crate::schema_adapter::SchemaAdapterFactory;
 use crate::{
     display::FileGroupsDisplay, file::FileSource,
     file_compression_type::FileCompressionType, file_stream::FileStream,
-    source::DataSource, statistics::MinMaxStatistics, PartitionedFile, TableSchema,
+    source::DataSource, statistics::MinMaxStatistics, PartitionedFile,
 };
 use arrow::datatypes::FieldRef;
 use arrow::{
@@ -33,7 +33,7 @@ use arrow::{
         RecordBatchOptions,
     },
     buffer::Buffer,
-    datatypes::{ArrowNativeType, DataType, Field, Schema, SchemaRef, UInt16Type},
+    datatypes::{ArrowNativeType, DataType, Schema, SchemaRef, UInt16Type},
 };
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::{
@@ -44,18 +44,20 @@ use datafusion_execution::{
     object_store::ObjectStoreUrl, SendableRecordBatchStream, TaskContext,
 };
 use datafusion_expr::Operator;
-use datafusion_physical_expr::expressions::BinaryExpr;
-use datafusion_physical_expr::{expressions::Column, utils::reassign_expr_columns};
+use datafusion_physical_expr::expressions::{BinaryExpr, Column};
+use datafusion_physical_expr::projection::ProjectionExprs;
+use datafusion_physical_expr::utils::reassign_expr_columns;
 use datafusion_physical_expr::{split_conjunction, EquivalenceProperties, Partitioning};
 use datafusion_physical_expr_adapter::PhysicalExprAdapterFactory;
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
-use datafusion_physical_plan::projection::ProjectionExpr;
+use datafusion_physical_plan::projection::{
+    all_alias_free_columns, new_projections_for_columns, ProjectionExpr,
+};
 use datafusion_physical_plan::{
     display::{display_orderings, ProjectSchemaDisplay},
     filter_pushdown::FilterPushdownPropagation,
     metrics::ExecutionPlanMetricsSet,
-    projection::{all_alias_free_columns, new_projections_for_columns},
     DisplayAs, DisplayFormatType,
 };
 use std::{
@@ -87,6 +89,7 @@ use log::{debug, warn};
 /// # use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 /// # use datafusion_datasource::file_stream::FileOpener;
 /// # use datafusion_datasource::source::DataSourceExec;
+/// # use datafusion_datasource::table_schema::TableSchema;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
 /// # use datafusion_physical_plan::ExecutionPlan;
 /// # use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
@@ -100,31 +103,32 @@ use log::{debug, warn};
 /// # // Note: crate mock ParquetSource, as ParquetSource is not in the datasource crate
 /// #[derive(Clone)]
 /// # struct ParquetSource {
+/// #    table_schema: TableSchema,
 /// #    projected_statistics: Option<Statistics>,
 /// #    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>
 /// # };
 /// # impl FileSource for ParquetSource {
 /// #  fn create_file_opener(&self, _: Arc<dyn ObjectStore>, _: &FileScanConfig, _: usize) -> Arc<dyn FileOpener> { unimplemented!() }
 /// #  fn as_any(&self) -> &dyn Any { self  }
+/// #  fn table_schema(&self) -> &TableSchema { &self.table_schema }
 /// #  fn with_batch_size(&self, _: usize) -> Arc<dyn FileSource> { unimplemented!() }
-/// #  fn with_schema(&self, _: SchemaRef) -> Arc<dyn FileSource> { Arc::new(self.clone()) as Arc<dyn FileSource> }
 /// #  fn with_projection(&self, _: &FileScanConfig) -> Arc<dyn FileSource> { unimplemented!() }
-/// #  fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> { Arc::new(Self {projected_statistics: Some(statistics), schema_adapter_factory: self.schema_adapter_factory.clone()} ) }
+/// #  fn with_statistics(&self, statistics: Statistics) -> Arc<dyn FileSource> { Arc::new(Self {table_schema: self.table_schema.clone(), projected_statistics: Some(statistics), schema_adapter_factory: self.schema_adapter_factory.clone()} ) }
 /// #  fn metrics(&self) -> &ExecutionPlanMetricsSet { unimplemented!() }
 /// #  fn statistics(&self) -> Result<Statistics> { Ok(self.projected_statistics.clone().expect("projected_statistics should be set")) }
 /// #  fn file_type(&self) -> &str { "parquet" }
-/// #  fn with_schema_adapter_factory(&self, factory: Arc<dyn SchemaAdapterFactory>) -> Result<Arc<dyn FileSource>> { Ok(Arc::new(Self {projected_statistics: self.projected_statistics.clone(), schema_adapter_factory: Some(factory)} )) }
+/// #  fn with_schema_adapter_factory(&self, factory: Arc<dyn SchemaAdapterFactory>) -> Result<Arc<dyn FileSource>> { Ok(Arc::new(Self {table_schema: self.table_schema.clone(), projected_statistics: self.projected_statistics.clone(), schema_adapter_factory: Some(factory)} )) }
 /// #  fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> { self.schema_adapter_factory.clone() }
 /// #  }
 /// # impl ParquetSource {
-/// #  fn new() -> Self { Self {projected_statistics: None, schema_adapter_factory: None} }
+/// #  fn new(table_schema: impl Into<TableSchema>) -> Self { Self {table_schema: table_schema.into(), projected_statistics: None, schema_adapter_factory: None} }
 /// # }
 /// // create FileScan config for reading parquet files from file://
 /// let object_store_url = ObjectStoreUrl::local_filesystem();
-/// let file_source = Arc::new(ParquetSource::new());
-/// let config = FileScanConfigBuilder::new(object_store_url, file_schema, file_source)
+/// let file_source = Arc::new(ParquetSource::new(file_schema.clone()));
+/// let config = FileScanConfigBuilder::new(object_store_url, file_source)
 ///   .with_limit(Some(1000))            // read only the first 1000 records
-///   .with_projection(Some(vec![2, 3])) // project columns 2 and 3
+///   .with_projection_indices(Some(vec![2, 3])) // project columns 2 and 3
 ///    // Read /tmp/file1.parquet with known size of 1234 bytes in a single group
 ///   .with_file(PartitionedFile::new("file1.parquet", 1234))
 ///   // Read /tmp/file2.parquet 56 bytes and /tmp/file3.parquet 78 bytes
@@ -153,16 +157,6 @@ pub struct FileScanConfig {
     /// [`RuntimeEnv::register_object_store`]: datafusion_execution::runtime_env::RuntimeEnv::register_object_store
     /// [`RuntimeEnv::object_store`]: datafusion_execution::runtime_env::RuntimeEnv::object_store
     pub object_store_url: ObjectStoreUrl,
-    /// Schema information including the file schema, table partition columns,
-    /// and the combined table schema.
-    ///
-    /// The table schema (file schema + partition columns) is the schema exposed
-    /// upstream of [`FileScanConfig`] (e.g. in [`DataSourceExec`]).
-    ///
-    /// See [`TableSchema`] for more information.
-    ///
-    /// [`DataSourceExec`]: crate::source::DataSourceExec
-    pub table_schema: TableSchema,
     /// List of files to be processed, grouped into partitions
     ///
     /// Each file must have a schema of `file_schema` or a subset. If
@@ -175,9 +169,12 @@ pub struct FileScanConfig {
     pub file_groups: Vec<FileGroup>,
     /// Table constraints
     pub constraints: Constraints,
-    /// Columns on which to project the data. Indexes that are higher than the
-    /// number of columns of `file_schema` refer to `table_partition_cols`.
-    pub projection: Option<Vec<usize>>,
+    /// Physical expressions defining the projection to apply when reading data.
+    ///
+    /// Each expression in the projection can reference columns from both the file
+    /// schema and table partition columns. If `None`, all columns from the table
+    /// schema are projected.
+    pub projection_exprs: Option<ProjectionExprs>,
     /// The maximum number of records to read from this plan. If `None`,
     /// all records after filtering are returned.
     pub limit: Option<usize>,
@@ -208,6 +205,7 @@ pub struct FileScanConfig {
 /// # use datafusion_datasource::file_compression_type::FileCompressionType;
 /// # use datafusion_datasource::file_groups::FileGroup;
 /// # use datafusion_datasource::PartitionedFile;
+/// # use datafusion_datasource::table_schema::TableSchema;
 /// # use datafusion_execution::object_store::ObjectStoreUrl;
 /// # use datafusion_common::Statistics;
 /// # use datafusion_datasource::file::FileSource;
@@ -215,25 +213,28 @@ pub struct FileScanConfig {
 /// # fn main() {
 /// # fn with_source(file_source: Arc<dyn FileSource>) {
 ///     // Create a schema for our Parquet files
-///     let schema = Arc::new(Schema::new(vec![
+///     let file_schema = Arc::new(Schema::new(vec![
 ///         Field::new("id", DataType::Int32, false),
 ///         Field::new("value", DataType::Utf8, false),
 ///     ]));
 ///
+///     // Create partition columns
+///     let partition_cols = vec![
+///         Arc::new(Field::new("date", DataType::Utf8, false)),
+///     ];
+///
+///     // Create table schema with file schema and partition columns
+///     let table_schema = TableSchema::new(file_schema, partition_cols);
+///
 ///     // Create a builder for scanning Parquet files from a local filesystem
 ///     let config = FileScanConfigBuilder::new(
 ///         ObjectStoreUrl::local_filesystem(),
-///         schema,
 ///         file_source,
 ///     )
 ///     // Set a limit of 1000 rows
 ///     .with_limit(Some(1000))
 ///     // Project only the first column
-///     .with_projection(Some(vec![0]))
-///     // Add partition columns
-///     .with_table_partition_cols(vec![
-///         Field::new("date", DataType::Utf8, false),
-///     ])
+///     .with_projection_indices(Some(vec![0]))
 ///     // Add a file group with two files
 ///     .with_file_group(FileGroup::new(vec![
 ///         PartitionedFile::new("data/date=2024-01-01/file1.parquet", 1024),
@@ -249,19 +250,9 @@ pub struct FileScanConfig {
 #[derive(Clone)]
 pub struct FileScanConfigBuilder {
     object_store_url: ObjectStoreUrl,
-    /// Schema information including the file schema, table partition columns,
-    /// and the combined table schema.
-    ///
-    /// This schema is used to read the files, but the file schema is **not** necessarily
-    /// the schema of the physical files. Rather this is the schema that the
-    /// physical file schema will be mapped onto, and the schema that the
-    /// [`DataSourceExec`] will return.
-    ///
-    /// [`DataSourceExec`]: crate::source::DataSourceExec
-    table_schema: TableSchema,
     file_source: Arc<dyn FileSource>,
     limit: Option<usize>,
-    projection: Option<Vec<usize>>,
+    projection_indices: Option<Vec<usize>>,
     constraints: Option<Constraints>,
     file_groups: Vec<FileGroup>,
     statistics: Option<Statistics>,
@@ -277,16 +268,14 @@ impl FileScanConfigBuilder {
     ///
     /// # Parameters:
     /// * `object_store_url`: See [`FileScanConfig::object_store_url`]
-    /// * `file_schema`: See [`FileScanConfig::file_schema`]
-    /// * `file_source`: See [`FileScanConfig::file_source`]
+    /// * `file_source`: See [`FileScanConfig::file_source`]. The file source must have
+    ///   a schema set via its constructor.
     pub fn new(
         object_store_url: ObjectStoreUrl,
-        file_schema: SchemaRef,
         file_source: Arc<dyn FileSource>,
     ) -> Self {
         Self {
             object_store_url,
-            table_schema: TableSchema::from_file_schema(file_schema),
             file_source,
             file_groups: vec![],
             statistics: None,
@@ -294,7 +283,7 @@ impl FileScanConfigBuilder {
             file_compression_type: None,
             new_lines_in_values: None,
             limit: None,
-            projection: None,
+            projection_indices: None,
             constraints: None,
             batch_size: None,
             expr_adapter_factory: None,
@@ -317,22 +306,25 @@ impl FileScanConfigBuilder {
         self
     }
 
+    pub fn table_schema(&self) -> &SchemaRef {
+        self.file_source.table_schema().table_schema()
+    }
+
     /// Set the columns on which to project the data. Indexes that are higher than the
     /// number of columns of `file_schema` refer to `table_partition_cols`.
-    pub fn with_projection(mut self, projection: Option<Vec<usize>>) -> Self {
-        self.projection = projection;
-        self
+    ///
+    /// # Deprecated
+    /// Use [`Self::with_projection_indices`] instead. This method will be removed in a future release.
+    #[deprecated(since = "51.0.0", note = "Use with_projection_indices instead")]
+    pub fn with_projection(self, indices: Option<Vec<usize>>) -> Self {
+        self.with_projection_indices(indices)
     }
 
-    /// Set the partitioning columns
-    pub fn with_table_partition_cols(mut self, table_partition_cols: Vec<Field>) -> Self {
-        let table_partition_cols: Vec<FieldRef> = table_partition_cols
-            .into_iter()
-            .map(|f| Arc::new(f) as FieldRef)
-            .collect();
-        self.table_schema = self
-            .table_schema
-            .with_table_partition_cols(table_partition_cols);
+    /// Set the columns on which to project the data using column indices.
+    ///
+    /// Indexes that are higher than the number of columns of `file_schema` refer to `table_partition_cols`.
+    pub fn with_projection_indices(mut self, indices: Option<Vec<usize>>) -> Self {
+        self.projection_indices = indices;
         self
     }
 
@@ -430,10 +422,9 @@ impl FileScanConfigBuilder {
     pub fn build(self) -> FileScanConfig {
         let Self {
             object_store_url,
-            table_schema,
             file_source,
             limit,
-            projection,
+            projection_indices,
             constraints,
             file_groups,
             statistics,
@@ -445,22 +436,29 @@ impl FileScanConfigBuilder {
         } = self;
 
         let constraints = constraints.unwrap_or_default();
-        let statistics = statistics
-            .unwrap_or_else(|| Statistics::new_unknown(table_schema.file_schema()));
+        let statistics = statistics.unwrap_or_else(|| {
+            Statistics::new_unknown(file_source.table_schema().file_schema())
+        });
 
-        let file_source = file_source
-            .with_statistics(statistics.clone())
-            .with_schema(Arc::clone(table_schema.file_schema()));
+        let file_source = file_source.with_statistics(statistics.clone());
         let file_compression_type =
             file_compression_type.unwrap_or(FileCompressionType::UNCOMPRESSED);
         let new_lines_in_values = new_lines_in_values.unwrap_or(false);
 
+        // Convert projection indices to ProjectionExprs using the final table schema
+        // (which now includes partition columns if they were added)
+        let projection_exprs = projection_indices.map(|indices| {
+            ProjectionExprs::from_indices(
+                &indices,
+                file_source.table_schema().table_schema(),
+            )
+        });
+
         FileScanConfig {
             object_store_url,
-            table_schema,
             file_source,
             limit,
-            projection,
+            projection_exprs,
             constraints,
             file_groups,
             output_ordering,
@@ -476,7 +474,6 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
     fn from(config: FileScanConfig) -> Self {
         Self {
             object_store_url: config.object_store_url,
-            table_schema: config.table_schema,
             file_source: Arc::<dyn FileSource>::clone(&config.file_source),
             file_groups: config.file_groups,
             statistics: config.file_source.statistics().ok(),
@@ -484,7 +481,9 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
             file_compression_type: Some(config.file_compression_type),
             new_lines_in_values: Some(config.new_lines_in_values),
             limit: config.limit,
-            projection: config.projection,
+            projection_indices: config
+                .projection_exprs
+                .map(|p| p.ordered_column_indices()),
             constraints: Some(config.constraints),
             batch_size: config.batch_size,
             expr_adapter_factory: config.expr_adapter_factory,
@@ -598,8 +597,39 @@ impl DataSource for FileScanConfig {
         SchedulingType::Cooperative
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(self.projected_stats())
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        if let Some(partition) = partition {
+            // Get statistics for a specific partition
+            if let Some(file_group) = self.file_groups.get(partition) {
+                if let Some(stat) = file_group.file_statistics(None) {
+                    // Project the statistics based on the projection
+                    let table_cols_stats = self
+                        .projection_indices()
+                        .into_iter()
+                        .map(|idx| {
+                            if idx < self.file_schema().fields().len() {
+                                stat.column_statistics[idx].clone()
+                            } else {
+                                // TODO provide accurate stat for partition column
+                                // See https://github.com/apache/datafusion/issues/1186
+                                ColumnStatistics::new_unknown()
+                            }
+                        })
+                        .collect();
+
+                    return Ok(Statistics {
+                        num_rows: stat.num_rows,
+                        total_byte_size: stat.total_byte_size,
+                        column_statistics: table_cols_stats,
+                    });
+                }
+            }
+            // If no statistics available for this partition, return unknown
+            Ok(Statistics::new_unknown(&self.projected_schema()))
+        } else {
+            // Return aggregate statistics across all partitions
+            Ok(self.projected_stats())
+        }
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn DataSource>> {
@@ -642,15 +672,16 @@ impl DataSource for FileScanConfig {
             let new_projections = new_projections_for_columns(
                 projection,
                 &file_scan
-                    .projection
-                    .clone()
+                    .projection_exprs
+                    .as_ref()
+                    .map(|p| p.ordered_column_indices())
                     .unwrap_or_else(|| (0..self.file_schema().fields().len()).collect()),
             );
 
             Arc::new(
                 FileScanConfigBuilder::from(file_scan)
                     // Assign projected statistics to source
-                    .with_projection(Some(new_projections))
+                    .with_projection_indices(Some(new_projections))
                     .with_source(source)
                     .build(),
             ) as _
@@ -687,17 +718,17 @@ impl DataSource for FileScanConfig {
 impl FileScanConfig {
     /// Get the file schema (schema of the files without partition columns)
     pub fn file_schema(&self) -> &SchemaRef {
-        self.table_schema.file_schema()
+        self.file_source.table_schema().file_schema()
     }
 
     /// Get the table partition columns
     pub fn table_partition_cols(&self) -> &Vec<FieldRef> {
-        self.table_schema.table_partition_cols()
+        self.file_source.table_schema().table_partition_cols()
     }
 
     fn projection_indices(&self) -> Vec<usize> {
-        match &self.projection {
-            Some(proj) => proj.clone(),
+        match &self.projection_exprs {
+            Some(proj) => proj.ordered_column_indices(),
             None => (0..self.file_schema().fields().len()
                 + self.table_partition_cols().len())
                 .collect(),
@@ -794,7 +825,7 @@ impl FileScanConfig {
 
     /// Project the schema, constraints, and the statistics on the given column indices
     pub fn project(&self) -> (SchemaRef, Constraints, Statistics, Vec<LexOrdering>) {
-        if self.projection.is_none() && self.table_partition_cols().is_empty() {
+        if self.projection_exprs.is_none() && self.table_partition_cols().is_empty() {
             return (
                 Arc::clone(self.file_schema()),
                 self.constraints.clone(),
@@ -813,12 +844,17 @@ impl FileScanConfig {
     }
 
     pub fn projected_file_column_names(&self) -> Option<Vec<String>> {
-        self.projection.as_ref().map(|p| {
-            p.iter()
-                .filter(|col_idx| **col_idx < self.file_schema().fields().len())
-                .map(|col_idx| self.file_schema().field(*col_idx).name())
+        let fields = self.file_schema().fields();
+
+        self.projection_exprs.as_ref().map(|p| {
+            let column_indices = p.ordered_column_indices();
+
+            column_indices
+                .iter()
+                .filter(|&&col_i| col_i < fields.len())
+                .map(|&col_i| self.file_schema().field(col_i).name())
                 .cloned()
-                .collect()
+                .collect::<Vec<_>>()
         })
     }
 
@@ -844,11 +880,11 @@ impl FileScanConfig {
     }
 
     pub fn file_column_projection_indices(&self) -> Option<Vec<usize>> {
-        self.projection.as_ref().map(|p| {
-            p.iter()
-                .filter(|col_idx| **col_idx < self.file_schema().fields().len())
-                .copied()
-                .collect()
+        self.projection_exprs.as_ref().map(|p| {
+            p.ordered_column_indices()
+                .into_iter()
+                .filter(|&i| i < self.file_schema().fields().len())
+                .collect::<Vec<_>>()
         })
     }
 
@@ -1323,25 +1359,25 @@ fn create_output_array(
 /// correctly sorted on `(A, B, C)`
 ///
 /// ```text
-///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
-///  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
-///┃   ┌───────────────┐     ┌──────────────┐ │   ┌──────────────┐ │   ┌─────────────┐   ┃
-///  │ │   1.parquet   │ │ │ │  2.parquet   │   │ │  3.parquet   │   │ │  4.parquet  │ │
-///┃   │ Sort: A, B, C │     │Sort: A, B, C │ │   │Sort: A, B, C │ │   │Sort: A, B, C│   ┃
-///  │ └───────────────┘ │ │ └──────────────┘   │ └──────────────┘   │ └─────────────┘ │
-///┃                                          │                    │                     ┃
-///  │                   │ │                    │                    │                 │
-///┃                                          │                    │                     ┃
-///  │                   │ │                    │                    │                 │
-///┃                                          │                    │                     ┃
-///  │                   │ │                    │                    │                 │
-///┃  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘  ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘  ─ ─ ─ ─ ─ ─ ─ ─ ─  ┃
-///     DataFusion           DataFusion           DataFusion           DataFusion
-///┃    Partition 1          Partition 2          Partition 3          Partition 4       ┃
-/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
+/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┓
+///   ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ┐
+/// ┃   ┌───────────────┐     ┌──────────────┐ │   ┌──────────────┐ │   ┌─────────────┐   ┃
+///   │ │   1.parquet   │ │ │ │  2.parquet   │   │ │  3.parquet   │   │ │  4.parquet  │ │
+/// ┃   │ Sort: A, B, C │     │Sort: A, B, C │ │   │Sort: A, B, C │ │   │Sort: A, B, C│   ┃
+///   │ └───────────────┘ │ │ └──────────────┘   │ └──────────────┘   │ └─────────────┘ │
+/// ┃                                          │                    │                     ┃
+///   │                   │ │                    │                    │                 │
+/// ┃                                          │                    │                     ┃
+///   │                   │ │                    │                    │                 │
+/// ┃                                          │                    │                     ┃
+///   │                   │ │                    │                    │                 │
+/// ┃  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘  ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘  ─ ─ ─ ─ ─ ─ ─ ─ ─  ┃
+///      DataFusion           DataFusion           DataFusion           DataFusion
+/// ┃    Partition 1          Partition 2          Partition 3          Partition 4       ┃
+///  ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
 ///
 ///                                      DataSourceExec
-///```
+/// ```
 ///
 /// However, when more than 1 file is assigned to each partition, each
 /// partition is NOT correctly sorted on `(A, B, C)`. Once the second
@@ -1349,25 +1385,25 @@ fn create_output_array(
 /// the same sorted stream
 ///
 ///```text
-///┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
-///  ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┃
-///┃   ┌───────────────┐     ┌──────────────┐ │
-///  │ │   1.parquet   │ │ │ │  2.parquet   │   ┃
-///┃   │ Sort: A, B, C │     │Sort: A, B, C │ │
-///  │ └───────────────┘ │ │ └──────────────┘   ┃
-///┃   ┌───────────────┐     ┌──────────────┐ │
-///  │ │   3.parquet   │ │ │ │  4.parquet   │   ┃
-///┃   │ Sort: A, B, C │     │Sort: A, B, C │ │
-///  │ └───────────────┘ │ │ └──────────────┘   ┃
-///┃                                          │
-///  │                   │ │                    ┃
-///┃  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
-///     DataFusion           DataFusion         ┃
-///┃    Partition 1          Partition 2
-/// ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
+/// ┏ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━
+///   ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ┐ ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─  ┃
+/// ┃   ┌───────────────┐     ┌──────────────┐ │
+///   │ │   1.parquet   │ │ │ │  2.parquet   │   ┃
+/// ┃   │ Sort: A, B, C │     │Sort: A, B, C │ │
+///   │ └───────────────┘ │ │ └──────────────┘   ┃
+/// ┃   ┌───────────────┐     ┌──────────────┐ │
+///   │ │   3.parquet   │ │ │ │  4.parquet   │   ┃
+/// ┃   │ Sort: A, B, C │     │Sort: A, B, C │ │
+///   │ └───────────────┘ │ │ └──────────────┘   ┃
+/// ┃                                          │
+///   │                   │ │                    ┃
+/// ┃  ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   ─ ─ ─ ─ ─ ─ ─ ─ ─ ┘
+///      DataFusion           DataFusion         ┃
+/// ┃    Partition 1          Partition 2
+///  ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ━ ┛
 ///
 ///              DataSourceExec
-///```
+/// ```
 fn get_projected_output_ordering(
     base_config: &FileScanConfig,
     projected_schema: &SchemaRef,
@@ -1384,10 +1420,15 @@ fn get_projected_output_ordering(
                 return false;
             }
 
+            let indices = base_config
+                .projection_exprs
+                .as_ref()
+                .map(|p| p.ordered_column_indices());
+
             let statistics = match MinMaxStatistics::new_from_files(
                 &new_ordering,
                 projected_schema,
-                base_config.projection.as_deref(),
+                indices.as_deref(),
                 group.iter(),
             ) {
                 Ok(statistics) => statistics,
@@ -1438,17 +1479,19 @@ pub fn wrap_partition_value_in_dict(val: ScalarValue) -> ScalarValue {
 mod tests {
     use super::*;
     use crate::test_util::col;
+    use crate::TableSchema;
     use crate::{
         generate_test_files, test_util::MockSource, tests::aggr_test_schema,
         verify_sort_integrity,
     };
 
     use arrow::array::{Int32Array, RecordBatch};
+    use arrow::datatypes::Field;
     use datafusion_common::stats::Precision;
     use datafusion_common::{assert_batches_eq, internal_err};
     use datafusion_expr::{Operator, SortExpr};
     use datafusion_physical_expr::create_physical_sort_expr;
-    use datafusion_physical_expr::expressions::{BinaryExpr, Literal};
+    use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal};
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
 
     /// Returns the column names on the schema
@@ -1603,7 +1646,7 @@ mod tests {
         );
 
         let source_statistics = conf.file_source.statistics().unwrap();
-        let conf_stats = conf.statistics().unwrap();
+        let conf_stats = conf.partition_statistics(None).unwrap();
 
         // projection should be reflected in the file source statistics
         assert_eq!(conf_stats.num_rows, Precision::Inexact(3));
@@ -2107,14 +2150,16 @@ mod tests {
         statistics: Statistics,
         table_partition_cols: Vec<Field>,
     ) -> FileScanConfig {
+        let table_schema = TableSchema::new(
+            file_schema,
+            table_partition_cols.into_iter().map(Arc::new).collect(),
+        );
         FileScanConfigBuilder::new(
             ObjectStoreUrl::parse("test:///").unwrap(),
-            file_schema,
-            Arc::new(MockSource::default()),
+            Arc::new(MockSource::new(table_schema.clone())),
         )
-        .with_projection(projection)
+        .with_projection_indices(projection)
         .with_statistics(statistics)
-        .with_table_partition_cols(table_partition_cols)
         .build()
     }
 
@@ -2153,24 +2198,29 @@ mod tests {
     fn test_file_scan_config_builder() {
         let file_schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
-        let file_source: Arc<dyn FileSource> = Arc::new(MockSource::default());
+
+        let table_schema = TableSchema::new(
+            Arc::clone(&file_schema),
+            vec![Arc::new(Field::new(
+                "date",
+                wrap_partition_type_in_dict(DataType::Utf8),
+                false,
+            ))],
+        );
+
+        let file_source: Arc<dyn FileSource> =
+            Arc::new(MockSource::new(table_schema.clone()));
 
         // Create a builder with required parameters
         let builder = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&file_schema),
             Arc::clone(&file_source),
         );
 
         // Build with various configurations
         let config = builder
             .with_limit(Some(1000))
-            .with_projection(Some(vec![0, 1]))
-            .with_table_partition_cols(vec![Field::new(
-                "date",
-                wrap_partition_type_in_dict(DataType::Utf8),
-                false,
-            )])
+            .with_projection_indices(Some(vec![0, 1]))
             .with_statistics(Statistics::new_unknown(&file_schema))
             .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
                 "test.parquet".to_string(),
@@ -2188,7 +2238,10 @@ mod tests {
         assert_eq!(config.object_store_url, object_store_url);
         assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, Some(1000));
-        assert_eq!(config.projection, Some(vec![0, 1]));
+        assert_eq!(
+            config.projection_exprs.as_ref().map(|p| p.column_indices()),
+            Some(vec![0, 1])
+        );
         assert_eq!(config.table_partition_cols().len(), 1);
         assert_eq!(config.table_partition_cols()[0].name(), "date");
         assert_eq!(config.file_groups.len(), 1);
@@ -2209,20 +2262,23 @@ mod tests {
     fn equivalence_properties_after_schema_change() {
         let file_schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
+
+        let table_schema = TableSchema::new(Arc::clone(&file_schema), vec![]);
+
         // Create a file source with a filter
-        let file_source: Arc<dyn FileSource> =
-            Arc::new(MockSource::default().with_filter(Arc::new(BinaryExpr::new(
+        let file_source: Arc<dyn FileSource> = Arc::new(
+            MockSource::new(table_schema.clone()).with_filter(Arc::new(BinaryExpr::new(
                 col("c2", &file_schema).unwrap(),
                 Operator::Eq,
                 Arc::new(Literal::new(ScalarValue::Int32(Some(10)))),
-            ))));
+            ))),
+        );
 
         let config = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&file_schema),
             Arc::clone(&file_source),
         )
-        .with_projection(Some(vec![0, 1, 2]))
+        .with_projection_indices(Some(vec![0, 1, 2]))
         .build();
 
         // Simulate projection being updated. Since the filter has already been pushed down,
@@ -2257,12 +2313,15 @@ mod tests {
     fn test_file_scan_config_builder_defaults() {
         let file_schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
-        let file_source: Arc<dyn FileSource> = Arc::new(MockSource::default());
+
+        let table_schema = TableSchema::new(Arc::clone(&file_schema), vec![]);
+
+        let file_source: Arc<dyn FileSource> =
+            Arc::new(MockSource::new(table_schema.clone()));
 
         // Create a builder with only required parameters and build without any additional configurations
         let config = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&file_schema),
             Arc::clone(&file_source),
         )
         .build();
@@ -2271,7 +2330,10 @@ mod tests {
         assert_eq!(config.object_store_url, object_store_url);
         assert_eq!(*config.file_schema(), file_schema);
         assert_eq!(config.limit, None);
-        assert_eq!(config.projection, None);
+        assert_eq!(
+            config.projection_exprs.as_ref().map(|p| p.column_indices()),
+            None
+        );
         assert!(config.table_partition_cols().is_empty());
         assert!(config.file_groups.is_empty());
         assert_eq!(
@@ -2312,7 +2374,6 @@ mod tests {
     fn test_file_scan_config_builder_new_from() {
         let schema = aggr_test_schema();
         let object_store_url = ObjectStoreUrl::parse("test:///").unwrap();
-        let file_source: Arc<dyn FileSource> = Arc::new(MockSource::default());
         let partition_cols = vec![Field::new(
             "date",
             wrap_partition_type_in_dict(DataType::Utf8),
@@ -2320,15 +2381,21 @@ mod tests {
         )];
         let file = PartitionedFile::new("test_file.parquet", 100);
 
+        let table_schema = TableSchema::new(
+            Arc::clone(&schema),
+            partition_cols.iter().map(|f| Arc::new(f.clone())).collect(),
+        );
+
+        let file_source: Arc<dyn FileSource> =
+            Arc::new(MockSource::new(table_schema.clone()));
+
         // Create a config with non-default values
         let original_config = FileScanConfigBuilder::new(
             object_store_url.clone(),
-            Arc::clone(&schema),
             Arc::clone(&file_source),
         )
-        .with_projection(Some(vec![0, 2]))
+        .with_projection_indices(Some(vec![0, 2]))
         .with_limit(Some(10))
-        .with_table_partition_cols(partition_cols.clone())
         .with_file(file.clone())
         .with_constraints(Constraints::default())
         .with_newlines_in_values(true)
@@ -2344,7 +2411,13 @@ mod tests {
         let partition_cols = partition_cols.into_iter().map(Arc::new).collect::<Vec<_>>();
         assert_eq!(new_config.object_store_url, object_store_url);
         assert_eq!(*new_config.file_schema(), schema);
-        assert_eq!(new_config.projection, Some(vec![0, 2]));
+        assert_eq!(
+            new_config
+                .projection_exprs
+                .as_ref()
+                .map(|p| p.column_indices()),
+            Some(vec![0, 2])
+        );
         assert_eq!(new_config.limit, Some(10));
         assert_eq!(*new_config.table_partition_cols(), partition_cols);
         assert_eq!(new_config.file_groups.len(), 1);
@@ -2510,4 +2583,92 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_partition_statistics_projection() {
+        // This test verifies that partition_statistics applies projection correctly.
+        // The old implementation had a bug where it returned file group statistics
+        // without applying the projection, returning all column statistics instead
+        // of just the projected ones.
+
+        use crate::source::DataSourceExec;
+        use datafusion_physical_plan::ExecutionPlan;
+
+        // Create a schema with 4 columns
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("col0", DataType::Int32, false),
+            Field::new("col1", DataType::Int32, false),
+            Field::new("col2", DataType::Int32, false),
+            Field::new("col3", DataType::Int32, false),
+        ]));
+
+        // Create statistics for all 4 columns
+        let file_group_stats = Statistics {
+            num_rows: Precision::Exact(100),
+            total_byte_size: Precision::Exact(1024),
+            column_statistics: vec![
+                ColumnStatistics {
+                    null_count: Precision::Exact(0),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(5),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(10),
+                    ..ColumnStatistics::new_unknown()
+                },
+                ColumnStatistics {
+                    null_count: Precision::Exact(15),
+                    ..ColumnStatistics::new_unknown()
+                },
+            ],
+        };
+
+        // Create a file group with statistics
+        let file_group = FileGroup::new(vec![PartitionedFile::new("test.parquet", 1024)])
+            .with_statistics(Arc::new(file_group_stats));
+
+        let table_schema = TableSchema::new(Arc::clone(&schema), vec![]);
+
+        // Create a FileScanConfig with projection: only keep columns 0 and 2
+        let config = FileScanConfigBuilder::new(
+            ObjectStoreUrl::parse("test:///").unwrap(),
+            Arc::new(MockSource::new(table_schema.clone())),
+        )
+        .with_projection_indices(Some(vec![0, 2])) // Only project columns 0 and 2
+        .with_file_groups(vec![file_group])
+        .build();
+
+        // Create a DataSourceExec from the config
+        let exec = DataSourceExec::from_data_source(config);
+
+        // Get statistics for partition 0
+        let partition_stats = exec.partition_statistics(Some(0)).unwrap();
+
+        // Verify that only 2 columns are in the statistics (the projected ones)
+        assert_eq!(
+            partition_stats.column_statistics.len(),
+            2,
+            "Expected 2 column statistics (projected), but got {}",
+            partition_stats.column_statistics.len()
+        );
+
+        // Verify the column statistics are for columns 0 and 2
+        assert_eq!(
+            partition_stats.column_statistics[0].null_count,
+            Precision::Exact(0),
+            "First projected column should be col0 with 0 nulls"
+        );
+        assert_eq!(
+            partition_stats.column_statistics[1].null_count,
+            Precision::Exact(10),
+            "Second projected column should be col2 with 10 nulls"
+        );
+
+        // Verify row count and byte size are preserved
+        assert_eq!(partition_stats.num_rows, Precision::Exact(100));
+        assert_eq!(partition_stats.total_byte_size, Precision::Exact(1024));
+    }
 }
diff --git a/datafusion/datasource/src/file_stream.rs b/datafusion/datasource/src/file_stream.rs
index 9fee5691beea..0568b4cc4e5f 100644
--- a/datafusion/datasource/src/file_stream.rs
+++ b/datafusion/datasource/src/file_stream.rs
@@ -338,19 +338,15 @@ pub type FileOpenFuture =
     BoxFuture<'static, Result<BoxStream<'static, Result<RecordBatch>>>>;
 
 /// Describes the behavior of the `FileStream` if file opening or scanning fails
+#[derive(Default)]
 pub enum OnError {
     /// Fail the entire stream and return the underlying error
+    #[default]
     Fail,
     /// Continue scanning, ignoring the failed file
     Skip,
 }
 
-impl Default for OnError {
-    fn default() -> Self {
-        Self::Fail
-    }
-}
-
 /// Generic API for opening a file using an [`ObjectStore`] and resolving to a
 /// stream of [`RecordBatch`]
 ///
@@ -643,10 +639,10 @@ mod tests {
 
             let on_error = self.on_error;
 
+            let table_schema = crate::table_schema::TableSchema::new(file_schema, vec![]);
             let config = FileScanConfigBuilder::new(
                 ObjectStoreUrl::parse("test:///").unwrap(),
-                file_schema,
-                Arc::new(MockSource::default()),
+                Arc::new(MockSource::new(table_schema)),
             )
             .with_file_group(file_group)
             .with_limit(self.limit)
diff --git a/datafusion/datasource/src/memory.rs b/datafusion/datasource/src/memory.rs
index eb55aa9b0b0d..7d5c8c4834ea 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -21,6 +21,7 @@ use std::collections::BinaryHeap;
 use std::fmt;
 use std::fmt::Debug;
 use std::ops::Deref;
+use std::slice::from_ref;
 use std::sync::Arc;
 
 use crate::sink::DataSink;
@@ -192,12 +193,27 @@ impl DataSource for MemorySourceConfig {
         SchedulingType::Cooperative
     }
 
-    fn statistics(&self) -> Result<Statistics> {
-        Ok(common::compute_record_batch_statistics(
-            &self.partitions,
-            &self.schema,
-            self.projection.clone(),
-        ))
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
+        if let Some(partition) = partition {
+            // Compute statistics for a specific partition
+            if let Some(batches) = self.partitions.get(partition) {
+                Ok(common::compute_record_batch_statistics(
+                    from_ref(batches),
+                    &self.schema,
+                    self.projection.clone(),
+                ))
+            } else {
+                // Invalid partition index
+                Ok(Statistics::new_unknown(&self.projected_schema))
+            }
+        } else {
+            // Compute statistics across all partitions
+            Ok(common::compute_record_batch_statistics(
+                &self.partitions,
+                &self.schema,
+                self.projection.clone(),
+            ))
+        }
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn DataSource>> {
diff --git a/datafusion/datasource/src/mod.rs b/datafusion/datasource/src/mod.rs
index 80b44ad5949a..8d988bdb31be 100644
--- a/datafusion/datasource/src/mod.rs
+++ b/datafusion/datasource/src/mod.rs
@@ -310,7 +310,6 @@ pub async fn calculate_range(
 /// Returns a `Result` wrapping a `usize` that represents the position of the first newline character found within the specified range. If no newline is found, it returns the length of the scanned data, effectively indicating the end of the range.
 ///
 /// The function returns an `Error` if any issues arise while reading from the object store or processing the data stream.
-///
 async fn find_first_newline(
     object_store: &Arc<dyn ObjectStore>,
     location: &Path,
diff --git a/datafusion/datasource/src/source.rs b/datafusion/datasource/src/source.rs
index 20d9a1d6e53f..11a8a3867b80 100644
--- a/datafusion/datasource/src/source.rs
+++ b/datafusion/datasource/src/source.rs
@@ -151,7 +151,21 @@ pub trait DataSource: Send + Sync + Debug {
     fn scheduling_type(&self) -> SchedulingType {
         SchedulingType::NonCooperative
     }
-    fn statistics(&self) -> Result<Statistics>;
+
+    /// Returns statistics for a specific partition, or aggregate statistics
+    /// across all partitions if `partition` is `None`.
+    fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics>;
+
+    /// Returns aggregate statistics across all partitions.
+    ///
+    /// # Deprecated
+    /// Use [`Self::partition_statistics`] instead, which provides more fine-grained
+    /// control over statistics retrieval (per-partition or aggregate).
+    #[deprecated(since = "51.0.0", note = "Use partition_statistics instead")]
+    fn statistics(&self) -> Result<Statistics> {
+        self.partition_statistics(None)
+    }
+
     /// Return a copy of this DataSource with a new fetch limit
     fn with_fetch(&self, _limit: Option<usize>) -> Option<Arc<dyn DataSource>>;
     fn fetch(&self) -> Option<usize>;
@@ -285,21 +299,7 @@ impl ExecutionPlan for DataSourceExec {
     }
 
     fn partition_statistics(&self, partition: Option<usize>) -> Result<Statistics> {
-        if let Some(partition) = partition {
-            let mut statistics = Statistics::new_unknown(&self.schema());
-            if let Some(file_config) =
-                self.data_source.as_any().downcast_ref::<FileScanConfig>()
-            {
-                if let Some(file_group) = file_config.file_groups.get(partition) {
-                    if let Some(stat) = file_group.file_statistics(None) {
-                        statistics = stat.clone();
-                    }
-                }
-            }
-            Ok(statistics)
-        } else {
-            Ok(self.data_source.statistics()?)
-        }
+        self.data_source.partition_statistics(partition)
     }
 
     fn with_fetch(&self, limit: Option<usize>) -> Option<Arc<dyn ExecutionPlan>> {
diff --git a/datafusion/datasource/src/table_schema.rs b/datafusion/datasource/src/table_schema.rs
index 8e95585ce873..ff0e78801887 100644
--- a/datafusion/datasource/src/table_schema.rs
+++ b/datafusion/datasource/src/table_schema.rs
@@ -85,6 +85,11 @@ impl TableSchema {
     /// The table schema is automatically computed by appending the partition columns
     /// to the file schema.
     ///
+    /// You should prefer calling this method over
+    /// chaining [`TableSchema::from_file_schema`] and [`TableSchema::with_table_partition_cols`]
+    /// if you have both the file schema and partition columns available at construction time
+    /// since it avoids re-computing the table schema.
+    ///
     /// # Arguments
     ///
     /// * `file_schema` - Schema of the data files (without partition columns)
@@ -121,17 +126,24 @@ impl TableSchema {
         }
     }
 
-    /// Create a new TableSchema from a file schema with no partition columns.
+    /// Create a new TableSchema with no partition columns.
+    ///
+    /// You should prefer calling [`TableSchema::new`] if you have partition columns at
+    /// construction time since it avoids re-computing the table schema.
     pub fn from_file_schema(file_schema: SchemaRef) -> Self {
         Self::new(file_schema, vec![])
     }
 
-    /// Set the table partition columns and rebuild the table schema.
-    pub fn with_table_partition_cols(
-        mut self,
-        table_partition_cols: Vec<FieldRef>,
-    ) -> TableSchema {
-        self.table_partition_cols = table_partition_cols;
+    /// Add partition columns to an existing TableSchema, returning a new instance.
+    ///
+    /// You should prefer calling [`TableSchema::new`] instead of chaining [`TableSchema::from_file_schema`]
+    /// into [`TableSchema::with_table_partition_cols`] if you have partition columns at construction time
+    /// since it avoids re-computing the table schema.
+    pub fn with_table_partition_cols(mut self, partition_cols: Vec<FieldRef>) -> Self {
+        self.table_partition_cols = partition_cols;
+        let mut builder = SchemaBuilder::from(self.file_schema.as_ref());
+        builder.extend(self.table_partition_cols.iter().cloned());
+        self.table_schema = Arc::new(builder.finish());
         self
     }
 
@@ -158,3 +170,9 @@ impl TableSchema {
         &self.table_schema
     }
 }
+
+impl From<SchemaRef> for TableSchema {
+    fn from(schema: SchemaRef) -> Self {
+        Self::from_file_schema(schema)
+    }
+}
diff --git a/datafusion/datasource/src/test_util.rs b/datafusion/datasource/src/test_util.rs
index f0aff1fa62b7..78ba593f22ec 100644
--- a/datafusion/datasource/src/test_util.rs
+++ b/datafusion/datasource/src/test_util.rs
@@ -22,22 +22,48 @@ use crate::{
 
 use std::sync::Arc;
 
-use arrow::datatypes::{Schema, SchemaRef};
+use arrow::datatypes::Schema;
 use datafusion_common::{Result, Statistics};
 use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
 use datafusion_physical_plan::metrics::ExecutionPlanMetricsSet;
 use object_store::ObjectStore;
 
 /// Minimal [`crate::file::FileSource`] implementation for use in tests.
-#[derive(Clone, Default)]
+#[derive(Clone)]
 pub(crate) struct MockSource {
     metrics: ExecutionPlanMetricsSet,
     projected_statistics: Option<Statistics>,
     schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
     filter: Option<Arc<dyn PhysicalExpr>>,
+    table_schema: crate::table_schema::TableSchema,
+}
+
+impl Default for MockSource {
+    fn default() -> Self {
+        Self {
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+            filter: None,
+            table_schema: crate::table_schema::TableSchema::new(
+                Arc::new(Schema::empty()),
+                vec![],
+            ),
+        }
+    }
 }
 
 impl MockSource {
+    pub fn new(table_schema: impl Into<crate::table_schema::TableSchema>) -> Self {
+        Self {
+            metrics: ExecutionPlanMetricsSet::new(),
+            projected_statistics: None,
+            schema_adapter_factory: None,
+            filter: None,
+            table_schema: table_schema.into(),
+        }
+    }
+
     pub fn with_filter(mut self, filter: Arc<dyn PhysicalExpr>) -> Self {
         self.filter = Some(filter);
         self
@@ -66,10 +92,6 @@ impl FileSource for MockSource {
         Arc::new(Self { ..self.clone() })
     }
 
-    fn with_schema(&self, _schema: SchemaRef) -> Arc<dyn FileSource> {
-        Arc::new(Self { ..self.clone() })
-    }
-
     fn with_projection(&self, _config: &FileScanConfig) -> Arc<dyn FileSource> {
         Arc::new(Self { ..self.clone() })
     }
@@ -109,6 +131,10 @@ impl FileSource for MockSource {
     fn schema_adapter_factory(&self) -> Option<Arc<dyn SchemaAdapterFactory>> {
         self.schema_adapter_factory.clone()
     }
+
+    fn table_schema(&self) -> &crate::table_schema::TableSchema {
+        &self.table_schema
+    }
 }
 
 /// Create a column expression
diff --git a/datafusion/datasource/src/url.rs b/datafusion/datasource/src/url.rs
index 0f31eb7caf41..08e5b6a5df83 100644
--- a/datafusion/datasource/src/url.rs
+++ b/datafusion/datasource/src/url.rs
@@ -385,7 +385,6 @@ const GLOB_START_CHARS: [char; 3] = ['?', '*', '['];
 ///
 /// Path delimiters are determined using [`std::path::is_separator`] which
 /// permits `/` as a path delimiter even on Windows platforms.
-///
 #[cfg(not(target_arch = "wasm32"))]
 fn split_glob_expression(path: &str) -> Option<(&str, &str)> {
     let mut last_separator = 0;
diff --git a/datafusion/datasource/src/write/mod.rs b/datafusion/datasource/src/write/mod.rs
index 3694568682a5..85832f81bc18 100644
--- a/datafusion/datasource/src/write/mod.rs
+++ b/datafusion/datasource/src/write/mod.rs
@@ -162,7 +162,11 @@ impl ObjectWriterBuilder {
     /// # let object_store = Arc::new(InMemory::new());
     /// let mut builder = ObjectWriterBuilder::new(compression_type, &location, object_store);
     /// builder.set_buffer_size(Some(20 * 1024 * 1024)); //20 MiB
-    /// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match");
+    /// assert_eq!(
+    ///     builder.get_buffer_size(),
+    ///     Some(20 * 1024 * 1024),
+    ///     "Internal error: Builder buffer size doesn't match"
+    /// );
     /// ```
     pub fn set_buffer_size(&mut self, buffer_size: Option<usize>) {
         self.buffer_size = buffer_size;
@@ -182,7 +186,11 @@ impl ObjectWriterBuilder {
     /// # let object_store = Arc::new(InMemory::new());
     /// let builder = ObjectWriterBuilder::new(compression_type, &location, object_store)
     ///     .with_buffer_size(Some(20 * 1024 * 1024)); //20 MiB
-    /// assert_eq!(builder.get_buffer_size(), Some(20 * 1024 * 1024), "Internal error: Builder buffer size doesn't match");
+    /// assert_eq!(
+    ///     builder.get_buffer_size(),
+    ///     Some(20 * 1024 * 1024),
+    ///     "Internal error: Builder buffer size doesn't match"
+    /// );
     /// ```
     pub fn with_buffer_size(mut self, buffer_size: Option<usize>) -> Self {
         self.buffer_size = buffer_size;
diff --git a/datafusion/doc/Cargo.toml b/datafusion/doc/Cargo.toml
index b8324565a0c6..c1368c153153 100644
--- a/datafusion/doc/Cargo.toml
+++ b/datafusion/doc/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/doc/src/lib.rs b/datafusion/doc/src/lib.rs
index 977130ffc0d6..2c5d0c5302e0 100644
--- a/datafusion/doc/src/lib.rs
+++ b/datafusion/doc/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/datafusion/execution/Cargo.toml b/datafusion/execution/Cargo.toml
index 67a37a86c706..f9f7a1bc63cc 100644
--- a/datafusion/execution/Cargo.toml
+++ b/datafusion/execution/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs
index 491b1aca69ea..a0b180bf4020 100644
--- a/datafusion/execution/src/config.rs
+++ b/datafusion/execution/src/config.rs
@@ -44,12 +44,15 @@ use datafusion_common::{
 /// shorthand for setting `datafusion.execution.batch_size`.
 ///
 /// ```
-/// use datafusion_execution::config::SessionConfig;
 /// use datafusion_common::ScalarValue;
+/// use datafusion_execution::config::SessionConfig;
 ///
 /// let config = SessionConfig::new()
-///    .set("datafusion.execution.batch_size", &ScalarValue::UInt64(Some(1234)))
-///    .set_bool("datafusion.execution.parquet.pushdown_filters", true);
+///     .set(
+///         "datafusion.execution.batch_size",
+///         &ScalarValue::UInt64(Some(1234)),
+///     )
+///     .set_bool("datafusion.execution.parquet.pushdown_filters", true);
 ///
 /// assert_eq!(config.batch_size(), 1234);
 /// assert_eq!(config.options().execution.batch_size, 1234);
@@ -502,8 +505,8 @@ impl SessionConfig {
     ///
     /// # Example
     /// ```
-    /// use std::sync::Arc;
     /// use datafusion_execution::config::SessionConfig;
+    /// use std::sync::Arc;
     ///
     /// // application-specific extension types
     /// struct Ext1(u8);
@@ -545,8 +548,8 @@ impl SessionConfig {
     ///
     /// # Example
     /// ```
-    /// use std::sync::Arc;
     /// use datafusion_execution::config::SessionConfig;
+    /// use std::sync::Arc;
     ///
     /// // application-specific extension types
     /// struct Ext1(u8);
diff --git a/datafusion/execution/src/disk_manager.rs b/datafusion/execution/src/disk_manager.rs
index 82f2d75ac1b5..c2923d6112a6 100644
--- a/datafusion/execution/src/disk_manager.rs
+++ b/datafusion/execution/src/disk_manager.rs
@@ -98,10 +98,11 @@ impl DiskManagerBuilder {
     }
 }
 
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Default)]
 pub enum DiskManagerMode {
     /// Create a new [DiskManager] that creates temporary files within
     /// a temporary directory chosen by the OS
+    #[default]
     OsTmpDirectory,
 
     /// Create a new [DiskManager] that creates temporary files within
@@ -113,21 +114,17 @@ pub enum DiskManagerMode {
     Disabled,
 }
 
-impl Default for DiskManagerMode {
-    fn default() -> Self {
-        Self::OsTmpDirectory
-    }
-}
-
 /// Configuration for temporary disk access
+#[allow(deprecated)]
 #[deprecated(since = "48.0.0", note = "Use DiskManagerBuilder instead")]
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Default)]
 pub enum DiskManagerConfig {
     /// Use the provided [DiskManager] instance
     Existing(Arc<DiskManager>),
 
     /// Create a new [DiskManager] that creates temporary files within
     /// a temporary directory chosen by the OS
+    #[default]
     NewOs,
 
     /// Create a new [DiskManager] that creates temporary files within
@@ -138,13 +135,6 @@ pub enum DiskManagerConfig {
     Disabled,
 }
 
-#[allow(deprecated)]
-impl Default for DiskManagerConfig {
-    fn default() -> Self {
-        Self::NewOs
-    }
-}
-
 #[allow(deprecated)]
 impl DiskManagerConfig {
     /// Create temporary files in a temporary directory chosen by the OS
@@ -293,11 +283,13 @@ impl DiskManager {
 
         let dir_index = rng().random_range(0..local_dirs.len());
         Ok(RefCountedTempFile {
-            _parent_temp_dir: Arc::clone(&local_dirs[dir_index]),
-            tempfile: Builder::new()
-                .tempfile_in(local_dirs[dir_index].as_ref())
-                .map_err(DataFusionError::IoError)?,
-            current_file_disk_usage: 0,
+            parent_temp_dir: Arc::clone(&local_dirs[dir_index]),
+            tempfile: Arc::new(
+                Builder::new()
+                    .tempfile_in(local_dirs[dir_index].as_ref())
+                    .map_err(DataFusionError::IoError)?,
+            ),
+            current_file_disk_usage: Arc::new(AtomicU64::new(0)),
             disk_manager: Arc::clone(self),
         })
     }
@@ -311,26 +303,50 @@ impl DiskManager {
 /// must invoke [`Self::update_disk_usage`] to update the global disk usage counter.
 /// This ensures the disk manager can properly enforce usage limits configured by
 /// [`DiskManager::with_max_temp_directory_size`].
+///
+/// This type is Clone-able, allowing multiple references to the same underlying file.
+/// The file is deleted only when the last reference is dropped.
+///
+/// The parent temporary directory is also kept alive as long as any reference to
+/// this file exists, preventing premature cleanup of the directory.
+///
+/// Once all references to this file are dropped, the file is deleted, and the
+/// disk usage is subtracted from the disk manager's total.
 #[derive(Debug)]
 pub struct RefCountedTempFile {
     /// The reference to the directory in which temporary files are created to ensure
     /// it is not cleaned up prior to the NamedTempFile
-    _parent_temp_dir: Arc<TempDir>,
-    tempfile: NamedTempFile,
+    parent_temp_dir: Arc<TempDir>,
+    /// The underlying temporary file, wrapped in Arc to allow cloning
+    tempfile: Arc<NamedTempFile>,
     /// Tracks the current disk usage of this temporary file. See
     /// [`Self::update_disk_usage`] for more details.
-    current_file_disk_usage: u64,
+    ///
+    /// This is wrapped in `Arc<AtomicU64>` so that all clones share the same
+    /// disk usage tracking, preventing incorrect accounting when clones are dropped.
+    current_file_disk_usage: Arc<AtomicU64>,
     /// The disk manager that created and manages this temporary file
     disk_manager: Arc<DiskManager>,
 }
 
+impl Clone for RefCountedTempFile {
+    fn clone(&self) -> Self {
+        Self {
+            parent_temp_dir: Arc::clone(&self.parent_temp_dir),
+            tempfile: Arc::clone(&self.tempfile),
+            current_file_disk_usage: Arc::clone(&self.current_file_disk_usage),
+            disk_manager: Arc::clone(&self.disk_manager),
+        }
+    }
+}
+
 impl RefCountedTempFile {
     pub fn path(&self) -> &Path {
         self.tempfile.path()
     }
 
     pub fn inner(&self) -> &NamedTempFile {
-        &self.tempfile
+        self.tempfile.as_ref()
     }
 
     /// Updates the global disk usage counter after modifications to the underlying file.
@@ -342,11 +358,14 @@ impl RefCountedTempFile {
         let metadata = self.tempfile.as_file().metadata()?;
         let new_disk_usage = metadata.len();
 
+        // Get the old disk usage
+        let old_disk_usage = self.current_file_disk_usage.load(Ordering::Relaxed);
+
         // Update the global disk usage by:
         // 1. Subtracting the old file size from the global counter
         self.disk_manager
             .used_disk_space
-            .fetch_sub(self.current_file_disk_usage, Ordering::Relaxed);
+            .fetch_sub(old_disk_usage, Ordering::Relaxed);
         // 2. Adding the new file size to the global counter
         self.disk_manager
             .used_disk_space
@@ -362,23 +381,29 @@ impl RefCountedTempFile {
         }
 
         // 4. Update the local file size tracking
-        self.current_file_disk_usage = new_disk_usage;
+        self.current_file_disk_usage
+            .store(new_disk_usage, Ordering::Relaxed);
 
         Ok(())
     }
 
     pub fn current_disk_usage(&self) -> u64 {
-        self.current_file_disk_usage
+        self.current_file_disk_usage.load(Ordering::Relaxed)
     }
 }
 
 /// When the temporary file is dropped, subtract its disk usage from the disk manager's total
 impl Drop for RefCountedTempFile {
     fn drop(&mut self) {
-        // Subtract the current file's disk usage from the global counter
-        self.disk_manager
-            .used_disk_space
-            .fetch_sub(self.current_file_disk_usage, Ordering::Relaxed);
+        // Only subtract disk usage when this is the last reference to the file
+        // Check if we're the last one by seeing if there's only one strong reference
+        // left to the underlying tempfile (the one we're holding)
+        if Arc::strong_count(&self.tempfile) == 1 {
+            let current_usage = self.current_file_disk_usage.load(Ordering::Relaxed);
+            self.disk_manager
+                .used_disk_space
+                .fetch_sub(current_usage, Ordering::Relaxed);
+        }
     }
 }
 
@@ -533,4 +558,190 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_disk_usage_basic() -> Result<()> {
+        use std::io::Write;
+
+        let dm = Arc::new(DiskManagerBuilder::default().build()?);
+        let mut temp_file = dm.create_tmp_file("Testing")?;
+
+        // Initially, disk usage should be 0
+        assert_eq!(dm.used_disk_space(), 0);
+        assert_eq!(temp_file.current_disk_usage(), 0);
+
+        // Write some data to the file
+        temp_file.inner().as_file().write_all(b"hello world")?;
+        temp_file.update_disk_usage()?;
+
+        // Disk usage should now reflect the written data
+        let expected_usage = temp_file.current_disk_usage();
+        assert!(expected_usage > 0);
+        assert_eq!(dm.used_disk_space(), expected_usage);
+
+        // Write more data
+        temp_file.inner().as_file().write_all(b" more data")?;
+        temp_file.update_disk_usage()?;
+
+        // Disk usage should increase
+        let new_usage = temp_file.current_disk_usage();
+        assert!(new_usage > expected_usage);
+        assert_eq!(dm.used_disk_space(), new_usage);
+
+        // Drop the file
+        drop(temp_file);
+
+        // Disk usage should return to 0
+        assert_eq!(dm.used_disk_space(), 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_disk_usage_with_clones() -> Result<()> {
+        use std::io::Write;
+
+        let dm = Arc::new(DiskManagerBuilder::default().build()?);
+        let mut temp_file = dm.create_tmp_file("Testing")?;
+
+        // Write some data
+        temp_file.inner().as_file().write_all(b"test data")?;
+        temp_file.update_disk_usage()?;
+
+        let usage_after_write = temp_file.current_disk_usage();
+        assert!(usage_after_write > 0);
+        assert_eq!(dm.used_disk_space(), usage_after_write);
+
+        // Clone the file
+        let clone1 = temp_file.clone();
+        let clone2 = temp_file.clone();
+
+        // All clones should see the same disk usage
+        assert_eq!(clone1.current_disk_usage(), usage_after_write);
+        assert_eq!(clone2.current_disk_usage(), usage_after_write);
+
+        // Global disk usage should still be the same (not multiplied by number of clones)
+        assert_eq!(dm.used_disk_space(), usage_after_write);
+
+        // Write more data through one clone
+        clone1.inner().as_file().write_all(b" more data")?;
+        let mut mutable_clone1 = clone1;
+        mutable_clone1.update_disk_usage()?;
+
+        let new_usage = mutable_clone1.current_disk_usage();
+        assert!(new_usage > usage_after_write);
+
+        // All clones should see the updated disk usage
+        assert_eq!(temp_file.current_disk_usage(), new_usage);
+        assert_eq!(clone2.current_disk_usage(), new_usage);
+        assert_eq!(mutable_clone1.current_disk_usage(), new_usage);
+
+        // Global disk usage should reflect the new size (not multiplied)
+        assert_eq!(dm.used_disk_space(), new_usage);
+
+        // Drop one clone
+        drop(mutable_clone1);
+
+        // Disk usage should NOT change (other clones still exist)
+        assert_eq!(dm.used_disk_space(), new_usage);
+        assert_eq!(temp_file.current_disk_usage(), new_usage);
+        assert_eq!(clone2.current_disk_usage(), new_usage);
+
+        // Drop another clone
+        drop(clone2);
+
+        // Disk usage should still NOT change (original still exists)
+        assert_eq!(dm.used_disk_space(), new_usage);
+        assert_eq!(temp_file.current_disk_usage(), new_usage);
+
+        // Drop the original
+        drop(temp_file);
+
+        // Now disk usage should return to 0 (last reference dropped)
+        assert_eq!(dm.used_disk_space(), 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_disk_usage_clones_dropped_out_of_order() -> Result<()> {
+        use std::io::Write;
+
+        let dm = Arc::new(DiskManagerBuilder::default().build()?);
+        let mut temp_file = dm.create_tmp_file("Testing")?;
+
+        // Write data
+        temp_file.inner().as_file().write_all(b"test")?;
+        temp_file.update_disk_usage()?;
+
+        let usage = temp_file.current_disk_usage();
+        assert_eq!(dm.used_disk_space(), usage);
+
+        // Create multiple clones
+        let clone1 = temp_file.clone();
+        let clone2 = temp_file.clone();
+        let clone3 = temp_file.clone();
+
+        // Drop the original first (out of order)
+        drop(temp_file);
+
+        // Disk usage should still be tracked (clones exist)
+        assert_eq!(dm.used_disk_space(), usage);
+        assert_eq!(clone1.current_disk_usage(), usage);
+
+        // Drop clones in different order
+        drop(clone2);
+        assert_eq!(dm.used_disk_space(), usage);
+
+        drop(clone1);
+        assert_eq!(dm.used_disk_space(), usage);
+
+        // Drop the last clone
+        drop(clone3);
+
+        // Now disk usage should be 0
+        assert_eq!(dm.used_disk_space(), 0);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_disk_usage_multiple_files() -> Result<()> {
+        use std::io::Write;
+
+        let dm = Arc::new(DiskManagerBuilder::default().build()?);
+
+        // Create multiple temp files
+        let mut file1 = dm.create_tmp_file("Testing1")?;
+        let mut file2 = dm.create_tmp_file("Testing2")?;
+
+        // Write to first file
+        file1.inner().as_file().write_all(b"file1")?;
+        file1.update_disk_usage()?;
+        let usage1 = file1.current_disk_usage();
+
+        assert_eq!(dm.used_disk_space(), usage1);
+
+        // Write to second file
+        file2.inner().as_file().write_all(b"file2 data")?;
+        file2.update_disk_usage()?;
+        let usage2 = file2.current_disk_usage();
+
+        // Global usage should be sum of both files
+        assert_eq!(dm.used_disk_space(), usage1 + usage2);
+
+        // Drop first file
+        drop(file1);
+
+        // Usage should only reflect second file
+        assert_eq!(dm.used_disk_space(), usage2);
+
+        // Drop second file
+        drop(file2);
+
+        // Usage should be 0
+        assert_eq!(dm.used_disk_space(), 0);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/execution/src/memory_pool/pool.rs b/datafusion/execution/src/memory_pool/pool.rs
index 306df3defdbb..d6b55182aa6b 100644
--- a/datafusion/execution/src/memory_pool/pool.rs
+++ b/datafusion/execution/src/memory_pool/pool.rs
@@ -346,8 +346,10 @@ impl<I: MemoryPool> TrackConsumersPool<I> {
     /// # Example
     ///
     /// ```rust
+    /// use datafusion_execution::memory_pool::{
+    ///     FairSpillPool, GreedyMemoryPool, TrackConsumersPool,
+    /// };
     /// use std::num::NonZeroUsize;
-    /// use datafusion_execution::memory_pool::{TrackConsumersPool, GreedyMemoryPool, FairSpillPool};
     ///
     /// // Create with a greedy pool backend, reporting top 3 consumers in error messages
     /// let tracked_greedy = TrackConsumersPool::new(
diff --git a/datafusion/execution/src/parquet_encryption.rs b/datafusion/execution/src/parquet_encryption.rs
index 73881e11ca72..027421e08f54 100644
--- a/datafusion/execution/src/parquet_encryption.rs
+++ b/datafusion/execution/src/parquet_encryption.rs
@@ -41,14 +41,14 @@ pub trait EncryptionFactory: Send + Sync + std::fmt::Debug + 'static {
         config: &EncryptionFactoryOptions,
         schema: &SchemaRef,
         file_path: &Path,
-    ) -> Result<Option<FileEncryptionProperties>>;
+    ) -> Result<Option<Arc<FileEncryptionProperties>>>;
 
     /// Generate file decryption properties to use when reading a Parquet file.
     async fn get_file_decryption_properties(
         &self,
         config: &EncryptionFactoryOptions,
         file_path: &Path,
-    ) -> Result<Option<FileDecryptionProperties>>;
+    ) -> Result<Option<Arc<FileDecryptionProperties>>>;
 }
 
 /// Stores [`EncryptionFactory`] implementations that can be retrieved by a unique string identifier
diff --git a/datafusion/execution/src/runtime_env.rs b/datafusion/execution/src/runtime_env.rs
index b0d0a966b7a2..d69987600855 100644
--- a/datafusion/execution/src/runtime_env.rs
+++ b/datafusion/execution/src/runtime_env.rs
@@ -67,9 +67,9 @@ use url::Url;
 /// // restrict to using at most 100MB of memory
 /// let pool_size = 100 * 1024 * 1024;
 /// let runtime_env = RuntimeEnvBuilder::new()
-///   .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size)))
-///   .build()
-///   .unwrap();
+///     .with_memory_pool(Arc::new(GreedyMemoryPool::new(pool_size)))
+///     .build()
+///     .unwrap();
 /// ```
 pub struct RuntimeEnv {
     /// Runtime memory management
diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml
index db85f3207921..0c4fa2c211cf 100644
--- a/datafusion/expr-common/Cargo.toml
+++ b/datafusion/expr-common/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/expr-common/src/interval_arithmetic.rs b/datafusion/expr-common/src/interval_arithmetic.rs
index b5b632076b00..7515b59b9221 100644
--- a/datafusion/expr-common/src/interval_arithmetic.rs
+++ b/datafusion/expr-common/src/interval_arithmetic.rs
@@ -583,7 +583,9 @@ impl Interval {
                     upper: ScalarValue::Boolean(Some(upper)),
                 })
             }
-            _ => internal_err!("Incompatible data types for logical conjunction"),
+
+            // Return UNCERTAIN when intervals don't have concrete boolean bounds
+            _ => Ok(Self::UNCERTAIN),
         }
     }
 
@@ -606,7 +608,9 @@ impl Interval {
                     upper: ScalarValue::Boolean(Some(upper)),
                 })
             }
-            _ => internal_err!("Incompatible data types for logical disjunction"),
+
+            // Return UNCERTAIN when intervals don't have concrete boolean bounds
+            _ => Ok(Self::UNCERTAIN),
         }
     }
 
@@ -1670,22 +1674,23 @@ fn cast_scalar_value(
 ///
 /// // [1, 2) U {NULL}
 /// let maybe_null = NullableInterval::MaybeNull {
-///    values: Interval::try_new(
-///            ScalarValue::Int32(Some(1)),
-///            ScalarValue::Int32(Some(2)),
-///        ).unwrap(),
+///     values: Interval::try_new(
+///         ScalarValue::Int32(Some(1)),
+///         ScalarValue::Int32(Some(2)),
+///     )
+///     .unwrap(),
 /// };
 ///
 /// // (0, ∞)
 /// let not_null = NullableInterval::NotNull {
-///   values: Interval::try_new(
-///            ScalarValue::Int32(Some(0)),
-///            ScalarValue::Int32(None),
-///        ).unwrap(),
+///     values: Interval::try_new(ScalarValue::Int32(Some(0)), ScalarValue::Int32(None))
+///         .unwrap(),
 /// };
 ///
 /// // {NULL}
-/// let null_interval = NullableInterval::Null { datatype: DataType::Int32 };
+/// let null_interval = NullableInterval::Null {
+///     datatype: DataType::Int32,
+/// };
 ///
 /// // {4}
 /// let single_value = NullableInterval::from(ScalarValue::Int32(Some(4)));
@@ -1787,22 +1792,26 @@ impl NullableInterval {
     ///
     /// ```
     /// use datafusion_common::ScalarValue;
-    /// use datafusion_expr_common::operator::Operator;
     /// use datafusion_expr_common::interval_arithmetic::Interval;
     /// use datafusion_expr_common::interval_arithmetic::NullableInterval;
+    /// use datafusion_expr_common::operator::Operator;
     ///
     /// // 4 > 3 -> true
     /// let lhs = NullableInterval::from(ScalarValue::Int32(Some(4)));
     /// let rhs = NullableInterval::from(ScalarValue::Int32(Some(3)));
     /// let result = lhs.apply_operator(&Operator::Gt, &rhs).unwrap();
-    /// assert_eq!(result, NullableInterval::from(ScalarValue::Boolean(Some(true))));
+    /// assert_eq!(
+    ///     result,
+    ///     NullableInterval::from(ScalarValue::Boolean(Some(true)))
+    /// );
     ///
     /// // [1, 3) > NULL -> NULL
     /// let lhs = NullableInterval::NotNull {
     ///     values: Interval::try_new(
-    ///            ScalarValue::Int32(Some(1)),
-    ///            ScalarValue::Int32(Some(3)),
-    ///        ).unwrap(),
+    ///         ScalarValue::Int32(Some(1)),
+    ///         ScalarValue::Int32(Some(3)),
+    ///     )
+    ///     .unwrap(),
     /// };
     /// let rhs = NullableInterval::from(ScalarValue::Int32(None));
     /// let result = lhs.apply_operator(&Operator::Gt, &rhs).unwrap();
@@ -1811,22 +1820,27 @@ impl NullableInterval {
     /// // [1, 3] > [2, 4] -> [false, true]
     /// let lhs = NullableInterval::NotNull {
     ///     values: Interval::try_new(
-    ///            ScalarValue::Int32(Some(1)),
-    ///            ScalarValue::Int32(Some(3)),
-    ///        ).unwrap(),
+    ///         ScalarValue::Int32(Some(1)),
+    ///         ScalarValue::Int32(Some(3)),
+    ///     )
+    ///     .unwrap(),
     /// };
     /// let rhs = NullableInterval::NotNull {
-    ///    values: Interval::try_new(
-    ///            ScalarValue::Int32(Some(2)),
-    ///            ScalarValue::Int32(Some(4)),
-    ///        ).unwrap(),
+    ///     values: Interval::try_new(
+    ///         ScalarValue::Int32(Some(2)),
+    ///         ScalarValue::Int32(Some(4)),
+    ///     )
+    ///     .unwrap(),
     /// };
     /// let result = lhs.apply_operator(&Operator::Gt, &rhs).unwrap();
     /// // Both inputs are valid (non-null), so result must be non-null
-    /// assert_eq!(result, NullableInterval::NotNull {
-    /// // Uncertain whether inequality is true or false
-    ///    values: Interval::UNCERTAIN,
-    /// });
+    /// assert_eq!(
+    ///     result,
+    ///     NullableInterval::NotNull {
+    ///         // Uncertain whether inequality is true or false
+    ///         values: Interval::UNCERTAIN,
+    ///     }
+    /// );
     /// ```
     pub fn apply_operator(&self, op: &Operator, rhs: &Self) -> Result<Self> {
         match op {
@@ -1924,7 +1938,8 @@ impl NullableInterval {
     ///     values: Interval::try_new(
     ///         ScalarValue::Int32(Some(1)),
     ///         ScalarValue::Int32(Some(4)),
-    ///     ).unwrap(),
+    ///     )
+    ///     .unwrap(),
     /// };
     /// assert_eq!(interval.single_value(), None);
     /// ```
@@ -2506,6 +2521,64 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn test_and_or_with_normalized_boolean_intervals() -> Result<()> {
+        // Verify that NULL boolean bounds are normalized and don't cause errors
+        let from_nulls =
+            Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
+
+        assert!(from_nulls.or(&Interval::CERTAINLY_TRUE).is_ok());
+        assert!(from_nulls.and(&Interval::CERTAINLY_FALSE).is_ok());
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_and_null_boolean_intervals() -> Result<()> {
+        let null_interval =
+            Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
+
+        let and_result = null_interval.and(&Interval::CERTAINLY_FALSE)?;
+        assert_eq!(and_result, Interval::CERTAINLY_FALSE);
+
+        let and_result = Interval::CERTAINLY_FALSE.and(&null_interval)?;
+        assert_eq!(and_result, Interval::CERTAINLY_FALSE);
+
+        let and_result = null_interval.and(&Interval::CERTAINLY_TRUE)?;
+        assert_eq!(and_result, Interval::UNCERTAIN);
+
+        let and_result = Interval::CERTAINLY_TRUE.and(&null_interval)?;
+        assert_eq!(and_result, Interval::UNCERTAIN);
+
+        let and_result = null_interval.and(&null_interval)?;
+        assert_eq!(and_result, Interval::UNCERTAIN);
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_or_null_boolean_intervals() -> Result<()> {
+        let null_interval =
+            Interval::try_new(ScalarValue::Boolean(None), ScalarValue::Boolean(None))?;
+
+        let or_result = null_interval.or(&Interval::CERTAINLY_FALSE)?;
+        assert_eq!(or_result, Interval::UNCERTAIN);
+
+        let or_result = Interval::CERTAINLY_FALSE.or(&null_interval)?;
+        assert_eq!(or_result, Interval::UNCERTAIN);
+
+        let or_result = null_interval.or(&Interval::CERTAINLY_TRUE)?;
+        assert_eq!(or_result, Interval::CERTAINLY_TRUE);
+
+        let or_result = Interval::CERTAINLY_TRUE.or(&null_interval)?;
+        assert_eq!(or_result, Interval::CERTAINLY_TRUE);
+
+        let or_result = null_interval.or(&null_interval)?;
+        assert_eq!(or_result, Interval::UNCERTAIN);
+
+        Ok(())
+    }
+
     #[test]
     fn intersect_test() -> Result<()> {
         let possible_cases = vec![
diff --git a/datafusion/expr-common/src/signature.rs b/datafusion/expr-common/src/signature.rs
index 5fd4518e2e57..6ee1c4a2a40c 100644
--- a/datafusion/expr-common/src/signature.rs
+++ b/datafusion/expr-common/src/signature.rs
@@ -21,10 +21,10 @@ use std::fmt::Display;
 use std::hash::Hash;
 
 use crate::type_coercion::aggregates::NUMERICS;
-use arrow::datatypes::{DataType, IntervalUnit, TimeUnit};
-use datafusion_common::internal_err;
+use arrow::datatypes::{DataType, Decimal128Type, DecimalType, IntervalUnit, TimeUnit};
 use datafusion_common::types::{LogicalType, LogicalTypeRef, NativeType};
 use datafusion_common::utils::ListCoercion;
+use datafusion_common::{internal_err, plan_err, Result};
 use indexmap::IndexSet;
 use itertools::Itertools;
 
@@ -84,6 +84,15 @@ pub enum Volatility {
     Volatile,
 }
 
+/// Represents the arity (number of arguments) of a function signature
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Arity {
+    /// Fixed number of arguments
+    Fixed(usize),
+    /// Variable number of arguments (e.g., Variadic, VariadicAny, UserDefined)
+    Variable,
+}
+
 /// The types of arguments for which a function has implementations.
 ///
 /// [`TypeSignature`] **DOES NOT** define the types that a user query could call the
@@ -118,11 +127,10 @@ pub enum Volatility {
 /// ```
 /// # use arrow::datatypes::DataType;
 /// # use datafusion_expr_common::signature::{TypeSignature};
-///  // Declares the function must be invoked with a single argument of type `Utf8View`.
-///  // if a user calls the function with `Utf8` or `LargeUtf8`, DataFusion will
-///  // automatically add a cast to `Utf8View` during planning.
-///  let type_signature = TypeSignature::Exact(vec![DataType::Utf8View]);
-///
+/// // Declares the function must be invoked with a single argument of type `Utf8View`.
+/// // if a user calls the function with `Utf8` or `LargeUtf8`, DataFusion will
+/// // automatically add a cast to `Utf8View` during planning.
+/// let type_signature = TypeSignature::Exact(vec![DataType::Utf8View]);
 /// ```
 ///
 /// # Example: Timestamps
@@ -135,11 +143,11 @@ pub enum Volatility {
 /// # use arrow::datatypes::{DataType, TimeUnit};
 /// # use datafusion_expr_common::signature::{TIMEZONE_WILDCARD, TypeSignature};
 /// let type_signature = TypeSignature::Exact(vec![
-///   // A nanosecond precision timestamp with ANY timezone
-///   // matches  Timestamp(Nanosecond, Some("+0:00"))
-///   // matches  Timestamp(Nanosecond, Some("+5:00"))
-///   // does not match  Timestamp(Nanosecond, None)
-///   DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
+///     // A nanosecond precision timestamp with ANY timezone
+///     // matches  Timestamp(Nanosecond, Some("+0:00"))
+///     // matches  Timestamp(Nanosecond, Some("+5:00"))
+///     // does not match  Timestamp(Nanosecond, None)
+///     DataType::Timestamp(TimeUnit::Nanosecond, Some(TIMEZONE_WILDCARD.into())),
 /// ]);
 /// ```
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
@@ -245,6 +253,69 @@ impl TypeSignature {
     pub fn is_one_of(&self) -> bool {
         matches!(self, TypeSignature::OneOf(_))
     }
+
+    /// Returns the arity (expected number of arguments) for this type signature.
+    ///
+    /// Returns `Arity::Fixed(n)` for signatures with a specific argument count,
+    /// or `Arity::Variable` for variable-arity signatures like `Variadic`, `VariadicAny`, `UserDefined`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # use datafusion_expr_common::signature::{TypeSignature, Arity};
+    /// # use arrow::datatypes::DataType;
+    /// // Exact signature has fixed arity
+    /// let sig = TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]);
+    /// assert_eq!(sig.arity(), Arity::Fixed(2));
+    ///
+    /// // Variadic signature has variable arity
+    /// let sig = TypeSignature::VariadicAny;
+    /// assert_eq!(sig.arity(), Arity::Variable);
+    /// ```
+    pub fn arity(&self) -> Arity {
+        match self {
+            TypeSignature::Exact(types) => Arity::Fixed(types.len()),
+            TypeSignature::Uniform(count, _) => Arity::Fixed(*count),
+            TypeSignature::Numeric(count) => Arity::Fixed(*count),
+            TypeSignature::String(count) => Arity::Fixed(*count),
+            TypeSignature::Comparable(count) => Arity::Fixed(*count),
+            TypeSignature::Any(count) => Arity::Fixed(*count),
+            TypeSignature::Coercible(types) => Arity::Fixed(types.len()),
+            TypeSignature::Nullary => Arity::Fixed(0),
+            TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
+                arguments,
+                ..
+            }) => Arity::Fixed(arguments.len()),
+            TypeSignature::ArraySignature(ArrayFunctionSignature::RecursiveArray) => {
+                Arity::Fixed(1)
+            }
+            TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray) => {
+                Arity::Fixed(1)
+            }
+            TypeSignature::OneOf(variants) => {
+                // If any variant is Variable, the whole OneOf is Variable
+                let has_variable = variants.iter().any(|v| v.arity() == Arity::Variable);
+                if has_variable {
+                    return Arity::Variable;
+                }
+                // Otherwise, get max arity from all fixed arity variants
+                let max_arity = variants
+                    .iter()
+                    .filter_map(|v| match v.arity() {
+                        Arity::Fixed(n) => Some(n),
+                        Arity::Variable => None,
+                    })
+                    .max();
+                match max_arity {
+                    Some(n) => Arity::Fixed(n),
+                    None => Arity::Variable,
+                }
+            }
+            TypeSignature::Variadic(_)
+            | TypeSignature::VariadicAny
+            | TypeSignature::UserDefined => Arity::Variable,
+        }
+    }
 }
 
 /// Represents the class of types that can be used in a function signature.
@@ -262,9 +333,10 @@ pub enum TypeSignatureClass {
     Interval,
     Duration,
     Native(LogicalTypeRef),
-    // TODO:
-    // Numeric
     Integer,
+    Float,
+    Decimal,
+    Numeric,
     /// Encompasses both the native Binary as well as arbitrarily sized FixedSizeBinary types
     Binary,
 }
@@ -307,14 +379,18 @@ impl TypeSignatureClass {
             TypeSignatureClass::Binary => {
                 vec![DataType::Binary]
             }
+            TypeSignatureClass::Decimal => vec![Decimal128Type::DEFAULT_TYPE],
+            TypeSignatureClass::Float => vec![DataType::Float64],
+            TypeSignatureClass::Numeric => vec![
+                DataType::Float64,
+                DataType::Int64,
+                Decimal128Type::DEFAULT_TYPE,
+            ],
         }
     }
 
     /// Does the specified `NativeType` match this type signature class?
-    pub fn matches_native_type(
-        self: &TypeSignatureClass,
-        logical_type: &NativeType,
-    ) -> bool {
+    pub fn matches_native_type(&self, logical_type: &NativeType) -> bool {
         if logical_type == &NativeType::Null {
             return true;
         }
@@ -327,6 +403,9 @@ impl TypeSignatureClass {
             TypeSignatureClass::Duration if logical_type.is_duration() => true,
             TypeSignatureClass::Integer if logical_type.is_integer() => true,
             TypeSignatureClass::Binary if logical_type.is_binary() => true,
+            TypeSignatureClass::Decimal if logical_type.is_decimal() => true,
+            TypeSignatureClass::Float if logical_type.is_float() => true,
+            TypeSignatureClass::Numeric if logical_type.is_numeric() => true,
             _ => false,
         }
     }
@@ -336,7 +415,7 @@ impl TypeSignatureClass {
         &self,
         native_type: &NativeType,
         origin_type: &DataType,
-    ) -> datafusion_common::Result<DataType> {
+    ) -> Result<DataType> {
         match self {
             TypeSignatureClass::Native(logical_type) => {
                 logical_type.native().default_cast_for(origin_type)
@@ -360,6 +439,16 @@ impl TypeSignatureClass {
             TypeSignatureClass::Binary if native_type.is_binary() => {
                 Ok(origin_type.to_owned())
             }
+            TypeSignatureClass::Decimal if native_type.is_decimal() => {
+                Ok(origin_type.to_owned())
+            }
+            TypeSignatureClass::Float if native_type.is_float() => {
+                Ok(origin_type.to_owned())
+            }
+            TypeSignatureClass::Numeric if native_type.is_numeric() => {
+                Ok(origin_type.to_owned())
+            }
+            _ if native_type.is_null() => Ok(origin_type.to_owned()),
             _ => internal_err!("May miss the matching logic in `matches_native_type`"),
         }
     }
@@ -486,6 +575,174 @@ impl TypeSignature {
         }
     }
 
+    /// Return string representation of the function signature with parameter names.
+    ///
+    /// This method is similar to [`Self::to_string_repr`] but uses parameter names
+    /// instead of types when available. This is useful for generating more helpful
+    /// error messages.
+    ///
+    /// # Arguments
+    /// * `parameter_names` - Optional slice of parameter names. When provided, these
+    ///   names will be used instead of type names in the output.
+    ///
+    /// # Examples
+    /// ```
+    /// # use datafusion_expr_common::signature::TypeSignature;
+    /// # use arrow::datatypes::DataType;
+    /// let sig = TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]);
+    ///
+    /// // Without names: shows types only
+    /// assert_eq!(sig.to_string_repr_with_names(None), vec!["Int32, Utf8"]);
+    ///
+    /// // With names: shows parameter names with types
+    /// assert_eq!(
+    ///     sig.to_string_repr_with_names(Some(&["id".to_string(), "name".to_string()])),
+    ///     vec!["id: Int32, name: Utf8"]
+    /// );
+    /// ```
+    pub fn to_string_repr_with_names(
+        &self,
+        parameter_names: Option<&[String]>,
+    ) -> Vec<String> {
+        match self {
+            TypeSignature::Exact(types) => {
+                if let Some(names) = parameter_names {
+                    vec![names
+                        .iter()
+                        .zip(types.iter())
+                        .map(|(name, typ)| format!("{name}: {typ}"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    vec![Self::join_types(types, ", ")]
+                }
+            }
+            TypeSignature::Any(count) => {
+                if let Some(names) = parameter_names {
+                    vec![names
+                        .iter()
+                        .take(*count)
+                        .map(|name| format!("{name}: Any"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    vec![std::iter::repeat_n("Any", *count)
+                        .collect::<Vec<&str>>()
+                        .join(", ")]
+                }
+            }
+            TypeSignature::Uniform(count, types) => {
+                if let Some(names) = parameter_names {
+                    let type_str = Self::join_types(types, "/");
+                    vec![names
+                        .iter()
+                        .take(*count)
+                        .map(|name| format!("{name}: {type_str}"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    self.to_string_repr()
+                }
+            }
+            TypeSignature::Coercible(coercions) => {
+                if let Some(names) = parameter_names {
+                    vec![names
+                        .iter()
+                        .zip(coercions.iter())
+                        .map(|(name, coercion)| format!("{name}: {coercion}"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    vec![Self::join_types(coercions, ", ")]
+                }
+            }
+            TypeSignature::Comparable(count) => {
+                if let Some(names) = parameter_names {
+                    vec![names
+                        .iter()
+                        .take(*count)
+                        .map(|name| format!("{name}: Comparable"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    self.to_string_repr()
+                }
+            }
+            TypeSignature::Numeric(count) => {
+                if let Some(names) = parameter_names {
+                    vec![names
+                        .iter()
+                        .take(*count)
+                        .map(|name| format!("{name}: Numeric"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    self.to_string_repr()
+                }
+            }
+            TypeSignature::String(count) => {
+                if let Some(names) = parameter_names {
+                    vec![names
+                        .iter()
+                        .take(*count)
+                        .map(|name| format!("{name}: String"))
+                        .collect::<Vec<_>>()
+                        .join(", ")]
+                } else {
+                    self.to_string_repr()
+                }
+            }
+            TypeSignature::Nullary => self.to_string_repr(),
+            TypeSignature::ArraySignature(array_sig) => {
+                if let Some(names) = parameter_names {
+                    match array_sig {
+                        ArrayFunctionSignature::Array { arguments, .. } => {
+                            vec![names
+                                .iter()
+                                .zip(arguments.iter())
+                                .map(|(name, arg_type)| format!("{name}: {arg_type}"))
+                                .collect::<Vec<_>>()
+                                .join(", ")]
+                        }
+                        ArrayFunctionSignature::RecursiveArray => {
+                            vec![names
+                                .iter()
+                                .take(1)
+                                .map(|name| format!("{name}: recursive_array"))
+                                .collect::<Vec<_>>()
+                                .join(", ")]
+                        }
+                        ArrayFunctionSignature::MapArray => {
+                            vec![names
+                                .iter()
+                                .take(1)
+                                .map(|name| format!("{name}: map_array"))
+                                .collect::<Vec<_>>()
+                                .join(", ")]
+                        }
+                    }
+                } else {
+                    self.to_string_repr()
+                }
+            }
+            TypeSignature::OneOf(sigs) => sigs
+                .iter()
+                .flat_map(|s| s.to_string_repr_with_names(parameter_names))
+                .collect(),
+            TypeSignature::UserDefined => {
+                if let Some(names) = parameter_names {
+                    vec![names.join(", ")]
+                } else {
+                    self.to_string_repr()
+                }
+            }
+            // Variable arity signatures cannot use parameter names
+            TypeSignature::Variadic(_) | TypeSignature::VariadicAny => {
+                self.to_string_repr()
+            }
+        }
+    }
+
     /// Helper function to join types with specified delimiter.
     pub fn join_types<T: Display>(types: &[T], delimiter: &str) -> String {
         types
@@ -618,8 +875,8 @@ fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
 /// # Examples
 ///
 /// ```
+/// use datafusion_common::types::{logical_binary, logical_string, NativeType};
 /// use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
-/// use datafusion_common::types::{NativeType, logical_binary, logical_string};
 ///
 /// // Exact coercion that only accepts timestamp types
 /// let exact = Coercion::new_exact(TypeSignatureClass::Timestamp);
@@ -628,7 +885,7 @@ fn get_data_types(native_type: &NativeType) -> Vec<DataType> {
 /// let implicit = Coercion::new_implicit(
 ///     TypeSignatureClass::Native(logical_string()),
 ///     vec![TypeSignatureClass::Native(logical_binary())],
-///     NativeType::String
+///     NativeType::String,
 /// );
 /// ```
 ///
@@ -804,6 +1061,13 @@ pub struct Signature {
     pub type_signature: TypeSignature,
     /// The volatility of the function. See [Volatility] for more information.
     pub volatility: Volatility,
+    /// Optional parameter names for the function arguments.
+    ///
+    /// If provided, enables named argument notation for function calls (e.g., `func(a => 1, b => 2)`).
+    /// The length must match the number of arguments defined by `type_signature`.
+    ///
+    /// Defaults to `None`, meaning only positional arguments are supported.
+    pub parameter_names: Option<Vec<String>>,
 }
 
 impl Signature {
@@ -812,6 +1076,7 @@ impl Signature {
         Signature {
             type_signature,
             volatility,
+            parameter_names: None,
         }
     }
     /// An arbitrary number of arguments with the same type, from those listed in `common_types`.
@@ -819,6 +1084,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Variadic(common_types),
             volatility,
+            parameter_names: None,
         }
     }
     /// User-defined coercion rules for the function.
@@ -826,6 +1092,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::UserDefined,
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -834,6 +1101,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Numeric(arg_count),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -842,6 +1110,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::String(arg_count),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -850,6 +1119,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::VariadicAny,
             volatility,
+            parameter_names: None,
         }
     }
     /// A fixed number of arguments of the same type, from those listed in `valid_types`.
@@ -861,6 +1131,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Uniform(arg_count, valid_types),
             volatility,
+            parameter_names: None,
         }
     }
     /// Exactly matches the types in `exact_types`, in order.
@@ -868,6 +1139,7 @@ impl Signature {
         Signature {
             type_signature: TypeSignature::Exact(exact_types),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -876,6 +1148,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Coercible(target_types),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -884,6 +1157,7 @@ impl Signature {
         Self {
             type_signature: TypeSignature::Comparable(arg_count),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -891,6 +1165,7 @@ impl Signature {
         Signature {
             type_signature: TypeSignature::Nullary,
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -899,6 +1174,7 @@ impl Signature {
         Signature {
             type_signature: TypeSignature::Any(arg_count),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -907,6 +1183,7 @@ impl Signature {
         Signature {
             type_signature: TypeSignature::OneOf(type_signatures),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -923,6 +1200,7 @@ impl Signature {
                 },
             ),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -939,6 +1217,7 @@ impl Signature {
                 },
             ),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -956,6 +1235,7 @@ impl Signature {
                 },
             ),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -980,6 +1260,7 @@ impl Signature {
                 }),
             ]),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -996,6 +1277,7 @@ impl Signature {
                 },
             ),
             volatility,
+            parameter_names: None,
         }
     }
 
@@ -1003,13 +1285,73 @@ impl Signature {
     pub fn array(volatility: Volatility) -> Self {
         Signature::arrays(1, Some(ListCoercion::FixedSizedListToList), volatility)
     }
+
+    /// Add parameter names to this signature, enabling named argument notation.
+    ///
+    /// # Example
+    /// ```
+    /// # use datafusion_expr_common::signature::{Signature, Volatility};
+    /// # use arrow::datatypes::DataType;
+    /// let sig =
+    ///     Signature::exact(vec![DataType::Int32, DataType::Utf8], Volatility::Immutable)
+    ///         .with_parameter_names(vec!["count".to_string(), "name".to_string()]);
+    /// ```
+    ///
+    /// # Errors
+    /// Returns an error if the number of parameter names doesn't match the signature's arity.
+    /// For signatures with variable arity (e.g., `Variadic`, `VariadicAny`), parameter names
+    /// cannot be specified.
+    pub fn with_parameter_names(mut self, names: Vec<impl Into<String>>) -> Result<Self> {
+        let names = names.into_iter().map(Into::into).collect::<Vec<String>>();
+        // Validate that the number of names matches the signature
+        self.validate_parameter_names(&names)?;
+        self.parameter_names = Some(names);
+        Ok(self)
+    }
+
+    /// Validate that parameter names are compatible with this signature
+    fn validate_parameter_names(&self, names: &[String]) -> Result<()> {
+        match self.type_signature.arity() {
+            Arity::Fixed(expected) => {
+                if names.len() != expected {
+                    return plan_err!(
+                        "Parameter names count ({}) does not match signature arity ({})",
+                        names.len(),
+                        expected
+                    );
+                }
+            }
+            Arity::Variable => {
+                // For UserDefined signatures, allow parameter names
+                // The function implementer is responsible for validating the names match the actual arguments
+                if !matches!(self.type_signature, TypeSignature::UserDefined) {
+                    return plan_err!(
+                        "Cannot specify parameter names for variable arity signature: {:?}",
+                        self.type_signature
+                    );
+                }
+            }
+        }
+
+        let mut seen = std::collections::HashSet::new();
+        for name in names {
+            if !seen.insert(name) {
+                return plan_err!("Duplicate parameter name: '{}'", name);
+            }
+        }
+
+        Ok(())
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use datafusion_common::types::{logical_int64, logical_string};
+    use datafusion_common::types::{logical_int32, logical_int64, logical_string};
 
     use super::*;
+    use crate::signature::{
+        ArrayFunctionArgument, ArrayFunctionSignature, Coercion, TypeSignatureClass,
+    };
 
     #[test]
     fn supports_zero_argument_tests() {
@@ -1167,4 +1509,430 @@ mod tests {
             ]
         );
     }
+
+    #[test]
+    fn test_signature_with_parameter_names() {
+        let sig = Signature::exact(
+            vec![DataType::Int32, DataType::Utf8],
+            Volatility::Immutable,
+        )
+        .with_parameter_names(vec!["count".to_string(), "name".to_string()])
+        .unwrap();
+
+        assert_eq!(
+            sig.parameter_names,
+            Some(vec!["count".to_string(), "name".to_string()])
+        );
+        assert_eq!(
+            sig.type_signature,
+            TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8])
+        );
+    }
+
+    #[test]
+    fn test_signature_parameter_names_wrong_count() {
+        let result = Signature::exact(
+            vec![DataType::Int32, DataType::Utf8],
+            Volatility::Immutable,
+        )
+        .with_parameter_names(vec!["count".to_string()]); // Only 1 name for 2 args
+
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("does not match signature arity"));
+    }
+
+    #[test]
+    fn test_signature_parameter_names_duplicate() {
+        let result = Signature::exact(
+            vec![DataType::Int32, DataType::Int32],
+            Volatility::Immutable,
+        )
+        .with_parameter_names(vec!["count".to_string(), "count".to_string()]);
+
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("Duplicate parameter name"));
+    }
+
+    #[test]
+    fn test_signature_parameter_names_variadic() {
+        let result = Signature::variadic(vec![DataType::Int32], Volatility::Immutable)
+            .with_parameter_names(vec!["arg".to_string()]);
+
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("variable arity signature"));
+    }
+
+    #[test]
+    fn test_signature_without_parameter_names() {
+        let sig = Signature::exact(
+            vec![DataType::Int32, DataType::Utf8],
+            Volatility::Immutable,
+        );
+
+        assert_eq!(sig.parameter_names, None);
+    }
+
+    #[test]
+    fn test_signature_uniform_with_parameter_names() {
+        let sig = Signature::uniform(3, vec![DataType::Float64], Volatility::Immutable)
+            .with_parameter_names(vec!["x".to_string(), "y".to_string(), "z".to_string()])
+            .unwrap();
+
+        assert_eq!(
+            sig.parameter_names,
+            Some(vec!["x".to_string(), "y".to_string(), "z".to_string()])
+        );
+    }
+
+    #[test]
+    fn test_signature_numeric_with_parameter_names() {
+        let sig = Signature::numeric(2, Volatility::Immutable)
+            .with_parameter_names(vec!["a".to_string(), "b".to_string()])
+            .unwrap();
+
+        assert_eq!(
+            sig.parameter_names,
+            Some(vec!["a".to_string(), "b".to_string()])
+        );
+    }
+
+    #[test]
+    fn test_signature_nullary_with_empty_names() {
+        let sig = Signature::nullary(Volatility::Immutable)
+            .with_parameter_names(Vec::<String>::new())
+            .unwrap();
+
+        assert_eq!(sig.parameter_names, Some(vec![]));
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_exact() {
+        let sig = TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]);
+
+        assert_eq!(sig.to_string_repr_with_names(None), vec!["Int32, Utf8"]);
+
+        let names = vec!["id".to_string(), "name".to_string()];
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec!["id: Int32, name: Utf8"]
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_any() {
+        let sig = TypeSignature::Any(3);
+
+        assert_eq!(sig.to_string_repr_with_names(None), vec!["Any, Any, Any"]);
+
+        let names = vec!["x".to_string(), "y".to_string(), "z".to_string()];
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec!["x: Any, y: Any, z: Any"]
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_one_of() {
+        let sig =
+            TypeSignature::OneOf(vec![TypeSignature::Any(2), TypeSignature::Any(3)]);
+
+        assert_eq!(
+            sig.to_string_repr_with_names(None),
+            vec!["Any, Any", "Any, Any, Any"]
+        );
+
+        let names = vec![
+            "str".to_string(),
+            "start_pos".to_string(),
+            "length".to_string(),
+        ];
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec![
+                "str: Any, start_pos: Any",
+                "str: Any, start_pos: Any, length: Any"
+            ]
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_partial() {
+        // This simulates providing max arity names for a OneOf signature
+        let sig = TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]);
+
+        // Provide 3 names for 2-parameter signature (extra name is ignored via zip)
+        let names = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec!["a: Int32, b: Utf8"]
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_uniform() {
+        let sig = TypeSignature::Uniform(2, vec![DataType::Float64]);
+
+        assert_eq!(
+            sig.to_string_repr_with_names(None),
+            vec!["Float64, Float64"]
+        );
+
+        let names = vec!["x".to_string(), "y".to_string()];
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec!["x: Float64, y: Float64"]
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_coercible() {
+        let sig = TypeSignature::Coercible(vec![
+            Coercion::new_exact(TypeSignatureClass::Native(logical_int32())),
+            Coercion::new_exact(TypeSignatureClass::Native(logical_int32())),
+        ]);
+
+        let names = vec!["a".to_string(), "b".to_string()];
+        let result = sig.to_string_repr_with_names(Some(&names));
+        // Check that it contains the parameter names with type annotations
+        assert_eq!(result.len(), 1);
+        assert!(result[0].starts_with("a: "));
+        assert!(result[0].contains(", b: "));
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_comparable_numeric_string() {
+        let comparable = TypeSignature::Comparable(3);
+        let numeric = TypeSignature::Numeric(2);
+        let string_sig = TypeSignature::String(2);
+
+        let names = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+
+        // All should show parameter names with type annotations
+        assert_eq!(
+            comparable.to_string_repr_with_names(Some(&names)),
+            vec!["a: Comparable, b: Comparable, c: Comparable"]
+        );
+        assert_eq!(
+            numeric.to_string_repr_with_names(Some(&names)),
+            vec!["a: Numeric, b: Numeric"]
+        );
+        assert_eq!(
+            string_sig.to_string_repr_with_names(Some(&names)),
+            vec!["a: String, b: String"]
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_variadic_fallback() {
+        let variadic = TypeSignature::Variadic(vec![DataType::Utf8, DataType::LargeUtf8]);
+        let names = vec!["x".to_string()];
+        assert_eq!(
+            variadic.to_string_repr_with_names(Some(&names)),
+            variadic.to_string_repr()
+        );
+
+        let variadic_any = TypeSignature::VariadicAny;
+        assert_eq!(
+            variadic_any.to_string_repr_with_names(Some(&names)),
+            variadic_any.to_string_repr()
+        );
+
+        // UserDefined now shows parameter names when available
+        let user_defined = TypeSignature::UserDefined;
+        assert_eq!(
+            user_defined.to_string_repr_with_names(Some(&names)),
+            vec!["x"]
+        );
+        assert_eq!(
+            user_defined.to_string_repr_with_names(None),
+            user_defined.to_string_repr()
+        );
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_nullary() {
+        let sig = TypeSignature::Nullary;
+        let names = vec!["x".to_string()];
+
+        // Should return empty representation, names don't apply
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec!["NullAry()"]
+        );
+        assert_eq!(sig.to_string_repr_with_names(None), vec!["NullAry()"]);
+    }
+
+    #[test]
+    fn test_to_string_repr_with_names_array_signature() {
+        let sig = TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
+            arguments: vec![
+                ArrayFunctionArgument::Array,
+                ArrayFunctionArgument::Index,
+                ArrayFunctionArgument::Element,
+            ],
+            array_coercion: None,
+        });
+
+        assert_eq!(
+            sig.to_string_repr_with_names(None),
+            vec!["array, index, element"]
+        );
+
+        let names = vec!["arr".to_string(), "idx".to_string(), "val".to_string()];
+        assert_eq!(
+            sig.to_string_repr_with_names(Some(&names)),
+            vec!["arr: array, idx: index, val: element"]
+        );
+
+        let recursive =
+            TypeSignature::ArraySignature(ArrayFunctionSignature::RecursiveArray);
+        let names = vec!["array".to_string()];
+        assert_eq!(
+            recursive.to_string_repr_with_names(Some(&names)),
+            vec!["array: recursive_array"]
+        );
+
+        // Test MapArray (1 argument)
+        let map_array = TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray);
+        let names = vec!["map".to_string()];
+        assert_eq!(
+            map_array.to_string_repr_with_names(Some(&names)),
+            vec!["map: map_array"]
+        );
+    }
+
+    #[test]
+    fn test_type_signature_arity_exact() {
+        let sig = TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]);
+        assert_eq!(sig.arity(), Arity::Fixed(2));
+
+        let sig = TypeSignature::Exact(vec![]);
+        assert_eq!(sig.arity(), Arity::Fixed(0));
+    }
+
+    #[test]
+    fn test_type_signature_arity_uniform() {
+        let sig = TypeSignature::Uniform(3, vec![DataType::Float64]);
+        assert_eq!(sig.arity(), Arity::Fixed(3));
+
+        let sig = TypeSignature::Uniform(1, vec![DataType::Int32]);
+        assert_eq!(sig.arity(), Arity::Fixed(1));
+    }
+
+    #[test]
+    fn test_type_signature_arity_numeric() {
+        let sig = TypeSignature::Numeric(2);
+        assert_eq!(sig.arity(), Arity::Fixed(2));
+    }
+
+    #[test]
+    fn test_type_signature_arity_string() {
+        let sig = TypeSignature::String(3);
+        assert_eq!(sig.arity(), Arity::Fixed(3));
+    }
+
+    #[test]
+    fn test_type_signature_arity_comparable() {
+        let sig = TypeSignature::Comparable(2);
+        assert_eq!(sig.arity(), Arity::Fixed(2));
+    }
+
+    #[test]
+    fn test_type_signature_arity_any() {
+        let sig = TypeSignature::Any(4);
+        assert_eq!(sig.arity(), Arity::Fixed(4));
+    }
+
+    #[test]
+    fn test_type_signature_arity_coercible() {
+        let sig = TypeSignature::Coercible(vec![
+            Coercion::new_exact(TypeSignatureClass::Native(logical_int32())),
+            Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
+        ]);
+        assert_eq!(sig.arity(), Arity::Fixed(2));
+    }
+
+    #[test]
+    fn test_type_signature_arity_nullary() {
+        let sig = TypeSignature::Nullary;
+        assert_eq!(sig.arity(), Arity::Fixed(0));
+    }
+
+    #[test]
+    fn test_type_signature_arity_array_signature() {
+        // Test Array variant with 2 arguments
+        let sig = TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
+            arguments: vec![ArrayFunctionArgument::Array, ArrayFunctionArgument::Index],
+            array_coercion: None,
+        });
+        assert_eq!(sig.arity(), Arity::Fixed(2));
+
+        // Test Array variant with 3 arguments
+        let sig = TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
+            arguments: vec![
+                ArrayFunctionArgument::Array,
+                ArrayFunctionArgument::Element,
+                ArrayFunctionArgument::Index,
+            ],
+            array_coercion: None,
+        });
+        assert_eq!(sig.arity(), Arity::Fixed(3));
+
+        // Test RecursiveArray variant
+        let sig = TypeSignature::ArraySignature(ArrayFunctionSignature::RecursiveArray);
+        assert_eq!(sig.arity(), Arity::Fixed(1));
+
+        // Test MapArray variant
+        let sig = TypeSignature::ArraySignature(ArrayFunctionSignature::MapArray);
+        assert_eq!(sig.arity(), Arity::Fixed(1));
+    }
+
+    #[test]
+    fn test_type_signature_arity_one_of_fixed() {
+        // OneOf with all fixed arity variants should return max arity
+        let sig = TypeSignature::OneOf(vec![
+            TypeSignature::Exact(vec![DataType::Int32]),
+            TypeSignature::Exact(vec![DataType::Int32, DataType::Utf8]),
+            TypeSignature::Exact(vec![
+                DataType::Int32,
+                DataType::Utf8,
+                DataType::Float64,
+            ]),
+        ]);
+        assert_eq!(sig.arity(), Arity::Fixed(3));
+    }
+
+    #[test]
+    fn test_type_signature_arity_one_of_variable() {
+        // OneOf with variable arity variant should return Variable
+        let sig = TypeSignature::OneOf(vec![
+            TypeSignature::Exact(vec![DataType::Int32]),
+            TypeSignature::VariadicAny,
+        ]);
+        assert_eq!(sig.arity(), Arity::Variable);
+    }
+
+    #[test]
+    fn test_type_signature_arity_variadic() {
+        let sig = TypeSignature::Variadic(vec![DataType::Int32]);
+        assert_eq!(sig.arity(), Arity::Variable);
+
+        let sig = TypeSignature::VariadicAny;
+        assert_eq!(sig.arity(), Arity::Variable);
+    }
+
+    #[test]
+    fn test_type_signature_arity_user_defined() {
+        let sig = TypeSignature::UserDefined;
+        assert_eq!(sig.arity(), Arity::Variable);
+    }
 }
diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml
index e6b2734cfff3..11d6ca1533db 100644
--- a/datafusion/expr/Cargo.toml
+++ b/datafusion/expr/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/expr/src/arguments.rs b/datafusion/expr/src/arguments.rs
new file mode 100644
index 000000000000..5653993db98f
--- /dev/null
+++ b/datafusion/expr/src/arguments.rs
@@ -0,0 +1,285 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Argument resolution logic for named function parameters
+
+use crate::Expr;
+use datafusion_common::{plan_err, Result};
+use std::collections::HashMap;
+
+/// Resolves function arguments, handling named and positional notation.
+///
+/// This function validates and reorders arguments to match the function's parameter names
+/// when named arguments are used.
+///
+/// # Rules
+/// - All positional arguments must come before named arguments
+/// - Named arguments can be in any order after positional arguments
+/// - Parameter names follow SQL identifier rules: unquoted names are case-insensitive
+///   (normalized to lowercase), quoted names are case-sensitive
+/// - No duplicate parameter names allowed
+///
+/// # Arguments
+/// * `param_names` - The function's parameter names in order
+/// * `args` - The argument expressions
+/// * `arg_names` - Optional parameter name for each argument
+///
+/// # Returns
+/// A vector of expressions in the correct order matching the parameter names
+///
+/// # Examples
+/// ```text
+/// Given parameters ["a", "b", "c"]
+/// And call: func(10, c => 30, b => 20)
+/// Returns: [Expr(10), Expr(20), Expr(30)]
+/// ```
+pub fn resolve_function_arguments(
+    param_names: &[String],
+    args: Vec<Expr>,
+    arg_names: Vec<Option<String>>,
+) -> Result<Vec<Expr>> {
+    if args.len() != arg_names.len() {
+        return plan_err!(
+            "Internal error: args length ({}) != arg_names length ({})",
+            args.len(),
+            arg_names.len()
+        );
+    }
+
+    // Check if all arguments are positional (fast path)
+    if arg_names.iter().all(|name| name.is_none()) {
+        return Ok(args);
+    }
+
+    validate_argument_order(&arg_names)?;
+
+    reorder_named_arguments(param_names, args, arg_names)
+}
+
+/// Validates that positional arguments come before named arguments
+fn validate_argument_order(arg_names: &[Option<String>]) -> Result<()> {
+    let mut seen_named = false;
+    for (i, arg_name) in arg_names.iter().enumerate() {
+        match arg_name {
+            Some(_) => seen_named = true,
+            None if seen_named => {
+                return plan_err!(
+                    "Positional argument at position {} follows named argument. \
+                     All positional arguments must come before named arguments.",
+                    i
+                );
+            }
+            None => {}
+        }
+    }
+    Ok(())
+}
+
+/// Reorders arguments based on named parameters to match signature order
+fn reorder_named_arguments(
+    param_names: &[String],
+    args: Vec<Expr>,
+    arg_names: Vec<Option<String>>,
+) -> Result<Vec<Expr>> {
+    // Build HashMap for O(1) parameter name lookups
+    let param_index_map: HashMap<&str, usize> = param_names
+        .iter()
+        .enumerate()
+        .map(|(idx, name)| (name.as_str(), idx))
+        .collect();
+
+    let positional_count = arg_names.iter().filter(|n| n.is_none()).count();
+
+    // Capture args length before consuming the vector
+    let args_len = args.len();
+
+    let expected_arg_count = param_names.len();
+
+    if positional_count > expected_arg_count {
+        return plan_err!(
+            "Too many positional arguments: expected at most {}, got {}",
+            expected_arg_count,
+            positional_count
+        );
+    }
+
+    let mut result: Vec<Option<Expr>> = vec![None; expected_arg_count];
+
+    for (i, (arg, arg_name)) in args.into_iter().zip(arg_names).enumerate() {
+        if let Some(name) = arg_name {
+            // Named argument - O(1) lookup in HashMap
+            let param_index =
+                param_index_map.get(name.as_str()).copied().ok_or_else(|| {
+                    datafusion_common::plan_datafusion_err!(
+                        "Unknown parameter name '{}'. Valid parameters are: [{}]",
+                        name,
+                        param_names.join(", ")
+                    )
+                })?;
+
+            if result[param_index].is_some() {
+                return plan_err!("Parameter '{}' specified multiple times", name);
+            }
+
+            result[param_index] = Some(arg);
+        } else {
+            result[i] = Some(arg);
+        }
+    }
+
+    // Only require parameters up to the number of arguments provided (supports optional parameters)
+    let required_count = args_len;
+    for i in 0..required_count {
+        if result[i].is_none() {
+            return plan_err!("Missing required parameter '{}'", param_names[i]);
+        }
+    }
+
+    // Return only the assigned parameters (handles optional trailing parameters)
+    Ok(result.into_iter().take(required_count).flatten().collect())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::lit;
+
+    #[test]
+    fn test_all_positional() {
+        let param_names = vec!["a".to_string(), "b".to_string()];
+
+        let args = vec![lit(1), lit("hello")];
+        let arg_names = vec![None, None];
+
+        let result =
+            resolve_function_arguments(&param_names, args.clone(), arg_names).unwrap();
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn test_all_named() {
+        let param_names = vec!["a".to_string(), "b".to_string()];
+
+        let args = vec![lit(1), lit("hello")];
+        let arg_names = vec![Some("a".to_string()), Some("b".to_string())];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
+        assert_eq!(result.len(), 2);
+    }
+
+    #[test]
+    fn test_named_reordering() {
+        let param_names = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+
+        // Call with: func(c => 3.0, a => 1, b => "hello")
+        let args = vec![lit(3.0), lit(1), lit("hello")];
+        let arg_names = vec![
+            Some("c".to_string()),
+            Some("a".to_string()),
+            Some("b".to_string()),
+        ];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
+
+        // Should be reordered to [a, b, c] = [1, "hello", 3.0]
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], lit(1));
+        assert_eq!(result[1], lit("hello"));
+        assert_eq!(result[2], lit(3.0));
+    }
+
+    #[test]
+    fn test_mixed_positional_and_named() {
+        let param_names = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+
+        // Call with: func(1, c => 3.0, b => "hello")
+        let args = vec![lit(1), lit(3.0), lit("hello")];
+        let arg_names = vec![None, Some("c".to_string()), Some("b".to_string())];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names).unwrap();
+
+        // Should be reordered to [a, b, c] = [1, "hello", 3.0]
+        assert_eq!(result.len(), 3);
+        assert_eq!(result[0], lit(1));
+        assert_eq!(result[1], lit("hello"));
+        assert_eq!(result[2], lit(3.0));
+    }
+
+    #[test]
+    fn test_positional_after_named_error() {
+        let param_names = vec!["a".to_string(), "b".to_string()];
+
+        // Call with: func(a => 1, "hello") - ERROR
+        let args = vec![lit(1), lit("hello")];
+        let arg_names = vec![Some("a".to_string()), None];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("Positional argument"));
+    }
+
+    #[test]
+    fn test_unknown_parameter_name() {
+        let param_names = vec!["a".to_string(), "b".to_string()];
+
+        // Call with: func(x => 1, b => "hello") - ERROR
+        let args = vec![lit(1), lit("hello")];
+        let arg_names = vec![Some("x".to_string()), Some("b".to_string())];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("Unknown parameter"));
+    }
+
+    #[test]
+    fn test_duplicate_parameter_name() {
+        let param_names = vec!["a".to_string(), "b".to_string()];
+
+        // Call with: func(a => 1, a => 2) - ERROR
+        let args = vec![lit(1), lit(2)];
+        let arg_names = vec![Some("a".to_string()), Some("a".to_string())];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("specified multiple times"));
+    }
+
+    #[test]
+    fn test_missing_required_parameter() {
+        let param_names = vec!["a".to_string(), "b".to_string(), "c".to_string()];
+
+        // Call with: func(a => 1, c => 3.0) - missing 'b'
+        let args = vec![lit(1), lit(3.0)];
+        let arg_names = vec![Some("a".to_string()), Some("c".to_string())];
+
+        let result = resolve_function_arguments(&param_names, args, arg_names);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("Missing required parameter"));
+    }
+}
diff --git a/datafusion/expr/src/execution_props.rs b/datafusion/expr/src/execution_props.rs
index d8a8c6bb49e1..fe20ed9331cb 100644
--- a/datafusion/expr/src/execution_props.rs
+++ b/datafusion/expr/src/execution_props.rs
@@ -102,6 +102,7 @@ impl ExecutionProps {
     }
 
     /// Returns the provider for the `var_type`, if any
+    #[allow(clippy::needless_pass_by_value)]
     pub fn get_var_provider(
         &self,
         var_type: VarType,
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 6077b3c1e5bb..13160d573ab4 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -100,7 +100,7 @@ impl From<sqlparser::ast::NullTreatment> for NullTreatment {
 ///
 /// # Printing Expressions
 ///
-/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
+/// You can print `Expr`s using the `Debug` trait, `Display` trait, or
 /// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
 ///
 /// If you need  SQL to pass to other systems, consider using [`Unparser`].
@@ -164,11 +164,11 @@ impl From<sqlparser::ast::NullTreatment> for NullTreatment {
 /// # use datafusion_expr::{lit, col, Operator, Expr};
 /// // Use the `+` operator to add two columns together
 /// let expr = col("c1") + col("c2");
-/// assert!(matches!(expr, Expr::BinaryExpr { ..} ));
+/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
 /// if let Expr::BinaryExpr(binary_expr) = expr {
-///   assert_eq!(*binary_expr.left, col("c1"));
-///   assert_eq!(*binary_expr.right, col("c2"));
-///   assert_eq!(binary_expr.op, Operator::Plus);
+///     assert_eq!(*binary_expr.left, col("c1"));
+///     assert_eq!(*binary_expr.right, col("c2"));
+///     assert_eq!(binary_expr.op, Operator::Plus);
 /// }
 /// ```
 ///
@@ -179,12 +179,12 @@ impl From<sqlparser::ast::NullTreatment> for NullTreatment {
 /// # use datafusion_common::ScalarValue;
 /// # use datafusion_expr::{lit, col, Operator, Expr};
 /// let expr = col("c1").eq(lit(42_i32));
-/// assert!(matches!(expr, Expr::BinaryExpr { .. } ));
+/// assert!(matches!(expr, Expr::BinaryExpr { .. }));
 /// if let Expr::BinaryExpr(binary_expr) = expr {
-///   assert_eq!(*binary_expr.left, col("c1"));
-///   let scalar = ScalarValue::Int32(Some(42));
-///   assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
-///   assert_eq!(binary_expr.op, Operator::Eq);
+///     assert_eq!(*binary_expr.left, col("c1"));
+///     let scalar = ScalarValue::Int32(Some(42));
+///     assert_eq!(*binary_expr.right, Expr::Literal(scalar, None));
+///     assert_eq!(binary_expr.op, Operator::Eq);
 /// }
 /// ```
 ///
@@ -197,22 +197,22 @@ impl From<sqlparser::ast::NullTreatment> for NullTreatment {
 /// # use datafusion_expr::Expr;
 /// // Create a schema c1(int, c2 float)
 /// let arrow_schema = Schema::new(vec![
-///    Field::new("c1", DataType::Int32, false),
-///    Field::new("c2", DataType::Float64, false),
+///     Field::new("c1", DataType::Int32, false),
+///     Field::new("c2", DataType::Float64, false),
 /// ]);
 /// // DFSchema is a an Arrow schema with optional relation name
-/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema)
-///   .unwrap();
+/// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap();
 ///
 /// // Form Vec<Expr> with an expression for each column in the schema
-/// let exprs: Vec<_> = df_schema.iter()
-///   .map(Expr::from)
-///   .collect();
-///
-/// assert_eq!(exprs, vec![
-///   Expr::from(Column::from_qualified_name("t1.c1")),
-///   Expr::from(Column::from_qualified_name("t1.c2")),
-/// ]);
+/// let exprs: Vec<_> = df_schema.iter().map(Expr::from).collect();
+///
+/// assert_eq!(
+///     exprs,
+///     vec![
+///         Expr::from(Column::from_qualified_name("t1.c1")),
+///         Expr::from(Column::from_qualified_name("t1.c2")),
+///     ]
+/// );
 /// ```
 ///
 /// # Examples: Displaying `Exprs`
@@ -273,12 +273,13 @@ impl From<sqlparser::ast::NullTreatment> for NullTreatment {
 /// let mut scalars = HashSet::new();
 /// // apply recursively visits all nodes in the expression tree
 /// expr.apply(|e| {
-///    if let Expr::Literal(scalar, _) = e {
-///       scalars.insert(scalar);
-///    }
-///    // The return value controls whether to continue visiting the tree
-///    Ok(TreeNodeRecursion::Continue)
-/// }).unwrap();
+///     if let Expr::Literal(scalar, _) = e {
+///         scalars.insert(scalar);
+///     }
+///     // The return value controls whether to continue visiting the tree
+///     Ok(TreeNodeRecursion::Continue)
+/// })
+/// .unwrap();
 /// // All subtrees have been visited and literals found
 /// assert_eq!(scalars.len(), 2);
 /// assert!(scalars.contains(&ScalarValue::Int32(Some(5))));
@@ -989,7 +990,7 @@ impl WindowFunctionDefinition {
         }
     }
 
-    /// Return the the inner window simplification function, if any
+    /// Return the inner window simplification function, if any
     ///
     /// See [`WindowFunctionSimplification`] for more information
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -1076,7 +1077,7 @@ impl WindowFunction {
         }
     }
 
-    /// Return the the inner window simplification function, if any
+    /// Return the inner window simplification function, if any
     ///
     /// See [`WindowFunctionSimplification`] for more information
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -1640,7 +1641,6 @@ impl Expr {
     /// let metadata = FieldMetadata::from(metadata);
     /// let expr = col("foo").alias_with_metadata("bar", Some(metadata));
     /// ```
-    ///
     pub fn alias_with_metadata(
         self,
         name: impl Into<String>,
@@ -1670,9 +1670,9 @@ impl Expr {
     /// # use datafusion_common::metadata::FieldMetadata;
     /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]);
     /// let metadata = FieldMetadata::from(metadata);
-    /// let expr = col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
+    /// let expr =
+    ///     col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata));
     /// ```
-    ///
     pub fn alias_qualified_with_metadata(
         self,
         relation: Option<impl Into<TableReference>>,
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs b/datafusion/expr/src/expr_rewriter/mod.rs
index d9fb9f7219c6..9c3c5df7007f 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -434,7 +434,7 @@ mod test {
             vec![Some("tableC".into()), Some("tableC".into())],
             vec!["f", "ff"],
         );
-        let schemas = vec![schema_c, schema_f, schema_b, schema_a];
+        let schemas = [schema_c, schema_f, schema_b, schema_a];
         let schemas = schemas.iter().collect::<Vec<_>>();
 
         let normalized_expr =
diff --git a/datafusion/expr/src/expr_rewriter/order_by.rs b/datafusion/expr/src/expr_rewriter/order_by.rs
index 6db95555502d..c21c6e6222a0 100644
--- a/datafusion/expr/src/expr_rewriter/order_by.rs
+++ b/datafusion/expr/src/expr_rewriter/order_by.rs
@@ -52,7 +52,7 @@ fn rewrite_sort_col_by_aggs(expr: Expr, plan: &LogicalPlan) -> Result<Expr> {
     // on top of them)
     if plan_inputs.len() == 1 {
         let proj_exprs = plan.expressions();
-        rewrite_in_terms_of_projection(expr, proj_exprs, plan_inputs[0])
+        rewrite_in_terms_of_projection(expr, &proj_exprs, plan_inputs[0])
     } else {
         Ok(expr)
     }
@@ -71,7 +71,7 @@ fn rewrite_sort_col_by_aggs(expr: Expr, plan: &LogicalPlan) -> Result<Expr> {
 /// 2. t produces an output schema with two columns "a", "b + c"
 fn rewrite_in_terms_of_projection(
     expr: Expr,
-    proj_exprs: Vec<Expr>,
+    proj_exprs: &[Expr],
     input: &LogicalPlan,
 ) -> Result<Expr> {
     // assumption is that each item in exprs, such as "b + c" is
@@ -104,7 +104,7 @@ fn rewrite_in_terms_of_projection(
 
         // look for the column named the same as this expr
         let mut found = None;
-        for proj_expr in &proj_exprs {
+        for proj_expr in proj_exprs {
             proj_expr.apply(|e| {
                 if expr_match(&search_col, e) {
                     found = Some(e.clone());
diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
index 8c557a5630f0..9e8d6080b82c 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -82,15 +82,17 @@ impl ExprSchemable for Expr {
     /// # use std::collections::HashMap;
     ///
     /// fn main() {
-    ///   let expr = col("c1") + col("c2");
-    ///   let schema = DFSchema::from_unqualified_fields(
-    ///     vec![
-    ///       Field::new("c1", DataType::Int32, true),
-    ///       Field::new("c2", DataType::Float32, true),
-    ///       ].into(),
-    ///       HashMap::new(),
-    ///   ).unwrap();
-    ///   assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap()));
+    ///     let expr = col("c1") + col("c2");
+    ///     let schema = DFSchema::from_unqualified_fields(
+    ///         vec![
+    ///             Field::new("c1", DataType::Int32, true),
+    ///             Field::new("c2", DataType::Float32, true),
+    ///         ]
+    ///         .into(),
+    ///         HashMap::new(),
+    ///     )
+    ///     .unwrap();
+    ///     assert_eq!("Float32", format!("{}", expr.get_type(&schema).unwrap()));
     /// }
     /// ```
     ///
@@ -734,7 +736,6 @@ impl Expr {
 ///    new projection with the casted expression.
 /// 2. **Non-projection plan**: If the subquery isn't a projection, it adds a projection to the plan
 ///    with the casted first column.
-///
 pub fn cast_subquery(subquery: Subquery, cast_to_type: &DataType) -> Result<Subquery> {
     if subquery.subquery.schema().field(0).data_type() == cast_to_type {
         return Ok(subquery);
diff --git a/datafusion/expr/src/lib.rs b/datafusion/expr/src/lib.rs
index 346d373ff5b4..885e582ea6d4 100644
--- a/datafusion/expr/src/lib.rs
+++ b/datafusion/expr/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! [DataFusion](https://github.com/apache/datafusion)
 //! is an extensible query execution framework that uses
@@ -44,6 +47,7 @@ mod udaf;
 mod udf;
 mod udwf;
 
+pub mod arguments;
 pub mod conditional_expressions;
 pub mod execution_props;
 pub mod expr;
diff --git a/datafusion/expr/src/literal.rs b/datafusion/expr/src/literal.rs
index 335d7b471f5f..c7345a455a76 100644
--- a/datafusion/expr/src/literal.rs
+++ b/datafusion/expr/src/literal.rs
@@ -21,10 +21,12 @@ use crate::Expr;
 use datafusion_common::{metadata::FieldMetadata, ScalarValue};
 
 /// Create a literal expression
+#[allow(clippy::needless_pass_by_value)]
 pub fn lit<T: Literal>(n: T) -> Expr {
     n.lit()
 }
 
+#[allow(clippy::needless_pass_by_value)]
 pub fn lit_with_metadata<T: Literal>(n: T, metadata: Option<FieldMetadata>) -> Expr {
     let Some(metadata) = metadata else {
         return n.lit();
@@ -45,6 +47,7 @@ pub fn lit_with_metadata<T: Literal>(n: T, metadata: Option<FieldMetadata>) -> E
 }
 
 /// Create a literal timestamp expression
+#[allow(clippy::needless_pass_by_value)]
 pub fn lit_timestamp_nano<T: TimestampLiteral>(n: T) -> Expr {
     n.lit_timestamp_nano()
 }
diff --git a/datafusion/expr/src/logical_plan/builder.rs b/datafusion/expr/src/logical_plan/builder.rs
index a430add3f786..b9afd894d77d 100644
--- a/datafusion/expr/src/logical_plan/builder.rs
+++ b/datafusion/expr/src/logical_plan/builder.rs
@@ -450,14 +450,13 @@ impl LogicalPlanBuilder {
     /// # ])) as _;
     /// # let table_source = Arc::new(LogicalTableSource::new(employee_schema));
     /// // VALUES (1), (2)
-    /// let input = LogicalPlanBuilder::values(vec![vec![lit(1)], vec![lit(2)]])?
-    ///   .build()?;
+    /// let input = LogicalPlanBuilder::values(vec![vec![lit(1)], vec![lit(2)]])?.build()?;
     /// // INSERT INTO MyTable VALUES (1), (2)
     /// let insert_plan = LogicalPlanBuilder::insert_into(
-    ///   input,
-    ///   "MyTable",
-    ///   table_source,
-    ///   InsertOp::Append,
+    ///     input,
+    ///     "MyTable",
+    ///     table_source,
+    ///     InsertOp::Append,
     /// )?;
     /// # Ok(())
     /// # }
@@ -953,8 +952,8 @@ impl LogicalPlanBuilder {
     /// // Form the expression `(left.a != right.a)` AND `(left.b != right.b)`
     /// let exprs = vec![
     ///     col("left.a").eq(col("right.a")),
-    ///     col("left.b").not_eq(col("right.b"))
-    ///  ];
+    ///     col("left.b").not_eq(col("right.b")),
+    /// ];
     ///
     /// // Perform the equivalent of `left INNER JOIN right ON (a != a2 AND b != b2)`
     /// // finding all pairs of rows from `left` and `right` where
diff --git a/datafusion/expr/src/logical_plan/display.rs b/datafusion/expr/src/logical_plan/display.rs
index ea08c223e8f4..b60126335598 100644
--- a/datafusion/expr/src/logical_plan/display.rs
+++ b/datafusion/expr/src/logical_plan/display.rs
@@ -94,17 +94,17 @@ impl<'n> TreeNodeVisitor<'n> for IndentVisitor<'_, '_> {
 /// `foo:Utf8;N` if `foo` is nullable.
 ///
 /// ```
-/// use arrow::datatypes::{Field, Schema, DataType};
+/// use arrow::datatypes::{DataType, Field, Schema};
 /// # use datafusion_expr::logical_plan::display_schema;
 /// let schema = Schema::new(vec![
 ///     Field::new("id", DataType::Int32, false),
 ///     Field::new("first_name", DataType::Utf8, true),
-///  ]);
+/// ]);
 ///
-///  assert_eq!(
-///      "[id:Int32, first_name:Utf8;N]",
-///      format!("{}", display_schema(&schema))
-///  );
+/// assert_eq!(
+///     "[id:Int32, first_name:Utf8;N]",
+///     format!("{}", display_schema(&schema))
+/// );
 /// ```
 pub fn display_schema(schema: &Schema) -> impl fmt::Display + '_ {
     struct Wrapper<'a>(&'a Schema);
diff --git a/datafusion/expr/src/logical_plan/extension.rs b/datafusion/expr/src/logical_plan/extension.rs
index a8ee7885644a..fe324d40fd95 100644
--- a/datafusion/expr/src/logical_plan/extension.rs
+++ b/datafusion/expr/src/logical_plan/extension.rs
@@ -39,10 +39,10 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync {
     /// # struct Dummy { }
     ///
     /// # impl Dummy {
-    ///   // canonical boiler plate
-    ///   fn as_any(&self) -> &dyn Any {
-    ///      self
-    ///   }
+    /// // canonical boiler plate
+    /// fn as_any(&self) -> &dyn Any {
+    ///     self
+    /// }
     /// # }
     /// ```
     fn as_any(&self) -> &dyn Any;
@@ -131,18 +131,18 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync {
     /// // User defined node that derives Hash
     /// #[derive(Hash, Debug, PartialEq, Eq)]
     /// struct MyNode {
-    ///   val: u64
+    ///     val: u64,
     /// }
     ///
     /// // impl UserDefinedLogicalNode {
     /// // ...
     /// # impl MyNode {
-    ///   // Boiler plate to call the derived Hash impl
-    ///   fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
+    /// // Boiler plate to call the derived Hash impl
+    /// fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
     ///     use std::hash::Hash;
     ///     let mut s = state;
     ///     self.hash(&mut s);
-    ///   }
+    /// }
     /// // }
     /// # }
     /// ```
@@ -169,19 +169,19 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync {
     /// // User defined node that derives Eq
     /// #[derive(Hash, Debug, PartialEq, Eq)]
     /// struct MyNode {
-    ///   val: u64
+    ///     val: u64,
     /// }
     ///
     /// // impl UserDefinedLogicalNode {
     /// // ...
     /// # impl MyNode {
-    ///   // Boiler plate to call the derived Eq impl
-    ///   fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool {
+    /// // Boiler plate to call the derived Eq impl
+    /// fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool {
     ///     match other.as_any().downcast_ref::<Self>() {
-    ///       Some(o) => self == o,
-    ///       None => false,
+    ///         Some(o) => self == o,
+    ///         None => false,
     ///     }
-    ///   }
+    /// }
     /// // }
     /// # }
     /// ```
diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
index 9541f35e3062..892ab135d6dc 100644
--- a/datafusion/expr/src/logical_plan/plan.rs
+++ b/datafusion/expr/src/logical_plan/plan.rs
@@ -203,7 +203,6 @@ pub use datafusion_common::{JoinConstraint, JoinType};
 /// # Ok(())
 /// # }
 /// ```
-///
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
 pub enum LogicalPlan {
     /// Evaluates an arbitrary list of expressions (essentially a
@@ -1157,7 +1156,7 @@ impl LogicalPlan {
 
     /// Helper for [Self::with_new_exprs] to use when no expressions are expected.
     #[inline]
-    #[allow(clippy::needless_pass_by_value)] // expr is moved intentionally to ensure it's not used again
+    #[expect(clippy::needless_pass_by_value)] // expr is moved intentionally to ensure it's not used again
     fn assert_no_expressions(&self, expr: Vec<Expr>) -> Result<()> {
         if !expr.is_empty() {
             return internal_err!("{self:?} should have no exprs, got {:?}", expr);
@@ -1167,7 +1166,7 @@ impl LogicalPlan {
 
     /// Helper for [Self::with_new_exprs] to use when no inputs are expected.
     #[inline]
-    #[allow(clippy::needless_pass_by_value)] // inputs is moved intentionally to ensure it's not used again
+    #[expect(clippy::needless_pass_by_value)] // inputs is moved intentionally to ensure it's not used again
     fn assert_no_inputs(&self, inputs: Vec<LogicalPlan>) -> Result<()> {
         if !inputs.is_empty() {
             return internal_err!("{self:?} should have no inputs, got: {:?}", inputs);
@@ -1267,7 +1266,6 @@ impl LogicalPlan {
     ///    \n  TableScan: t1",
     ///    plan.display_indent().to_string()
     ///  );
-    ///
     /// ```
     pub fn with_param_values(
         self,
@@ -1561,20 +1559,20 @@ impl LogicalPlan {
     /// ```
     ///
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder};
+    /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+    /// let plan = table_scan(Some("t1"), &schema, None)
+    ///     .unwrap()
+    ///     .filter(col("id").eq(lit(5)))
+    ///     .unwrap()
+    ///     .build()
+    ///     .unwrap();
     ///
     /// // Format using display_indent
     /// let display_string = format!("{}", plan.display_indent());
     ///
-    /// assert_eq!("Filter: t1.id = Int32(5)\n  TableScan: t1",
-    ///             display_string);
+    /// assert_eq!("Filter: t1.id = Int32(5)\n  TableScan: t1", display_string);
     /// ```
     pub fn display_indent(&self) -> impl Display + '_ {
         // Boilerplate structure to wrap LogicalPlan with something
@@ -1603,21 +1601,24 @@ impl LogicalPlan {
     /// ```
     ///
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder};
+    /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+    /// let plan = table_scan(Some("t1"), &schema, None)
+    ///     .unwrap()
+    ///     .filter(col("id").eq(lit(5)))
+    ///     .unwrap()
+    ///     .build()
+    ///     .unwrap();
     ///
     /// // Format using display_indent_schema
     /// let display_string = format!("{}", plan.display_indent_schema());
     ///
-    /// assert_eq!("Filter: t1.id = Int32(5) [id:Int32]\
+    /// assert_eq!(
+    ///     "Filter: t1.id = Int32(5) [id:Int32]\
     ///             \n  TableScan: t1 [id:Int32]",
-    ///             display_string);
+    ///     display_string
+    /// );
     /// ```
     pub fn display_indent_schema(&self) -> impl Display + '_ {
         // Boilerplate structure to wrap LogicalPlan with something
@@ -1665,14 +1666,15 @@ impl LogicalPlan {
     /// structure, and one with additional details such as schema.
     ///
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
-    ///     .filter(col("id").eq(lit(5))).unwrap()
-    ///     .build().unwrap();
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder};
+    /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+    /// let plan = table_scan(Some("t1"), &schema, None)
+    ///     .unwrap()
+    ///     .filter(col("id").eq(lit(5)))
+    ///     .unwrap()
+    ///     .build()
+    ///     .unwrap();
     ///
     /// // Format using display_graphviz
     /// let graphviz_string = format!("{}", plan.display_graphviz());
@@ -1684,7 +1686,6 @@ impl LogicalPlan {
     /// ```bash
     ///   dot -Tpdf < /tmp/example.dot  > /tmp/example.pdf
     /// ```
-    ///
     pub fn display_graphviz(&self) -> impl Display + '_ {
         // Boilerplate structure to wrap LogicalPlan with something
         // that that can be formatted
@@ -1723,13 +1724,13 @@ impl LogicalPlan {
     /// Projection: id
     /// ```
     /// ```
-    /// use arrow::datatypes::{Field, Schema, DataType};
-    /// use datafusion_expr::{lit, col, LogicalPlanBuilder, logical_plan::table_scan};
-    /// let schema = Schema::new(vec![
-    ///     Field::new("id", DataType::Int32, false),
-    /// ]);
-    /// let plan = table_scan(Some("t1"), &schema, None).unwrap()
-    ///     .build().unwrap();
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_expr::{col, lit, logical_plan::table_scan, LogicalPlanBuilder};
+    /// let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+    /// let plan = table_scan(Some("t1"), &schema, None)
+    ///     .unwrap()
+    ///     .build()
+    ///     .unwrap();
     ///
     /// // Format using display
     /// let display_string = format!("{}", plan.display());
@@ -3480,6 +3481,7 @@ impl Aggregate {
     ///
     /// This method should only be called when you are absolutely sure that the schema being
     /// provided is correct for the aggregate. If in doubt, call [try_new](Self::try_new) instead.
+    #[allow(clippy::needless_pass_by_value)]
     pub fn try_new_with_schema(
         input: Arc<LogicalPlan>,
         group_expr: Vec<Expr>,
diff --git a/datafusion/expr/src/select_expr.rs b/datafusion/expr/src/select_expr.rs
index 039df20f397b..bfec4c5844d0 100644
--- a/datafusion/expr/src/select_expr.rs
+++ b/datafusion/expr/src/select_expr.rs
@@ -44,10 +44,8 @@ use crate::{expr::WildcardOptions, Expr};
 /// let wildcard = SelectExpr::Wildcard(WildcardOptions::default());
 ///
 /// // SELECT mytable.*
-/// let qualified = SelectExpr::QualifiedWildcard(
-///     "mytable".into(),
-///     WildcardOptions::default()
-/// );
+/// let qualified =
+///     SelectExpr::QualifiedWildcard("mytable".into(), WildcardOptions::default());
 ///
 /// // SELECT col1
 /// let expr = SelectExpr::Expression(col("col1").into());
diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
index b593f8411d24..42a5f9b26239 100644
--- a/datafusion/expr/src/udaf.rs
+++ b/datafusion/expr/src/udaf.rs
@@ -329,9 +329,9 @@ impl AggregateUDF {
         self.inner.supports_null_handling_clause()
     }
 
-    /// See [`AggregateUDFImpl::is_ordered_set_aggregate`] for more details.
-    pub fn is_ordered_set_aggregate(&self) -> bool {
-        self.inner.is_ordered_set_aggregate()
+    /// See [`AggregateUDFImpl::supports_within_group_clause`] for more details.
+    pub fn supports_within_group_clause(&self) -> bool {
+        self.inner.supports_within_group_clause()
     }
 
     /// Returns the documentation for this Aggregate UDF.
@@ -746,18 +746,25 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
         true
     }
 
-    /// If this function is an ordered-set aggregate function, return `true`.
-    /// Otherwise, return `false` (default).
+    /// If this function supports the `WITHIN GROUP (ORDER BY column [ASC|DESC])`
+    /// SQL syntax, return `true`. Otherwise, return `false` (default) which will
+    /// cause an error when parsing SQL where this syntax is detected for this
+    /// function.
+    ///
+    /// This function should return `true` for ordered-set aggregate functions
+    /// only.
+    ///
+    /// # Ordered-set aggregate functions
     ///
     /// Ordered-set aggregate functions allow specifying a sort order that affects
     /// how the function calculates its result, unlike other aggregate functions
-    /// like `SUM` or `COUNT`. For example, `percentile_cont` is an ordered-set
+    /// like `sum` or `count`. For example, `percentile_cont` is an ordered-set
     /// aggregate function that calculates the exact percentile value from a list
     /// of values; the output of calculating the `0.75` percentile depends on if
     /// you're calculating on an ascending or descending list of values.
     ///
-    /// Setting this to return `true` affects only SQL parsing & planning; it allows
-    /// use of the `WITHIN GROUP` clause to specify this order, for example:
+    /// An example of how an ordered-set aggregate function is called with the
+    /// `WITHIN GROUP` SQL syntax:
     ///
     /// ```sql
     /// -- Ascending
@@ -784,15 +791,11 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// without the `WITHIN GROUP` clause, though a default of ascending is the
     /// standard practice.
     ///
-    /// Note that setting this to `true` does not guarantee input sort order to
-    /// the aggregate function; it expects the function to handle ordering the
-    /// input values themselves (e.g. `percentile_cont` must buffer and sort
-    /// the values internally). That is, DataFusion does not introduce any kind
-    /// of sort into the plan for these functions.
-    ///
-    /// Setting this to `false` disallows calling this function with the `WITHIN GROUP`
-    /// clause.
-    fn is_ordered_set_aggregate(&self) -> bool {
+    /// Ordered-set aggregate function implementations are responsible for handling
+    /// the input sort order themselves (e.g. `percentile_cont` must buffer and
+    /// sort the values internally). That is, DataFusion does not introduce any
+    /// kind of sort into the plan for these functions with this syntax.
+    fn supports_within_group_clause(&self) -> bool {
         false
     }
 
@@ -843,7 +846,7 @@ pub fn udaf_default_schema_name<F: AggregateUDFImpl + ?Sized>(
 
     // exclude the first function argument(= column) in ordered set aggregate function,
     // because it is duplicated with the WITHIN GROUP clause in schema name.
-    let args = if func.is_ordered_set_aggregate() && !order_by.is_empty() {
+    let args = if func.supports_within_group_clause() && !order_by.is_empty() {
         &args[1..]
     } else {
         &args[..]
@@ -867,7 +870,7 @@ pub fn udaf_default_schema_name<F: AggregateUDFImpl + ?Sized>(
     };
 
     if !order_by.is_empty() {
-        let clause = match func.is_ordered_set_aggregate() {
+        let clause = match func.supports_within_group_clause() {
             true => "WITHIN GROUP",
             false => "ORDER BY",
         };
@@ -1259,8 +1262,8 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl {
         self.inner.supports_null_handling_clause()
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
-        self.inner.is_ordered_set_aggregate()
+    fn supports_within_group_clause(&self) -> bool {
+        self.inner.supports_within_group_clause()
     }
 
     fn set_monotonicity(&self, data_type: &DataType) -> SetMonotonicity {
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index c4cd8c006d1f..fd54bb13a62f 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -568,7 +568,6 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     ///
     /// * `Some(ScalarUDF)` - A new instance of this function configured with the new settings
     /// * `None` - If this function does not change with new configuration settings (the default)
-    ///
     fn with_updated_config(&self, _config: &ConfigOptions) -> Option<ScalarUDF> {
         None
     }
@@ -604,10 +603,10 @@ pub trait ScalarUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// # struct Example{}
     /// # impl Example {
     /// fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
-    ///   // report output is only nullable if any one of the arguments are nullable
-    ///   let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
-    ///   let field = Arc::new(Field::new("ignored_name", DataType::Int32, true));
-    ///   Ok(field)
+    ///     // report output is only nullable if any one of the arguments are nullable
+    ///     let nullable = args.arg_fields.iter().any(|f| f.is_nullable());
+    ///     let field = Arc::new(Field::new("ignored_name", DataType::Int32, true));
+    ///     Ok(field)
     /// }
     /// # }
     /// ```
diff --git a/datafusion/expr/src/udwf.rs b/datafusion/expr/src/udwf.rs
index 7ca2f0662d48..3220fdcbcad7 100644
--- a/datafusion/expr/src/udwf.rs
+++ b/datafusion/expr/src/udwf.rs
@@ -355,7 +355,7 @@ pub trait WindowUDFImpl: Debug + DynEq + DynHash + Send + Sync {
     /// optimizations manually for specific UDFs.
     ///
     /// Example:
-    /// [`advanced_udwf.rs`]: <https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs>
+    /// `advanced_udwf.rs`: <https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udwf.rs>
     ///
     /// # Returns
     /// [None] if simplify is not defined or,
diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs
index b91db4527b3a..b4e763cdf497 100644
--- a/datafusion/expr/src/utils.rs
+++ b/datafusion/expr/src/utils.rs
@@ -354,7 +354,7 @@ fn get_excluded_columns(
 /// Returns all `Expr`s in the schema, except the `Column`s in the `columns_to_skip`
 fn get_exprs_except_skipped(
     schema: &DFSchema,
-    columns_to_skip: HashSet<Column>,
+    columns_to_skip: &HashSet<Column>,
 ) -> Vec<Expr> {
     if columns_to_skip.is_empty() {
         schema.iter().map(Expr::from).collect::<Vec<Expr>>()
@@ -419,7 +419,7 @@ pub fn expand_wildcard(
     };
     // Add each excluded `Column` to columns_to_skip
     columns_to_skip.extend(excluded_columns);
-    Ok(get_exprs_except_skipped(schema, columns_to_skip))
+    Ok(get_exprs_except_skipped(schema, &columns_to_skip))
 }
 
 /// Resolves an `Expr::Wildcard` to a collection of qualified `Expr::Column`'s.
@@ -464,7 +464,7 @@ pub fn expand_qualified_wildcard(
     columns_to_skip.extend(excluded_columns);
     Ok(get_exprs_except_skipped(
         &qualified_dfschema,
-        columns_to_skip,
+        &columns_to_skip,
     ))
 }
 
@@ -890,7 +890,6 @@ pub fn check_all_columns_from_schema(
 ///    all referenced column of the right side is from the right schema.
 /// 2. Or opposite. All referenced column of the left side is from the right schema,
 ///    and the right side is from the left schema.
-///
 pub fn find_valid_equijoin_key_pair(
     left_key: &Expr,
     right_key: &Expr,
@@ -929,6 +928,7 @@ pub fn find_valid_equijoin_key_pair(
 ///     round(Float64)
 ///     round(Float32)
 /// ```
+#[allow(clippy::needless_pass_by_value)]
 pub fn generate_signature_error_msg(
     func_name: &str,
     func_signature: Signature,
@@ -936,7 +936,7 @@ pub fn generate_signature_error_msg(
 ) -> String {
     let candidate_signatures = func_signature
         .type_signature
-        .to_string_repr()
+        .to_string_repr_with_names(func_signature.parameter_names.as_deref())
         .iter()
         .map(|args_str| format!("\t{func_name}({args_str})"))
         .collect::<Vec<String>>()
@@ -1034,10 +1034,7 @@ pub fn iter_conjunction_owned(expr: Expr) -> impl Iterator<Item = Expr> {
 /// let expr = col("a").eq(lit(1)).and(col("b").eq(lit(2)));
 ///
 /// // [a=1, b=2]
-/// let split = vec![
-///   col("a").eq(lit(1)),
-///   col("b").eq(lit(2)),
-/// ];
+/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))];
 ///
 /// // use split_conjunction_owned to split them
 /// assert_eq!(split_conjunction_owned(expr), split);
@@ -1060,10 +1057,7 @@ pub fn split_conjunction_owned(expr: Expr) -> Vec<Expr> {
 /// let expr = col("a").eq(lit(1)).add(col("b").eq(lit(2)));
 ///
 /// // [a=1, b=2]
-/// let split = vec![
-///   col("a").eq(lit(1)),
-///   col("b").eq(lit(2)),
-/// ];
+/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))];
 ///
 /// // use split_binary_owned to split them
 /// assert_eq!(split_binary_owned(expr, Operator::Plus), split);
@@ -1131,10 +1125,7 @@ fn split_binary_impl<'a>(
 /// let expr = col("a").eq(lit(1)).and(col("b").eq(lit(2)));
 ///
 /// // [a=1, b=2]
-/// let split = vec![
-///   col("a").eq(lit(1)),
-///   col("b").eq(lit(2)),
-/// ];
+/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))];
 ///
 /// // use conjunction to join them together with `AND`
 /// assert_eq!(conjunction(split), Some(expr));
@@ -1157,10 +1148,7 @@ pub fn conjunction(filters: impl IntoIterator<Item = Expr>) -> Option<Expr> {
 /// let expr = col("a").eq(lit(1)).or(col("b").eq(lit(2)));
 ///
 /// // [a=1, b=2]
-/// let split = vec![
-///   col("a").eq(lit(1)),
-///   col("b").eq(lit(2)),
-/// ];
+/// let split = vec![col("a").eq(lit(1)), col("b").eq(lit(2))];
 ///
 /// // use disjunction to join them together with `OR`
 /// assert_eq!(disjunction(split), Some(expr));
@@ -1295,6 +1283,7 @@ mod tests {
         Cast, ExprFunctionExt, WindowFunctionDefinition,
     };
     use arrow::datatypes::{UnionFields, UnionMode};
+    use datafusion_expr_common::signature::{TypeSignature, Volatility};
 
     #[test]
     fn test_group_window_expr_by_sort_keys_empty_case() -> Result<()> {
@@ -1714,4 +1703,52 @@ mod tests {
             DataType::List(Arc::new(Field::new("my_union", union_type, true)));
         assert!(!can_hash(&list_union_type));
     }
+
+    #[test]
+    fn test_generate_signature_error_msg_with_parameter_names() {
+        let sig = Signature::one_of(
+            vec![
+                TypeSignature::Exact(vec![DataType::Utf8, DataType::Int64]),
+                TypeSignature::Exact(vec![
+                    DataType::Utf8,
+                    DataType::Int64,
+                    DataType::Int64,
+                ]),
+            ],
+            Volatility::Immutable,
+        )
+        .with_parameter_names(vec![
+            "str".to_string(),
+            "start_pos".to_string(),
+            "length".to_string(),
+        ])
+        .expect("valid parameter names");
+
+        // Generate error message with only 1 argument provided
+        let error_msg = generate_signature_error_msg("substr", sig, &[DataType::Utf8]);
+
+        assert!(
+            error_msg.contains("str: Utf8, start_pos: Int64"),
+            "Expected 'str: Utf8, start_pos: Int64' in error message, got: {error_msg}"
+        );
+        assert!(
+            error_msg.contains("str: Utf8, start_pos: Int64, length: Int64"),
+            "Expected 'str: Utf8, start_pos: Int64, length: Int64' in error message, got: {error_msg}"
+        );
+    }
+
+    #[test]
+    fn test_generate_signature_error_msg_without_parameter_names() {
+        let sig = Signature::one_of(
+            vec![TypeSignature::Any(2), TypeSignature::Any(3)],
+            Volatility::Immutable,
+        );
+
+        let error_msg = generate_signature_error_msg("my_func", sig, &[DataType::Int32]);
+
+        assert!(
+            error_msg.contains("Any, Any"),
+            "Expected 'Any, Any' without parameter names, got: {error_msg}"
+        );
+    }
 }
diff --git a/datafusion/expr/src/window_frame.rs b/datafusion/expr/src/window_frame.rs
index f72dc10a6950..5fb2916c34e9 100644
--- a/datafusion/expr/src/window_frame.rs
+++ b/datafusion/expr/src/window_frame.rs
@@ -307,7 +307,6 @@ impl WindowFrame {
 /// 3. CURRENT ROW
 /// 4. `<expr>` FOLLOWING
 /// 5. UNBOUNDED FOLLOWING
-///
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)]
 pub enum WindowFrameBound {
     /// 1. UNBOUNDED PRECEDING
diff --git a/datafusion/ffi/Cargo.toml b/datafusion/ffi/Cargo.toml
index babfe28ad557..3ac08180fb68 100644
--- a/datafusion/ffi/Cargo.toml
+++ b/datafusion/ffi/Cargo.toml
@@ -30,6 +30,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/ffi/src/udaf/accumulator_args.rs b/datafusion/ffi/src/udaf/accumulator_args.rs
index 0302c26a2e6b..6ac0a0b21d2d 100644
--- a/datafusion/ffi/src/udaf/accumulator_args.rs
+++ b/datafusion/ffi/src/udaf/accumulator_args.rs
@@ -97,6 +97,7 @@ impl TryFrom<AccumulatorArgs<'_>> for FFI_AccumulatorArgs {
 pub struct ForeignAccumulatorArgs {
     pub return_field: FieldRef,
     pub schema: Schema,
+    pub expr_fields: Vec<FieldRef>,
     pub ignore_nulls: bool,
     pub order_bys: Vec<PhysicalSortExpr>,
     pub is_reversed: bool,
@@ -132,9 +133,15 @@ impl TryFrom<FFI_AccumulatorArgs> for ForeignAccumulatorArgs {
 
         let exprs = parse_physical_exprs(&proto_def.expr, &task_ctx, &schema, &codex)?;
 
+        let expr_fields = exprs
+            .iter()
+            .map(|e| e.return_field(&schema))
+            .collect::<Result<Vec<_>, _>>()?;
+
         Ok(Self {
             return_field,
             schema,
+            expr_fields,
             ignore_nulls: proto_def.ignore_nulls,
             order_bys,
             is_reversed: value.is_reversed,
@@ -150,6 +157,7 @@ impl<'a> From<&'a ForeignAccumulatorArgs> for AccumulatorArgs<'a> {
         Self {
             return_field: Arc::clone(&value.return_field),
             schema: &value.schema,
+            expr_fields: &value.expr_fields,
             ignore_nulls: value.ignore_nulls,
             order_bys: &value.order_bys,
             is_reversed: value.is_reversed,
@@ -175,6 +183,7 @@ mod tests {
         let orig_args = AccumulatorArgs {
             return_field: Field::new("f", DataType::Float64, true).into(),
             schema: &schema,
+            expr_fields: &[Field::new("a", DataType::Int32, true).into()],
             ignore_nulls: false,
             order_bys: &[PhysicalSortExpr::new_default(col("a", &schema)?)],
             is_reversed: false,
diff --git a/datafusion/ffi/src/udaf/mod.rs b/datafusion/ffi/src/udaf/mod.rs
index 1ea1798c7c8b..ce5611590b67 100644
--- a/datafusion/ffi/src/udaf/mod.rs
+++ b/datafusion/ffi/src/udaf/mod.rs
@@ -705,6 +705,7 @@ mod tests {
         let acc_args = AccumulatorArgs {
             return_field: Field::new("f", DataType::Float64, true).into(),
             schema: &schema,
+            expr_fields: &[Field::new("a", DataType::Float64, true).into()],
             ignore_nulls: true,
             order_bys: &[PhysicalSortExpr::new_default(col("a", &schema)?)],
             is_reversed: false,
@@ -782,6 +783,7 @@ mod tests {
         let acc_args = AccumulatorArgs {
             return_field: Field::new("f", DataType::Float64, true).into(),
             schema: &schema,
+            expr_fields: &[Field::new("a", DataType::Float64, true).into()],
             ignore_nulls: true,
             order_bys: &[PhysicalSortExpr::new_default(col("a", &schema)?)],
             is_reversed: false,
diff --git a/datafusion/ffi/src/udtf.rs b/datafusion/ffi/src/udtf.rs
index ceedec2599a2..edd5273c70a8 100644
--- a/datafusion/ffi/src/udtf.rs
+++ b/datafusion/ffi/src/udtf.rs
@@ -293,8 +293,7 @@ mod tests {
 
         let foreign_udf: ForeignTableFunction = local_udtf.into();
 
-        let table =
-            foreign_udf.call(&vec![lit(6_u64), lit("one"), lit(2.0), lit(3_u64)])?;
+        let table = foreign_udf.call(&[lit(6_u64), lit("one"), lit(2.0), lit(3_u64)])?;
 
         let ctx = SessionContext::default();
         let _ = ctx.register_table("test-table", table)?;
diff --git a/datafusion/functions-aggregate-common/Cargo.toml b/datafusion/functions-aggregate-common/Cargo.toml
index a6e0a1fc2f8b..1d4fb29d9c67 100644
--- a/datafusion/functions-aggregate-common/Cargo.toml
+++ b/datafusion/functions-aggregate-common/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/functions-aggregate-common/src/accumulator.rs b/datafusion/functions-aggregate-common/src/accumulator.rs
index e0f7af1fb38e..8db0ab4133dc 100644
--- a/datafusion/functions-aggregate-common/src/accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/accumulator.rs
@@ -30,7 +30,8 @@ pub struct AccumulatorArgs<'a> {
     /// The return field of the aggregate function.
     pub return_field: FieldRef,
 
-    /// The schema of the input arguments
+    /// Input schema to the aggregate function. If you need to check data type, nullability
+    /// or metadata of input arguments then you should use `expr_fields` below instead.
     pub schema: &'a Schema,
 
     /// Whether to ignore nulls.
@@ -67,6 +68,9 @@ pub struct AccumulatorArgs<'a> {
 
     /// The physical expression of arguments the aggregate function takes.
     pub exprs: &'a [Arc<dyn PhysicalExpr>],
+
+    /// Fields corresponding to each expr (same order & length).
+    pub expr_fields: &'a [FieldRef],
 }
 
 impl AccumulatorArgs<'_> {
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
index aa2f5a586e87..c807591dabec 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator.rs
@@ -80,15 +80,13 @@ use datafusion_expr_common::groups_accumulator::{EmitTo, GroupsAccumulator};
 ///  Logical group         Current Min/Max value for that group stored
 ///     number             as a ScalarValue which points to an
 ///                        individually allocated String
-///
-///```
+/// ```
 ///
 /// # Optimizations
 ///
 /// The adapter minimizes the number of calls to [`Accumulator::update_batch`]
 /// by first collecting the input rows for each group into a contiguous array
 /// using [`compute::take`]
-///
 pub struct GroupsAccumulatorAdapter {
     factory: Box<dyn Fn() -> Result<Box<dyn Accumulator>> + Send>,
 
@@ -184,7 +182,6 @@ impl GroupsAccumulatorAdapter {
     /// └─────────┘   └─────────┘   └ ─ ─ ─ ─ ┘                       └─────────┘   └ ─ ─ ─ ─ ┘
     ///
     /// logical group   values      opt_filter           logical group  values       opt_filter
-    ///
     /// ```
     fn invoke_per_accumulator<F>(
         &mut self,
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
index 987ba57f7719..736345874c27 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/accumulate.rs
@@ -760,7 +760,6 @@ mod test {
 
         /// Calls `NullState::accumulate` and `accumulate_indices` to
         /// ensure it generates the correct values.
-        ///
         fn accumulate_test(
             group_indices: &[usize],
             values: &UInt32Array,
diff --git a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
index 078982c983fc..fe920927f39b 100644
--- a/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
+++ b/datafusion/functions-aggregate-common/src/aggregate/groups_accumulator/prim_op.rs
@@ -142,7 +142,6 @@ where
     /// The state is:
     /// - self.prim_fn for all non null, non filtered values
     /// - null otherwise
-    ///
     fn convert_to_state(
         &self,
         values: &[ArrayRef],
diff --git a/datafusion/functions-aggregate-common/src/stats.rs b/datafusion/functions-aggregate-common/src/stats.rs
index bcd004db7831..593b105426be 100644
--- a/datafusion/functions-aggregate-common/src/stats.rs
+++ b/datafusion/functions-aggregate-common/src/stats.rs
@@ -17,7 +17,7 @@
 
 /// TODO: Move this to functions-aggregate module
 /// Enum used for differentiating population and sample for statistical functions
-#[derive(PartialEq, Eq, Debug, Clone, Copy)]
+#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
 pub enum StatsType {
     /// Population
     Population,
diff --git a/datafusion/functions-aggregate-common/src/tdigest.rs b/datafusion/functions-aggregate-common/src/tdigest.rs
index 370a640b046a..320157fb7bd8 100644
--- a/datafusion/functions-aggregate-common/src/tdigest.rs
+++ b/datafusion/functions-aggregate-common/src/tdigest.rs
@@ -575,7 +575,6 @@ impl TDigest {
     ///                          │└ ─ ─ ─ ┘│
     ///                          │         │
     ///                              ...
-    ///
     /// ```
     ///
     /// The [`TDigest::from_scalar_state()`] method reverses this processes,
diff --git a/datafusion/functions-aggregate-common/src/utils.rs b/datafusion/functions-aggregate-common/src/utils.rs
index b01f2c8629c9..7ce5f09373f5 100644
--- a/datafusion/functions-aggregate-common/src/utils.rs
+++ b/datafusion/functions-aggregate-common/src/utils.rs
@@ -95,6 +95,8 @@ pub struct DecimalAverager<T: DecimalType> {
     target_mul: T::Native,
     /// the output precision
     target_precision: u8,
+    /// the output scale
+    target_scale: i8,
 }
 
 impl<T: DecimalType> DecimalAverager<T> {
@@ -129,6 +131,7 @@ impl<T: DecimalType> DecimalAverager<T> {
                 sum_mul,
                 target_mul,
                 target_precision,
+                target_scale,
             })
         } else {
             // can't convert the lit decimal to the returned data type
@@ -147,8 +150,11 @@ impl<T: DecimalType> DecimalAverager<T> {
         if let Ok(value) = sum.mul_checked(self.target_mul.div_wrapping(self.sum_mul)) {
             let new_value = value.div_wrapping(count);
 
-            let validate =
-                T::validate_decimal_precision(new_value, self.target_precision);
+            let validate = T::validate_decimal_precision(
+                new_value,
+                self.target_precision,
+                self.target_scale,
+            );
 
             if validate.is_ok() {
                 Ok(new_value)
diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml
index ffc6f3bb7a10..428855a61698 100644
--- a/datafusion/functions-aggregate/Cargo.toml
+++ b/datafusion/functions-aggregate/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs
index 96444b018465..83b0c4a4c659 100644
--- a/datafusion/functions-aggregate/benches/array_agg.rs
+++ b/datafusion/functions-aggregate/benches/array_agg.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{
@@ -22,7 +23,7 @@ use arrow::array::{
     PrimitiveArray,
 };
 use arrow::datatypes::{Field, Int64Type};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_expr::Accumulator;
 use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator;
 
diff --git a/datafusion/functions-aggregate/benches/count.rs b/datafusion/functions-aggregate/benches/count.rs
index 37c7fad4bd32..53484652fd25 100644
--- a/datafusion/functions-aggregate/benches/count.rs
+++ b/datafusion/functions-aggregate/benches/count.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, BooleanArray};
@@ -29,19 +30,21 @@ use datafusion_expr::{Accumulator, AggregateUDFImpl, GroupsAccumulator};
 use datafusion_functions_aggregate::count::Count;
 use datafusion_physical_expr::expressions::col;
 
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 
 fn prepare_group_accumulator() -> Box<dyn GroupsAccumulator> {
     let schema = Arc::new(Schema::new(vec![Field::new("f", DataType::Int32, true)]));
+    let expr = col("f", &schema).unwrap();
     let accumulator_args = AccumulatorArgs {
         return_field: Field::new("f", DataType::Int64, true).into(),
         schema: &schema,
+        expr_fields: &[expr.return_field(&schema).unwrap()],
         ignore_nulls: false,
         order_bys: &[],
         is_reversed: false,
         name: "COUNT(f)",
         is_distinct: false,
-        exprs: &[col("f", &schema).unwrap()],
+        exprs: &[expr],
     };
     let count_fn = Count::new();
 
@@ -56,15 +59,17 @@ fn prepare_accumulator() -> Box<dyn Accumulator> {
         DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
         true,
     )]));
+    let expr = col("f", &schema).unwrap();
     let accumulator_args = AccumulatorArgs {
         return_field: Arc::new(Field::new_list_field(DataType::Int64, true)),
         schema: &schema,
+        expr_fields: &[expr.return_field(&schema).unwrap()],
         ignore_nulls: false,
         order_bys: &[],
         is_reversed: false,
         name: "COUNT(f)",
         is_distinct: true,
-        exprs: &[col("f", &schema).unwrap()],
+        exprs: &[expr],
     };
     let count_fn = Count::new();
 
diff --git a/datafusion/functions-aggregate/benches/min_max_bytes.rs b/datafusion/functions-aggregate/benches/min_max_bytes.rs
index a438ee5697a2..6d76ff2d0366 100644
--- a/datafusion/functions-aggregate/benches/min_max_bytes.rs
+++ b/datafusion/functions-aggregate/benches/min_max_bytes.rs
@@ -44,6 +44,7 @@ fn create_max_bytes_accumulator() -> Box<dyn GroupsAccumulator> {
     max.create_groups_accumulator(AccumulatorArgs {
         return_field: Arc::new(Field::new("value", DataType::Utf8, true)),
         schema: &input_schema,
+        expr_fields: &[Field::new("value", DataType::Utf8, true).into()],
         ignore_nulls: true,
         order_bys: &[],
         is_reversed: false,
diff --git a/datafusion/functions-aggregate/benches/sum.rs b/datafusion/functions-aggregate/benches/sum.rs
index a1e9894fb86c..d85f0686224b 100644
--- a/datafusion/functions-aggregate/benches/sum.rs
+++ b/datafusion/functions-aggregate/benches/sum.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, BooleanArray};
@@ -25,14 +26,15 @@ use datafusion_expr::{function::AccumulatorArgs, AggregateUDFImpl, GroupsAccumul
 use datafusion_functions_aggregate::sum::Sum;
 use datafusion_physical_expr::expressions::col;
 
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 
 fn prepare_accumulator(data_type: &DataType) -> Box<dyn GroupsAccumulator> {
     let field = Field::new("f", data_type.clone(), true).into();
     let schema = Arc::new(Schema::new(vec![Arc::clone(&field)]));
     let accumulator_args = AccumulatorArgs {
-        return_field: field,
+        return_field: Arc::clone(&field),
         schema: &schema,
+        expr_fields: &[field],
         ignore_nulls: false,
         order_bys: &[],
         is_reversed: false,
diff --git a/datafusion/functions-aggregate/src/approx_distinct.rs b/datafusion/functions-aggregate/src/approx_distinct.rs
index 9affdb3ee5f6..998f981deef7 100644
--- a/datafusion/functions-aggregate/src/approx_distinct.rs
+++ b/datafusion/functions-aggregate/src/approx_distinct.rs
@@ -361,7 +361,7 @@ impl AggregateUDFImpl for ApproxDistinct {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+        let data_type = acc_args.expr_fields[0].data_type();
 
         let accumulator: Box<dyn Accumulator> = match data_type {
             // TODO u8, i8, u16, i16 shall really be done using bitmap, not HLL
diff --git a/datafusion/functions-aggregate/src/approx_median.rs b/datafusion/functions-aggregate/src/approx_median.rs
index 976f4d2c9480..530dbf3e43c7 100644
--- a/datafusion/functions-aggregate/src/approx_median.rs
+++ b/datafusion/functions-aggregate/src/approx_median.rs
@@ -134,7 +134,7 @@ impl AggregateUDFImpl for ApproxMedian {
 
         Ok(Box::new(ApproxPercentileAccumulator::new(
             0.5_f64,
-            acc_args.exprs[0].data_type(acc_args.schema)?,
+            acc_args.expr_fields[0].data_type().clone(),
         )))
     }
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
index 668280314e8d..4015abc6adf7 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs
@@ -187,9 +187,9 @@ impl ApproxPercentileCont {
             None
         };
 
-        let data_type = args.exprs[0].data_type(args.schema)?;
+        let data_type = args.expr_fields[0].data_type();
         let accumulator: ApproxPercentileAccumulator = match data_type {
-            t @ (DataType::UInt8
+            DataType::UInt8
             | DataType::UInt16
             | DataType::UInt32
             | DataType::UInt64
@@ -198,12 +198,11 @@ impl ApproxPercentileCont {
             | DataType::Int32
             | DataType::Int64
             | DataType::Float32
-            | DataType::Float64) => {
+            | DataType::Float64 => {
                 if let Some(max_size) = tdigest_max_size {
-                    ApproxPercentileAccumulator::new_with_max_size(percentile, t, max_size)
-                }else{
-                    ApproxPercentileAccumulator::new(percentile, t)
-
+                    ApproxPercentileAccumulator::new_with_max_size(percentile, data_type.clone(), max_size)
+                } else {
+                    ApproxPercentileAccumulator::new(percentile, data_type.clone())
                 }
             }
             other => {
@@ -320,7 +319,7 @@ impl AggregateUDFImpl for ApproxPercentileCont {
         false
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
+    fn supports_within_group_clause(&self) -> bool {
         true
     }
 
diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
index 89ff546039e5..51891ce7f277 100644
--- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
+++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs
@@ -220,7 +220,28 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
                     Arc::clone(&acc_args.exprs[2]), // percentile
                 ]
             },
-            ..acc_args
+            expr_fields: if acc_args.exprs.len() == 4 {
+                &[
+                    Arc::clone(&acc_args.expr_fields[0]), // value
+                    Arc::clone(&acc_args.expr_fields[2]), // percentile
+                    Arc::clone(&acc_args.expr_fields[3]), // centroids
+                ]
+            } else {
+                &[
+                    Arc::clone(&acc_args.expr_fields[0]), // value
+                    Arc::clone(&acc_args.expr_fields[2]), // percentile
+                ]
+            },
+            // Unchanged below; we list each field explicitly in case we ever add more
+            // fields to AccumulatorArgs making it easier to see if changes are also
+            // needed here.
+            return_field: acc_args.return_field,
+            schema: acc_args.schema,
+            ignore_nulls: acc_args.ignore_nulls,
+            order_bys: acc_args.order_bys,
+            is_reversed: acc_args.is_reversed,
+            name: acc_args.name,
+            is_distinct: acc_args.is_distinct,
         };
         let approx_percentile_cont_accumulator =
             self.approx_percentile_cont.create_accumulator(sub_args)?;
@@ -241,7 +262,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight {
         false
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
+    fn supports_within_group_clause(&self) -> bool {
         true
     }
 
diff --git a/datafusion/functions-aggregate/src/array_agg.rs b/datafusion/functions-aggregate/src/array_agg.rs
index 4d8676f24a28..b830588d404b 100644
--- a/datafusion/functions-aggregate/src/array_agg.rs
+++ b/datafusion/functions-aggregate/src/array_agg.rs
@@ -162,9 +162,9 @@ impl AggregateUDFImpl for ArrayAgg {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
-        let ignore_nulls =
-            acc_args.ignore_nulls && acc_args.exprs[0].nullable(acc_args.schema)?;
+        let field = &acc_args.expr_fields[0];
+        let data_type = field.data_type();
+        let ignore_nulls = acc_args.ignore_nulls && field.is_nullable();
 
         if acc_args.is_distinct {
             // Limitation similar to Postgres. The aggregation function can only mix
@@ -191,7 +191,7 @@ impl AggregateUDFImpl for ArrayAgg {
                 }
             };
             return Ok(Box::new(DistinctArrayAggAccumulator::try_new(
-                &data_type,
+                data_type,
                 sort_option,
                 ignore_nulls,
             )?));
@@ -199,7 +199,7 @@ impl AggregateUDFImpl for ArrayAgg {
 
         let Some(ordering) = LexOrdering::new(acc_args.order_bys.to_vec()) else {
             return Ok(Box::new(ArrayAggAccumulator::try_new(
-                &data_type,
+                data_type,
                 ignore_nulls,
             )?));
         };
@@ -210,7 +210,7 @@ impl AggregateUDFImpl for ArrayAgg {
             .collect::<Result<Vec<_>>>()?;
 
         OrderSensitiveArrayAggAccumulator::try_new(
-            &data_type,
+            data_type,
             &ordering_dtypes,
             ordering,
             self.is_input_pre_ordered,
@@ -802,6 +802,7 @@ mod tests {
     use datafusion_common::cast::as_generic_string_array;
     use datafusion_common::internal_err;
     use datafusion_physical_expr::expressions::Column;
+    use datafusion_physical_expr::PhysicalExpr;
     use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
     use std::sync::Arc;
 
@@ -1159,15 +1160,18 @@ mod tests {
         }
 
         fn build(&self) -> Result<Box<dyn Accumulator>> {
+            let expr = Arc::new(Column::new("col", 0));
+            let expr_field = expr.return_field(&self.schema)?;
             ArrayAgg::default().accumulator(AccumulatorArgs {
                 return_field: Arc::clone(&self.return_field),
                 schema: &self.schema,
+                expr_fields: &[expr_field],
                 ignore_nulls: false,
                 order_bys: &self.order_bys,
                 is_reversed: false,
                 name: "",
                 is_distinct: self.distinct,
-                exprs: &[Arc::new(Column::new("col", 0))],
+                exprs: &[expr],
             })
         }
 
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
index 11960779ed18..bec1734e2e20 100644
--- a/datafusion/functions-aggregate/src/average.rs
+++ b/datafusion/functions-aggregate/src/average.rs
@@ -184,12 +184,12 @@ impl AggregateUDFImpl for Avg {
     }
 
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
-        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
+        let data_type = acc_args.expr_fields[0].data_type();
         use DataType::*;
 
         // instantiate specialized accumulator based for the type
         if acc_args.is_distinct {
-            match (&data_type, acc_args.return_type()) {
+            match (data_type, acc_args.return_type()) {
                 // Numeric types are converted to Float64 via `coerce_avg_type` during logical plan creation
                 (Float64, _) => Ok(Box::new(Float64DistinctAvgAccumulator::default())),
 
@@ -362,12 +362,13 @@ impl AggregateUDFImpl for Avg {
     ) -> Result<Box<dyn GroupsAccumulator>> {
         use DataType::*;
 
-        let data_type = args.exprs[0].data_type(args.schema)?;
+        let data_type = args.expr_fields[0].data_type();
+
         // instantiate specialized accumulator based for the type
-        match (&data_type, args.return_field.data_type()) {
+        match (data_type, args.return_field.data_type()) {
             (Float64, Float64) => {
                 Ok(Box::new(AvgGroupsAccumulator::<Float64Type, _>::new(
-                    &data_type,
+                    data_type,
                     args.return_field.data_type(),
                     |sum: f64, count: u64| Ok(sum / count as f64),
                 )))
@@ -386,7 +387,7 @@ impl AggregateUDFImpl for Avg {
                     move |sum: i32, count: u64| decimal_averager.avg(sum, count as i32);
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal32Type, _>::new(
-                    &data_type,
+                    data_type,
                     args.return_field.data_type(),
                     avg_fn,
                 )))
@@ -405,7 +406,7 @@ impl AggregateUDFImpl for Avg {
                     move |sum: i64, count: u64| decimal_averager.avg(sum, count as i64);
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal64Type, _>::new(
-                    &data_type,
+                    data_type,
                     args.return_field.data_type(),
                     avg_fn,
                 )))
@@ -424,7 +425,7 @@ impl AggregateUDFImpl for Avg {
                     move |sum: i128, count: u64| decimal_averager.avg(sum, count as i128);
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal128Type, _>::new(
-                    &data_type,
+                    data_type,
                     args.return_field.data_type(),
                     avg_fn,
                 )))
@@ -445,7 +446,7 @@ impl AggregateUDFImpl for Avg {
                 };
 
                 Ok(Box::new(AvgGroupsAccumulator::<Decimal256Type, _>::new(
-                    &data_type,
+                    data_type,
                     args.return_field.data_type(),
                     avg_fn,
                 )))
@@ -459,7 +460,7 @@ impl AggregateUDFImpl for Avg {
                         DurationSecondType,
                         _,
                     >::new(
-                        &data_type,
+                        data_type,
                         args.return_type(),
                         avg_fn,
                     ))),
@@ -467,7 +468,7 @@ impl AggregateUDFImpl for Avg {
                         DurationMillisecondType,
                         _,
                     >::new(
-                        &data_type,
+                        data_type,
                         args.return_type(),
                         avg_fn,
                     ))),
@@ -475,7 +476,7 @@ impl AggregateUDFImpl for Avg {
                         DurationMicrosecondType,
                         _,
                     >::new(
-                        &data_type,
+                        data_type,
                         args.return_type(),
                         avg_fn,
                     ))),
@@ -483,7 +484,7 @@ impl AggregateUDFImpl for Avg {
                         DurationNanosecondType,
                         _,
                     >::new(
-                        &data_type,
+                        data_type,
                         args.return_type(),
                         avg_fn,
                     ))),
diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs
index 20f23662cade..f2a464de4155 100644
--- a/datafusion/functions-aggregate/src/correlation.rs
+++ b/datafusion/functions-aggregate/src/correlation.rs
@@ -88,7 +88,9 @@ impl Correlation {
             signature: Signature::exact(
                 vec![DataType::Float64, DataType::Float64],
                 Volatility::Immutable,
-            ),
+            )
+            .with_parameter_names(vec!["y".to_string(), "x".to_string()])
+            .expect("valid parameter names for corr"),
         }
     }
 }
diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
index c0d2ba199a13..a291e8e21eb0 100644
--- a/datafusion/functions-aggregate/src/count.rs
+++ b/datafusion/functions-aggregate/src/count.rs
@@ -113,8 +113,8 @@ pub fn count_all() -> Expr {
 /// // create `count(*)` OVER ... window function expression
 /// let expr = count_all_window();
 /// assert_eq!(
-///   expr.schema_name().to_string(),
-///   "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING"
+///     expr.schema_name().to_string(),
+///     "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING"
 /// );
 /// // if you need to refer to this column, use the `schema_name` function
 /// let expr = col(expr.schema_name().to_string());
@@ -333,7 +333,7 @@ impl AggregateUDFImpl for Count {
             return not_impl_err!("COUNT DISTINCT with multiple arguments");
         }
 
-        let data_type = &acc_args.exprs[0].data_type(acc_args.schema)?;
+        let data_type = acc_args.expr_fields[0].data_type();
 
         Ok(match data_type {
             DataType::Dictionary(_, values_type) => {
@@ -854,7 +854,7 @@ mod tests {
         datatypes::{DataType, Field, Int32Type, Schema},
     };
     use datafusion_expr::function::AccumulatorArgs;
-    use datafusion_physical_expr::expressions::Column;
+    use datafusion_physical_expr::{expressions::Column, PhysicalExpr};
     use std::sync::Arc;
     /// Helper function to create a dictionary array with non-null keys but some null values
     /// Returns a dictionary array where:
@@ -895,8 +895,10 @@ mod tests {
         // Using Count UDAF's accumulator
         let count = Count::new();
         let expr = Arc::new(Column::new("dict_col", 0));
+        let expr_field = expr.return_field(&schema)?;
         let args = AccumulatorArgs {
             schema: &schema,
+            expr_fields: &[expr_field],
             exprs: &[expr],
             is_distinct: true,
             name: "count",
diff --git a/datafusion/functions-aggregate/src/first_last.rs b/datafusion/functions-aggregate/src/first_last.rs
index 28755427c732..73f2ec112ffc 100644
--- a/datafusion/functions-aggregate/src/first_last.rs
+++ b/datafusion/functions-aggregate/src/first_last.rs
@@ -166,7 +166,14 @@ impl AggregateUDFImpl for FirstValue {
         )
         .into()];
         fields.extend(args.ordering_fields.iter().cloned());
-        fields.push(Field::new("is_set", DataType::Boolean, true).into());
+        fields.push(
+            Field::new(
+                format_state_name(args.name, "first_value_is_set"),
+                DataType::Boolean,
+                true,
+            )
+            .into(),
+        );
         Ok(fields)
     }
 
@@ -810,6 +817,8 @@ impl Accumulator for TrivialFirstValueAccumulator {
         // Second index contains is_set flag.
         if !self.is_set {
             let flags = states[1].as_boolean();
+            validate_is_set_flags(flags, "first_value")?;
+
             let filtered_states =
                 filter_states_according_to_is_set(&states[0..1], flags)?;
             if let Some(first) = filtered_states.first() {
@@ -955,6 +964,8 @@ impl Accumulator for FirstValueAccumulator {
         // last index contains is_set flag.
         let is_set_idx = states.len() - 1;
         let flags = states[is_set_idx].as_boolean();
+        validate_is_set_flags(flags, "first_value")?;
+
         let filtered_states =
             filter_states_according_to_is_set(&states[0..is_set_idx], flags)?;
         // 1..is_set_idx range corresponds to ordering section
@@ -1087,7 +1098,14 @@ impl AggregateUDFImpl for LastValue {
         )
         .into()];
         fields.extend(args.ordering_fields.iter().cloned());
-        fields.push(Field::new("is_set", DataType::Boolean, true).into());
+        fields.push(
+            Field::new(
+                format_state_name(args.name, "last_value_is_set"),
+                DataType::Boolean,
+                true,
+            )
+            .into(),
+        );
         Ok(fields)
     }
 
@@ -1285,6 +1303,8 @@ impl Accumulator for TrivialLastValueAccumulator {
         // LAST_VALUE(last1, last2, last3, ...)
         // Second index contains is_set flag.
         let flags = states[1].as_boolean();
+        validate_is_set_flags(flags, "last_value")?;
+
         let filtered_states = filter_states_according_to_is_set(&states[0..1], flags)?;
         if let Some(last) = filtered_states.last() {
             if !last.is_empty() {
@@ -1430,6 +1450,8 @@ impl Accumulator for LastValueAccumulator {
         // last index contains is_set flag.
         let is_set_idx = states.len() - 1;
         let flags = states[is_set_idx].as_boolean();
+        validate_is_set_flags(flags, "last_value")?;
+
         let filtered_states =
             filter_states_according_to_is_set(&states[0..is_set_idx], flags)?;
         // 1..is_set_idx range corresponds to ordering section
@@ -1473,6 +1495,16 @@ impl Accumulator for LastValueAccumulator {
     }
 }
 
+/// Validates that `is_set flags` do not contain NULL values.
+fn validate_is_set_flags(flags: &BooleanArray, function_name: &str) -> Result<()> {
+    if flags.null_count() > 0 {
+        return Err(DataFusionError::Internal(format!(
+            "{function_name}: is_set flags contain nulls"
+        )));
+    }
+    Ok(())
+}
+
 /// Filters states according to the `is_set` flag at the last column and returns
 /// the resulting states.
 fn filter_states_according_to_is_set(
@@ -1501,7 +1533,7 @@ mod tests {
     use std::iter::repeat_with;
 
     use arrow::{
-        array::{Int64Array, ListArray},
+        array::{BooleanArray, Int64Array, ListArray, StringArray},
         compute::SortOptions,
         datatypes::Schema,
     };
@@ -1914,4 +1946,90 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_first_value_merge_with_is_set_nulls() -> Result<()> {
+        // Test data with corrupted is_set flag
+        let value = Arc::new(StringArray::from(vec![Some("first_string")])) as ArrayRef;
+        let corrupted_flag = Arc::new(BooleanArray::from(vec![None])) as ArrayRef;
+
+        // Test TrivialFirstValueAccumulator
+        let mut trivial_accumulator =
+            TrivialFirstValueAccumulator::try_new(&DataType::Utf8, false)?;
+        let trivial_states = vec![Arc::clone(&value), Arc::clone(&corrupted_flag)];
+        let result = trivial_accumulator.merge_batch(&trivial_states);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("is_set flags contain nulls"));
+
+        // Test FirstValueAccumulator (with ordering)
+        let schema = Schema::new(vec![Field::new("ordering", DataType::Int64, false)]);
+        let ordering_expr = col("ordering", &schema)?;
+        let mut ordered_accumulator = FirstValueAccumulator::try_new(
+            &DataType::Utf8,
+            &[DataType::Int64],
+            LexOrdering::new(vec![PhysicalSortExpr {
+                expr: ordering_expr,
+                options: SortOptions::default(),
+            }])
+            .unwrap(),
+            false,
+            false,
+        )?;
+        let ordering = Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef;
+        let ordered_states = vec![value, ordering, corrupted_flag];
+        let result = ordered_accumulator.merge_batch(&ordered_states);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("is_set flags contain nulls"));
+
+        Ok(())
+    }
+
+    #[test]
+    fn test_last_value_merge_with_is_set_nulls() -> Result<()> {
+        // Test data with corrupted is_set flag
+        let value = Arc::new(StringArray::from(vec![Some("last_string")])) as ArrayRef;
+        let corrupted_flag = Arc::new(BooleanArray::from(vec![None])) as ArrayRef;
+
+        // Test TrivialLastValueAccumulator
+        let mut trivial_accumulator =
+            TrivialLastValueAccumulator::try_new(&DataType::Utf8, false)?;
+        let trivial_states = vec![Arc::clone(&value), Arc::clone(&corrupted_flag)];
+        let result = trivial_accumulator.merge_batch(&trivial_states);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("is_set flags contain nulls"));
+
+        // Test LastValueAccumulator (with ordering)
+        let schema = Schema::new(vec![Field::new("ordering", DataType::Int64, false)]);
+        let ordering_expr = col("ordering", &schema)?;
+        let mut ordered_accumulator = LastValueAccumulator::try_new(
+            &DataType::Utf8,
+            &[DataType::Int64],
+            LexOrdering::new(vec![PhysicalSortExpr {
+                expr: ordering_expr,
+                options: SortOptions::default(),
+            }])
+            .unwrap(),
+            false,
+            false,
+        )?;
+        let ordering = Arc::new(Int64Array::from(vec![Some(1)])) as ArrayRef;
+        let ordered_states = vec![value, ordering, corrupted_flag];
+        let result = ordered_accumulator.merge_batch(&ordered_states);
+        assert!(result.is_err());
+        assert!(result
+            .unwrap_err()
+            .to_string()
+            .contains("is_set flags contain nulls"));
+
+        Ok(())
+    }
 }
diff --git a/datafusion/functions-aggregate/src/median.rs b/datafusion/functions-aggregate/src/median.rs
index a65759594eac..8c524c2f1596 100644
--- a/datafusion/functions-aggregate/src/median.rs
+++ b/datafusion/functions-aggregate/src/median.rs
@@ -162,7 +162,7 @@ impl AggregateUDFImpl for Median {
             };
         }
 
-        let dt = acc_args.exprs[0].data_type(acc_args.schema)?;
+        let dt = acc_args.expr_fields[0].data_type().clone();
         downcast_integer! {
             dt => (helper, dt),
             DataType::Float16 => helper!(Float16Type, dt),
@@ -196,7 +196,7 @@ impl AggregateUDFImpl for Median {
             );
         }
 
-        let dt = args.exprs[0].data_type(args.schema)?;
+        let dt = args.expr_fields[0].data_type().clone();
 
         macro_rules! helper {
             ($t:ty, $dt:expr) => {
@@ -302,7 +302,6 @@ impl<T: ArrowNumericType> Accumulator for MedianAccumulator<T> {
 /// of groups before final evaluation.
 /// So values in each group will be stored in a `Vec<T>`, and the total group values
 /// will be actually organized as a `Vec<Vec<T>>`.
-///
 #[derive(Debug)]
 struct MedianGroupsAccumulator<T: ArrowNumericType + Send> {
     data_type: DataType,
diff --git a/datafusion/functions-aggregate/src/nth_value.rs b/datafusion/functions-aggregate/src/nth_value.rs
index b9dc498ee746..2f4f9371be58 100644
--- a/datafusion/functions-aggregate/src/nth_value.rs
+++ b/datafusion/functions-aggregate/src/nth_value.rs
@@ -160,8 +160,8 @@ impl AggregateUDFImpl for NthValueAgg {
             .map(|e| e.expr.data_type(acc_args.schema))
             .collect::<Result<Vec<_>>>()?;
 
-        let data_type = acc_args.exprs[0].data_type(acc_args.schema)?;
-        NthValueAccumulator::try_new(n, &data_type, &ordering_dtypes, ordering)
+        let data_type = acc_args.expr_fields[0].data_type();
+        NthValueAccumulator::try_new(n, data_type, &ordering_dtypes, ordering)
             .map(|acc| Box::new(acc) as _)
     }
 
diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs
index 8e9e9a3144d4..1e06461e569f 100644
--- a/datafusion/functions-aggregate/src/percentile_cont.rs
+++ b/datafusion/functions-aggregate/src/percentile_cont.rs
@@ -146,7 +146,9 @@ impl PercentileCont {
             variants.push(TypeSignature::Exact(vec![num.clone(), DataType::Float64]));
         }
         Self {
-            signature: Signature::one_of(variants, Volatility::Immutable),
+            signature: Signature::one_of(variants, Volatility::Immutable)
+                .with_parameter_names(vec!["expr".to_string(), "percentile".to_string()])
+                .expect("valid parameter names for percentile_cont"),
             aliases: vec![String::from("quantile_cont")],
         }
     }
@@ -360,7 +362,7 @@ impl AggregateUDFImpl for PercentileCont {
         false
     }
 
-    fn is_ordered_set_aggregate(&self) -> bool {
+    fn supports_within_group_clause(&self) -> bool {
         true
     }
 
@@ -457,7 +459,6 @@ impl<T: ArrowNumericType> Accumulator for PercentileContAccumulator<T> {
 /// of groups before final evaluation.
 /// So values in each group will be stored in a `Vec<T>`, and the total group values
 /// will be actually organized as a `Vec<Vec<T>>`.
-///
 #[derive(Debug)]
 struct PercentileContGroupsAccumulator<T: ArrowNumericType + Send> {
     data_type: DataType,
diff --git a/datafusion/functions-aggregate/src/stddev.rs b/datafusion/functions-aggregate/src/stddev.rs
index 312d5f11b477..782524aa4d0a 100644
--- a/datafusion/functions-aggregate/src/stddev.rs
+++ b/datafusion/functions-aggregate/src/stddev.rs
@@ -443,26 +443,31 @@ mod tests {
         agg2: Arc<AggregateUDF>,
         schema: &Schema,
     ) -> Result<ScalarValue> {
+        let expr = col("a", schema)?;
+        let expr_field = expr.return_field(schema)?;
+
         let args1 = AccumulatorArgs {
             return_field: Field::new("f", DataType::Float64, true).into(),
             schema,
+            expr_fields: &[Arc::clone(&expr_field)],
             ignore_nulls: false,
             order_bys: &[],
             name: "a",
             is_distinct: false,
             is_reversed: false,
-            exprs: &[col("a", schema)?],
+            exprs: &[Arc::clone(&expr)],
         };
 
         let args2 = AccumulatorArgs {
             return_field: Field::new("f", DataType::Float64, true).into(),
             schema,
+            expr_fields: &[expr_field],
             ignore_nulls: false,
             order_bys: &[],
             name: "a",
             is_distinct: false,
             is_reversed: false,
-            exprs: &[col("a", schema)?],
+            exprs: &[expr],
         };
 
         let mut accum1 = agg1.accumulator(args1)?;
diff --git a/datafusion/functions-aggregate/src/string_agg.rs b/datafusion/functions-aggregate/src/string_agg.rs
index a091ed34da70..4a040df7b4a3 100644
--- a/datafusion/functions-aggregate/src/string_agg.rs
+++ b/datafusion/functions-aggregate/src/string_agg.rs
@@ -199,7 +199,16 @@ impl AggregateUDFImpl for StringAgg {
                 )
                 .into(),
                 exprs: &filter_index(acc_args.exprs, 1),
-                ..acc_args
+                expr_fields: &filter_index(acc_args.expr_fields, 1),
+                // Unchanged below; we list each field explicitly in case we ever add more
+                // fields to AccumulatorArgs making it easier to see if changes are also
+                // needed here.
+                schema: acc_args.schema,
+                ignore_nulls: acc_args.ignore_nulls,
+                order_bys: acc_args.order_bys,
+                is_reversed: acc_args.is_reversed,
+                name: acc_args.name,
+                is_distinct: acc_args.is_distinct,
             })?;
 
             Ok(Box::new(StringAggAccumulator::new(
@@ -590,6 +599,10 @@ mod tests {
             StringAgg::new().accumulator(AccumulatorArgs {
                 return_field: Field::new("f", DataType::LargeUtf8, true).into(),
                 schema: &self.schema,
+                expr_fields: &[
+                    Field::new("col", DataType::LargeUtf8, true).into(),
+                    Field::new("lit", DataType::Utf8, false).into(),
+                ],
                 ignore_nulls: false,
                 order_bys: &self.order_bys,
                 is_reversed: false,
diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml
index 9c0b7a16f9a9..8e4801ba2729 100644
--- a/datafusion/functions-nested/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -66,6 +69,10 @@ rand = { workspace = true }
 harness = false
 name = "array_expression"
 
+[[bench]]
+harness = false
+name = "array_reverse"
+
 [[bench]]
 harness = false
 name = "map"
diff --git a/datafusion/functions-nested/benches/array_expression.rs b/datafusion/functions-nested/benches/array_expression.rs
index 0e3ecbc72641..8d72ffa3c1cd 100644
--- a/datafusion/functions-nested/benches/array_expression.rs
+++ b/datafusion/functions-nested/benches/array_expression.rs
@@ -22,6 +22,7 @@ extern crate arrow;
 use crate::criterion::Criterion;
 use datafusion_expr::lit;
 use datafusion_functions_nested::expr_fn::{array_replace_all, make_array};
+use std::hint::black_box;
 
 fn criterion_benchmark(c: &mut Criterion) {
     // Construct large arrays for benchmarking
@@ -45,7 +46,7 @@ fn criterion_benchmark(c: &mut Criterion) {
                     from_array.clone(),
                     to_array.clone()
                 ),
-                *criterion::black_box(&expected_array)
+                *black_box(&expected_array)
             )
         })
     });
diff --git a/datafusion/functions-nested/benches/array_reverse.rs b/datafusion/functions-nested/benches/array_reverse.rs
new file mode 100644
index 000000000000..92a65128fe6b
--- /dev/null
+++ b/datafusion/functions-nested/benches/array_reverse.rs
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[macro_use]
+extern crate criterion;
+extern crate arrow;
+
+use std::{hint::black_box, sync::Arc};
+
+use crate::criterion::Criterion;
+use arrow::{
+    array::{ArrayRef, FixedSizeListArray, Int32Array, ListArray, ListViewArray},
+    buffer::{NullBuffer, OffsetBuffer, ScalarBuffer},
+    datatypes::{DataType, Field},
+};
+use datafusion_functions_nested::reverse::array_reverse_inner;
+
+fn array_reverse(array: &ArrayRef) -> ArrayRef {
+    black_box(array_reverse_inner(std::slice::from_ref(array)).unwrap())
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    // Create array sizes with step size of 100, starting from 100.
+    let number_of_arrays = 1000;
+    let sizes = (0..number_of_arrays)
+        .map(|i| 100 + i * 100)
+        .collect::<Vec<i32>>();
+
+    // Calculate the total number of values
+    let total_values = sizes.iter().sum::<i32>();
+
+    // Calculate sizes and offsets from array lengths
+    let offsets = sizes
+        .iter()
+        .scan(0, |acc, &x| {
+            let offset = *acc;
+            *acc += x;
+            Some(offset)
+        })
+        .collect::<Vec<i32>>();
+    let offsets = ScalarBuffer::from(offsets);
+    // Set every 10th array to null
+    let nulls = (0..number_of_arrays)
+        .map(|i| i % 10 != 0)
+        .collect::<Vec<bool>>();
+
+    let values = (0..total_values).collect::<Vec<i32>>();
+    let values = Arc::new(Int32Array::from(values));
+
+    // Create ListArray and ListViewArray
+    let nulls_list_array = Some(NullBuffer::from(
+        nulls[..((number_of_arrays as usize) - 1)].to_vec(),
+    ));
+    let list_array: ArrayRef = Arc::new(ListArray::new(
+        Arc::new(Field::new("a", DataType::Int32, false)),
+        OffsetBuffer::new(offsets.clone()),
+        values.clone(),
+        nulls_list_array,
+    ));
+    let nulls_list_view_array = Some(NullBuffer::from(
+        nulls[..(number_of_arrays as usize)].to_vec(),
+    ));
+    let list_view_array: ArrayRef = Arc::new(ListViewArray::new(
+        Arc::new(Field::new("a", DataType::Int32, false)),
+        offsets,
+        ScalarBuffer::from(sizes),
+        values.clone(),
+        nulls_list_view_array,
+    ));
+
+    c.bench_function("array_reverse_list", |b| {
+        b.iter(|| array_reverse(&list_array))
+    });
+
+    c.bench_function("array_reverse_list_view", |b| {
+        b.iter(|| array_reverse(&list_view_array))
+    });
+
+    // Create FixedSizeListArray
+    let array_len = 1000;
+    let num_arrays = 5000;
+    let total_values = num_arrays * array_len;
+    let values = (0..total_values).collect::<Vec<i32>>();
+    let values = Arc::new(Int32Array::from(values));
+    // Set every 10th array to null
+    let nulls = (0..num_arrays).map(|i| i % 10 != 0).collect::<Vec<bool>>();
+    let nulls = Some(NullBuffer::from(nulls));
+    let fixed_size_list_array: ArrayRef = Arc::new(FixedSizeListArray::new(
+        Arc::new(Field::new("a", DataType::Int32, false)),
+        array_len,
+        values.clone(),
+        nulls.clone(),
+    ));
+    c.bench_function("array_reverse_fixed_size_list", |b| {
+        b.iter(|| array_reverse(&fixed_size_list_array))
+    });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs
index ca12dde1f5c3..3197cc55cc95 100644
--- a/datafusion/functions-nested/benches/map.rs
+++ b/datafusion/functions-nested/benches/map.rs
@@ -20,7 +20,7 @@ extern crate criterion;
 use arrow::array::{Int32Array, ListArray, StringArray};
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::planner::ExprPlanner;
@@ -30,6 +30,7 @@ use datafusion_functions_nested::planner::NestedFunctionPlanner;
 use rand::prelude::ThreadRng;
 use rand::Rng;
 use std::collections::HashSet;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn keys(rng: &mut ThreadRng) -> Vec<String> {
@@ -98,7 +99,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         let values = ColumnarValue::Scalar(ScalarValue::List(Arc::new(value_list)));
 
         let return_type = map_udf()
-            .return_type(&[DataType::Utf8, DataType::Int32])
+            .return_type(&[keys.data_type(), values.data_type()])
             .expect("should get return type");
         let arg_fields = vec![
             Field::new("a", keys.data_type(), true).into(),
diff --git a/datafusion/functions-nested/src/expr_ext.rs b/datafusion/functions-nested/src/expr_ext.rs
index 4da4a3f583b7..18c4c5fb59c3 100644
--- a/datafusion/functions-nested/src/expr_ext.rs
+++ b/datafusion/functions-nested/src/expr_ext.rs
@@ -36,8 +36,7 @@ use crate::extract::{array_element, array_slice};
 /// ```
 /// # use datafusion_expr::{lit, col, Expr};
 /// # use datafusion_functions_nested::expr_ext::IndexAccessor;
-/// let expr = col("c1")
-///    .index(lit(3));
+/// let expr = col("c1").index(lit(3));
 /// assert_eq!(expr.schema_name().to_string(), "c1[Int32(3)]");
 /// ```
 pub trait IndexAccessor {
@@ -66,8 +65,7 @@ impl IndexAccessor for Expr {
 /// ```
 /// # use datafusion_expr::{lit, col};
 /// # use datafusion_functions_nested::expr_ext::SliceAccessor;
-/// let expr = col("c1")
-///    .range(lit(2), lit(4));
+/// let expr = col("c1").range(lit(2), lit(4));
 /// assert_eq!(expr.schema_name().to_string(), "c1[Int32(2):Int32(4)]");
 /// ```
 pub trait SliceAccessor {
diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs
index a46c9c75094c..57505c59493a 100644
--- a/datafusion/functions-nested/src/extract.rs
+++ b/datafusion/functions-nested/src/extract.rs
@@ -18,18 +18,21 @@
 //! [`ScalarUDFImpl`] definitions for array_element, array_slice, array_pop_front, array_pop_back, and array_any_value functions.
 
 use arrow::array::{
-    Array, ArrayRef, ArrowNativeTypeOp, Capacities, GenericListArray, Int64Array,
+    Array, ArrayRef, Capacities, GenericListArray, GenericListViewArray, Int64Array,
     MutableArrayData, NullArray, NullBufferBuilder, OffsetSizeTrait,
 };
-use arrow::buffer::OffsetBuffer;
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::datatypes::DataType;
 use arrow::datatypes::{
-    DataType::{FixedSizeList, LargeList, List, Null},
+    DataType::{FixedSizeList, LargeList, LargeListView, List, ListView, Null},
     Field,
 };
-use datafusion_common::cast::as_int64_array;
 use datafusion_common::cast::as_large_list_array;
 use datafusion_common::cast::as_list_array;
+use datafusion_common::cast::{
+    as_int64_array, as_large_list_view_array, as_list_view_array,
+};
+use datafusion_common::internal_err;
 use datafusion_common::utils::ListCoercion;
 use datafusion_common::{
     exec_datafusion_err, exec_err, internal_datafusion_err, plan_err,
@@ -449,10 +452,162 @@ fn array_slice_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
             let array = as_large_list_array(&args[0])?;
             general_array_slice::<i64>(array, from_array, to_array, stride)
         }
+        ListView(_) => {
+            let array = as_list_view_array(&args[0])?;
+            general_list_view_array_slice::<i32>(array, from_array, to_array, stride)
+        }
+        LargeListView(_) => {
+            let array = as_large_list_view_array(&args[0])?;
+            general_list_view_array_slice::<i64>(array, from_array, to_array, stride)
+        }
         _ => exec_err!("array_slice does not support type: {}", array_data_type),
     }
 }
 
+fn adjusted_from_index<O: OffsetSizeTrait>(index: i64, len: O) -> Result<Option<O>>
+where
+    i64: TryInto<O>,
+{
+    // 0 ~ len - 1
+    let adjusted_zero_index = if index < 0 {
+        if let Ok(index) = index.try_into() {
+            // When index < 0 and -index > length, index is clamped to the beginning of the list.
+            // Otherwise, when index < 0, the index is counted from the end of the list.
+            //
+            // Note, we actually test the contrapositive, index < -length, because negating a
+            // negative will panic if the negative is equal to the smallest representable value
+            // while negating a positive is always safe.
+            if index < (O::zero() - O::one()) * len {
+                O::zero()
+            } else {
+                index + len
+            }
+        } else {
+            return exec_err!("array_slice got invalid index: {}", index);
+        }
+    } else {
+        // array_slice(arr, 1, to) is the same as array_slice(arr, 0, to)
+        if let Ok(index) = index.try_into() {
+            std::cmp::max(index - O::usize_as(1), O::usize_as(0))
+        } else {
+            return exec_err!("array_slice got invalid index: {}", index);
+        }
+    };
+
+    if O::usize_as(0) <= adjusted_zero_index && adjusted_zero_index < len {
+        Ok(Some(adjusted_zero_index))
+    } else {
+        // Out of bounds
+        Ok(None)
+    }
+}
+
+fn adjusted_to_index<O: OffsetSizeTrait>(index: i64, len: O) -> Result<Option<O>>
+where
+    i64: TryInto<O>,
+{
+    // 0 ~ len - 1
+    let adjusted_zero_index = if index < 0 {
+        // array_slice in duckdb with negative to_index is python-like, so index itself is exclusive
+        if let Ok(index) = index.try_into() {
+            index + len
+        } else {
+            return exec_err!("array_slice got invalid index: {}", index);
+        }
+    } else {
+        // array_slice(arr, from, len + 1) is the same as array_slice(arr, from, len)
+        if let Ok(index) = index.try_into() {
+            std::cmp::min(index - O::usize_as(1), len - O::usize_as(1))
+        } else {
+            return exec_err!("array_slice got invalid index: {}", index);
+        }
+    };
+
+    if O::usize_as(0) <= adjusted_zero_index && adjusted_zero_index < len {
+        Ok(Some(adjusted_zero_index))
+    } else {
+        // Out of bounds
+        Ok(None)
+    }
+}
+
+/// Internal plan describing how to materialize a single row's slice after
+/// the slice bounds/stride have been normalized. Both list layouts consume
+/// this to drive their copy logic.
+enum SlicePlan<O: OffsetSizeTrait> {
+    /// No values should be produced.
+    Empty,
+    /// A contiguous run starting at `start` (relative to the row) with `len`
+    /// elements can be copied in one go.
+    Contiguous { start: O, len: O },
+    /// Arbitrary positions (already relative to the row) must be copied in
+    /// sequence.
+    Indices(Vec<O>),
+}
+
+/// Produces a [`SlicePlan`] for the given logical slice parameters.
+fn compute_slice_plan<O: OffsetSizeTrait>(
+    len: O,
+    from_raw: i64,
+    to_raw: i64,
+    stride_raw: Option<i64>,
+) -> Result<SlicePlan<O>>
+where
+    i64: TryInto<O>,
+{
+    if len == O::usize_as(0) {
+        return Ok(SlicePlan::Empty);
+    }
+
+    let from_index = adjusted_from_index::<O>(from_raw, len)?;
+    let to_index = adjusted_to_index::<O>(to_raw, len)?;
+
+    let (Some(from), Some(to)) = (from_index, to_index) else {
+        return Ok(SlicePlan::Empty);
+    };
+
+    let stride_value = stride_raw.unwrap_or(1);
+    if stride_value == 0 {
+        return exec_err!(
+            "array_slice got invalid stride: {:?}, it cannot be 0",
+            stride_value
+        );
+    }
+
+    if (from < to && stride_value.is_negative())
+        || (from > to && stride_value.is_positive())
+    {
+        return Ok(SlicePlan::Empty);
+    }
+
+    let stride: O = stride_value.try_into().map_err(|_| {
+        internal_datafusion_err!("array_slice got invalid stride: {}", stride_value)
+    })?;
+
+    if from <= to && stride_value.is_positive() {
+        if stride_value == 1 {
+            let len = to - from + O::usize_as(1);
+            Ok(SlicePlan::Contiguous { start: from, len })
+        } else {
+            let mut indices = Vec::new();
+            let mut index = from;
+            while index <= to {
+                indices.push(index);
+                index += stride;
+            }
+            Ok(SlicePlan::Indices(indices))
+        }
+    } else {
+        let mut indices = Vec::new();
+        let mut index = from;
+        while index >= to {
+            indices.push(index);
+            index += stride;
+        }
+        Ok(SlicePlan::Indices(indices))
+    }
+}
+
 fn general_array_slice<O: OffsetSizeTrait>(
     array: &GenericListArray<O>,
     from_array: &Int64Array,
@@ -472,73 +627,6 @@ where
     // We have the slice syntax compatible with DuckDB v0.8.1.
     // The rule `adjusted_from_index` and `adjusted_to_index` follows the rule of array_slice in duckdb.
 
-    fn adjusted_from_index<O: OffsetSizeTrait>(index: i64, len: O) -> Result<Option<O>>
-    where
-        i64: TryInto<O>,
-    {
-        // 0 ~ len - 1
-        let adjusted_zero_index = if index < 0 {
-            if let Ok(index) = index.try_into() {
-                // When index < 0 and -index > length, index is clamped to the beginning of the list.
-                // Otherwise, when index < 0, the index is counted from the end of the list.
-                //
-                // Note, we actually test the contrapositive, index < -length, because negating a
-                // negative will panic if the negative is equal to the smallest representable value
-                // while negating a positive is always safe.
-                if index < (O::zero() - O::one()) * len {
-                    O::zero()
-                } else {
-                    index + len
-                }
-            } else {
-                return exec_err!("array_slice got invalid index: {}", index);
-            }
-        } else {
-            // array_slice(arr, 1, to) is the same as array_slice(arr, 0, to)
-            if let Ok(index) = index.try_into() {
-                std::cmp::max(index - O::usize_as(1), O::usize_as(0))
-            } else {
-                return exec_err!("array_slice got invalid index: {}", index);
-            }
-        };
-
-        if O::usize_as(0) <= adjusted_zero_index && adjusted_zero_index < len {
-            Ok(Some(adjusted_zero_index))
-        } else {
-            // Out of bounds
-            Ok(None)
-        }
-    }
-
-    fn adjusted_to_index<O: OffsetSizeTrait>(index: i64, len: O) -> Result<Option<O>>
-    where
-        i64: TryInto<O>,
-    {
-        // 0 ~ len - 1
-        let adjusted_zero_index = if index < 0 {
-            // array_slice in duckdb with negative to_index is python-like, so index itself is exclusive
-            if let Ok(index) = index.try_into() {
-                index + len
-            } else {
-                return exec_err!("array_slice got invalid index: {}", index);
-            }
-        } else {
-            // array_slice(arr, from, len + 1) is the same as array_slice(arr, from, len)
-            if let Ok(index) = index.try_into() {
-                std::cmp::min(index - O::usize_as(1), len - O::usize_as(1))
-            } else {
-                return exec_err!("array_slice got invalid index: {}", index);
-            }
-        };
-
-        if O::usize_as(0) <= adjusted_zero_index && adjusted_zero_index < len {
-            Ok(Some(adjusted_zero_index))
-        } else {
-            // Out of bounds
-            Ok(None)
-        }
-    }
-
     let mut offsets = vec![O::usize_as(0)];
     let mut null_builder = NullBufferBuilder::new(array.len());
 
@@ -551,6 +639,7 @@ where
         if array.is_null(row_index)
             || from_array.is_null(row_index)
             || to_array.is_null(row_index)
+            || stride.is_some_and(|s| s.is_null(row_index))
         {
             mutable.extend_nulls(1);
             offsets.push(offsets[row_index] + O::usize_as(1));
@@ -565,72 +654,32 @@ where
             continue;
         }
 
-        let from_index = adjusted_from_index::<O>(from_array.value(row_index), len)?;
-        let to_index = adjusted_to_index::<O>(to_array.value(row_index), len)?;
-
-        if let (Some(from), Some(to)) = (from_index, to_index) {
-            let stride = stride.map(|s| s.value(row_index));
-            // Default stride is 1 if not provided
-            let stride = stride.unwrap_or(1);
-            if stride.is_zero() {
-                return exec_err!(
-                    "array_slice got invalid stride: {:?}, it cannot be 0",
-                    stride
-                );
-            } else if (from < to && stride.is_negative())
-                || (from > to && stride.is_positive())
-            {
-                // return empty array
-                offsets.push(offsets[row_index]);
-                continue;
+        let slice_plan = compute_slice_plan::<O>(
+            len,
+            from_array.value(row_index),
+            to_array.value(row_index),
+            stride.map(|s| s.value(row_index)),
+        )?;
+
+        match slice_plan {
+            SlicePlan::Empty => offsets.push(offsets[row_index]),
+            SlicePlan::Contiguous {
+                start: rel_start,
+                len: slice_len,
+            } => {
+                let start_index = (start + rel_start).to_usize().unwrap();
+                let end_index = (start + rel_start + slice_len).to_usize().unwrap();
+                mutable.extend(0, start_index, end_index);
+                offsets.push(offsets[row_index] + slice_len);
             }
-
-            let stride: O = stride.try_into().map_err(|_| {
-                internal_datafusion_err!("array_slice got invalid stride: {}", stride)
-            })?;
-
-            if from <= to && stride > O::zero() {
-                assert!(start + to <= end);
-                if stride.eq(&O::one()) {
-                    // stride is default to 1
-                    mutable.extend(
-                        0,
-                        (start + from).to_usize().unwrap(),
-                        (start + to + O::usize_as(1)).to_usize().unwrap(),
-                    );
-                    offsets.push(offsets[row_index] + (to - from + O::usize_as(1)));
-                    continue;
-                }
-                let mut index = start + from;
-                let mut cnt = 0;
-                while index <= start + to {
-                    mutable.extend(
-                        0,
-                        index.to_usize().unwrap(),
-                        index.to_usize().unwrap() + 1,
-                    );
-                    index += stride;
-                    cnt += 1;
+            SlicePlan::Indices(indices) => {
+                let count = indices.len();
+                for rel_index in indices {
+                    let absolute_index = (start + rel_index).to_usize().unwrap();
+                    mutable.extend(0, absolute_index, absolute_index + 1);
                 }
-                offsets.push(offsets[row_index] + O::usize_as(cnt));
-            } else {
-                let mut index = start + from;
-                let mut cnt = 0;
-                while index >= start + to {
-                    mutable.extend(
-                        0,
-                        index.to_usize().unwrap(),
-                        index.to_usize().unwrap() + 1,
-                    );
-                    index += stride;
-                    cnt += 1;
-                }
-                // invalid range, return empty array
-                offsets.push(offsets[row_index] + O::usize_as(cnt));
+                offsets.push(offsets[row_index] + O::usize_as(count));
             }
-        } else {
-            // invalid range, return empty array
-            offsets.push(offsets[row_index]);
         }
     }
 
@@ -644,6 +693,107 @@ where
     )?))
 }
 
+fn general_list_view_array_slice<O: OffsetSizeTrait>(
+    array: &GenericListViewArray<O>,
+    from_array: &Int64Array,
+    to_array: &Int64Array,
+    stride: Option<&Int64Array>,
+) -> Result<ArrayRef>
+where
+    i64: TryInto<O>,
+{
+    let values = array.values();
+    let original_data = values.to_data();
+    let capacity = Capacities::Array(original_data.len());
+    let field = match array.data_type() {
+        ListView(field) | LargeListView(field) => Arc::clone(field),
+        other => {
+            return internal_err!("array_slice got unexpected data type: {}", other);
+        }
+    };
+
+    let mut mutable =
+        MutableArrayData::with_capacities(vec![&original_data], true, capacity);
+
+    // We must build `offsets` and `sizes` buffers manually as ListView does not enforce
+    // monotonically increasing offsets.
+    let mut offsets = Vec::with_capacity(array.len());
+    let mut sizes = Vec::with_capacity(array.len());
+    let mut current_offset = O::usize_as(0);
+    let mut null_builder = NullBufferBuilder::new(array.len());
+
+    for row_index in 0..array.len() {
+        // Propagate NULL semantics: any NULL input yields a NULL output slot.
+        if array.is_null(row_index)
+            || from_array.is_null(row_index)
+            || to_array.is_null(row_index)
+            || stride.is_some_and(|s| s.is_null(row_index))
+        {
+            null_builder.append_null();
+            offsets.push(current_offset);
+            sizes.push(O::usize_as(0));
+            continue;
+        }
+        null_builder.append_non_null();
+
+        let len = array.value_size(row_index);
+
+        // Empty arrays always return an empty array.
+        if len == O::usize_as(0) {
+            offsets.push(current_offset);
+            sizes.push(O::usize_as(0));
+            continue;
+        }
+
+        let slice_plan = compute_slice_plan::<O>(
+            len,
+            from_array.value(row_index),
+            to_array.value(row_index),
+            stride.map(|s| s.value(row_index)),
+        )?;
+
+        let start = array.value_offset(row_index);
+        match slice_plan {
+            SlicePlan::Empty => {
+                offsets.push(current_offset);
+                sizes.push(O::usize_as(0));
+            }
+            SlicePlan::Contiguous {
+                start: rel_start,
+                len: slice_len,
+            } => {
+                let start_index = (start + rel_start).to_usize().unwrap();
+                let end_index = (start + rel_start + slice_len).to_usize().unwrap();
+                mutable.extend(0, start_index, end_index);
+                offsets.push(current_offset);
+                sizes.push(slice_len);
+                current_offset += slice_len;
+            }
+            SlicePlan::Indices(indices) => {
+                let count = indices.len();
+                for rel_index in indices {
+                    let absolute_index = (start + rel_index).to_usize().unwrap();
+                    mutable.extend(0, absolute_index, absolute_index + 1);
+                }
+                let length = O::usize_as(count);
+                offsets.push(current_offset);
+                sizes.push(length);
+                current_offset += length;
+            }
+        }
+    }
+
+    let data = mutable.freeze();
+
+    Ok(Arc::new(GenericListViewArray::<O>::try_new(
+        field,
+        ScalarBuffer::from(offsets),
+        ScalarBuffer::from(sizes),
+        arrow::array::make_array(data),
+        null_builder.finish(),
+    )?))
+}
+
 #[user_doc(
     doc_section(label = "Array Functions"),
     description = "Returns the array without the first element.",
@@ -977,12 +1127,28 @@ where
 
 #[cfg(test)]
 mod tests {
-    use super::array_element_udf;
+    use super::{array_element_udf, general_list_view_array_slice};
+    use arrow::array::{
+        cast::AsArray, Array, ArrayRef, GenericListViewArray, Int32Array, Int64Array,
+        ListViewArray,
+    };
+    use arrow::buffer::ScalarBuffer;
     use arrow::datatypes::{DataType, Field};
-    use datafusion_common::{Column, DFSchema};
+    use datafusion_common::{Column, DFSchema, Result};
     use datafusion_expr::expr::ScalarFunction;
     use datafusion_expr::{Expr, ExprSchemable};
     use std::collections::HashMap;
+    use std::sync::Arc;
+
+    fn list_view_values(array: &GenericListViewArray<i32>) -> Vec<Vec<i32>> {
+        (0..array.len())
+            .map(|i| {
+                let child = array.value(i);
+                let values = child.as_any().downcast_ref::<Int32Array>().unwrap();
+                values.iter().map(|v| v.unwrap()).collect()
+            })
+            .collect()
+    }
 
     // Regression test for https://github.com/apache/datafusion/issues/13755
     #[test]
@@ -1028,4 +1194,164 @@ mod tests {
             fixed_size_list_type
         );
     }
+
+    #[test]
+    fn test_array_slice_list_view_basic() -> Result<()> {
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![0, 3]);
+        let sizes = ScalarBuffer::from(vec![3, 2]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![2, 1]);
+        let to = Int64Array::from(vec![3, 2]);
+
+        let result = general_list_view_array_slice::<i32>(
+            &array,
+            &from,
+            &to,
+            None::<&Int64Array>,
+        )?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(list_view_values(result), vec![vec![2, 3], vec![4, 5]]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_slice_list_view_non_monotonic_offsets() -> Result<()> {
+        // First list references the tail of the values buffer, second list references the head.
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![3, 0]);
+        let sizes = ScalarBuffer::from(vec![2, 3]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![1, 1]);
+        let to = Int64Array::from(vec![2, 2]);
+
+        let result = general_list_view_array_slice::<i32>(
+            &array,
+            &from,
+            &to,
+            None::<&Int64Array>,
+        )?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(list_view_values(result), vec![vec![4, 5], vec![1, 2]]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_slice_list_view_negative_stride() -> Result<()> {
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![0, 3]);
+        let sizes = ScalarBuffer::from(vec![3, 2]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![3, 2]);
+        let to = Int64Array::from(vec![1, 1]);
+        let stride = Int64Array::from(vec![-1, -1]);
+
+        let result =
+            general_list_view_array_slice::<i32>(&array, &from, &to, Some(&stride))?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(list_view_values(result), vec![vec![3, 2, 1], vec![5, 4]]);
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_slice_list_view_out_of_order() -> Result<()> {
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5]));
+        let offsets = ScalarBuffer::from(vec![3, 1, 0]);
+        let sizes = ScalarBuffer::from(vec![2, 2, 1]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+        assert_eq!(
+            list_view_values(&array),
+            vec![vec![4, 5], vec![2, 3], vec![1]]
+        );
+
+        let from = Int64Array::from(vec![2, 2, 2]);
+        let to = Int64Array::from(vec![1, 1, 1]);
+        let stride = Int64Array::from(vec![-1, -1, -1]);
+
+        let result =
+            general_list_view_array_slice::<i32>(&array, &from, &to, Some(&stride))?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        assert_eq!(
+            list_view_values(result),
+            vec![vec![5, 4], vec![3, 2], vec![]]
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn test_array_slice_list_view_with_nulls() -> Result<()> {
+        let values: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(1),
+            None,
+            Some(3),
+            Some(4),
+            Some(5),
+        ]));
+        let offsets = ScalarBuffer::from(vec![0, 2, 5]);
+        let sizes = ScalarBuffer::from(vec![2, 3, 0]);
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let array = ListViewArray::new(field, offsets, sizes, values, None);
+
+        let from = Int64Array::from(vec![1, 1, 1]);
+        let to = Int64Array::from(vec![2, 2, 1]);
+
+        let result = general_list_view_array_slice::<i32>(&array, &from, &to, None)?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        let actual: Vec<Vec<Option<i32>>> = (0..result.len())
+            .map(|i| {
+                result
+                    .value(i)
+                    .as_any()
+                    .downcast_ref::<Int32Array>()
+                    .unwrap()
+                    .iter()
+                    .collect()
+            })
+            .collect();
+
+        assert_eq!(
+            actual,
+            vec![vec![Some(1), None], vec![Some(3), Some(4)], Vec::new(),]
+        );
+
+        // Test with NULL stride - should return NULL for rows with NULL stride
+        let stride_with_null = Int64Array::from(vec![Some(1), None, Some(1)]);
+        let result = general_list_view_array_slice::<i32>(
+            &array,
+            &from,
+            &to,
+            Some(&stride_with_null),
+        )?;
+        let result = result.as_ref().as_list_view::<i32>();
+
+        // First row: stride = 1, should return [1, None]
+        // Second row: stride = NULL, should return NULL
+        // Third row: stride = 1, empty array should return empty
+        assert!(!result.is_null(0)); // First row should not be null
+        assert!(result.is_null(1)); // Second row should be null (stride is NULL)
+        assert!(!result.is_null(2)); // Third row should not be null
+
+        let first_row: Vec<Option<i32>> = result
+            .value(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap()
+            .iter()
+            .collect();
+        assert_eq!(first_row, vec![Some(1), None]);
+
+        Ok(())
+    }
 }
diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs
index 1b74af643c0c..49f4110faeaa 100644
--- a/datafusion/functions-nested/src/flatten.rs
+++ b/datafusion/functions-nested/src/flatten.rs
@@ -96,6 +96,7 @@ impl ScalarUDFImpl for Flatten {
         let data_type = match &arg_types[0] {
             List(field) => match field.data_type() {
                 List(field) | FixedSizeList(field, _) => List(Arc::clone(field)),
+                LargeList(field) => LargeList(Arc::clone(field)),
                 _ => arg_types[0].clone(),
             },
             LargeList(field) => match field.data_type() {
@@ -143,7 +144,8 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                 List(_) => {
                     let (inner_field, inner_offsets, inner_values, _) =
                         as_list_array(&values)?.clone().into_parts();
-                    let offsets = get_offsets_for_flatten::<i32>(inner_offsets, offsets);
+                    let offsets =
+                        get_offsets_for_flatten::<i32, i32>(inner_offsets, offsets);
                     let flattened_array = GenericListArray::<i32>::new(
                         inner_field,
                         offsets,
@@ -154,7 +156,17 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     Ok(Arc::new(flattened_array) as ArrayRef)
                 }
                 LargeList(_) => {
-                    exec_err!("flatten does not support type '{:?}'", array.data_type())?
+                    let (inner_field, inner_offsets, inner_values, _) =
+                        as_large_list_array(&values)?.clone().into_parts();
+                    let offsets =
+                        get_offsets_for_flatten::<i64, i32>(inner_offsets, offsets);
+                    let flattened_array = GenericListArray::<i64>::new(
+                        inner_field,
+                        offsets,
+                        inner_values,
+                        nulls,
+                    );
+                    Ok(Arc::new(flattened_array) as ArrayRef)
                 }
                 _ => Ok(Arc::clone(array) as ArrayRef),
             }
@@ -179,9 +191,10 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
                     Ok(Arc::new(flattened_array) as ArrayRef)
                 }
                 LargeList(_) => {
-                    let (inner_field, inner_offsets, inner_values, nulls) =
+                    let (inner_field, inner_offsets, inner_values, _) =
                         as_large_list_array(&values)?.clone().into_parts();
-                    let offsets = get_offsets_for_flatten::<i64>(inner_offsets, offsets);
+                    let offsets =
+                        get_offsets_for_flatten::<i64, i64>(inner_offsets, offsets);
                     let flattened_array = GenericListArray::<i64>::new(
                         inner_field,
                         offsets,
@@ -202,12 +215,12 @@ pub fn flatten_inner(args: &[ArrayRef]) -> Result<ArrayRef> {
 }
 
 // Create new offsets that are equivalent to `flatten` the array.
-fn get_offsets_for_flatten<O: OffsetSizeTrait>(
-    offsets: OffsetBuffer<O>,
-    indexes: OffsetBuffer<O>,
+fn get_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
+    inner_offsets: OffsetBuffer<O>,
+    outer_offsets: OffsetBuffer<P>,
 ) -> OffsetBuffer<O> {
-    let buffer = offsets.into_inner();
-    let offsets: Vec<O> = indexes
+    let buffer = inner_offsets.into_inner();
+    let offsets: Vec<O> = outer_offsets
         .iter()
         .map(|i| buffer[i.to_usize().unwrap()])
         .collect();
@@ -216,11 +229,11 @@ fn get_offsets_for_flatten<O: OffsetSizeTrait>(
 
 // Create new large offsets that are equivalent to `flatten` the array.
 fn get_large_offsets_for_flatten<O: OffsetSizeTrait, P: OffsetSizeTrait>(
-    offsets: OffsetBuffer<O>,
-    indexes: OffsetBuffer<P>,
+    inner_offsets: OffsetBuffer<O>,
+    outer_offsets: OffsetBuffer<P>,
 ) -> OffsetBuffer<i64> {
-    let buffer = offsets.into_inner();
-    let offsets: Vec<i64> = indexes
+    let buffer = inner_offsets.into_inner();
+    let offsets: Vec<i64> = outer_offsets
         .iter()
         .map(|i| buffer[i.to_usize().unwrap()].to_i64().unwrap())
         .collect();
diff --git a/datafusion/functions-nested/src/lib.rs b/datafusion/functions-nested/src/lib.rs
index 0a549fb294c6..3a66e6569476 100644
--- a/datafusion/functions-nested/src/lib.rs
+++ b/datafusion/functions-nested/src/lib.rs
@@ -32,7 +32,6 @@
 //! [DataFusion]: https://crates.io/crates/datafusion
 //!
 //! You can register the functions in this crate using the [`register_all`] function.
-//!
 
 #[macro_use]
 pub mod macros;
diff --git a/datafusion/functions-nested/src/macros.rs b/datafusion/functions-nested/src/macros.rs
index cec7f2fd562d..5380f6b1272d 100644
--- a/datafusion/functions-nested/src/macros.rs
+++ b/datafusion/functions-nested/src/macros.rs
@@ -41,10 +41,15 @@
 /// * `arg`: 0 or more named arguments for the function
 /// * `DOC`: documentation string for the function
 /// * `SCALAR_UDF_FUNC`: name of the function to create (just) the `ScalarUDF`
+/// * (optional) `$CTOR`: Pass a custom constructor. When omitted it
+///   automatically resolves to `$UDF::new()`.
 ///
 /// [`ScalarUDFImpl`]: datafusion_expr::ScalarUDFImpl
 macro_rules! make_udf_expr_and_func {
-    ($UDF:ty, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr , $SCALAR_UDF_FN:ident) => {
+    ($UDF:ident, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr, $SCALAR_UDF_FN:ident) => {
+        make_udf_expr_and_func!($UDF, $EXPR_FN, $($arg)*, $DOC, $SCALAR_UDF_FN, $UDF::new);
+    };
+    ($UDF:ident, $EXPR_FN:ident, $($arg:ident)*, $DOC:expr, $SCALAR_UDF_FN:ident, $CTOR:path) => {
         paste::paste! {
             // "fluent expr_fn" style function
             #[doc = $DOC]
@@ -54,10 +59,13 @@ macro_rules! make_udf_expr_and_func {
                     vec![$($arg),*],
                 ))
             }
-            create_func!($UDF, $SCALAR_UDF_FN);
+            create_func!($UDF, $SCALAR_UDF_FN, $CTOR);
         }
     };
-    ($UDF:ty, $EXPR_FN:ident, $DOC:expr , $SCALAR_UDF_FN:ident) => {
+    ($UDF:ident, $EXPR_FN:ident, $DOC:expr, $SCALAR_UDF_FN:ident) => {
+        make_udf_expr_and_func!($UDF, $EXPR_FN, $DOC, $SCALAR_UDF_FN, $UDF::new);
+    };
+    ($UDF:ident, $EXPR_FN:ident, $DOC:expr, $SCALAR_UDF_FN:ident, $CTOR:path) => {
         paste::paste! {
             // "fluent expr_fn" style function
             #[doc = $DOC]
@@ -67,7 +75,7 @@ macro_rules! make_udf_expr_and_func {
                     arg,
                 ))
             }
-            create_func!($UDF, $SCALAR_UDF_FN);
+            create_func!($UDF, $SCALAR_UDF_FN, $CTOR);
         }
     };
 }
@@ -80,10 +88,15 @@ macro_rules! make_udf_expr_and_func {
 /// # Arguments
 /// * `UDF`: name of the [`ScalarUDFImpl`]
 /// * `SCALAR_UDF_FUNC`: name of the function to create (just) the `ScalarUDF`
+/// * (optional) `$CTOR`: Pass a custom constructor. When omitted it
+///   automatically resolves to `$UDF::new()`.
 ///
 /// [`ScalarUDFImpl`]: datafusion_expr::ScalarUDFImpl
 macro_rules! create_func {
-    ($UDF:ty, $SCALAR_UDF_FN:ident) => {
+    ($UDF:ident, $SCALAR_UDF_FN:ident) => {
+        create_func!($UDF, $SCALAR_UDF_FN, $UDF::new);
+    };
+    ($UDF:ident, $SCALAR_UDF_FN:ident, $CTOR:path) => {
         paste::paste! {
             #[doc = concat!("ScalarFunction that returns a [`ScalarUDF`](datafusion_expr::ScalarUDF) for ")]
             #[doc = stringify!($UDF)]
@@ -92,7 +105,7 @@ macro_rules! create_func {
                 static INSTANCE: std::sync::LazyLock<std::sync::Arc<datafusion_expr::ScalarUDF>> =
                     std::sync::LazyLock::new(|| {
                         std::sync::Arc::new(datafusion_expr::ScalarUDF::new_from_impl(
-                            <$UDF>::new(),
+                            $CTOR(),
                         ))
                     });
                 std::sync::Arc::clone(&INSTANCE)
diff --git a/datafusion/functions-nested/src/planner.rs b/datafusion/functions-nested/src/planner.rs
index f4fa8630a8d3..4fec5e38065b 100644
--- a/datafusion/functions-nested/src/planner.rs
+++ b/datafusion/functions-nested/src/planner.rs
@@ -18,7 +18,6 @@
 //! SQL planning extensions like [`NestedFunctionPlanner`] and [`FieldAccessPlanner`]
 
 use arrow::datatypes::DataType;
-use datafusion_common::ExprSchema;
 use datafusion_common::{plan_err, utils::list_ndims, DFSchema, Result};
 use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::expr::{AggregateFunction, AggregateFunctionParams};
@@ -177,9 +176,7 @@ impl ExprPlanner for FieldAccessPlanner {
                         )),
                     )),
                     // special case for map access with
-                    Expr::Column(ref c)
-                        if matches!(schema.data_type(c)?, DataType::Map(_, _)) =>
-                    {
+                    _ if matches!(expr.get_type(schema)?, DataType::Map(_, _)) => {
                         Ok(PlannerResult::Planned(Expr::ScalarFunction(
                             ScalarFunction::new_udf(
                                 get_field_inner(),
diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs
index 619b0e84c19a..e570ecf97420 100644
--- a/datafusion/functions-nested/src/range.rs
+++ b/datafusion/functions-nested/src/range.rs
@@ -18,30 +18,39 @@
 //! [`ScalarUDFImpl`] definitions for range and gen_series functions.
 
 use crate::utils::make_scalar_function;
-use arrow::array::{
-    builder::{Date32Builder, TimestampNanosecondBuilder},
-    temporal_conversions::as_datetime_with_timezone,
-    timezone::Tz,
-    types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT},
-    Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullArray, NullBufferBuilder,
-    TimestampNanosecondArray,
-};
 use arrow::buffer::OffsetBuffer;
-use arrow::datatypes::{
-    DataType, DataType::*, Field, IntervalUnit::MonthDayNano, TimeUnit::Nanosecond,
+use arrow::datatypes::TimeUnit;
+use arrow::datatypes::{DataType, Field, IntervalUnit::MonthDayNano};
+use arrow::{
+    array::{
+        builder::{Date32Builder, TimestampNanosecondBuilder},
+        temporal_conversions::as_datetime_with_timezone,
+        timezone::Tz,
+        types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType},
+        Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullBufferBuilder,
+    },
+    compute::cast,
 };
-use datafusion_common::cast::{
-    as_date32_array, as_int64_array, as_interval_mdn_array, as_timestamp_nanosecond_array,
+use datafusion_common::internal_err;
+use datafusion_common::{
+    cast::{
+        as_date32_array, as_int64_array, as_interval_mdn_array,
+        as_timestamp_nanosecond_array,
+    },
+    types::{
+        logical_date, logical_int64, logical_interval_mdn, logical_string, NativeType,
+    },
+    ScalarValue,
 };
 use datafusion_common::{
-    exec_datafusion_err, exec_err, internal_err, not_impl_datafusion_err,
-    utils::take_function_args, Result,
+    exec_datafusion_err, exec_err, not_impl_datafusion_err, utils::take_function_args,
+    Result,
 };
 use datafusion_expr::{
-    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
+    Coercion, ColumnarValue, Documentation, ScalarUDFImpl, Signature, TypeSignature,
+    TypeSignatureClass, Volatility,
 };
 use datafusion_macros::user_doc;
-use itertools::Itertools;
 use std::any::Any;
 use std::cmp::Ordering;
 use std::iter::from_fn;
@@ -53,13 +62,24 @@ make_udf_expr_and_func!(
     range,
     start stop step,
     "create a list of values in the range between start and stop",
-    range_udf
+    range_udf,
+    Range::new
+);
+
+make_udf_expr_and_func!(
+    GenSeries,
+    gen_series,
+    start stop step,
+    "create a list of values in the range between start and stop, include upper bound",
+    gen_series_udf,
+    Range::generate_series
 );
 
 #[user_doc(
     doc_section(label = "Array Functions"),
     description = "Returns an Arrow array between start and stop with step. The range start..end contains all values with start <= x < end. It is empty if start >= end. Step cannot be 0.",
-    syntax_example = "range(start, stop, step)",
+    syntax_example = "range(stop)
+range(start, stop[, step])",
     sql_example = r#"```sql
 > select range(2, 10, 3);
 +-----------------------------------+
@@ -69,11 +89,11 @@ make_udf_expr_and_func!(
 +-----------------------------------+
 
 > select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);
-+--------------------------------------------------------------+
-| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) |
-+--------------------------------------------------------------+
++--------------------------------------------------------------------------+
+| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH)          |
++--------------------------------------------------------------------------+
 | [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] |
-+--------------------------------------------------------------+
++--------------------------------------------------------------------------+
 ```"#,
     argument(
         name = "start",
@@ -88,115 +108,13 @@ make_udf_expr_and_func!(
         description = "Increase by step (cannot be 0). Steps less than a day are supported only for timestamp ranges."
     )
 )]
-#[derive(Debug, PartialEq, Eq, Hash)]
-pub struct Range {
-    signature: Signature,
-    aliases: Vec<String>,
-}
-
-impl Default for Range {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-impl Range {
-    pub fn new() -> Self {
-        Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-            aliases: vec![],
-        }
-    }
-}
-impl ScalarUDFImpl for Range {
-    fn as_any(&self) -> &dyn Any {
-        self
-    }
-    fn name(&self) -> &str {
-        "range"
-    }
-
-    fn signature(&self) -> &Signature {
-        &self.signature
-    }
-
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        arg_types
-            .iter()
-            .map(|arg_type| match arg_type {
-                Null => Ok(Null),
-                Int8 => Ok(Int64),
-                Int16 => Ok(Int64),
-                Int32 => Ok(Int64),
-                Int64 => Ok(Int64),
-                UInt8 => Ok(Int64),
-                UInt16 => Ok(Int64),
-                UInt32 => Ok(Int64),
-                UInt64 => Ok(Int64),
-                Timestamp(_, tz) => Ok(Timestamp(Nanosecond, tz.clone())),
-                Date32 => Ok(Date32),
-                Date64 => Ok(Date32),
-                Utf8 => Ok(Date32),
-                LargeUtf8 => Ok(Date32),
-                Utf8View => Ok(Date32),
-                Interval(_) => Ok(Interval(MonthDayNano)),
-                _ => exec_err!("Unsupported DataType"),
-            })
-            .try_collect()
-    }
-
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if arg_types.iter().any(|t| t.is_null()) {
-            Ok(Null)
-        } else {
-            Ok(List(Arc::new(Field::new_list_field(
-                arg_types[0].clone(),
-                true,
-            ))))
-        }
-    }
-
-    fn invoke_with_args(
-        &self,
-        args: datafusion_expr::ScalarFunctionArgs,
-    ) -> Result<ColumnarValue> {
-        let args = &args.args;
-
-        if args.iter().any(|arg| arg.data_type().is_null()) {
-            return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
-        }
-        match args[0].data_type() {
-            Int64 => make_scalar_function(|args| gen_range_inner(args, false))(args),
-            Date32 => make_scalar_function(|args| gen_range_date(args, false))(args),
-            Timestamp(_, _) => {
-                make_scalar_function(|args| gen_range_timestamp(args, false))(args)
-            }
-            dt => {
-                exec_err!("unsupported type for RANGE. Expected Int64, Date32 or Timestamp, got: {dt}")
-            }
-        }
-    }
-
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-
-    fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
-    }
-}
-
-make_udf_expr_and_func!(
-    GenSeries,
-    gen_series,
-    start stop step,
-    "create a list of values in the range between start and stop, include upper bound",
-    gen_series_udf
-);
+struct RangeDoc {}
 
 #[user_doc(
     doc_section(label = "Array Functions"),
     description = "Similar to the range function, but it includes the upper bound.",
-    syntax_example = "generate_series(start, stop, step)",
+    syntax_example = "generate_series(stop)
+generate_series(start, stop[, step])",
     sql_example = r#"```sql
 > select generate_series(1,3);
 +------------------------------------+
@@ -218,64 +136,119 @@ make_udf_expr_and_func!(
         description = "Increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges."
     )
 )]
+struct GenerateSeriesDoc {}
+
 #[derive(Debug, PartialEq, Eq, Hash)]
-pub(super) struct GenSeries {
+pub struct Range {
     signature: Signature,
-    aliases: Vec<String>,
+    /// `false` for range, `true` for generate_series
+    include_upper_bound: bool,
 }
-impl GenSeries {
+
+impl Default for Range {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Range {
+    fn defined_signature() -> Signature {
+        // We natively only support i64 in our implementation; so ensure we cast other integer
+        // types to it.
+        let integer = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_int64()),
+            vec![TypeSignatureClass::Integer],
+            NativeType::Int64,
+        );
+        // We natively only support mdn in our implementation; so ensure we cast other interval
+        // types to it.
+        let interval = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_interval_mdn()),
+            vec![TypeSignatureClass::Interval],
+            NativeType::Interval(MonthDayNano),
+        );
+        // Ideally we'd limit to only Date32 & Timestamp(Nanoseconds) as those are the implementations
+        // we have but that is difficult to do with this current API; we'll cast later on to
+        // handle such types.
+        let date = Coercion::new_implicit(
+            TypeSignatureClass::Native(logical_date()),
+            vec![TypeSignatureClass::Native(logical_string())],
+            NativeType::Date,
+        );
+        let timestamp = Coercion::new_exact(TypeSignatureClass::Timestamp);
+        Signature::one_of(
+            vec![
+                // Integer ranges
+                // Stop
+                TypeSignature::Coercible(vec![integer.clone()]),
+                // Start & stop
+                TypeSignature::Coercible(vec![integer.clone(), integer.clone()]),
+                // Start, stop & step
+                TypeSignature::Coercible(vec![integer.clone(), integer.clone(), integer]),
+                // Date range
+                TypeSignature::Coercible(vec![date.clone(), date, interval.clone()]),
+                // Timestamp range
+                TypeSignature::Coercible(vec![timestamp.clone(), timestamp, interval]),
+            ],
+            Volatility::Immutable,
+        )
+    }
+
+    /// Generate `range()` function which excludes upper bound.
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
-            aliases: vec![],
+            signature: Self::defined_signature(),
+            include_upper_bound: false,
+        }
+    }
+
+    /// Generate `generate_series()` function which includes upper bound.
+    fn generate_series() -> Self {
+        Self {
+            signature: Self::defined_signature(),
+            include_upper_bound: true,
         }
     }
 }
-impl ScalarUDFImpl for GenSeries {
+
+impl ScalarUDFImpl for Range {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn name(&self) -> &str {
-        "generate_series"
+        if self.include_upper_bound {
+            "generate_series"
+        } else {
+            "range"
+        }
     }
 
     fn signature(&self) -> &Signature {
         &self.signature
     }
 
-    fn coerce_types(&self, arg_types: &[DataType]) -> Result<Vec<DataType>> {
-        arg_types
-            .iter()
-            .map(|arg_type| match arg_type {
-                Null => Ok(Null),
-                Int8 => Ok(Int64),
-                Int16 => Ok(Int64),
-                Int32 => Ok(Int64),
-                Int64 => Ok(Int64),
-                UInt8 => Ok(Int64),
-                UInt16 => Ok(Int64),
-                UInt32 => Ok(Int64),
-                UInt64 => Ok(Int64),
-                Timestamp(_, tz) => Ok(Timestamp(Nanosecond, tz.clone())),
-                Date32 => Ok(Date32),
-                Date64 => Ok(Date32),
-                Utf8 => Ok(Date32),
-                LargeUtf8 => Ok(Date32),
-                Utf8View => Ok(Date32),
-                Interval(_) => Ok(Interval(MonthDayNano)),
-                _ => exec_err!("Unsupported DataType"),
-            })
-            .try_collect()
-    }
-
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         if arg_types.iter().any(|t| t.is_null()) {
-            Ok(Null)
-        } else {
-            Ok(List(Arc::new(Field::new_list_field(
+            return Ok(DataType::Null);
+        }
+
+        match (&arg_types[0], arg_types.get(1)) {
+            // In implementation we downcast to Date32 so ensure reflect that here
+            (_, Some(DataType::Date64)) | (DataType::Date64, _) => Ok(DataType::List(
+                Arc::new(Field::new_list_field(DataType::Date32, true)),
+            )),
+            // Ensure we preserve timezone
+            (DataType::Timestamp(_, tz), _) => {
+                Ok(DataType::List(Arc::new(Field::new_list_field(
+                    DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned()),
+                    true,
+                ))))
+            }
+            _ => Ok(DataType::List(Arc::new(Field::new_list_field(
                 arg_types[0].clone(),
                 true,
-            ))))
+            )))),
         }
     }
 
@@ -286,107 +259,270 @@ impl ScalarUDFImpl for GenSeries {
         let args = &args.args;
 
         if args.iter().any(|arg| arg.data_type().is_null()) {
-            return Ok(ColumnarValue::Array(Arc::new(NullArray::new(1))));
+            return Ok(ColumnarValue::Scalar(ScalarValue::Null));
         }
         match args[0].data_type() {
-            Int64 => make_scalar_function(|args| gen_range_inner(args, true))(args),
-            Date32 => make_scalar_function(|args| gen_range_date(args, true))(args),
-            Timestamp(_, _) => {
-                make_scalar_function(|args| gen_range_timestamp(args, true))(args)
+            DataType::Int64 => {
+                make_scalar_function(|args| self.gen_range_inner(args))(args)
+            }
+            DataType::Date32 | DataType::Date64 => {
+                make_scalar_function(|args| self.gen_range_date(args))(args)
+            }
+            DataType::Timestamp(_, _) => {
+                make_scalar_function(|args| self.gen_range_timestamp(args))(args)
             }
             dt => {
-                exec_err!(
-                    "unsupported type for GENERATE_SERIES. Expected Int64, Date32 or Timestamp, got: {}",
-                    dt
+                internal_err!(
+                    "Signature failed to guard unknown input type for {}: {dt}",
+                    self.name()
                 )
             }
         }
     }
 
-    fn aliases(&self) -> &[String] {
-        &self.aliases
-    }
-
     fn documentation(&self) -> Option<&Documentation> {
-        self.doc()
+        if self.include_upper_bound {
+            GenerateSeriesDoc {}.doc()
+        } else {
+            RangeDoc {}.doc()
+        }
     }
 }
 
-/// Generates an array of integers from start to stop with a given step.
-///
-/// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values.
-/// It returns a `Result<ArrayRef>` representing the resulting ListArray after the operation.
-///
-/// # Arguments
-///
-/// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and step(step value can not be zero.) values.
-///
-/// # Examples
-///
-/// gen_range(3) => [0, 1, 2]
-/// gen_range(1, 4) => [1, 2, 3]
-/// gen_range(1, 7, 2) => [1, 3, 5]
-pub(super) fn gen_range_inner(
-    args: &[ArrayRef],
-    include_upper: bool,
-) -> Result<ArrayRef> {
-    let (start_array, stop_array, step_array) = match args.len() {
-        1 => (None, as_int64_array(&args[0])?, None),
-        2 => (
-            Some(as_int64_array(&args[0])?),
-            as_int64_array(&args[1])?,
-            None,
-        ),
-        3 => (
-            Some(as_int64_array(&args[0])?),
-            as_int64_array(&args[1])?,
-            Some(as_int64_array(&args[2])?),
-        ),
-        _ => return exec_err!("gen_range expects 1 to 3 arguments"),
-    };
-
-    let mut values = vec![];
-    let mut offsets = vec![0];
-    let mut valid = NullBufferBuilder::new(stop_array.len());
-    for (idx, stop) in stop_array.iter().enumerate() {
-        match retrieve_range_args(start_array, stop, step_array, idx) {
-            Some((_, _, 0)) => {
-                return exec_err!(
-                    "step can't be 0 for function {}(start [, stop, step])",
-                    if include_upper {
-                        "generate_series"
-                    } else {
-                        "range"
-                    }
-                );
+impl Range {
+    /// Generates an array of integers from start to stop with a given step.
+    ///
+    /// This function takes 1 to 3 ArrayRefs as arguments, representing start, stop, and step values.
+    /// It returns a `Result<ArrayRef>` representing the resulting ListArray after the operation.
+    ///
+    /// # Arguments
+    ///
+    /// * `args` - An array of 1 to 3 ArrayRefs representing start, stop, and step(step value can not be zero.) values.
+    ///
+    /// # Examples
+    ///
+    /// gen_range(3) => [0, 1, 2]
+    /// gen_range(1, 4) => [1, 2, 3]
+    /// gen_range(1, 7, 2) => [1, 3, 5]
+    fn gen_range_inner(&self, args: &[ArrayRef]) -> Result<ArrayRef> {
+        let (start_array, stop_array, step_array) = match args {
+            [stop_array] => (None, as_int64_array(stop_array)?, None),
+            [start_array, stop_array] => (
+                Some(as_int64_array(start_array)?),
+                as_int64_array(stop_array)?,
+                None,
+            ),
+            [start_array, stop_array, step_array] => (
+                Some(as_int64_array(start_array)?),
+                as_int64_array(stop_array)?,
+                Some(as_int64_array(step_array)?),
+            ),
+            _ => return internal_err!("{} expects 1 to 3 arguments", self.name()),
+        };
+
+        let mut values = vec![];
+        let mut offsets = vec![0];
+        let mut valid = NullBufferBuilder::new(stop_array.len());
+        for (idx, stop) in stop_array.iter().enumerate() {
+            match retrieve_range_args(start_array, stop, step_array, idx) {
+                Some((_, _, 0)) => {
+                    return exec_err!(
+                        "step can't be 0 for function {}(start [, stop, step])",
+                        self.name()
+                    );
+                }
+                Some((start, stop, step)) => {
+                    // Below, we utilize `usize` to represent steps.
+                    // On 32-bit targets, the absolute value of `i64` may fail to fit into `usize`.
+                    let step_abs =
+                        usize::try_from(step.unsigned_abs()).map_err(|_| {
+                            not_impl_datafusion_err!("step {} can't fit into usize", step)
+                        })?;
+                    values.extend(
+                        gen_range_iter(start, stop, step < 0, self.include_upper_bound)
+                            .step_by(step_abs),
+                    );
+                    offsets.push(values.len() as i32);
+                    valid.append_non_null();
+                }
+                // If any of the arguments is NULL, append a NULL value to the result.
+                None => {
+                    offsets.push(values.len() as i32);
+                    valid.append_null();
+                }
+            };
+        }
+        let arr = Arc::new(ListArray::try_new(
+            Arc::new(Field::new_list_field(DataType::Int64, true)),
+            OffsetBuffer::new(offsets.into()),
+            Arc::new(Int64Array::from(values)),
+            valid.finish(),
+        )?);
+        Ok(arr)
+    }
+
+    fn gen_range_date(&self, args: &[ArrayRef]) -> Result<ArrayRef> {
+        let [start, stop, step] = take_function_args(self.name(), args)?;
+        let step = as_interval_mdn_array(step)?;
+
+        // Signature can only guarantee we get a date type, not specifically
+        // date32 so handle potential cast from date64 here.
+        let start = cast(start, &DataType::Date32)?;
+        let start = as_date32_array(&start)?;
+        let stop = cast(stop, &DataType::Date32)?;
+        let stop = as_date32_array(&stop)?;
+
+        // values are date32s
+        let values_builder = Date32Builder::new();
+        let mut list_builder = ListBuilder::new(values_builder);
+
+        for idx in 0..stop.len() {
+            if start.is_null(idx) || stop.is_null(idx) || step.is_null(idx) {
+                list_builder.append_null();
+                continue;
+            }
+
+            let start = start.value(idx);
+            let stop = stop.value(idx);
+            let step = step.value(idx);
+
+            let (months, days, _) = IntervalMonthDayNanoType::to_parts(step);
+            if months == 0 && days == 0 {
+                return exec_err!("Cannot generate date range less than 1 day.");
+            }
+
+            let stop = if !self.include_upper_bound {
+                Date32Type::subtract_month_day_nano(stop, step)
+            } else {
+                stop
+            };
+
+            let neg = months < 0 || days < 0;
+            let mut new_date = start;
+
+            let values = from_fn(|| {
+                if (neg && new_date < stop) || (!neg && new_date > stop) {
+                    None
+                } else {
+                    let current_date = new_date;
+                    new_date = Date32Type::add_month_day_nano(new_date, step);
+                    Some(Some(current_date))
+                }
+            });
+
+            list_builder.append_value(values);
+        }
+
+        let arr = Arc::new(list_builder.finish());
+
+        Ok(arr)
+    }
+
+    fn gen_range_timestamp(&self, args: &[ArrayRef]) -> Result<ArrayRef> {
+        let [start, stop, step] = take_function_args(self.name(), args)?;
+        let step = as_interval_mdn_array(step)?;
+
+        // Signature can only guarantee we get a timestamp type, not specifically
+        // timestamp(ns) so handle potential cast from other timestamps here.
+        fn cast_to_ns(arr: &ArrayRef) -> Result<ArrayRef> {
+            match arr.data_type() {
+                DataType::Timestamp(TimeUnit::Nanosecond, _) => Ok(Arc::clone(arr)),
+                DataType::Timestamp(_, tz) => Ok(cast(
+                    arr,
+                    &DataType::Timestamp(TimeUnit::Nanosecond, tz.to_owned()),
+                )?),
+                _ => unreachable!(),
             }
-            Some((start, stop, step)) => {
-                // Below, we utilize `usize` to represent steps.
-                // On 32-bit targets, the absolute value of `i64` may fail to fit into `usize`.
-                let step_abs = usize::try_from(step.unsigned_abs()).map_err(|_| {
-                    not_impl_datafusion_err!("step {} can't fit into usize", step)
-                })?;
-                values.extend(
-                    gen_range_iter(start, stop, step < 0, include_upper)
-                        .step_by(step_abs),
-                );
-                offsets.push(values.len() as i32);
-                valid.append_non_null();
+        }
+        let start = cast_to_ns(start)?;
+        let start = as_timestamp_nanosecond_array(&start)?;
+        let stop = cast_to_ns(stop)?;
+        let stop = as_timestamp_nanosecond_array(&stop)?;
+
+        let start_tz = parse_tz(&start.timezone())?;
+        let stop_tz = parse_tz(&stop.timezone())?;
+
+        // values are timestamps
+        let values_builder = start
+            .timezone()
+            .map_or_else(TimestampNanosecondBuilder::new, |start_tz_str| {
+                TimestampNanosecondBuilder::new().with_timezone(start_tz_str)
+            });
+        let mut list_builder = ListBuilder::new(values_builder);
+
+        for idx in 0..start.len() {
+            if start.is_null(idx) || stop.is_null(idx) || step.is_null(idx) {
+                list_builder.append_null();
+                continue;
             }
-            // If any of the arguments is NULL, append a NULL value to the result.
-            None => {
-                offsets.push(values.len() as i32);
-                valid.append_null();
+
+            let start = start.value(idx);
+            let stop = stop.value(idx);
+            let step = step.value(idx);
+
+            let (months, days, ns) = IntervalMonthDayNanoType::to_parts(step);
+            if months == 0 && days == 0 && ns == 0 {
+                return exec_err!("Interval argument to {} must not be 0", self.name());
             }
-        };
+
+            let neg = TimestampNanosecondType::add_month_day_nano(start, step, start_tz)
+                .ok_or(exec_datafusion_err!(
+                    "Cannot generate timestamp range where start + step overflows"
+                ))?
+                .cmp(&start)
+                == Ordering::Less;
+
+            let stop_dt =
+                as_datetime_with_timezone::<TimestampNanosecondType>(stop, stop_tz)
+                    .ok_or(exec_datafusion_err!(
+                        "Cannot generate timestamp for stop: {}: {:?}",
+                        stop,
+                        stop_tz
+                    ))?;
+
+            let mut current = start;
+            let mut current_dt =
+                as_datetime_with_timezone::<TimestampNanosecondType>(current, start_tz)
+                    .ok_or(exec_datafusion_err!(
+                    "Cannot generate timestamp for start: {}: {:?}",
+                    current,
+                    start_tz
+                ))?;
+
+            let values = from_fn(|| {
+                let generate_series_should_end = self.include_upper_bound
+                    && ((neg && current_dt < stop_dt) || (!neg && current_dt > stop_dt));
+                let range_should_end = !self.include_upper_bound
+                    && ((neg && current_dt <= stop_dt)
+                        || (!neg && current_dt >= stop_dt));
+                if generate_series_should_end || range_should_end {
+                    return None;
+                }
+
+                let prev_current = current;
+
+                if let Some(ts) =
+                    TimestampNanosecondType::add_month_day_nano(current, step, start_tz)
+                {
+                    current = ts;
+                    current_dt = as_datetime_with_timezone::<TimestampNanosecondType>(
+                        current, start_tz,
+                    )?;
+
+                    Some(Some(prev_current))
+                } else {
+                    // we failed to parse the timestamp here so terminate the series
+                    None
+                }
+            });
+
+            list_builder.append_value(values);
+        }
+
+        let arr = Arc::new(list_builder.finish());
+
+        Ok(arr)
     }
-    let arr = Arc::new(ListArray::try_new(
-        Arc::new(Field::new_list_field(Int64, true)),
-        OffsetBuffer::new(offsets.into()),
-        Arc::new(Int64Array::from(values)),
-        valid.finish(),
-    )?);
-    Ok(arr)
 }
 
 /// Get the (start, stop, step) args for the range and generate_series function.
@@ -436,201 +572,7 @@ fn gen_range_iter(
     }
 }
 
-fn gen_range_date(args: &[ArrayRef], include_upper_bound: bool) -> Result<ArrayRef> {
-    let [start, stop, step] = take_function_args("range", args)?;
-
-    let (start_array, stop_array, step_array) = (
-        Some(as_date32_array(start)?),
-        as_date32_array(stop)?,
-        Some(as_interval_mdn_array(step)?),
-    );
-
-    // values are date32s
-    let values_builder = Date32Builder::new();
-    let mut list_builder = ListBuilder::new(values_builder);
-
-    for idx in 0..stop_array.len() {
-        if stop_array.is_null(idx) {
-            list_builder.append_null();
-            continue;
-        }
-        let mut stop = stop_array.value(idx);
-
-        let start = if let Some(start_array_values) = start_array {
-            if start_array_values.is_null(idx) {
-                list_builder.append_null();
-                continue;
-            }
-            start_array_values.value(idx)
-        } else {
-            list_builder.append_null();
-            continue;
-        };
-
-        let step = if let Some(step) = step_array {
-            if step.is_null(idx) {
-                list_builder.append_null();
-                continue;
-            }
-            step.value(idx)
-        } else {
-            list_builder.append_null();
-            continue;
-        };
-
-        let (months, days, _) = IntervalMonthDayNanoType::to_parts(step);
-
-        if months == 0 && days == 0 {
-            return exec_err!("Cannot generate date range less than 1 day.");
-        }
-
-        let neg = months < 0 || days < 0;
-        if !include_upper_bound {
-            stop = Date32Type::subtract_month_day_nano(stop, step);
-        }
-        let mut new_date = start;
-
-        let values = from_fn(|| {
-            if (neg && new_date < stop) || (!neg && new_date > stop) {
-                None
-            } else {
-                let current_date = new_date;
-                new_date = Date32Type::add_month_day_nano(new_date, step);
-                Some(Some(current_date))
-            }
-        });
-
-        list_builder.append_value(values);
-    }
-
-    let arr = Arc::new(list_builder.finish());
-
-    Ok(arr)
-}
-
-fn gen_range_timestamp(args: &[ArrayRef], include_upper_bound: bool) -> Result<ArrayRef> {
-    let func_name = if include_upper_bound {
-        "GENERATE_SERIES"
-    } else {
-        "RANGE"
-    };
-    let [start, stop, step] = take_function_args(func_name, args)?;
-
-    // coerce_types fn should coerce all types to Timestamp(Nanosecond, tz)
-    let (start_arr, start_tz_opt) = cast_timestamp_arg(start, include_upper_bound)?;
-    let (stop_arr, stop_tz_opt) = cast_timestamp_arg(stop, include_upper_bound)?;
-    let step_arr = as_interval_mdn_array(step)?;
-    let start_tz = parse_tz(start_tz_opt)?;
-    let stop_tz = parse_tz(stop_tz_opt)?;
-
-    // values are timestamps
-    let values_builder = start_tz_opt
-        .clone()
-        .map_or_else(TimestampNanosecondBuilder::new, |start_tz_str| {
-            TimestampNanosecondBuilder::new().with_timezone(start_tz_str)
-        });
-    let mut list_builder = ListBuilder::new(values_builder);
-
-    for idx in 0..start_arr.len() {
-        if start_arr.is_null(idx) || stop_arr.is_null(idx) || step_arr.is_null(idx) {
-            list_builder.append_null();
-            continue;
-        }
-
-        let start = start_arr.value(idx);
-        let stop = stop_arr.value(idx);
-        let step = step_arr.value(idx);
-
-        let (months, days, ns) = IntervalMonthDayNanoType::to_parts(step);
-        if months == 0 && days == 0 && ns == 0 {
-            return exec_err!(
-                "Interval argument to {} must not be 0",
-                if include_upper_bound {
-                    "GENERATE_SERIES"
-                } else {
-                    "RANGE"
-                }
-            );
-        }
-
-        let neg = TSNT::add_month_day_nano(start, step, start_tz)
-            .ok_or(exec_datafusion_err!(
-                "Cannot generate timestamp range where start + step overflows"
-            ))?
-            .cmp(&start)
-            == Ordering::Less;
-
-        let stop_dt = as_datetime_with_timezone::<TSNT>(stop, stop_tz).ok_or(
-            exec_datafusion_err!(
-                "Cannot generate timestamp for stop: {}: {:?}",
-                stop,
-                stop_tz
-            ),
-        )?;
-
-        let mut current = start;
-        let mut current_dt = as_datetime_with_timezone::<TSNT>(current, start_tz).ok_or(
-            exec_datafusion_err!(
-                "Cannot generate timestamp for start: {}: {:?}",
-                current,
-                start_tz
-            ),
-        )?;
-
-        let values = from_fn(|| {
-            if (include_upper_bound
-                && ((neg && current_dt < stop_dt) || (!neg && current_dt > stop_dt)))
-                || (!include_upper_bound
-                    && ((neg && current_dt <= stop_dt)
-                        || (!neg && current_dt >= stop_dt)))
-            {
-                return None;
-            }
-
-            let prev_current = current;
-
-            if let Some(ts) = TSNT::add_month_day_nano(current, step, start_tz) {
-                current = ts;
-                current_dt = as_datetime_with_timezone::<TSNT>(current, start_tz)?;
-
-                Some(Some(prev_current))
-            } else {
-                // we failed to parse the timestamp here so terminate the series
-                None
-            }
-        });
-
-        list_builder.append_value(values);
-    }
-
-    let arr = Arc::new(list_builder.finish());
-
-    Ok(arr)
-}
-
-fn cast_timestamp_arg(
-    arg: &ArrayRef,
-    include_upper: bool,
-) -> Result<(&TimestampNanosecondArray, &Option<Arc<str>>)> {
-    match arg.data_type() {
-        Timestamp(Nanosecond, tz_opt) => {
-            Ok((as_timestamp_nanosecond_array(arg)?, tz_opt))
-        }
-        _ => {
-            internal_err!(
-                "Unexpected argument type for {} : {}",
-                if include_upper {
-                    "GENERATE_SERIES"
-                } else {
-                    "RANGE"
-                },
-                arg.data_type()
-            )
-        }
-    }
-}
-
-fn parse_tz(tz: &Option<Arc<str>>) -> Result<Tz> {
+fn parse_tz(tz: &Option<&str>) -> Result<Tz> {
     let tz = tz.as_ref().map_or_else(|| "+00", |s| s);
 
     Tz::from_str(tz)
diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs
index 59f851a776a1..4314d41419bc 100644
--- a/datafusion/functions-nested/src/replace.rs
+++ b/datafusion/functions-nested/src/replace.rs
@@ -105,6 +105,7 @@ impl ArrayReplace {
                     },
                 ),
                 volatility: Volatility::Immutable,
+                parameter_names: None,
             },
             aliases: vec![String::from("list_replace")],
         }
@@ -186,6 +187,7 @@ impl ArrayReplaceN {
                     },
                 ),
                 volatility: Volatility::Immutable,
+                parameter_names: None,
             },
             aliases: vec![String::from("list_replace_n")],
         }
@@ -265,6 +267,7 @@ impl ArrayReplaceAll {
                     },
                 ),
                 volatility: Volatility::Immutable,
+                parameter_names: None,
             },
             aliases: vec![String::from("list_replace_all")],
         }
diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs
index 8440d890d252..df873ade798d 100644
--- a/datafusion/functions-nested/src/reverse.rs
+++ b/datafusion/functions-nested/src/reverse.rs
@@ -19,20 +19,25 @@
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
-    Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
-    OffsetSizeTrait,
+    Array, ArrayRef, FixedSizeListArray, GenericListArray, GenericListViewArray,
+    OffsetSizeTrait, UInt32Array, UInt64Array,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::compute::take;
+use arrow::datatypes::DataType::{
+    FixedSizeList, LargeList, LargeListView, List, ListView, Null,
 };
-use arrow::buffer::OffsetBuffer;
-use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
 use arrow::datatypes::{DataType, FieldRef};
 use datafusion_common::cast::{
-    as_fixed_size_list_array, as_large_list_array, as_list_array,
+    as_fixed_size_list_array, as_large_list_array, as_large_list_view_array,
+    as_list_array, as_list_view_array,
 };
 use datafusion_common::{exec_err, utils::take_function_args, Result};
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
 };
 use datafusion_macros::user_doc;
+use itertools::Itertools;
 use std::any::Any;
 use std::sync::Arc;
 
@@ -133,89 +138,363 @@ pub fn array_reverse_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
             fixed_size_array_reverse(array, field)
         }
         Null => Ok(Arc::clone(input_array)),
+        ListView(field) => {
+            let array = as_list_view_array(input_array)?;
+            list_view_reverse::<i32>(array, field)
+        }
+        LargeListView(field) => {
+            let array = as_large_list_view_array(input_array)?;
+            list_view_reverse::<i64>(array, field)
+        }
         array_type => exec_err!("array_reverse does not support type '{array_type}'."),
     }
 }
 
-fn general_array_reverse<O: OffsetSizeTrait + TryFrom<i64>>(
+fn general_array_reverse<O: OffsetSizeTrait>(
     array: &GenericListArray<O>,
     field: &FieldRef,
 ) -> Result<ArrayRef> {
     let values = array.values();
-    let original_data = values.to_data();
-    let capacity = Capacities::Array(original_data.len());
     let mut offsets = vec![O::usize_as(0)];
-    let mut nulls = vec![];
-    let mut mutable =
-        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
+    let mut indices: Vec<O> = Vec::with_capacity(values.len());
 
-    for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
+    for (row_index, (&start, &end)) in array.offsets().iter().tuple_windows().enumerate()
+    {
         // skip the null value
         if array.is_null(row_index) {
-            nulls.push(false);
-            offsets.push(offsets[row_index] + O::one());
-            mutable.extend(0, 0, 1);
+            offsets.push(offsets[row_index]);
             continue;
-        } else {
-            nulls.push(true);
         }
 
-        let start = offset_window[0];
-        let end = offset_window[1];
-
         let mut index = end - O::one();
-        let mut cnt = 0;
-
         while index >= start {
-            mutable.extend(0, index.to_usize().unwrap(), index.to_usize().unwrap() + 1);
+            indices.push(index);
             index = index - O::one();
-            cnt += 1;
         }
-        offsets.push(offsets[row_index] + O::usize_as(cnt));
+        let size = end - start;
+        offsets.push(offsets[row_index] + size);
     }
 
-    let data = mutable.freeze();
+    // Materialize values from underlying array with take
+    let indices_array: ArrayRef = if O::IS_LARGE {
+        Arc::new(UInt64Array::from(
+            indices
+                .iter()
+                .map(|i| i.as_usize() as u64)
+                .collect::<Vec<_>>(),
+        ))
+    } else {
+        Arc::new(UInt32Array::from(
+            indices
+                .iter()
+                .map(|i| i.as_usize() as u32)
+                .collect::<Vec<_>>(),
+        ))
+    };
+    let values = take(&values, &indices_array, None)?;
     Ok(Arc::new(GenericListArray::<O>::try_new(
         Arc::clone(field),
         OffsetBuffer::<O>::new(offsets.into()),
-        arrow::array::make_array(data),
-        Some(nulls.into()),
+        values,
+        array.nulls().cloned(),
     )?))
 }
 
-fn fixed_size_array_reverse(
-    array: &FixedSizeListArray,
+/// Reverses a list view array.
+///
+/// Construct indices, sizes and offsets for the reversed array by iterating over
+/// the list view array in the logical order, and reversing the order of the elements.
+/// We end up with a list view array where the elements are in order,
+/// even if the original array had elements out of order.
+fn list_view_reverse<O: OffsetSizeTrait>(
+    array: &GenericListViewArray<O>,
     field: &FieldRef,
 ) -> Result<ArrayRef> {
+    let offsets = array.offsets();
     let values = array.values();
-    let original_data = values.to_data();
-    let capacity = Capacities::Array(original_data.len());
-    let mut nulls = vec![];
-    let mut mutable =
-        MutableArrayData::with_capacities(vec![&original_data], false, capacity);
-    let value_length = array.value_length() as usize;
-
-    for row_index in 0..array.len() {
-        // skip the null value
+    let sizes = array.sizes();
+
+    let mut new_offsets: Vec<O> = Vec::with_capacity(offsets.len());
+    let mut indices: Vec<O> = Vec::with_capacity(values.len());
+    let mut new_sizes = Vec::with_capacity(sizes.len());
+
+    let mut current_offset = O::zero();
+    for (row_index, offset) in offsets.iter().enumerate() {
+        new_offsets.push(current_offset);
+
+        // If this array is null, we set its size to 0 and continue
         if array.is_null(row_index) {
-            nulls.push(false);
-            mutable.extend(0, 0, value_length);
+            new_sizes.push(O::zero());
             continue;
-        } else {
-            nulls.push(true);
         }
-        let start = row_index * value_length;
-        let end = start + value_length;
-        for idx in (start..end).rev() {
-            mutable.extend(0, idx, idx + 1);
+        let size = sizes[row_index];
+        new_sizes.push(size);
+
+        // Each array is located at [offset, offset + size), collect indices in the reverse order
+        let array_start = *offset;
+        let array_end = array_start + size;
+        let mut idx = array_end - O::one();
+        while idx >= array_start {
+            indices.push(idx);
+            idx = idx - O::one();
         }
+
+        current_offset += size;
+    }
+
+    // Materialize values from underlying array with take
+    let indices_array: ArrayRef = if O::IS_LARGE {
+        Arc::new(UInt64Array::from(
+            indices
+                .iter()
+                .map(|i| i.as_usize() as u64)
+                .collect::<Vec<_>>(),
+        ))
+    } else {
+        Arc::new(UInt32Array::from(
+            indices
+                .iter()
+                .map(|i| i.as_usize() as u32)
+                .collect::<Vec<_>>(),
+        ))
+    };
+    let values = take(&values, &indices_array, None)?;
+    Ok(Arc::new(GenericListViewArray::<O>::try_new(
+        Arc::clone(field),
+        ScalarBuffer::from(new_offsets),
+        ScalarBuffer::from(new_sizes),
+        values,
+        array.nulls().cloned(),
+    )?))
+}
+
+fn fixed_size_array_reverse(
+    array: &FixedSizeListArray,
+    field: &FieldRef,
+) -> Result<ArrayRef> {
+    let values: &Arc<dyn Array> = array.values();
+
+    // Since each fixed size list in the physical array is the same size and we keep the order
+    // of the fixed size lists, we can reverse the indices for each fixed size list.
+    let mut indices: Vec<u64> = (0..values.len() as u64).collect();
+    for chunk in indices.chunks_mut(array.value_length() as usize) {
+        chunk.reverse();
     }
 
-    let data = mutable.freeze();
+    // Materialize values from underlying array with take
+    let indices_array: ArrayRef = Arc::new(UInt64Array::from(indices));
+    let values = take(&values, &indices_array, None)?;
+
     Ok(Arc::new(FixedSizeListArray::try_new(
         Arc::clone(field),
         array.value_length(),
-        arrow::array::make_array(data),
-        Some(nulls.into()),
+        values,
+        array.nulls().cloned(),
     )?))
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::reverse::{fixed_size_array_reverse, list_view_reverse};
+    use arrow::{
+        array::{
+            AsArray, FixedSizeListArray, GenericListViewArray, Int32Array,
+            LargeListViewArray, ListViewArray, OffsetSizeTrait,
+        },
+        buffer::{NullBuffer, ScalarBuffer},
+        datatypes::{DataType, Field, Int32Type},
+    };
+    use datafusion_common::Result;
+    use std::sync::Arc;
+
+    fn list_view_values<O: OffsetSizeTrait>(
+        array: &GenericListViewArray<O>,
+    ) -> Vec<Option<Vec<i32>>> {
+        array
+            .iter()
+            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
+            .collect()
+    }
+
+    fn fixed_size_list_values(array: &FixedSizeListArray) -> Vec<Option<Vec<i32>>> {
+        array
+            .iter()
+            .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec()))
+            .collect()
+    }
+
+    #[test]
+    fn test_reverse_list_view() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]);
+        let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]);
+        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
+        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
+        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
+        let result = list_view_reverse(
+            &list_view,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = list_view_values(result.as_list_view::<i32>());
+        let expected = vec![
+            Some(vec![1]),
+            Some(vec![6, 5, 4, 3, 2]),
+            None,
+            Some(vec![9, 8, 7]),
+        ];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_large_list_view() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let offsets = ScalarBuffer::from(vec![0, 1, 6, 6]);
+        let sizes = ScalarBuffer::from(vec![1, 5, 0, 3]);
+        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
+        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
+        let list_view = LargeListViewArray::new(field, offsets, sizes, values, nulls);
+        let result = list_view_reverse(
+            &list_view,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = list_view_values(result.as_list_view::<i64>());
+        let expected = vec![
+            Some(vec![1]),
+            Some(vec![6, 5, 4, 3, 2]),
+            None,
+            Some(vec![9, 8, 7]),
+        ];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_list_view_out_of_order() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let offsets = ScalarBuffer::from(vec![6, 1, 6, 0]); // out of order
+        let sizes = ScalarBuffer::from(vec![3, 5, 0, 1]);
+        let values = Arc::new(Int32Array::from(vec![
+            1, // fourth array: offset 0, size 1
+            2, 3, 4, 5, 6, // second array: offset 1, size 5
+            // third array: offset 6, size 0 (and null)
+            7, 8, 9, // first array: offset 6, size 3
+        ]));
+        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
+        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
+        let result = list_view_reverse(
+            &list_view,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = list_view_values(result.as_list_view::<i32>());
+        let expected = vec![
+            Some(vec![9, 8, 7]),
+            Some(vec![6, 5, 4, 3, 2]),
+            None,
+            Some(vec![1]),
+        ];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_list_view_with_nulls() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let offsets = ScalarBuffer::from(vec![16, 1, 6, 0]); // out of order
+        let sizes = ScalarBuffer::from(vec![3, 5, 10, 1]);
+        let values = Arc::new(Int32Array::from(vec![
+            1, // fourth array: offset 0, size 1
+            2, 3, 4, 5, 6, // second array: offset 1, size 5
+            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, // third array: offset 6, size 10
+            7, 8, 9, // first array: offset 6, size 3
+        ]));
+        let nulls = Some(NullBuffer::from(vec![true, true, false, true]));
+        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
+        let result = list_view_reverse(
+            &list_view,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = list_view_values(result.as_list_view::<i32>());
+        let expected = vec![
+            Some(vec![9, 8, 7]),
+            Some(vec![6, 5, 4, 3, 2]),
+            None,
+            Some(vec![1]),
+        ];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_list_view_empty() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let offsets = ScalarBuffer::from(vec![]);
+        let sizes = ScalarBuffer::from(vec![]);
+        let empty_array: Vec<i32> = vec![];
+        let values = Arc::new(Int32Array::from(empty_array));
+        let nulls = None;
+        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
+        let result = list_view_reverse(
+            &list_view,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = list_view_values(result.as_list_view::<i32>());
+        let expected: Vec<Option<Vec<i32>>> = vec![];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_list_view_all_nulls() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let offsets = ScalarBuffer::from(vec![0, 1, 2, 3]);
+        let sizes = ScalarBuffer::from(vec![0, 1, 1, 1]);
+        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4]));
+        let nulls = Some(NullBuffer::from(vec![false, false, false, false]));
+        let list_view = ListViewArray::new(field, offsets, sizes, values, nulls);
+        let result = list_view_reverse(
+            &list_view,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = list_view_values(result.as_list_view::<i32>());
+        let expected: Vec<Option<Vec<i32>>> = vec![None, None, None, None];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_fixed_size_list() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let values = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9]));
+        let result = fixed_size_array_reverse(
+            &FixedSizeListArray::new(
+                field,
+                3,
+                values,
+                Some(NullBuffer::from(vec![true, false, true])),
+            ),
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = fixed_size_list_values(result.as_fixed_size_list());
+        let expected = vec![Some(vec![3, 2, 1]), None, Some(vec![9, 8, 7])];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+
+    #[test]
+    fn test_reverse_fixed_size_list_empty() -> Result<()> {
+        let field = Arc::new(Field::new("a", DataType::Int32, false));
+        let empty_array: Vec<i32> = vec![];
+        let values = Arc::new(Int32Array::from(empty_array));
+        let nulls = None;
+        let fixed_size_list = FixedSizeListArray::new(field, 3, values, nulls);
+        let result = fixed_size_array_reverse(
+            &fixed_size_list,
+            &Arc::new(Field::new("test", DataType::Int32, true)),
+        )?;
+        let reversed = fixed_size_list_values(result.as_fixed_size_list());
+        let expected: Vec<Option<Vec<i32>>> = vec![];
+        assert_eq!(expected, reversed);
+        Ok(())
+    }
+}
diff --git a/datafusion/functions-table/Cargo.toml b/datafusion/functions-table/Cargo.toml
index 78d59257dd48..a5f50c072d1c 100644
--- a/datafusion/functions-table/Cargo.toml
+++ b/datafusion/functions-table/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/functions-table/src/generate_series.rs b/datafusion/functions-table/src/generate_series.rs
index d00f3d734d76..d71c5945aafc 100644
--- a/datafusion/functions-table/src/generate_series.rs
+++ b/datafusion/functions-table/src/generate_series.rs
@@ -472,14 +472,12 @@ impl TableProvider for GenerateSeriesTable {
         _limit: Option<usize>,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         let batch_size = state.config_options().execution.batch_size;
-        let schema = match projection {
-            Some(projection) => Arc::new(self.schema.project(projection)?),
-            None => self.schema(),
-        };
-
         let generator = self.as_generator(batch_size)?;
 
-        Ok(Arc::new(LazyMemoryExec::try_new(schema, vec![generator])?))
+        Ok(Arc::new(
+            LazyMemoryExec::try_new(self.schema(), vec![generator])?
+                .with_projection(projection.cloned()),
+        ))
     }
 }
 
diff --git a/datafusion/functions-table/src/lib.rs b/datafusion/functions-table/src/lib.rs
index b339a8f4a52f..f099a186ecbb 100644
--- a/datafusion/functions-table/src/lib.rs
+++ b/datafusion/functions-table/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/datafusion/functions-window-common/Cargo.toml b/datafusion/functions-window-common/Cargo.toml
index 466e7bc68b48..6af668c1459e 100644
--- a/datafusion/functions-window-common/Cargo.toml
+++ b/datafusion/functions-window-common/Cargo.toml
@@ -31,6 +31,9 @@ version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/functions-window-common/src/expr.rs b/datafusion/functions-window-common/src/expr.rs
index d72cd412f017..7ae43906c455 100644
--- a/datafusion/functions-window-common/src/expr.rs
+++ b/datafusion/functions-window-common/src/expr.rs
@@ -39,7 +39,6 @@ impl<'a> ExpressionArgs<'a> {
     ///   to the user-defined window function.
     /// * `input_fields` - The fields corresponding to the
     ///   arguments to the user-defined window function.
-    ///
     pub fn new(
         input_exprs: &'a [Arc<dyn PhysicalExpr>],
         input_fields: &'a [FieldRef],
diff --git a/datafusion/functions-window-common/src/field.rs b/datafusion/functions-window-common/src/field.rs
index 8d22efa3bcf4..8e33930ff760 100644
--- a/datafusion/functions-window-common/src/field.rs
+++ b/datafusion/functions-window-common/src/field.rs
@@ -36,7 +36,6 @@ impl<'a> WindowUDFFieldArgs<'a> {
     ///   arguments to the user-defined window function.
     /// * `function_name` - The qualified schema name of the
     ///   user-defined window function expression.
-    ///
     pub fn new(input_fields: &'a [FieldRef], display_name: &'a str) -> Self {
         WindowUDFFieldArgs {
             input_fields,
diff --git a/datafusion/functions-window-common/src/lib.rs b/datafusion/functions-window-common/src/lib.rs
index 76341239f6a5..c35b9a6c8461 100644
--- a/datafusion/functions-window-common/src/lib.rs
+++ b/datafusion/functions-window-common/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/datafusion/functions-window-common/src/partition.rs b/datafusion/functions-window-common/src/partition.rs
index df0a81540117..463419d5f019 100644
--- a/datafusion/functions-window-common/src/partition.rs
+++ b/datafusion/functions-window-common/src/partition.rs
@@ -48,7 +48,6 @@ impl<'a> PartitionEvaluatorArgs<'a> {
     ///   window function is reversible and is reversed.
     /// * `ignore_nulls` - Set to `true` when `IGNORE NULLS` is
     ///   specified.
-    ///
     pub fn new(
         input_exprs: &'a [Arc<dyn PhysicalExpr>],
         input_fields: &'a [FieldRef],
diff --git a/datafusion/functions-window/Cargo.toml b/datafusion/functions-window/Cargo.toml
index 23ee608a8267..7036bbec9d2c 100644
--- a/datafusion/functions-window/Cargo.toml
+++ b/datafusion/functions-window/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/functions-window/src/lead_lag.rs b/datafusion/functions-window/src/lead_lag.rs
index 3910a0be574d..02d7fc290b32 100644
--- a/datafusion/functions-window/src/lead_lag.rs
+++ b/datafusion/functions-window/src/lead_lag.rs
@@ -137,7 +137,13 @@ impl WindowShift {
                     TypeSignature::Any(3),
                 ],
                 Volatility::Immutable,
-            ),
+            )
+            .with_parameter_names(vec![
+                "expr".to_string(),
+                "offset".to_string(),
+                "default".to_string(),
+            ])
+            .expect("valid parameter names for lead/lag"),
             kind,
         }
     }
diff --git a/datafusion/functions-window/src/lib.rs b/datafusion/functions-window/src/lib.rs
index 139ace4bf709..0093a1c23522 100644
--- a/datafusion/functions-window/src/lib.rs
+++ b/datafusion/functions-window/src/lib.rs
@@ -30,7 +30,6 @@
 //! implemented using the extension API.
 //!
 //! [DataFusion]: https://crates.io/crates/datafusion
-//!
 
 use std::sync::Arc;
 
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index 90331fbccaf0..ad52a551a7c1 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -78,6 +81,7 @@ hex = { version = "0.4", optional = true }
 itertools = { workspace = true }
 log = { workspace = true }
 md-5 = { version = "^0.10.0", optional = true }
+num-traits = { workspace = true }
 rand = { workspace = true }
 regex = { workspace = true, optional = true }
 sha2 = { version = "^0.10.9", optional = true }
diff --git a/datafusion/functions/benches/ascii.rs b/datafusion/functions/benches/ascii.rs
index 55471817d277..03d25e9c3d4f 100644
--- a/datafusion/functions/benches/ascii.rs
+++ b/datafusion/functions/benches/ascii.rs
@@ -19,10 +19,11 @@ extern crate criterion;
 mod helper;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use helper::gen_string_array;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs
index edb61c013e24..4a1a63d62765 100644
--- a/datafusion/functions/benches/character_length.rs
+++ b/datafusion/functions/benches/character_length.rs
@@ -18,10 +18,11 @@
 extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use helper::gen_string_array;
+use std::hint::black_box;
 use std::sync::Arc;
 
 mod helper;
diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs
index ec3f188f9084..8356cf7c3172 100644
--- a/datafusion/functions/benches/chr.rs
+++ b/datafusion/functions/benches/chr.rs
@@ -18,10 +18,11 @@
 extern crate criterion;
 
 use arrow::{array::PrimitiveArray, datatypes::Int64Type};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string::chr;
 use rand::{Rng, SeedableRng};
+use std::hint::black_box;
 
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::config::ConfigOptions;
diff --git a/datafusion/functions/benches/concat.rs b/datafusion/functions/benches/concat.rs
index 15f9ffbd7802..09200139a244 100644
--- a/datafusion/functions/benches/concat.rs
+++ b/datafusion/functions/benches/concat.rs
@@ -23,6 +23,7 @@ use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string::concat;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn create_args(size: usize, str_len: usize) -> Vec<ColumnarValue> {
@@ -51,7 +52,7 @@ fn criterion_benchmark(c: &mut Criterion) {
         group.bench_function(BenchmarkId::new("concat", size), |b| {
             b.iter(|| {
                 let args_cloned = args.clone();
-                criterion::black_box(
+                black_box(
                     concat()
                         .invoke_with_args(ScalarFunctionArgs {
                             args: args_cloned,
diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs
index 937d092cc028..97f21ccd6d55 100644
--- a/datafusion/functions/benches/cot.rs
+++ b/datafusion/functions/benches/cot.rs
@@ -21,9 +21,10 @@ use arrow::{
     datatypes::{Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::cot;
+use std::hint::black_box;
 
 use arrow::datatypes::{DataType, Field};
 use datafusion_common::config::ConfigOptions;
diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs
index ea8705984f38..74390491d538 100644
--- a/datafusion/functions/benches/date_bin.rs
+++ b/datafusion/functions/benches/date_bin.rs
@@ -17,11 +17,12 @@
 
 extern crate criterion;
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, TimestampSecondArray};
 use arrow::datatypes::Field;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs
index 70d372429b2d..498a3e63ef29 100644
--- a/datafusion/functions/benches/date_trunc.rs
+++ b/datafusion/functions/benches/date_trunc.rs
@@ -17,11 +17,12 @@
 
 extern crate criterion;
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, TimestampSecondArray};
 use arrow::datatypes::Field;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs
index dc2529cd9fd7..98faee91e191 100644
--- a/datafusion/functions/benches/encoding.rs
+++ b/datafusion/functions/benches/encoding.rs
@@ -20,10 +20,11 @@ extern crate criterion;
 use arrow::array::Array;
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::encoding;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/find_in_set.rs b/datafusion/functions/benches/find_in_set.rs
index df7d7cc09dd2..a928f5655806 100644
--- a/datafusion/functions/benches/find_in_set.rs
+++ b/datafusion/functions/benches/find_in_set.rs
@@ -22,13 +22,14 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use rand::distr::Alphanumeric;
 use rand::prelude::StdRng;
 use rand::{Rng, SeedableRng};
+use std::hint::black_box;
 use std::sync::Arc;
 use std::time::Duration;
 
diff --git a/datafusion/functions/benches/gcd.rs b/datafusion/functions/benches/gcd.rs
index 913ed523543e..19e196d9a3ea 100644
--- a/datafusion/functions/benches/gcd.rs
+++ b/datafusion/functions/benches/gcd.rs
@@ -22,12 +22,13 @@ use arrow::{
     array::{ArrayRef, Int64Array},
     datatypes::DataType,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::gcd;
 use rand::Rng;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn generate_i64_array(n_rows: usize) -> ArrayRef {
diff --git a/datafusion/functions/benches/initcap.rs b/datafusion/functions/benches/initcap.rs
index 7562e990ca16..50aee8dbb916 100644
--- a/datafusion/functions/benches/initcap.rs
+++ b/datafusion/functions/benches/initcap.rs
@@ -22,10 +22,11 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn create_args<O: OffsetSizeTrait>(
diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs
index f59c7af939ab..4a90d45d6622 100644
--- a/datafusion/functions/benches/isnan.rs
+++ b/datafusion/functions/benches/isnan.rs
@@ -22,10 +22,11 @@ use arrow::{
     datatypes::{Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::isnan;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs
index 9752a9364b9f..961cba7200ce 100644
--- a/datafusion/functions/benches/iszero.rs
+++ b/datafusion/functions/benches/iszero.rs
@@ -22,10 +22,11 @@ use arrow::{
     datatypes::{Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::iszero;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/lower.rs b/datafusion/functions/benches/lower.rs
index 83d437c6caa6..6a5178b87fdc 100644
--- a/datafusion/functions/benches/lower.rs
+++ b/datafusion/functions/benches/lower.rs
@@ -22,10 +22,11 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
+use std::hint::black_box;
 use std::sync::Arc;
 
 /// Create an array of args containing a StringArray, where all the values in the
diff --git a/datafusion/functions/benches/ltrim.rs b/datafusion/functions/benches/ltrim.rs
index 2712223506b9..4458af614396 100644
--- a/datafusion/functions/benches/ltrim.rs
+++ b/datafusion/functions/benches/ltrim.rs
@@ -20,14 +20,15 @@ extern crate criterion;
 use arrow::array::{ArrayRef, LargeStringArray, StringArray, StringViewArray};
 use arrow::datatypes::{DataType, Field};
 use criterion::{
-    black_box, criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup,
-    Criterion, SamplingMode,
+    criterion_group, criterion_main, measurement::Measurement, BenchmarkGroup, Criterion,
+    SamplingMode,
 };
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
 use datafusion_functions::string;
 use rand::{distr::Alphanumeric, rngs::StdRng, Rng, SeedableRng};
+use std::hint::black_box;
 use std::{fmt, sync::Arc};
 
 #[derive(Clone, Copy)]
@@ -100,7 +101,6 @@ pub fn create_string_array_and_characters(
 /// Outputs:
 ///   - testing string array
 ///   - trimmed characters
-///
 fn create_args(
     size: usize,
     characters: &str,
diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs
index f0494a9d3b4e..15a895468db9 100644
--- a/datafusion/functions/benches/make_date.rs
+++ b/datafusion/functions/benches/make_date.rs
@@ -17,11 +17,12 @@
 
 extern crate criterion;
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{Array, ArrayRef, Int32Array};
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs
index 93ec687c4d0e..d649697cc518 100644
--- a/datafusion/functions/benches/nullif.rs
+++ b/datafusion/functions/benches/nullif.rs
@@ -19,11 +19,12 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::core::nullif;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs
index 125559269a4f..f92a69bbf4f9 100644
--- a/datafusion/functions/benches/pad.rs
+++ b/datafusion/functions/benches/pad.rs
@@ -27,6 +27,7 @@ use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode::{lpad, rpad};
 use rand::distr::{Distribution, Uniform};
 use rand::Rng;
+use std::hint::black_box;
 use std::sync::Arc;
 
 struct Filter<Dist> {
@@ -131,29 +132,17 @@ fn criterion_benchmark(c: &mut Criterion) {
         let args = create_args::<i32>(size, 32, false);
 
         group.bench_function(BenchmarkId::new("utf8 type", size), |b| {
-            b.iter(|| {
-                criterion::black_box(
-                    invoke_pad_with_args(args.clone(), size, true).unwrap(),
-                )
-            })
+            b.iter(|| black_box(invoke_pad_with_args(args.clone(), size, true).unwrap()))
         });
 
         let args = create_args::<i64>(size, 32, false);
         group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
-            b.iter(|| {
-                criterion::black_box(
-                    invoke_pad_with_args(args.clone(), size, true).unwrap(),
-                )
-            })
+            b.iter(|| black_box(invoke_pad_with_args(args.clone(), size, true).unwrap()))
         });
 
         let args = create_args::<i32>(size, 32, true);
         group.bench_function(BenchmarkId::new("stringview type", size), |b| {
-            b.iter(|| {
-                criterion::black_box(
-                    invoke_pad_with_args(args.clone(), size, true).unwrap(),
-                )
-            })
+            b.iter(|| black_box(invoke_pad_with_args(args.clone(), size, true).unwrap()))
         });
 
         group.finish();
@@ -162,30 +151,18 @@ fn criterion_benchmark(c: &mut Criterion) {
 
         let args = create_args::<i32>(size, 32, false);
         group.bench_function(BenchmarkId::new("utf8 type", size), |b| {
-            b.iter(|| {
-                criterion::black_box(
-                    invoke_pad_with_args(args.clone(), size, false).unwrap(),
-                )
-            })
+            b.iter(|| black_box(invoke_pad_with_args(args.clone(), size, false).unwrap()))
         });
 
         let args = create_args::<i64>(size, 32, false);
         group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| {
-            b.iter(|| {
-                criterion::black_box(
-                    invoke_pad_with_args(args.clone(), size, false).unwrap(),
-                )
-            })
+            b.iter(|| black_box(invoke_pad_with_args(args.clone(), size, false).unwrap()))
         });
 
         // rpad for stringview type
         let args = create_args::<i32>(size, 32, true);
         group.bench_function(BenchmarkId::new("stringview type", size), |b| {
-            b.iter(|| {
-                criterion::black_box(
-                    invoke_pad_with_args(args.clone(), size, false).unwrap(),
-                )
-            })
+            b.iter(|| black_box(invoke_pad_with_args(args.clone(), size, false).unwrap()))
         });
 
         group.finish();
diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs
index ac92aed586ba..88efb2d1b5b9 100644
--- a/datafusion/functions/benches/random.rs
+++ b/datafusion/functions/benches/random.rs
@@ -18,10 +18,11 @@
 extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl};
 use datafusion_functions::math::random::RandomFunc;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/regx.rs b/datafusion/functions/benches/regx.rs
index c18241f799e3..a415330245bf 100644
--- a/datafusion/functions/benches/regx.rs
+++ b/datafusion/functions/benches/regx.rs
@@ -21,7 +21,7 @@ use arrow::array::builder::StringBuilder;
 use arrow::array::{ArrayRef, AsArray, Int64Array, StringArray, StringViewArray};
 use arrow::compute::cast;
 use arrow::datatypes::DataType;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_functions::regex::regexpcount::regexp_count_func;
 use datafusion_functions::regex::regexpinstr::regexp_instr_func;
 use datafusion_functions::regex::regexplike::regexp_like;
@@ -31,6 +31,7 @@ use rand::distr::Alphanumeric;
 use rand::prelude::IndexedRandom;
 use rand::rngs::ThreadRng;
 use rand::Rng;
+use std::hint::black_box;
 use std::iter;
 use std::sync::Arc;
 fn data(rng: &mut ThreadRng) -> StringArray {
diff --git a/datafusion/functions/benches/repeat.rs b/datafusion/functions/benches/repeat.rs
index 991a5a467c0e..80ffa8ee38f1 100644
--- a/datafusion/functions/benches/repeat.rs
+++ b/datafusion/functions/benches/repeat.rs
@@ -22,11 +22,12 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::DataFusionError;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
+use std::hint::black_box;
 use std::sync::Arc;
 use std::time::Duration;
 
diff --git a/datafusion/functions/benches/reverse.rs b/datafusion/functions/benches/reverse.rs
index acac674a6de0..b1eca654fb25 100644
--- a/datafusion/functions/benches/reverse.rs
+++ b/datafusion/functions/benches/reverse.rs
@@ -19,10 +19,11 @@ extern crate criterion;
 mod helper;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use helper::gen_string_array;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs
index d56f3930d267..24b8861e4d28 100644
--- a/datafusion/functions/benches/signum.rs
+++ b/datafusion/functions/benches/signum.rs
@@ -22,10 +22,11 @@ use arrow::{
     datatypes::{Field, Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::signum;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs
index fc31abb23d84..18a99e44bf48 100644
--- a/datafusion/functions/benches/strpos.rs
+++ b/datafusion/functions/benches/strpos.rs
@@ -19,12 +19,13 @@ extern crate criterion;
 
 use arrow::array::{StringArray, StringViewArray};
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use rand::distr::Alphanumeric;
 use rand::prelude::StdRng;
 use rand::{Rng, SeedableRng};
+use std::hint::black_box;
 use std::str::Chars;
 use std::sync::Arc;
 
diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs
index f14f10894649..771413458c1f 100644
--- a/datafusion/functions/benches/substr.rs
+++ b/datafusion/functions/benches/substr.rs
@@ -22,11 +22,12 @@ use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::{
     create_string_array_with_len, create_string_view_array_with_len,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode};
+use criterion::{criterion_group, criterion_main, Criterion, SamplingMode};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::DataFusionError;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn create_args_without_count<O: OffsetSizeTrait>(
diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs
index 2cc381e4545e..d0941d9baedd 100644
--- a/datafusion/functions/benches/substr_index.rs
+++ b/datafusion/functions/benches/substr_index.rs
@@ -17,11 +17,12 @@
 
 extern crate criterion;
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Int64Array, StringArray};
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::unicode::substr_index;
diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs
index 9599b8677216..945508aec740 100644
--- a/datafusion/functions/benches/to_char.rs
+++ b/datafusion/functions/benches/to_char.rs
@@ -17,13 +17,14 @@
 
 extern crate criterion;
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::{ArrayRef, Date32Array, StringArray};
 use arrow::datatypes::{DataType, Field};
 use chrono::prelude::*;
 use chrono::TimeDelta;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::ScalarValue;
 use datafusion_common::ScalarValue::TimestampNanosecond;
diff --git a/datafusion/functions/benches/to_hex.rs b/datafusion/functions/benches/to_hex.rs
index cad9addab10e..a75ed9258791 100644
--- a/datafusion/functions/benches/to_hex.rs
+++ b/datafusion/functions/benches/to_hex.rs
@@ -19,10 +19,11 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field, Int32Type, Int64Type};
 use arrow::util::bench_util::create_primitive_array;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs
index 7e15d896f83e..a8f5c5816d4d 100644
--- a/datafusion/functions/benches/to_timestamp.rs
+++ b/datafusion/functions/benches/to_timestamp.rs
@@ -17,13 +17,14 @@
 
 extern crate criterion;
 
+use std::hint::black_box;
 use std::sync::Arc;
 
 use arrow::array::builder::StringBuilder;
 use arrow::array::{Array, ArrayRef, StringArray};
 use arrow::compute::cast;
 use arrow::datatypes::{DataType, Field, TimeUnit};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::datetime::to_timestamp;
diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs
index 160eac913d2b..6e225e0e7038 100644
--- a/datafusion/functions/benches/trunc.rs
+++ b/datafusion/functions/benches/trunc.rs
@@ -21,9 +21,10 @@ use arrow::{
     datatypes::{Field, Float32Type, Float64Type},
     util::bench_util::create_primitive_array,
 };
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::math::trunc;
+use std::hint::black_box;
 
 use arrow::datatypes::DataType;
 use datafusion_common::config::ConfigOptions;
diff --git a/datafusion/functions/benches/upper.rs b/datafusion/functions/benches/upper.rs
index 700f70b4b4f3..7328b32574a4 100644
--- a/datafusion/functions/benches/upper.rs
+++ b/datafusion/functions/benches/upper.rs
@@ -19,10 +19,11 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
 use arrow::util::bench_util::create_string_array_with_len;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_functions::string;
+use std::hint::black_box;
 use std::sync::Arc;
 
 /// Create an array of args containing a StringArray, where all the values in the
diff --git a/datafusion/functions/benches/uuid.rs b/datafusion/functions/benches/uuid.rs
index f9345a97eb53..1368e2f2af5d 100644
--- a/datafusion/functions/benches/uuid.rs
+++ b/datafusion/functions/benches/uuid.rs
@@ -18,10 +18,11 @@
 extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::ScalarFunctionArgs;
 use datafusion_functions::string;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/functions/src/core/arrow_cast.rs b/datafusion/functions/src/core/arrow_cast.rs
index 94a41ba4bb25..c4e58601cd10 100644
--- a/datafusion/functions/src/core/arrow_cast.rs
+++ b/datafusion/functions/src/core/arrow_cast.rs
@@ -60,16 +60,26 @@ use datafusion_macros::user_doc;
     description = "Casts a value to a specific Arrow data type.",
     syntax_example = "arrow_cast(expression, datatype)",
     sql_example = r#"```sql
-> select arrow_cast(-5, 'Int8') as a,
+> select
+  arrow_cast(-5,    'Int8') as a,
   arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
+  arrow_cast('bar', 'LargeUtf8') as c;
+
++----+-----+-----+
+| a  | b   | c   |
++----+-----+-----+
+| -5 | foo | bar |
++----+-----+-----+
+
+> select
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
+
++---------------------------+---------------------+
+| d                         | e                   |
++---------------------------+---------------------+
+| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
++---------------------------+---------------------+
 ```"#,
     argument(
         name = "expression",
diff --git a/datafusion/functions/src/core/expr_ext.rs b/datafusion/functions/src/core/expr_ext.rs
index af05f447f1c1..3b8581995ab3 100644
--- a/datafusion/functions/src/core/expr_ext.rs
+++ b/datafusion/functions/src/core/expr_ext.rs
@@ -39,8 +39,7 @@ use super::expr_fn::get_field;
 /// ```
 /// # use datafusion_expr::{col};
 /// # use datafusion_functions::core::expr_ext::FieldAccessor;
-/// let expr = col("c1")
-///    .field("my_field");
+/// let expr = col("c1").field("my_field");
 /// assert_eq!(expr.schema_name().to_string(), "c1[my_field]");
 /// ```
 pub trait FieldAccessor {
diff --git a/datafusion/functions/src/core/getfield.rs b/datafusion/functions/src/core/getfield.rs
index d18bd6e31f72..3be7dd67981d 100644
--- a/datafusion/functions/src/core/getfield.rs
+++ b/datafusion/functions/src/core/getfield.rs
@@ -245,6 +245,46 @@ impl ScalarUDFImpl for GetFieldFunc {
             Ok(ColumnarValue::Array(data))
         }
 
+        fn process_map_with_nested_key(
+            array: Arc<dyn Array>,
+            key_array: Arc<dyn Array>,
+        ) -> Result<ColumnarValue> {
+            let map_array = as_map_array(array.as_ref())?;
+
+            let comparator = make_comparator(
+                map_array.keys().as_ref(),
+                key_array.as_ref(),
+                SortOptions::default(),
+            )?;
+
+            let original_data = map_array.entries().column(1).to_data();
+            let capacity = Capacities::Array(original_data.len());
+            let mut mutable =
+                MutableArrayData::with_capacities(vec![&original_data], true, capacity);
+
+            for entry in 0..map_array.len() {
+                let start = map_array.value_offsets()[entry] as usize;
+                let end = map_array.value_offsets()[entry + 1] as usize;
+
+                let mut found_match = false;
+                for i in start..end {
+                    if comparator(i, 0).is_eq() {
+                        mutable.extend(0, i, i + 1);
+                        found_match = true;
+                        break;
+                    }
+                }
+
+                if !found_match {
+                    mutable.extend_nulls(1);
+                }
+            }
+
+            let data = mutable.freeze();
+            let data = make_array(data);
+            Ok(ColumnarValue::Array(data))
+        }
+
         match (array.data_type(), name) {
             (DataType::Map(_, _), ScalarValue::List(arr)) => {
                 let key_array: Arc<dyn Array> = arr;
@@ -256,7 +296,7 @@ impl ScalarUDFImpl for GetFieldFunc {
             (DataType::Map(_, _), other) => {
                 let data_type = other.data_type();
                 if data_type.is_nested() {
-                    exec_err!("unsupported type {} for map access", data_type)
+                    process_map_with_nested_key(array, other.to_array()?)
                 } else {
                     process_map_array(array, other.to_array()?)
                 }
diff --git a/datafusion/functions/src/core/nullif.rs b/datafusion/functions/src/core/nullif.rs
index be2dd0d2ca16..69d86360cb3c 100644
--- a/datafusion/functions/src/core/nullif.rs
+++ b/datafusion/functions/src/core/nullif.rs
@@ -113,7 +113,6 @@ impl ScalarUDFImpl for NullIfFunc {
 /// Implements NULLIF(expr1, expr2)
 /// Args: 0 - left expr is any array
 ///       1 - if the left is equal to this expr2, then the result is NULL, otherwise left value is passed.
-///
 fn nullif_func(args: &[ColumnarValue]) -> Result<ColumnarValue> {
     let [lhs, rhs] = take_function_args("nullif", args)?;
 
diff --git a/datafusion/functions/src/crypto/basic.rs b/datafusion/functions/src/crypto/basic.rs
index 5bf83943a92d..9b3c6fdbdba9 100644
--- a/datafusion/functions/src/crypto/basic.rs
+++ b/datafusion/functions/src/crypto/basic.rs
@@ -89,6 +89,7 @@ macro_rules! digest_to_scalar {
         ScalarValue::Binary($INPUT.as_ref().map(|v| {
             let mut digest = $METHOD::default();
             digest.update(v);
+            #[allow(deprecated)]
             digest.finalize().as_slice().to_vec()
         }))
     }};
diff --git a/datafusion/functions/src/datetime/common.rs b/datafusion/functions/src/datetime/common.rs
index 65f9c9323925..90b92a7f88f9 100644
--- a/datafusion/functions/src/datetime/common.rs
+++ b/datafusion/functions/src/datetime/common.rs
@@ -140,7 +140,6 @@ pub(crate) fn string_to_datetime_formatted<T: TimeZone>(
 /// defined by `chrono`.
 ///
 /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
-///
 #[inline]
 pub(crate) fn string_to_timestamp_nanos_formatted(
     s: &str,
@@ -169,7 +168,6 @@ pub(crate) fn string_to_timestamp_nanos_formatted(
 /// defined by `chrono`.
 ///
 /// [`chrono::format::strftime`]: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
-///
 #[inline]
 pub(crate) fn string_to_timestamp_millis_formatted(s: &str, format: &str) -> Result<i64> {
     Ok(string_to_datetime_formatted(&Utc, s, format)?
diff --git a/datafusion/functions/src/datetime/current_date.rs b/datafusion/functions/src/datetime/current_date.rs
index 18b99bca8638..da690b4e6be1 100644
--- a/datafusion/functions/src/datetime/current_date.rs
+++ b/datafusion/functions/src/datetime/current_date.rs
@@ -108,7 +108,14 @@ impl ScalarUDFImpl for CurrentDateFunc {
         let days = info
             .execution_props()
             .config_options()
-            .and_then(|config| config.execution.time_zone.parse::<Tz>().ok())
+            .and_then(|config| {
+                config
+                    .execution
+                    .time_zone
+                    .as_ref()
+                    .map(|tz| tz.parse::<Tz>().ok())
+            })
+            .flatten()
             .map_or_else(
                 || datetime_to_days(&now_ts),
                 |tz| {
diff --git a/datafusion/functions/src/datetime/current_time.rs b/datafusion/functions/src/datetime/current_time.rs
index 4f5b199cce41..9f3456b8777f 100644
--- a/datafusion/functions/src/datetime/current_time.rs
+++ b/datafusion/functions/src/datetime/current_time.rs
@@ -104,7 +104,14 @@ impl ScalarUDFImpl for CurrentTimeFunc {
         let nano = info
             .execution_props()
             .config_options()
-            .and_then(|config| config.execution.time_zone.parse::<Tz>().ok())
+            .and_then(|config| {
+                config
+                    .execution
+                    .time_zone
+                    .as_ref()
+                    .map(|tz| tz.parse::<Tz>().ok())
+            })
+            .flatten()
             .map_or_else(
                 || datetime_to_time_nanos(&now_ts),
                 |tz| {
@@ -167,7 +174,11 @@ mod tests {
 
     fn set_session_timezone_env(tz: &str, start_time: DateTime<Utc>) -> MockSimplifyInfo {
         let mut config = datafusion_common::config::ConfigOptions::default();
-        config.execution.time_zone = tz.to_string();
+        config.execution.time_zone = if tz.is_empty() {
+            None
+        } else {
+            Some(tz.to_string())
+        };
         let mut execution_props =
             ExecutionProps::new().with_query_execution_start_time(start_time);
         execution_props.config_options = Some(Arc::new(config));
diff --git a/datafusion/functions/src/datetime/date_bin.rs b/datafusion/functions/src/datetime/date_bin.rs
index 74e286de0f58..92af123dbafa 100644
--- a/datafusion/functions/src/datetime/date_bin.rs
+++ b/datafusion/functions/src/datetime/date_bin.rs
@@ -687,7 +687,7 @@ mod tests {
         let res = invoke_date_bin_with_args(args, 1, return_field);
         assert_eq!(
             res.err().unwrap().strip_backtrace(),
-            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(Microsecond, None)"
+            "Execution error: DATE_BIN expects origin argument to be a TIMESTAMP with nanosecond precision but got Timestamp(µs)"
         );
 
         args = vec![
@@ -743,7 +743,7 @@ mod tests {
 
     #[test]
     fn test_date_bin_timezones() {
-        let cases = vec![
+        let cases = [
             (
                 vec![
                     "2020-09-08T00:00:00Z",
@@ -883,7 +883,7 @@ mod tests {
 
     #[test]
     fn test_date_bin_single() {
-        let cases = vec![
+        let cases = [
             (
                 (
                     TimeDelta::try_minutes(15),
diff --git a/datafusion/functions/src/datetime/date_part.rs b/datafusion/functions/src/datetime/date_part.rs
index aa23a5028dd8..cbe4e3b187aa 100644
--- a/datafusion/functions/src/datetime/date_part.rs
+++ b/datafusion/functions/src/datetime/date_part.rs
@@ -19,15 +19,19 @@ use std::any::Any;
 use std::str::FromStr;
 use std::sync::Arc;
 
-use arrow::array::{Array, ArrayRef, Float64Array, Int32Array};
+use arrow::array::timezone::Tz;
+use arrow::array::{Array, ArrayRef, Float64Array, Int32Array, PrimitiveBuilder};
 use arrow::compute::kernels::cast_utils::IntervalUnit;
 use arrow::compute::{binary, date_part, DatePart};
 use arrow::datatypes::DataType::{
     Date32, Date64, Duration, Interval, Time32, Time64, Timestamp,
 };
 use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
-use arrow::datatypes::{DataType, Field, FieldRef, TimeUnit};
+use arrow::datatypes::{ArrowTimestampType, DataType, Field, FieldRef, TimeUnit, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType};
+use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
+use datafusion_common::cast::as_primitive_array;
 use datafusion_common::types::{logical_date, NativeType};
+use std::ops::Add;
 
 use datafusion_common::{
     cast::{
@@ -36,7 +40,7 @@ use datafusion_common::{
         as_timestamp_microsecond_array, as_timestamp_millisecond_array,
         as_timestamp_nanosecond_array, as_timestamp_second_array,
     },
-    exec_err, internal_err, not_impl_err,
+    exec_err, internal_datafusion_err, internal_err, not_impl_err,
     types::logical_string,
     utils::take_function_args,
     Result, ScalarValue,
@@ -56,7 +60,7 @@ use datafusion_macros::user_doc;
     argument(
         name = "part",
         description = r#"Part of the date to return. The following date parts are supported:
-        
+
     - year
     - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
     - month
@@ -173,6 +177,7 @@ impl ScalarUDFImpl for DatePartFunc {
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
+        let config = &args.config_options;
         let args = args.args;
         let [part, array] = take_function_args(self.name(), args)?;
 
@@ -193,11 +198,63 @@ impl ScalarUDFImpl for DatePartFunc {
             ColumnarValue::Scalar(scalar) => scalar.to_array()?,
         };
 
+        let (is_timezone_aware, tz_str_opt) = match array.data_type() {
+            Timestamp(_, Some(tz_str)) => (true, Some(tz_str.clone())),
+            _ => (false, None),
+        };
+
+        // Adjust timestamps for extraction
+        let array = if is_timezone_aware {
+            // For timezone-aware timestamps, extract in their own timezone
+            let tz_str = tz_str_opt.as_ref().unwrap();
+            let tz = match tz_str.parse::<Tz>() {
+                Ok(tz) => tz,
+                Err(_) => return exec_err!("Invalid timezone"),
+            };
+            match array.data_type() {
+                Timestamp(time_unit, _) => match time_unit {
+                    Nanosecond => {
+                        adjust_timestamp_array::<TimestampNanosecondType>(&array, tz)?
+                    }
+                    Microsecond => {
+                        adjust_timestamp_array::<TimestampMicrosecondType>(&array, tz)?
+                    }
+                    Millisecond => {
+                        adjust_timestamp_array::<TimestampMillisecondType>(&array, tz)?
+                    }
+                    Second => adjust_timestamp_array::<TimestampSecondType>(&array, tz)?,
+                    _ => array,
+                },
+                _ => array,
+            }
+        } else if let Timestamp(time_unit, None) = array.data_type() {
+            // For naive timestamps, interpret in session timezone
+            let tz = match config.execution.time_zone.parse::<Tz>() {
+                Ok(tz) => tz,
+                Err(_) => return exec_err!("Invalid timezone"),
+            };
+            match time_unit {
+                Nanosecond => {
+                    adjust_timestamp_array::<TimestampNanosecondType>(&array, tz)?
+                }
+                Microsecond => {
+                    adjust_timestamp_array::<TimestampMicrosecondType>(&array, tz)?
+                }
+                Millisecond => {
+                    adjust_timestamp_array::<TimestampMillisecondType>(&array, tz)?
+                }
+                Second => adjust_timestamp_array::<TimestampSecondType>(&array, tz)?,
+                _ => array,
+            }
+        } else {
+            array
+        };
+
         let part_trim = part_normalization(&part);
 
         // using IntervalUnit here means we hand off all the work of supporting plurals (like "seconds")
         // and synonyms ( like "ms,msec,msecond,millisecond") to Arrow
-        let arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) {
+        let mut arr = if let Ok(interval_unit) = IntervalUnit::from_str(part_trim) {
             match interval_unit {
                 IntervalUnit::Year => date_part(array.as_ref(), DatePart::Year)?,
                 IntervalUnit::Month => date_part(array.as_ref(), DatePart::Month)?,
@@ -209,7 +266,7 @@ impl ScalarUDFImpl for DatePartFunc {
                 IntervalUnit::Millisecond => seconds_as_i32(array.as_ref(), Millisecond)?,
                 IntervalUnit::Microsecond => seconds_as_i32(array.as_ref(), Microsecond)?,
                 IntervalUnit::Nanosecond => seconds_as_i32(array.as_ref(), Nanosecond)?,
-                // century and decade are not supported by `DatePart`, although they are supported in postgres
+                // century and decade are not supported by `DatePart`
                 _ => return exec_err!("Date part '{part}' not supported"),
             }
         } else {
@@ -224,6 +281,8 @@ impl ScalarUDFImpl for DatePartFunc {
             }
         };
 
+
+
         Ok(if is_scalar {
             ColumnarValue::Scalar(ScalarValue::try_from_array(arr.as_ref(), 0)?)
         } else {
@@ -240,6 +299,72 @@ impl ScalarUDFImpl for DatePartFunc {
     }
 }
 
+fn adjust_to_local_time<T: ArrowTimestampType>(ts: i64, tz: Tz) -> Result<i64> {
+    fn convert_timestamp<F>(ts: i64, converter: F) -> Result<DateTime<Utc>>
+    where
+        F: Fn(i64) -> MappedLocalTime<DateTime<Utc>>,
+    {
+        match converter(ts) {
+            MappedLocalTime::Ambiguous(earliest, latest) => exec_err!(
+                "Ambiguous timestamp. Do you mean {:?} or {:?}",
+                earliest,
+                latest
+            ),
+            MappedLocalTime::None => exec_err!(
+                "The local time does not exist because there is a gap in the local time."
+            ),
+            MappedLocalTime::Single(date_time) => Ok(date_time),
+        }
+    }
+
+    let date_time = match T::UNIT {
+        Nanosecond => Utc.timestamp_nanos(ts),
+        Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?,
+        Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?,
+        Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?,
+    };
+
+    let offset_seconds: i64 = tz
+        .offset_from_utc_datetime(&date_time.naive_utc())
+        .fix()
+        .local_minus_utc() as i64;
+
+    let adjusted_date_time = date_time.add(
+        TimeDelta::try_seconds(offset_seconds)
+            .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?,
+    );
+
+    // convert back to i64
+    match T::UNIT {
+        Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(|| {
+            internal_datafusion_err!(
+                "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807"
+            )
+        }),
+        Microsecond => Ok(adjusted_date_time.timestamp_micros()),
+        Millisecond => Ok(adjusted_date_time.timestamp_millis()),
+        Second => Ok(adjusted_date_time.timestamp()),
+    }
+}
+
+fn adjust_timestamp_array<T: ArrowTimestampType>(
+    array: &ArrayRef,
+    tz: Tz,
+) -> Result<ArrayRef> {
+    let mut builder = PrimitiveBuilder::<T>::new();
+    let primitive_array = as_primitive_array::<T>(array)?;
+    for ts_opt in primitive_array.iter() {
+        match ts_opt {
+            None => builder.append_null(),
+            Some(ts) => {
+                let adjusted_ts = adjust_to_local_time::<T>(ts, tz)?;
+                builder.append_value(adjusted_ts);
+            }
+        }
+    }
+    Ok(Arc::new(builder.finish()))
+}
+
 fn is_epoch(part: &str) -> bool {
     let part = part_normalization(part);
     matches!(part.to_lowercase().as_str(), "epoch")
diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs
index 405aabfde991..913e6217af82 100644
--- a/datafusion/functions/src/datetime/date_trunc.rs
+++ b/datafusion/functions/src/datetime/date_trunc.rs
@@ -16,6 +16,7 @@
 // under the License.
 
 use std::any::Any;
+use std::num::NonZeroI64;
 use std::ops::{Add, Sub};
 use std::str::FromStr;
 use std::sync::Arc;
@@ -28,7 +29,7 @@ use arrow::array::types::{
     ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
     TimestampNanosecondType, TimestampSecondType,
 };
-use arrow::array::{Array, ArrayRef, Int64Array, PrimitiveArray};
+use arrow::array::{Array, ArrayRef, PrimitiveArray};
 use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
 use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
 use datafusion_common::cast::as_primitive_array;
@@ -46,6 +47,77 @@ use chrono::{
     DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
 };
 
+/// Represents the granularity for date truncation operations
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum DateTruncGranularity {
+    Microsecond,
+    Millisecond,
+    Second,
+    Minute,
+    Hour,
+    Day,
+    Week,
+    Month,
+    Quarter,
+    Year,
+}
+
+impl DateTruncGranularity {
+    /// List of all supported granularity values
+    /// Cannot use HashMap here as it would require lazy_static or once_cell,
+    /// Rust does not support const HashMap yet.
+    const SUPPORTED_GRANULARITIES: &[&str] = &[
+        "microsecond",
+        "millisecond",
+        "second",
+        "minute",
+        "hour",
+        "day",
+        "week",
+        "month",
+        "quarter",
+        "year",
+    ];
+
+    /// Parse a granularity string into a DateTruncGranularity enum
+    fn from_str(s: &str) -> Result<Self> {
+        // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
+        match s.to_lowercase().as_str() {
+            "microsecond" => Ok(Self::Microsecond),
+            "millisecond" => Ok(Self::Millisecond),
+            "second" => Ok(Self::Second),
+            "minute" => Ok(Self::Minute),
+            "hour" => Ok(Self::Hour),
+            "day" => Ok(Self::Day),
+            "week" => Ok(Self::Week),
+            "month" => Ok(Self::Month),
+            "quarter" => Ok(Self::Quarter),
+            "year" => Ok(Self::Year),
+            _ => {
+                let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
+                exec_err!(
+                    "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
+                )
+            }
+        }
+    }
+
+    /// Returns true if this granularity can be handled with simple arithmetic
+    /// (fine granularity: second, minute, millisecond, microsecond)
+    fn is_fine_granularity(&self) -> bool {
+        matches!(
+            self,
+            Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
+        )
+    }
+
+    /// Returns true if this granularity can be handled with simple arithmetic in UTC
+    /// (hour and day in addition to fine granularities)
+    fn is_fine_granularity_utc(&self) -> bool {
+        self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
+    }
+}
+
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
     description = "Truncates a timestamp value to a specified precision.",
@@ -171,7 +243,7 @@ impl ScalarUDFImpl for DateTruncFunc {
         let args = args.args;
         let (granularity, array) = (&args[0], &args[1]);
 
-        let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
+        let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
             granularity
         {
             v.to_lowercase()
@@ -182,54 +254,46 @@ impl ScalarUDFImpl for DateTruncFunc {
             return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
         };
 
+        let granularity = DateTruncGranularity::from_str(&granularity_str)?;
+
         fn process_array<T: ArrowTimestampType>(
             array: &dyn Array,
-            granularity: String,
+            granularity: DateTruncGranularity,
             tz_opt: &Option<Arc<str>>,
         ) -> Result<ColumnarValue> {
             let parsed_tz = parse_tz(tz_opt)?;
             let array = as_primitive_array::<T>(array)?;
 
-            // fast path for fine granularities
-            if matches!(
-                granularity.as_str(),
-                // For modern timezones, it's correct to truncate "minute" in this way.
-                // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
-                // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
-                "second" | "minute" | "millisecond" | "microsecond"
-            ) ||
+            // fast path for fine granularity
+            // For modern timezones, it's correct to truncate "minute" in this way.
+            // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
+            // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
             // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
-            (parsed_tz.is_none() && matches!(granularity.as_str(), "hour" | "day"))
+            if granularity.is_fine_granularity()
+                || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
             {
                 let result = general_date_trunc_array_fine_granularity(
                     T::UNIT,
                     array,
-                    granularity.as_str(),
+                    granularity,
                 )?;
                 return Ok(ColumnarValue::Array(result));
             }
 
             let array: PrimitiveArray<T> = array
-                .try_unary(|x| {
-                    general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
-                })?
+                .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
                 .with_timezone_opt(tz_opt.clone());
             Ok(ColumnarValue::Array(Arc::new(array)))
         }
 
         fn process_scalar<T: ArrowTimestampType>(
             v: &Option<i64>,
-            granularity: String,
+            granularity: DateTruncGranularity,
             tz_opt: &Option<Arc<str>>,
         ) -> Result<ColumnarValue> {
             let parsed_tz = parse_tz(tz_opt)?;
             let value = if let Some(v) = v {
-                Some(general_date_trunc(
-                    T::UNIT,
-                    *v,
-                    parsed_tz,
-                    granularity.as_str(),
-                )?)
+                Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
             } else {
                 None
             };
@@ -307,27 +371,30 @@ impl ScalarUDFImpl for DateTruncFunc {
     }
 }
 
-fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
+fn _date_trunc_coarse<T>(
+    granularity: DateTruncGranularity,
+    value: Option<T>,
+) -> Result<Option<T>>
 where
     T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
 {
     let value = match granularity {
-        "millisecond" => value,
-        "microsecond" => value,
-        "second" => value.and_then(|d| d.with_nanosecond(0)),
-        "minute" => value
+        DateTruncGranularity::Millisecond => value,
+        DateTruncGranularity::Microsecond => value,
+        DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
+        DateTruncGranularity::Minute => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0)),
-        "hour" => value
+        DateTruncGranularity::Hour => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0))
             .and_then(|d| d.with_minute(0)),
-        "day" => value
+        DateTruncGranularity::Day => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0))
             .and_then(|d| d.with_minute(0))
             .and_then(|d| d.with_hour(0)),
-        "week" => value
+        DateTruncGranularity::Week => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0))
             .and_then(|d| d.with_minute(0))
@@ -335,29 +402,26 @@ where
             .map(|d| {
                 d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
             }),
-        "month" => value
+        DateTruncGranularity::Month => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0))
             .and_then(|d| d.with_minute(0))
             .and_then(|d| d.with_hour(0))
             .and_then(|d| d.with_day0(0)),
-        "quarter" => value
+        DateTruncGranularity::Quarter => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0))
             .and_then(|d| d.with_minute(0))
             .and_then(|d| d.with_hour(0))
             .and_then(|d| d.with_day0(0))
             .and_then(|d| d.with_month(quarter_month(&d))),
-        "year" => value
+        DateTruncGranularity::Year => value
             .and_then(|d| d.with_nanosecond(0))
             .and_then(|d| d.with_second(0))
             .and_then(|d| d.with_minute(0))
             .and_then(|d| d.with_hour(0))
             .and_then(|d| d.with_day0(0))
             .and_then(|d| d.with_month0(0)),
-        unsupported => {
-            return exec_err!("Unsupported date_trunc granularity: {unsupported}");
-        }
     };
     Ok(value)
 }
@@ -370,7 +434,7 @@ where
 }
 
 fn _date_trunc_coarse_with_tz(
-    granularity: &str,
+    granularity: DateTruncGranularity,
     value: Option<DateTime<Tz>>,
 ) -> Result<Option<i64>> {
     if let Some(value) = value {
@@ -412,7 +476,7 @@ fn _date_trunc_coarse_with_tz(
 }
 
 fn _date_trunc_coarse_without_tz(
-    granularity: &str,
+    granularity: DateTruncGranularity,
     value: Option<NaiveDateTime>,
 ) -> Result<Option<i64>> {
     let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
@@ -423,7 +487,11 @@ fn _date_trunc_coarse_without_tz(
 /// epoch, for granularities greater than 1 second, in taking into
 /// account that some granularities are not uniform durations of time
 /// (e.g. months are not always the same lengths, leap seconds, etc)
-fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i64> {
+fn date_trunc_coarse(
+    granularity: DateTruncGranularity,
+    value: i64,
+    tz: Option<Tz>,
+) -> Result<i64> {
     let value = match tz {
         Some(tz) => {
             // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
@@ -453,40 +521,43 @@ fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i6
 fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
     tu: TimeUnit,
     array: &PrimitiveArray<T>,
-    granularity: &str,
+    granularity: DateTruncGranularity,
 ) -> Result<ArrayRef> {
     let unit = match (tu, granularity) {
-        (Second, "minute") => Some(Int64Array::new_scalar(60)),
-        (Second, "hour") => Some(Int64Array::new_scalar(3600)),
-        (Second, "day") => Some(Int64Array::new_scalar(86400)),
-
-        (Millisecond, "second") => Some(Int64Array::new_scalar(1_000)),
-        (Millisecond, "minute") => Some(Int64Array::new_scalar(60_000)),
-        (Millisecond, "hour") => Some(Int64Array::new_scalar(3_600_000)),
-        (Millisecond, "day") => Some(Int64Array::new_scalar(86_400_000)),
-
-        (Microsecond, "millisecond") => Some(Int64Array::new_scalar(1_000)),
-        (Microsecond, "second") => Some(Int64Array::new_scalar(1_000_000)),
-        (Microsecond, "minute") => Some(Int64Array::new_scalar(60_000_000)),
-        (Microsecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000)),
-        (Microsecond, "day") => Some(Int64Array::new_scalar(86_400_000_000)),
-
-        (Nanosecond, "microsecond") => Some(Int64Array::new_scalar(1_000)),
-        (Nanosecond, "millisecond") => Some(Int64Array::new_scalar(1_000_000)),
-        (Nanosecond, "second") => Some(Int64Array::new_scalar(1_000_000_000)),
-        (Nanosecond, "minute") => Some(Int64Array::new_scalar(60_000_000_000)),
-        (Nanosecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000_000)),
-        (Nanosecond, "day") => Some(Int64Array::new_scalar(86_400_000_000_000)),
+        (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
+        (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
+        (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
+
+        (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
+        (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
+        (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
+        (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
+
+        (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
+        (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
+        (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
+        (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
+        (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
+
+        (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
+        (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
+        (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
+        (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
+        (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
+        (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
         _ => None,
     };
 
     if let Some(unit) = unit {
-        let original_type = array.data_type();
-        let array = arrow::compute::cast(array, &DataType::Int64)?;
-        let array = arrow::compute::kernels::numeric::div(&array, &unit)?;
-        let array = arrow::compute::kernels::numeric::mul(&array, &unit)?;
-        let array = arrow::compute::cast(&array, original_type)?;
-        Ok(array)
+        let unit = unit.get();
+        let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
+            array
+                .values()
+                .iter()
+                .map(|v| *v - i64::rem_euclid(*v, unit)),
+            array.nulls().cloned(),
+        );
+        Ok(Arc::new(array))
     } else {
         // truncate to the same or smaller unit
         Ok(Arc::new(array.clone()))
@@ -498,7 +569,7 @@ fn general_date_trunc(
     tu: TimeUnit,
     value: i64,
     tz: Option<Tz>,
-    granularity: &str,
+    granularity: DateTruncGranularity,
 ) -> Result<i64, DataFusionError> {
     let scale = match tu {
         Second => 1_000_000_000,
@@ -512,25 +583,29 @@ fn general_date_trunc(
 
     let result = match tu {
         Second => match granularity {
-            "minute" => nano / 1_000_000_000 / 60 * 60,
+            DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
             _ => nano / 1_000_000_000,
         },
         Millisecond => match granularity {
-            "minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
-            "second" => nano / 1_000_000 / 1_000 * 1_000,
+            DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
+            DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
             _ => nano / 1_000_000,
         },
         Microsecond => match granularity {
-            "minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000,
-            "second" => nano / 1_000 / 1_000_000 * 1_000_000,
-            "millisecond" => nano / 1_000 / 1_000 * 1_000,
+            DateTruncGranularity::Minute => {
+                nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
+            }
+            DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
+            DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
             _ => nano / 1_000,
         },
         _ => match granularity {
-            "minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60,
-            "second" => nano / 1_000_000_000 * 1_000_000_000,
-            "millisecond" => nano / 1_000_000 * 1_000_000,
-            "microsecond" => nano / 1_000 * 1_000,
+            DateTruncGranularity::Minute => {
+                nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
+            }
+            DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
+            DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
+            DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
             _ => nano,
         },
     };
@@ -550,7 +625,9 @@ fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
 mod tests {
     use std::sync::Arc;
 
-    use crate::datetime::date_trunc::{date_trunc_coarse, DateTruncFunc};
+    use crate::datetime::date_trunc::{
+        date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
+    };
 
     use arrow::array::cast::as_primitive_array;
     use arrow::array::types::TimestampNanosecondType;
@@ -651,14 +728,15 @@ mod tests {
         cases.iter().for_each(|(original, granularity, expected)| {
             let left = string_to_timestamp_nanos(original).unwrap();
             let right = string_to_timestamp_nanos(expected).unwrap();
-            let result = date_trunc_coarse(granularity, left, None).unwrap();
+            let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
+            let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
             assert_eq!(result, right, "{original} = {expected}");
         });
     }
 
     #[test]
     fn test_date_trunc_timezones() {
-        let cases = vec![
+        let cases = [
             (
                 vec![
                     "2020-09-08T00:00:00Z",
@@ -831,7 +909,7 @@ mod tests {
 
     #[test]
     fn test_date_trunc_hour_timezones() {
-        let cases = vec![
+        let cases = [
             (
                 vec![
                     "2020-09-08T00:30:00Z",
diff --git a/datafusion/functions/src/datetime/mod.rs b/datafusion/functions/src/datetime/mod.rs
index d80f14facf82..60d399e90565 100644
--- a/datafusion/functions/src/datetime/mod.rs
+++ b/datafusion/functions/src/datetime/mod.rs
@@ -19,6 +19,13 @@
 
 use std::sync::Arc;
 
+use arrow::array::timezone::Tz;
+use arrow::datatypes::ArrowTimestampType;
+use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
+use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
+use datafusion_common::{exec_err, internal_datafusion_err, Result};
+use std::ops::Add;
+
 use datafusion_expr::ScalarUDF;
 
 pub mod common;
@@ -37,6 +44,55 @@ pub mod to_local_time;
 pub mod to_timestamp;
 pub mod to_unixtime;
 
+// Adjusts a timestamp to local time by applying the timezone offset.
+pub fn adjust_to_local_time<T: ArrowTimestampType>(ts: i64, tz: Tz) -> Result<i64> {
+    fn convert_timestamp<F>(ts: i64, converter: F) -> Result<DateTime<Utc>>
+    where
+        F: Fn(i64) -> MappedLocalTime<DateTime<Utc>>,
+    {
+        match converter(ts) {
+            MappedLocalTime::Ambiguous(earliest, latest) => exec_err!(
+                "Ambiguous timestamp. Do you mean {:?} or {:?}",
+                earliest,
+                latest
+            ),
+            MappedLocalTime::None => exec_err!(
+                "The local time does not exist because there is a gap in the local time."
+            ),
+            MappedLocalTime::Single(date_time) => Ok(date_time),
+        }
+    }
+
+    let date_time = match T::UNIT {
+        Nanosecond => Utc.timestamp_nanos(ts),
+        Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?,
+        Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?,
+        Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?,
+    };
+
+    let offset_seconds: i64 = tz
+        .offset_from_utc_datetime(&date_time.naive_utc())
+        .fix()
+        .local_minus_utc() as i64;
+
+    let adjusted_date_time = date_time.add(
+        TimeDelta::try_seconds(offset_seconds)
+            .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?,
+    );
+
+    // convert back to i64
+    match T::UNIT {
+        Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(|| {
+            internal_datafusion_err!(
+                "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807"
+            )
+        }),
+        Microsecond => Ok(adjusted_date_time.timestamp_micros()),
+        Millisecond => Ok(adjusted_date_time.timestamp_millis()),
+        Second => Ok(adjusted_date_time.timestamp()),
+    }
+}
+
 // create UDFs
 make_udf_function!(current_date::CurrentDateFunc, current_date);
 make_udf_function!(current_time::CurrentTimeFunc, current_time);
diff --git a/datafusion/functions/src/datetime/now.rs b/datafusion/functions/src/datetime/now.rs
index 96a35c241ff0..4723548a4558 100644
--- a/datafusion/functions/src/datetime/now.rs
+++ b/datafusion/functions/src/datetime/now.rs
@@ -33,7 +33,7 @@ use datafusion_macros::user_doc;
 #[user_doc(
     doc_section(label = "Time and Date Functions"),
     description = r#"
-Returns the current UTC timestamp.
+Returns the current timestamp in the system configured timezone (None by default).
 
 The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
 "#,
@@ -54,19 +54,24 @@ impl Default for NowFunc {
 
 impl NowFunc {
     #[deprecated(since = "50.2.0", note = "use `new_with_config` instead")]
+    /// Deprecated constructor retained for backwards compatibility.
+    ///
+    /// Prefer [`NowFunc::new_with_config`] which allows specifying the
+    /// timezone via [`ConfigOptions`]. This helper now mirrors the
+    /// canonical default offset (None) provided by `ConfigOptions::default()`.
     pub fn new() -> Self {
-        Self {
-            signature: Signature::nullary(Volatility::Stable),
-            aliases: vec!["current_timestamp".to_string()],
-            timezone: Some(Arc::from("+00")),
-        }
+        Self::new_with_config(&ConfigOptions::default())
     }
 
     pub fn new_with_config(config: &ConfigOptions) -> Self {
         Self {
             signature: Signature::nullary(Volatility::Stable),
             aliases: vec!["current_timestamp".to_string()],
-            timezone: Some(Arc::from(config.execution.time_zone.as_str())),
+            timezone: config
+                .execution
+                .time_zone
+                .as_ref()
+                .map(|tz| Arc::from(tz.as_str())),
         }
     }
 }
@@ -138,3 +143,44 @@ impl ScalarUDFImpl for NowFunc {
         self.doc()
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[allow(deprecated)]
+    #[test]
+    fn now_func_default_matches_config() {
+        let default_config = ConfigOptions::default();
+
+        let legacy_now = NowFunc::new();
+        let configured_now = NowFunc::new_with_config(&default_config);
+
+        let empty_fields: [FieldRef; 0] = [];
+        let empty_scalars: [Option<&ScalarValue>; 0] = [];
+
+        let legacy_field = legacy_now
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &empty_fields,
+                scalar_arguments: &empty_scalars,
+            })
+            .expect("legacy now() return field");
+
+        let configured_field = configured_now
+            .return_field_from_args(ReturnFieldArgs {
+                arg_fields: &empty_fields,
+                scalar_arguments: &empty_scalars,
+            })
+            .expect("configured now() return field");
+
+        assert_eq!(legacy_field.as_ref(), configured_field.as_ref());
+
+        let legacy_scalar =
+            ScalarValue::TimestampNanosecond(None, legacy_now.timezone.clone());
+        let configured_scalar =
+            ScalarValue::TimestampNanosecond(None, configured_now.timezone.clone());
+
+        assert_eq!(legacy_scalar, configured_scalar);
+        assert_eq!(None, legacy_now.timezone.as_deref());
+    }
+}
diff --git a/datafusion/functions/src/datetime/to_local_time.rs b/datafusion/functions/src/datetime/to_local_time.rs
index a2a54398a33b..82e862c2d1bc 100644
--- a/datafusion/functions/src/datetime/to_local_time.rs
+++ b/datafusion/functions/src/datetime/to_local_time.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use std::any::Any;
-use std::ops::Add;
 use std::sync::Arc;
 
 use arrow::array::timezone::Tz;
@@ -27,12 +26,11 @@ use arrow::datatypes::{
     ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
     TimestampNanosecondType, TimestampSecondType,
 };
-use chrono::{DateTime, MappedLocalTime, Offset, TimeDelta, TimeZone, Utc};
 
+use crate::datetime::adjust_to_local_time;
 use datafusion_common::cast::as_primitive_array;
 use datafusion_common::{
-    exec_err, internal_datafusion_err, plan_err, utils::take_function_args, Result,
-    ScalarValue,
+    exec_err, plan_err, utils::take_function_args, Result, ScalarValue,
 };
 use datafusion_expr::{
     ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
@@ -69,11 +67,11 @@ use datafusion_macros::user_doc;
 FROM (
   SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
 );
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-| time                      | type                                           | to_local_time       | to_local_time_type          |
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
++---------------------------+----------------------------------+---------------------+--------------------+
+| time                      | type                             | to_local_time       | to_local_time_type |
++---------------------------+----------------------------------+---------------------+--------------------+
+| 2024-04-01T00:00:20+02:00 | Timestamp(ns, "Europe/Brussels") | 2024-04-01T00:00:20 | Timestamp(ns)      |
++---------------------------+----------------------------------+---------------------+--------------------+
 
 # combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
 # than UTC boundaries
@@ -293,56 +291,6 @@ impl ToLocalTimeFunc {
 /// ```
 ///
 /// See `test_adjust_to_local_time()` for example
-fn adjust_to_local_time<T: ArrowTimestampType>(ts: i64, tz: Tz) -> Result<i64> {
-    fn convert_timestamp<F>(ts: i64, converter: F) -> Result<DateTime<Utc>>
-    where
-        F: Fn(i64) -> MappedLocalTime<DateTime<Utc>>,
-    {
-        match converter(ts) {
-            MappedLocalTime::Ambiguous(earliest, latest) => exec_err!(
-                "Ambiguous timestamp. Do you mean {:?} or {:?}",
-                earliest,
-                latest
-            ),
-            MappedLocalTime::None => exec_err!(
-                "The local time does not exist because there is a gap in the local time."
-            ),
-            MappedLocalTime::Single(date_time) => Ok(date_time),
-        }
-    }
-
-    let date_time = match T::UNIT {
-        Nanosecond => Utc.timestamp_nanos(ts),
-        Microsecond => convert_timestamp(ts, |ts| Utc.timestamp_micros(ts))?,
-        Millisecond => convert_timestamp(ts, |ts| Utc.timestamp_millis_opt(ts))?,
-        Second => convert_timestamp(ts, |ts| Utc.timestamp_opt(ts, 0))?,
-    };
-
-    let offset_seconds: i64 = tz
-        .offset_from_utc_datetime(&date_time.naive_utc())
-        .fix()
-        .local_minus_utc() as i64;
-
-    let adjusted_date_time = date_time.add(
-        // This should not fail under normal circumstances as the
-        // maximum possible offset is 26 hours (93,600 seconds)
-        TimeDelta::try_seconds(offset_seconds)
-            .ok_or_else(|| internal_datafusion_err!("Offset seconds should be less than i64::MAX / 1_000 or greater than -i64::MAX / 1_000"))?,
-    );
-
-    // convert the naive datetime back to i64
-    match T::UNIT {
-        Nanosecond => adjusted_date_time.timestamp_nanos_opt().ok_or_else(||
-            internal_datafusion_err!(
-                "Failed to convert DateTime to timestamp in nanosecond. This error may occur if the date is out of range. The supported date ranges are between 1677-09-21T00:12:43.145224192 and 2262-04-11T23:47:16.854775807"
-            )
-        ),
-        Microsecond => Ok(adjusted_date_time.timestamp_micros()),
-        Millisecond => Ok(adjusted_date_time.timestamp_millis()),
-        Second => Ok(adjusted_date_time.timestamp()),
-    }
-}
-
 impl ScalarUDFImpl for ToLocalTimeFunc {
     fn as_any(&self) -> &dyn Any {
         self
diff --git a/datafusion/functions/src/datetime/to_timestamp.rs b/datafusion/functions/src/datetime/to_timestamp.rs
index dcd52aa07be3..0a0700097770 100644
--- a/datafusion/functions/src/datetime/to_timestamp.rs
+++ b/datafusion/functions/src/datetime/to_timestamp.rs
@@ -38,7 +38,7 @@ use datafusion_macros::user_doc;
     description = r#"
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
 
-Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
 "#,
     syntax_example = "to_timestamp(expression[, ..., format_n])",
     sql_example = r#"```sql
diff --git a/datafusion/functions/src/math/abs.rs b/datafusion/functions/src/math/abs.rs
index 040f13c01449..b3dc2b2eb6f8 100644
--- a/datafusion/functions/src/math/abs.rs
+++ b/datafusion/functions/src/math/abs.rs
@@ -22,7 +22,8 @@ use std::sync::Arc;
 
 use arrow::array::{
     ArrayRef, Decimal128Array, Decimal256Array, Decimal32Array, Decimal64Array,
-    Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array,
+    Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array,
+    Int8Array,
 };
 use arrow::datatypes::DataType;
 use arrow::error::ArrowError;
@@ -34,6 +35,7 @@ use datafusion_expr::{
     Volatility,
 };
 use datafusion_macros::user_doc;
+use num_traits::sign::Signed;
 
 type MathArrayFunction = fn(&ArrayRef) -> Result<ArrayRef>;
 
@@ -81,6 +83,7 @@ macro_rules! make_decimal_abs_function {
 /// Return different implementations based on input datatype to reduce branches during execution
 fn create_abs_function(input_data_type: &DataType) -> Result<MathArrayFunction> {
     match input_data_type {
+        DataType::Float16 => Ok(make_abs_function!(Float16Array)),
         DataType::Float32 => Ok(make_abs_function!(Float32Array)),
         DataType::Float64 => Ok(make_abs_function!(Float64Array)),
 
@@ -143,6 +146,7 @@ impl ScalarUDFImpl for AbsFunc {
     fn as_any(&self) -> &dyn Any {
         self
     }
+
     fn name(&self) -> &str {
         "abs"
     }
@@ -152,35 +156,7 @@ impl ScalarUDFImpl for AbsFunc {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match arg_types[0] {
-            DataType::Float32 => Ok(DataType::Float32),
-            DataType::Float64 => Ok(DataType::Float64),
-            DataType::Int8 => Ok(DataType::Int8),
-            DataType::Int16 => Ok(DataType::Int16),
-            DataType::Int32 => Ok(DataType::Int32),
-            DataType::Int64 => Ok(DataType::Int64),
-            DataType::Null => Ok(DataType::Null),
-            DataType::UInt8 => Ok(DataType::UInt8),
-            DataType::UInt16 => Ok(DataType::UInt16),
-            DataType::UInt32 => Ok(DataType::UInt32),
-            DataType::UInt64 => Ok(DataType::UInt64),
-            DataType::Decimal32(precision, scale) => {
-                Ok(DataType::Decimal32(precision, scale))
-            }
-            DataType::Decimal64(precision, scale) => {
-                Ok(DataType::Decimal64(precision, scale))
-            }
-            DataType::Decimal128(precision, scale) => {
-                Ok(DataType::Decimal128(precision, scale))
-            }
-            DataType::Decimal256(precision, scale) => {
-                Ok(DataType::Decimal256(precision, scale))
-            }
-            _ => not_impl_err!(
-                "Unsupported data type {} for function abs",
-                arg_types[0].to_string()
-            ),
-        }
+        Ok(arg_types[0].clone())
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
diff --git a/datafusion/functions/src/math/log.rs b/datafusion/functions/src/math/log.rs
index ff1fd0cd4b37..24000a3876bd 100644
--- a/datafusion/functions/src/math/log.rs
+++ b/datafusion/functions/src/math/log.rs
@@ -18,18 +18,18 @@
 //! Math function: `log()`.
 
 use std::any::Any;
-use std::sync::Arc;
 
 use super::power::PowerFunc;
 
 use crate::utils::{calculate_binary_math, decimal128_to_i128};
 use arrow::array::{Array, ArrayRef};
+use arrow::compute::kernels::cast;
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Float32Type, Float64Type, Int32Type,
-    Int64Type, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    DataType, Decimal128Type, Decimal256Type, Float16Type, Float32Type, Float64Type,
 };
 use arrow::error::ArrowError;
 use arrow_buffer::i256;
+use datafusion_common::types::NativeType;
 use datafusion_common::{
     exec_err, internal_err, plan_datafusion_err, plan_err, Result, ScalarValue,
 };
@@ -37,11 +37,12 @@ use datafusion_expr::expr::ScalarFunction;
 use datafusion_expr::simplify::{ExprSimplifyResult, SimplifyInfo};
 use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
 use datafusion_expr::{
-    lit, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF,
-    TypeSignature::*,
+    lit, Coercion, ColumnarValue, Documentation, Expr, ScalarFunctionArgs, ScalarUDF,
+    TypeSignature, TypeSignatureClass,
 };
 use datafusion_expr::{ScalarUDFImpl, Signature, Volatility};
 use datafusion_macros::user_doc;
+use num_traits::Float;
 
 #[user_doc(
     doc_section(label = "Math Functions"),
@@ -72,37 +73,28 @@ impl Default for LogFunc {
 
 impl LogFunc {
     pub fn new() -> Self {
+        // Converts decimals & integers to float64, accepting other floats as is
+        let as_float = Coercion::new_implicit(
+            TypeSignatureClass::Float,
+            vec![TypeSignatureClass::Numeric],
+            NativeType::Float64,
+        );
         Self {
             signature: Signature::one_of(
+                // Ensure decimals have precedence over floats since we have
+                // a native decimal implementation for log
                 vec![
-                    Numeric(1),
-                    Numeric(2),
-                    Exact(vec![DataType::Float32, DataType::Float32]),
-                    Exact(vec![DataType::Float64, DataType::Float64]),
-                    Exact(vec![
-                        DataType::Int64,
-                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
-                    ]),
-                    Exact(vec![
-                        DataType::Float32,
-                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
-                    ]),
-                    Exact(vec![
-                        DataType::Float64,
-                        DataType::Decimal128(DECIMAL128_MAX_PRECISION, 0),
-                    ]),
-                    Exact(vec![
-                        DataType::Int64,
-                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
-                    ]),
-                    Exact(vec![
-                        DataType::Float32,
-                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
-                    ]),
-                    Exact(vec![
-                        DataType::Float64,
-                        DataType::Decimal256(DECIMAL256_MAX_PRECISION, 0),
+                    // log(value)
+                    TypeSignature::Coercible(vec![Coercion::new_exact(
+                        TypeSignatureClass::Decimal,
+                    )]),
+                    TypeSignature::Coercible(vec![as_float.clone()]),
+                    // log(base, value)
+                    TypeSignature::Coercible(vec![
+                        as_float.clone(),
+                        Coercion::new_exact(TypeSignatureClass::Decimal),
                     ]),
+                    TypeSignature::Coercible(vec![as_float.clone(), as_float.clone()]),
                 ],
                 Volatility::Immutable,
             ),
@@ -160,6 +152,7 @@ impl ScalarUDFImpl for LogFunc {
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
         // Check last argument (value)
         match &arg_types.last().ok_or(plan_datafusion_err!("No args"))? {
+            DataType::Float16 => Ok(DataType::Float16),
             DataType::Float32 => Ok(DataType::Float32),
             _ => Ok(DataType::Float64),
         }
@@ -192,68 +185,67 @@ impl ScalarUDFImpl for LogFunc {
 
     // Support overloaded log(base, x) and log(x) which defaults to log(10, x)
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
-        let args = ColumnarValue::values_to_arrays(&args.args)?;
+        if args.arg_fields.iter().any(|a| a.data_type().is_null()) {
+            return ColumnarValue::Scalar(ScalarValue::Null)
+                .cast_to(args.return_type(), None);
+        }
 
-        let (base, value) = if args.len() == 2 {
-            // note in f64::log params order is different than in sql. e.g in sql log(base, x) == f64::log(x, base)
-            (ColumnarValue::Array(Arc::clone(&args[0])), &args[1])
+        let (base, value) = if args.args.len() == 2 {
+            (args.args[0].clone(), &args.args[1])
         } else {
-            // log(num) - assume base is 10
-            let ret_type = if args[0].data_type().is_null() {
-                &DataType::Float64
-            } else {
-                args[0].data_type()
-            };
+            // no base specified, default to 10
             (
-                ColumnarValue::Array(
-                    ScalarValue::new_ten(ret_type)?.to_array_of_size(args[0].len())?,
-                ),
-                &args[0],
+                ColumnarValue::Scalar(ScalarValue::new_ten(args.return_type())?),
+                &args.args[0],
             )
         };
+        let value = value.to_array(args.number_rows)?;
 
-        // All log functors have format 'log(value, base)'
-        // Therefore, for `calculate_binary_math` the first type means a type of main array
-        // The second type is the type of the base array (even if derived from main)
-        let arr: ArrayRef = match value.data_type() {
-            DataType::Float32 => calculate_binary_math::<
-                Float32Type,
-                Float32Type,
-                Float32Type,
-                _,
-            >(value, &base, |x, b| Ok(f32::log(x, b)))?,
-            DataType::Float64 => calculate_binary_math::<
-                Float64Type,
-                Float64Type,
-                Float64Type,
-                _,
-            >(value, &base, |x, b| Ok(f64::log(x, b)))?,
-            DataType::Int32 => {
-                calculate_binary_math::<Int32Type, Float64Type, Float64Type, _>(
-                    value,
+        let output: ArrayRef = match value.data_type() {
+            DataType::Float16 => {
+                calculate_binary_math::<Float16Type, Float16Type, Float16Type, _>(
+                    &value,
+                    &base,
+                    |value, base| Ok(value.log(base)),
+                )?
+            }
+            DataType::Float32 => {
+                calculate_binary_math::<Float32Type, Float32Type, Float32Type, _>(
+                    &value,
                     &base,
-                    |x, b| Ok(f64::log(x as f64, b)),
+                    |value, base| Ok(value.log(base)),
                 )?
             }
-            DataType::Int64 => {
-                calculate_binary_math::<Int64Type, Float64Type, Float64Type, _>(
-                    value,
+            DataType::Float64 => {
+                calculate_binary_math::<Float64Type, Float64Type, Float64Type, _>(
+                    &value,
                     &base,
-                    |x, b| Ok(f64::log(x as f64, b)),
+                    |value, base| Ok(value.log(base)),
                 )?
             }
-            DataType::Decimal128(_precision, scale) => {
+            // TODO: native log support for decimal 32 & 64; right now upcast
+            //       to decimal128 to calculate
+            //       https://github.com/apache/datafusion/issues/17555
+            DataType::Decimal32(precision, scale)
+            | DataType::Decimal64(precision, scale) => {
                 calculate_binary_math::<Decimal128Type, Float64Type, Float64Type, _>(
-                    value,
+                    &cast(&value, &DataType::Decimal128(*precision, *scale))?,
                     &base,
-                    |x, b| log_decimal128(x, *scale, b),
+                    |value, base| log_decimal128(value, *scale, base),
                 )?
             }
-            DataType::Decimal256(_precision, scale) => {
+            DataType::Decimal128(_, scale) => {
+                calculate_binary_math::<Decimal128Type, Float64Type, Float64Type, _>(
+                    &value,
+                    &base,
+                    |value, base| log_decimal128(value, *scale, base),
+                )?
+            }
+            DataType::Decimal256(_, scale) => {
                 calculate_binary_math::<Decimal256Type, Float64Type, Float64Type, _>(
-                    value,
+                    &value,
                     &base,
-                    |x, b| log_decimal256(x, *scale, b),
+                    |value, base| log_decimal256(value, *scale, base),
                 )?
             }
             other => {
@@ -261,7 +253,7 @@ impl ScalarUDFImpl for LogFunc {
             }
         };
 
-        Ok(ColumnarValue::Array(arr))
+        Ok(ColumnarValue::Array(output))
     }
 
     fn documentation(&self) -> Option<&Documentation> {
@@ -277,17 +269,28 @@ impl ScalarUDFImpl for LogFunc {
         mut args: Vec<Expr>,
         info: &dyn SimplifyInfo,
     ) -> Result<ExprSimplifyResult> {
+        let mut arg_types = args
+            .iter()
+            .map(|arg| info.get_data_type(arg))
+            .collect::<Result<Vec<_>>>()?;
+        let return_type = self.return_type(&arg_types)?;
+
+        // Null propagation
+        if arg_types.iter().any(|dt| dt.is_null()) {
+            return Ok(ExprSimplifyResult::Simplified(lit(
+                ScalarValue::Null.cast_to(&return_type)?
+            )));
+        }
+
         // Args are either
         // log(number)
         // log(base, number)
         let num_args = args.len();
-        if num_args > 2 {
+        if num_args != 1 && num_args != 2 {
             return plan_err!("Expected log to have 1 or 2 arguments, got {num_args}");
         }
-        let number = args.pop().ok_or_else(|| {
-            plan_datafusion_err!("Expected log to have 1 or 2 arguments, got 0")
-        })?;
-        let number_datatype = info.get_data_type(&number)?;
+        let number = args.pop().unwrap();
+        let number_datatype = arg_types.pop().unwrap();
         // default to base 10
         let base = if let Some(base) = args.pop() {
             base
@@ -339,6 +342,7 @@ fn is_pow(func: &ScalarUDF) -> bool {
 #[cfg(test)]
 mod tests {
     use std::collections::HashMap;
+    use std::sync::Arc;
 
     use super::*;
 
@@ -353,6 +357,18 @@ mod tests {
     use datafusion_expr::execution_props::ExecutionProps;
     use datafusion_expr::simplify::SimplifyContext;
 
+    #[test]
+    fn test_log_decimal_native() {
+        let value = 10_i128.pow(35);
+        assert_eq!((value as f64).log2(), 116.26748332105768);
+        assert_eq!(
+            log_decimal128(value, 0, 2.0).unwrap(),
+            // TODO: see we're losing our decimal points compared to above
+            //       https://github.com/apache/datafusion/issues/18524
+            116.0
+        );
+    }
+
     #[test]
     fn test_log_invalid_base_type() {
         let arg_fields = vec![
@@ -720,7 +736,7 @@ mod tests {
     #[test]
     fn test_log_output_ordering() {
         // [Unordered, Ascending, Descending, Literal]
-        let orders = vec![
+        let orders = [
             ExprProperties::new_unknown(),
             ExprProperties::new_unknown().with_order(SortProperties::Ordered(
                 SortOptions {
@@ -755,7 +771,7 @@ mod tests {
                 results.push(result);
             }
         }
-        let expected = vec![
+        let expected = [
             // base: Unordered
             SortProperties::Unordered,
             SortProperties::Unordered,
diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs
index 0b35f664532d..46b3cc63d0b6 100644
--- a/datafusion/functions/src/unicode/substr.rs
+++ b/datafusion/functions/src/unicode/substr.rs
@@ -71,7 +71,13 @@ impl Default for SubstrFunc {
 impl SubstrFunc {
     pub fn new() -> Self {
         Self {
-            signature: Signature::user_defined(Volatility::Immutable),
+            signature: Signature::user_defined(Volatility::Immutable)
+                .with_parameter_names(vec![
+                    "str".to_string(),
+                    "start_pos".to_string(),
+                    "length".to_string(),
+                ])
+                .expect("valid parameter names"),
             aliases: vec![String::from("substring")],
         }
     }
diff --git a/datafusion/functions/src/utils.rs b/datafusion/functions/src/utils.rs
index 932d61e8007c..ffa238162b1b 100644
--- a/datafusion/functions/src/utils.rs
+++ b/datafusion/functions/src/utils.rs
@@ -140,38 +140,25 @@ where
     F: Fn(L::Native, R::Native) -> Result<O::Native, ArrowError>,
     R::Native: TryFrom<ScalarValue>,
 {
-    Ok(match right {
+    let left = left.as_primitive::<L>();
+    let right = right.cast_to(&R::DATA_TYPE, None)?;
+    let result = match right {
         ColumnarValue::Scalar(scalar) => {
-            let right_value: R::Native =
-                R::Native::try_from(scalar.clone()).map_err(|_| {
-                    DataFusionError::NotImplemented(format!(
-                        "Cannot convert scalar value {} to {}",
-                        &scalar,
-                        R::DATA_TYPE
-                    ))
-                })?;
-            let left_array = left.as_primitive::<L>();
-            // Bind right value
-            let result =
-                left_array.try_unary::<_, O, _>(|lvalue| fun(lvalue, right_value))?;
-            Arc::new(result) as _
+            let right = R::Native::try_from(scalar.clone()).map_err(|_| {
+                DataFusionError::NotImplemented(format!(
+                    "Cannot convert scalar value {} to {}",
+                    &scalar,
+                    R::DATA_TYPE
+                ))
+            })?;
+            left.try_unary::<_, O, _>(|lvalue| fun(lvalue, right))?
         }
         ColumnarValue::Array(right) => {
-            let right_casted = arrow::compute::cast(&right, &R::DATA_TYPE)?;
-            let right_array = right_casted.as_primitive::<R>();
-
-            // Types are compatible even they are decimals with different scale or precision
-            let result = if PrimitiveArray::<L>::is_compatible(&L::DATA_TYPE) {
-                let left_array = left.as_primitive::<L>();
-                try_binary::<_, _, _, O>(left_array, right_array, &fun)?
-            } else {
-                let left_casted = arrow::compute::cast(left, &L::DATA_TYPE)?;
-                let left_array = left_casted.as_primitive::<L>();
-                try_binary::<_, _, _, O>(left_array, right_array, &fun)?
-            };
-            Arc::new(result) as _
+            let right = right.as_primitive::<R>();
+            try_binary::<_, _, _, O>(left, right, &fun)?
         }
-    })
+    };
+    Ok(Arc::new(result) as _)
 }
 
 /// Converts Decimal128 components (value and scale) to an unscaled i128
diff --git a/datafusion/macros/Cargo.toml b/datafusion/macros/Cargo.toml
index fe979720bc56..35714bfe960b 100644
--- a/datafusion/macros/Cargo.toml
+++ b/datafusion/macros/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -43,4 +46,4 @@ proc-macro = true
 [dependencies]
 datafusion-doc = { workspace = true }
 quote = "1.0.41"
-syn = { version = "2.0.106", features = ["full"] }
+syn = { version = "2.0.108", features = ["full"] }
diff --git a/datafusion/macros/src/user_doc.rs b/datafusion/macros/src/user_doc.rs
index 71ce381ec431..58c2cc2b1b2a 100644
--- a/datafusion/macros/src/user_doc.rs
+++ b/datafusion/macros/src/user_doc.rs
@@ -61,7 +61,6 @@ use syn::{parse_macro_input, DeriveInput, LitStr};
 /// }
 /// ```
 /// will generate the following code
-///
 /// ```ignore
 /// pub struct ToDateFunc {
 ///     signature: Signature,
diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml
index f10510e0973c..15d3261ca513 100644
--- a/datafusion/optimizer/Cargo.toml
+++ b/datafusion/optimizer/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/optimizer/benches/projection_unnecessary.rs b/datafusion/optimizer/benches/projection_unnecessary.rs
index c9f248fe49b5..bdc59de4820b 100644
--- a/datafusion/optimizer/benches/projection_unnecessary.rs
+++ b/datafusion/optimizer/benches/projection_unnecessary.rs
@@ -16,11 +16,12 @@
 // under the License.
 
 use arrow::datatypes::{DataType, Field, Schema};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::ToDFSchema;
 use datafusion_common::{Column, TableReference};
 use datafusion_expr::{logical_plan::LogicalPlan, projection_schema, Expr};
 use datafusion_optimizer::optimize_projections::is_projection_unnecessary;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn is_projection_unnecessary_old(
diff --git a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
index fa7ff1b8b19d..6381db63122d 100644
--- a/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
+++ b/datafusion/optimizer/src/analyzer/resolve_grouping_function.rs
@@ -28,7 +28,7 @@ use arrow::datatypes::DataType;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
 use datafusion_common::{
-    internal_datafusion_err, plan_err, Column, DFSchemaRef, Result, ScalarValue,
+    internal_datafusion_err, plan_err, Column, DFSchema, Result, ScalarValue,
 };
 use datafusion_expr::expr::{AggregateFunction, Alias};
 use datafusion_expr::logical_plan::LogicalPlan;
@@ -74,7 +74,7 @@ fn group_expr_to_bitmap_index(group_expr: &[Expr]) -> Result<HashMap<&Expr, usiz
 
 fn replace_grouping_exprs(
     input: Arc<LogicalPlan>,
-    schema: DFSchemaRef,
+    schema: &DFSchema,
     group_expr: Vec<Expr>,
     aggr_expr: Vec<Expr>,
 ) -> Result<LogicalPlan> {
@@ -139,7 +139,7 @@ fn analyze_internal(plan: LogicalPlan) -> Result<Transformed<LogicalPlan>> {
             schema,
             ..
         }) if contains_grouping_function(&aggr_expr) => Ok(Transformed::yes(
-            replace_grouping_exprs(input, schema, group_expr, aggr_expr)?,
+            replace_grouping_exprs(input, schema.as_ref(), group_expr, aggr_expr)?,
         )),
         _ => Ok(Transformed::no(plan)),
     })?;
diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
index 3d5dee3a7255..4fb0f8553b4b 100644
--- a/datafusion/optimizer/src/analyzer/type_coercion.rs
+++ b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -2117,7 +2117,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None)) = CAST(CAST(Utf8("1998-03-18") AS Date32) AS Timestamp(Nanosecond, None))
+        Projection: CAST(Utf8("1998-03-18") AS Timestamp(ns)) = CAST(CAST(Utf8("1998-03-18") AS Date32) AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
@@ -2258,7 +2258,7 @@ mod test {
         let err = coerce_case_expression(case, &schema).unwrap_err();
         assert_snapshot!(
             err.strip_backtrace(),
-            @"Error during planning: Failed to coerce then (Date32, Float32, Binary) and else (Timestamp(Nanosecond, None)) to common types in CASE WHEN expression"
+            @"Error during planning: Failed to coerce then (Date32, Float32, Binary) and else (Timestamp(ns)) to common types in CASE WHEN expression"
         );
 
         Ok(())
@@ -2465,7 +2465,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: a = CAST(CAST(a AS Map(Field { name: "key_value", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false)) AS Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false))
+        Projection: a = CAST(CAST(a AS Map("key_value": Struct("key": Utf8, "value": nullable Float64), unsorted)) AS Map("entries": Struct("key": Utf8, "value": nullable Float64), unsorted))
           EmptyRelation: rows=0
         "#
         )
@@ -2488,7 +2488,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: IntervalYearMonth("12") + CAST(Utf8("2000-01-01T00:00:00") AS Timestamp(Nanosecond, None))
+        Projection: IntervalYearMonth("12") + CAST(Utf8("2000-01-01T00:00:00") AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
@@ -2513,7 +2513,7 @@ mod test {
         assert_analyzed_plan_eq!(
             plan,
             @r#"
-        Projection: CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None)) - CAST(Utf8("1998-03-18") AS Timestamp(Nanosecond, None))
+        Projection: CAST(Utf8("1998-03-18") AS Timestamp(ns)) - CAST(Utf8("1998-03-18") AS Timestamp(ns))
           EmptyRelation: rows=0
         "#
         )
diff --git a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
index c8be689fc5a4..9e4e44b00770 100644
--- a/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
+++ b/datafusion/optimizer/src/decorrelate_predicate_subquery.rs
@@ -136,7 +136,7 @@ fn rewrite_inner_subqueries(
         Expr::Exists(Exists {
             subquery: Subquery { subquery, .. },
             negated,
-        }) => match mark_join(&cur_input, Arc::clone(&subquery), None, negated, alias)? {
+        }) => match mark_join(&cur_input, &subquery, None, negated, alias)? {
             Some((plan, exists_expr)) => {
                 cur_input = plan;
                 Ok(Transformed::yes(exists_expr))
@@ -154,13 +154,7 @@ fn rewrite_inner_subqueries(
                 .map_or(plan_err!("single expression required."), |output_expr| {
                     Ok(Expr::eq(*expr.clone(), output_expr))
                 })?;
-            match mark_join(
-                &cur_input,
-                Arc::clone(&subquery),
-                Some(in_predicate),
-                negated,
-                alias,
-            )? {
+            match mark_join(&cur_input, &subquery, Some(&in_predicate), negated, alias)? {
                 Some((plan, exists_expr)) => {
                     cur_input = plan;
                     Ok(Transformed::yes(exists_expr))
@@ -275,7 +269,13 @@ fn build_join_top(
     };
     let subquery = query_info.query.subquery.as_ref();
     let subquery_alias = alias.next("__correlated_sq");
-    build_join(left, subquery, in_predicate_opt, join_type, subquery_alias)
+    build_join(
+        left,
+        subquery,
+        in_predicate_opt.as_ref(),
+        join_type,
+        subquery_alias,
+    )
 }
 
 /// This is used to handle the case when the subquery is embedded in a more complex boolean
@@ -295,8 +295,8 @@ fn build_join_top(
 ///           TableScan: t2
 fn mark_join(
     left: &LogicalPlan,
-    subquery: Arc<LogicalPlan>,
-    in_predicate_opt: Option<Expr>,
+    subquery: &LogicalPlan,
+    in_predicate_opt: Option<&Expr>,
     negated: bool,
     alias_generator: &Arc<AliasGenerator>,
 ) -> Result<Option<(LogicalPlan, Expr)>> {
@@ -306,7 +306,7 @@ fn mark_join(
     let exists_expr = if negated { !exists_col } else { exists_col };
 
     Ok(
-        build_join(left, &subquery, in_predicate_opt, JoinType::LeftMark, alias)?
+        build_join(left, subquery, in_predicate_opt, JoinType::LeftMark, alias)?
             .map(|plan| (plan, exists_expr)),
     )
 }
@@ -314,12 +314,12 @@ fn mark_join(
 fn build_join(
     left: &LogicalPlan,
     subquery: &LogicalPlan,
-    in_predicate_opt: Option<Expr>,
+    in_predicate_opt: Option<&Expr>,
     join_type: JoinType,
     alias: String,
 ) -> Result<Option<LogicalPlan>> {
     let mut pull_up = PullUpCorrelatedExpr::new()
-        .with_in_predicate_opt(in_predicate_opt.clone())
+        .with_in_predicate_opt(in_predicate_opt.cloned())
         .with_exists_sub_query(in_predicate_opt.is_none());
 
     let new_plan = subquery.clone().rewrite(&mut pull_up).data()?;
@@ -342,7 +342,7 @@ fn build_join(
             replace_qualified_name(filter, &all_correlated_cols, &alias).map(Some)
         })?;
 
-    let join_filter = match (join_filter_opt, in_predicate_opt.clone()) {
+    let join_filter = match (join_filter_opt, in_predicate_opt.cloned()) {
         (
             Some(join_filter),
             Some(Expr::BinaryExpr(BinaryExpr {
@@ -378,7 +378,7 @@ fn build_join(
         // Gather all columns needed for the join filter + predicates
         let mut needed = std::collections::HashSet::new();
         expr_to_columns(&join_filter, &mut needed)?;
-        if let Some(ref in_pred) = in_predicate_opt {
+        if let Some(in_pred) = in_predicate_opt {
             expr_to_columns(in_pred, &mut needed)?;
         }
 
@@ -1972,14 +1972,14 @@ mod tests {
 
         assert_optimized_plan_equal!(
             plan,
-            @r#"
+            @r"
         Projection: test.b [b:UInt32]
           LeftSemi Join:  Filter: Boolean(true) [a:UInt32, b:UInt32, c:UInt32]
             TableScan: test [a:UInt32, b:UInt32, c:UInt32]
             SubqueryAlias: __correlated_sq_1 [arr:Int32;N]
               Unnest: lists[sq.arr|depth=1] structs[] [arr:Int32;N]
-                TableScan: sq [arr:List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
-        "#
+                TableScan: sq [arr:List(Field { data_type: Int32, nullable: true });N]
+        "
         )
     }
 
@@ -2007,14 +2007,14 @@ mod tests {
 
         assert_optimized_plan_equal!(
             plan,
-            @r#"
+            @r"
         Projection: test.b [b:UInt32]
           LeftSemi Join:  Filter: __correlated_sq_1.a = test.b [a:UInt32, b:UInt32, c:UInt32]
             TableScan: test [a:UInt32, b:UInt32, c:UInt32]
             SubqueryAlias: __correlated_sq_1 [a:UInt32;N]
               Unnest: lists[sq.a|depth=1] structs[] [a:UInt32;N]
-                TableScan: sq [a:List(Field { name: "item", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} });N]
-        "#
+                TableScan: sq [a:List(Field { data_type: UInt32, nullable: true });N]
+        "
         )
     }
 
diff --git a/datafusion/optimizer/src/lib.rs b/datafusion/optimizer/src/lib.rs
index 85fa9493f449..07ef2a46cba9 100644
--- a/datafusion/optimizer/src/lib.rs
+++ b/datafusion/optimizer/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! # DataFusion Optimizer
 //!
diff --git a/datafusion/optimizer/src/push_down_filter.rs b/datafusion/optimizer/src/push_down_filter.rs
index a8251d669002..1c0790b3e3ac 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -562,7 +562,6 @@ fn push_down_join(
 ///
 /// * `on_filters` filters from the join ON clause that have not already been
 ///   identified as join predicates
-///
 fn infer_join_predicates(
     join: &Join,
     predicates: &[Expr],
@@ -649,7 +648,6 @@ impl InferredPredicates {
 /// * `predicates` the pushed down predicates
 ///
 /// * `inferred_predicates` the inferred results
-///
 fn infer_join_predicates_from_predicates(
     join_col_keys: &[(&Column, &Column)],
     predicates: &[Expr],
@@ -673,7 +671,6 @@ fn infer_join_predicates_from_predicates(
 ///   identified as join predicates
 ///
 /// * `inferred_predicates` the inferred results
-///
 fn infer_join_predicates_from_on_filters(
     join_col_keys: &[(&Column, &Column)],
     join_type: JoinType,
@@ -719,7 +716,6 @@ fn infer_join_predicates_from_on_filters(
 ///
 /// * `ENABLE_RIGHT_TO_LEFT` indicates that the left table related predicate can
 ///   be inferred from the right table related predicate
-///
 fn infer_join_predicates_impl<
     const ENABLE_LEFT_TO_RIGHT: bool,
     const ENABLE_RIGHT_TO_LEFT: bool,
diff --git a/datafusion/optimizer/src/push_down_limit.rs b/datafusion/optimizer/src/push_down_limit.rs
index c5a2e6578805..80d4a2de6679 100644
--- a/datafusion/optimizer/src/push_down_limit.rs
+++ b/datafusion/optimizer/src/push_down_limit.rs
@@ -30,7 +30,6 @@ use datafusion_expr::logical_plan::{Join, JoinType, Limit, LogicalPlan};
 use datafusion_expr::{lit, FetchType, SkipType};
 
 /// Optimization rule that tries to push down `LIMIT`.
-///
 //. It will push down through projection, limits (taking the smaller limit)
 #[derive(Default, Debug)]
 pub struct PushDownLimit {}
diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
index 204ce14e37d8..c7912bbf70b0 100644
--- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
+++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
@@ -69,23 +69,21 @@ use regex::Regex;
 ///
 /// For example:
 /// ```
-/// use arrow::datatypes::{Schema, Field, DataType};
-/// use datafusion_expr::{col, lit};
+/// use arrow::datatypes::{DataType, Field, Schema};
 /// use datafusion_common::{DataFusionError, ToDFSchema};
 /// use datafusion_expr::execution_props::ExecutionProps;
 /// use datafusion_expr::simplify::SimplifyContext;
+/// use datafusion_expr::{col, lit};
 /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
 ///
 /// // Create the schema
-/// let schema = Schema::new(vec![
-///     Field::new("i", DataType::Int64, false),
-///   ])
-///   .to_dfschema_ref().unwrap();
+/// let schema = Schema::new(vec![Field::new("i", DataType::Int64, false)])
+///     .to_dfschema_ref()
+///     .unwrap();
 ///
 /// // Create the simplifier
 /// let props = ExecutionProps::new();
-/// let context = SimplifyContext::new(&props)
-///    .with_schema(schema);
+/// let context = SimplifyContext::new(&props).with_schema(schema);
 /// let simplifier = ExprSimplifier::new(context);
 ///
 /// // Use the simplifier
@@ -144,35 +142,35 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     ///
     /// ```
     /// use arrow::datatypes::DataType;
-    /// use datafusion_expr::{col, lit, Expr};
+    /// use datafusion_common::DFSchema;
     /// use datafusion_common::Result;
     /// use datafusion_expr::execution_props::ExecutionProps;
     /// use datafusion_expr::simplify::SimplifyContext;
     /// use datafusion_expr::simplify::SimplifyInfo;
+    /// use datafusion_expr::{col, lit, Expr};
     /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
-    /// use datafusion_common::DFSchema;
     /// use std::sync::Arc;
     ///
     /// /// Simple implementation that provides `Simplifier` the information it needs
     /// /// See SimplifyContext for a structure that does this.
     /// #[derive(Default)]
     /// struct Info {
-    ///   execution_props: ExecutionProps,
+    ///     execution_props: ExecutionProps,
     /// };
     ///
     /// impl SimplifyInfo for Info {
-    ///   fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
-    ///     Ok(false)
-    ///   }
-    ///   fn nullable(&self, expr: &Expr) -> Result<bool> {
-    ///     Ok(true)
-    ///   }
-    ///   fn execution_props(&self) -> &ExecutionProps {
-    ///     &self.execution_props
-    ///   }
-    ///   fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
-    ///     Ok(DataType::Int32)
-    ///   }
+    ///     fn is_boolean_type(&self, expr: &Expr) -> Result<bool> {
+    ///         Ok(false)
+    ///     }
+    ///     fn nullable(&self, expr: &Expr) -> Result<bool> {
+    ///         Ok(true)
+    ///     }
+    ///     fn execution_props(&self) -> &ExecutionProps {
+    ///         &self.execution_props
+    ///     }
+    ///     fn get_data_type(&self, expr: &Expr) -> Result<DataType> {
+    ///         Ok(DataType::Int32)
+    ///     }
     /// }
     ///
     /// // Create the simplifier
@@ -198,7 +196,6 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     /// optimizations.
     ///
     /// See [Self::simplify] for details and usage examples.
-    ///
     #[deprecated(
         since = "48.0.0",
         note = "Use `simplify_with_cycle_count_transformed` instead"
@@ -222,7 +219,6 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     /// - The number of simplification cycles that were performed
     ///
     /// See [Self::simplify] for details and usage examples.
-    ///
     pub fn simplify_with_cycle_count_transformed(
         &self,
         mut expr: Expr,
@@ -286,24 +282,24 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     ///
     /// ```rust
     /// use arrow::datatypes::{DataType, Field, Schema};
-    /// use datafusion_expr::{col, lit, Expr};
-    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
     /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
     /// use datafusion_expr::execution_props::ExecutionProps;
+    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
     /// use datafusion_expr::simplify::SimplifyContext;
+    /// use datafusion_expr::{col, lit, Expr};
     /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
     ///
     /// let schema = Schema::new(vec![
-    ///   Field::new("x", DataType::Int64, false),
-    ///   Field::new("y", DataType::UInt32, false),
-    ///   Field::new("z", DataType::Int64, false),
-    ///   ])
-    ///   .to_dfschema_ref().unwrap();
+    ///     Field::new("x", DataType::Int64, false),
+    ///     Field::new("y", DataType::UInt32, false),
+    ///     Field::new("z", DataType::Int64, false),
+    /// ])
+    /// .to_dfschema_ref()
+    /// .unwrap();
     ///
     /// // Create the simplifier
     /// let props = ExecutionProps::new();
-    /// let context = SimplifyContext::new(&props)
-    ///    .with_schema(schema);
+    /// let context = SimplifyContext::new(&props).with_schema(schema);
     ///
     /// // Expression: (x >= 3) AND (y + 2 < 10) AND (z > 5)
     /// let expr_x = col("x").gt_eq(lit(3_i64));
@@ -312,15 +308,18 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     /// let expr = expr_x.and(expr_y).and(expr_z.clone());
     ///
     /// let guarantees = vec![
-    ///    // x ∈ [3, 5]
-    ///    (
-    ///        col("x"),
-    ///        NullableInterval::NotNull {
-    ///            values: Interval::make(Some(3_i64), Some(5_i64)).unwrap()
-    ///        }
-    ///    ),
-    ///    // y = 3
-    ///    (col("y"), NullableInterval::from(ScalarValue::UInt32(Some(3)))),
+    ///     // x ∈ [3, 5]
+    ///     (
+    ///         col("x"),
+    ///         NullableInterval::NotNull {
+    ///             values: Interval::make(Some(3_i64), Some(5_i64)).unwrap(),
+    ///         },
+    ///     ),
+    ///     // y = 3
+    ///     (
+    ///         col("y"),
+    ///         NullableInterval::from(ScalarValue::UInt32(Some(3))),
+    ///     ),
     /// ];
     /// let simplifier = ExprSimplifier::new(context).with_guarantees(guarantees);
     /// let output = simplifier.simplify(expr).unwrap();
@@ -345,24 +344,24 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     ///
     /// ```rust
     /// use arrow::datatypes::{DataType, Field, Schema};
-    /// use datafusion_expr::{col, lit, Expr};
-    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
     /// use datafusion_common::{Result, ScalarValue, ToDFSchema};
     /// use datafusion_expr::execution_props::ExecutionProps;
+    /// use datafusion_expr::interval_arithmetic::{Interval, NullableInterval};
     /// use datafusion_expr::simplify::SimplifyContext;
+    /// use datafusion_expr::{col, lit, Expr};
     /// use datafusion_optimizer::simplify_expressions::ExprSimplifier;
     ///
     /// let schema = Schema::new(vec![
-    ///   Field::new("a", DataType::Int64, false),
-    ///   Field::new("b", DataType::Int64, false),
-    ///   Field::new("c", DataType::Int64, false),
-    ///   ])
-    ///   .to_dfschema_ref().unwrap();
+    ///     Field::new("a", DataType::Int64, false),
+    ///     Field::new("b", DataType::Int64, false),
+    ///     Field::new("c", DataType::Int64, false),
+    /// ])
+    /// .to_dfschema_ref()
+    /// .unwrap();
     ///
     /// // Create the simplifier
     /// let props = ExecutionProps::new();
-    /// let context = SimplifyContext::new(&props)
-    ///    .with_schema(schema);
+    /// let context = SimplifyContext::new(&props).with_schema(schema);
     /// let simplifier = ExprSimplifier::new(context);
     ///
     /// // Expression: a = c AND 1 = b
@@ -376,9 +375,9 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     ///
     /// // If canonicalization is disabled, the expression is not changed
     /// let non_canonicalized = simplifier
-    ///   .with_canonicalize(false)
-    ///   .simplify(expr.clone())
-    ///   .unwrap();
+    ///     .with_canonicalize(false)
+    ///     .simplify(expr.clone())
+    ///     .unwrap();
     ///
     /// assert_eq!(non_canonicalized, expr);
     /// ```
@@ -437,7 +436,6 @@ impl<S: SimplifyInfo> ExprSimplifier<S> {
     /// assert_eq!(simplified_expr.data, lit(true));
     /// // Only 1 cycle was executed
     /// assert_eq!(count, 1);
-    ///
     /// ```
     pub fn with_max_cycles(mut self, max_simplifier_cycles: u32) -> Self {
         self.max_simplifier_cycles = max_simplifier_cycles;
@@ -573,7 +571,18 @@ impl TreeNodeRewriter for ConstEvaluator<'_> {
                 ConstSimplifyResult::NotSimplified(s, m) => {
                     Ok(Transformed::no(Expr::Literal(s, m)))
                 }
-                ConstSimplifyResult::SimplifyRuntimeError(_, expr) => {
+                ConstSimplifyResult::SimplifyRuntimeError(err, expr) => {
+                    // For CAST expressions with literal inputs, propagate the error at plan time rather than deferring to execution time.
+                    // This provides clearer error messages and fails fast.
+                    if let Expr::Cast(Cast { ref expr, .. })
+                    | Expr::TryCast(TryCast { ref expr, .. }) = expr
+                    {
+                        if matches!(expr.as_ref(), Expr::Literal(_, _)) {
+                            return Err(err);
+                        }
+                    }
+                    // For other expressions (like CASE, COALESCE), preserve the original
+                    // to allow short-circuit evaluation at execution time
                     Ok(Transformed::yes(expr))
                 }
             },
@@ -713,35 +722,12 @@ impl<'a> ConstEvaluator<'a> {
                 } else {
                     // Non-ListArray
                     match ScalarValue::try_from_array(&a, 0) {
-                        Ok(s) => {
-                            // TODO: support the optimization for `Map` type after support impl hash for it
-                            if matches!(&s, ScalarValue::Map(_)) {
-                                ConstSimplifyResult::SimplifyRuntimeError(
-                                    DataFusionError::NotImplemented("Const evaluate for Map type is still not supported".to_string()),
-                                    expr,
-                                )
-                            } else {
-                                ConstSimplifyResult::Simplified(s, metadata)
-                            }
-                        }
+                        Ok(s) => ConstSimplifyResult::Simplified(s, metadata),
                         Err(err) => ConstSimplifyResult::SimplifyRuntimeError(err, expr),
                     }
                 }
             }
-            ColumnarValue::Scalar(s) => {
-                // TODO: support the optimization for `Map` type after support impl hash for it
-                if matches!(&s, ScalarValue::Map(_)) {
-                    ConstSimplifyResult::SimplifyRuntimeError(
-                        DataFusionError::NotImplemented(
-                            "Const evaluate for Map type is still not supported"
-                                .to_string(),
-                        ),
-                        expr,
-                    )
-                } else {
-                    ConstSimplifyResult::Simplified(s, metadata)
-                }
-            }
+            ColumnarValue::Scalar(s) => ConstSimplifyResult::Simplified(s, metadata),
         }
     }
 }
@@ -1673,7 +1659,7 @@ impl<S: SimplifyInfo> TreeNodeRewriter for Simplifier<'_, S> {
                                     .to_string();
                                 Transformed::yes(Expr::Like(Like {
                                     pattern: Box::new(to_string_scalar(
-                                        data_type,
+                                        &data_type,
                                         Some(simplified_pattern),
                                     )),
                                     ..like
@@ -1985,7 +1971,7 @@ fn as_string_scalar(expr: &Expr) -> Option<(DataType, &Option<String>)> {
     }
 }
 
-fn to_string_scalar(data_type: DataType, value: Option<String>) -> Expr {
+fn to_string_scalar(data_type: &DataType, value: Option<String>) -> Expr {
     match data_type {
         DataType::Utf8 => Expr::Literal(ScalarValue::Utf8(value), None),
         DataType::LargeUtf8 => Expr::Literal(ScalarValue::LargeUtf8(value), None),
@@ -4970,6 +4956,56 @@ mod tests {
         );
     }
 
+    #[test]
+    fn simplify_cast_literal() {
+        // Test that CAST(literal) expressions are evaluated at plan time
+
+        // CAST(123 AS Int64) should become 123i64
+        let expr = Expr::Cast(Cast::new(Box::new(lit(123i32)), DataType::Int64));
+        let expected = lit(123i64);
+        assert_eq!(simplify(expr), expected);
+
+        // CAST(1761630189642 AS Timestamp(Nanosecond, Some("+00:00")))
+        // Integer to timestamp cast
+        let expr = Expr::Cast(Cast::new(
+            Box::new(lit(1761630189642i64)),
+            DataType::Timestamp(
+                arrow::datatypes::TimeUnit::Nanosecond,
+                Some("+00:00".into()),
+            ),
+        ));
+        // Should evaluate to a timestamp literal
+        let result = simplify(expr);
+        match result {
+            Expr::Literal(ScalarValue::TimestampNanosecond(Some(val), tz), _) => {
+                assert_eq!(val, 1761630189642i64);
+                assert_eq!(tz.as_deref(), Some("+00:00"));
+            }
+            other => panic!("Expected TimestampNanosecond literal, got: {other:?}"),
+        }
+
+        // Test CAST of invalid string to timestamp - should return an error at plan time
+        // This represents the case from the issue: CAST(Utf8("1761630189642") AS Timestamp)
+        // "1761630189642" is NOT a valid timestamp string format
+        let expr = Expr::Cast(Cast::new(
+            Box::new(lit("1761630189642")),
+            DataType::Timestamp(
+                arrow::datatypes::TimeUnit::Nanosecond,
+                Some("+00:00".into()),
+            ),
+        ));
+
+        // The simplification should now fail with an error at plan time
+        let schema = test_schema();
+        let props = ExecutionProps::new();
+        let simplifier =
+            ExprSimplifier::new(SimplifyContext::new(&props).with_schema(schema));
+        let result = simplifier.simplify(expr);
+        assert!(result.is_err(), "Expected error for invalid cast");
+        let err_msg = result.unwrap_err().to_string();
+        assert_contains!(err_msg, "Error parsing timestamp");
+    }
+
     fn if_not_null(expr: Expr, then: bool) -> Expr {
         Expr::Case(Case {
             expr: Some(expr.is_not_null().into()),
diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
index bbb023cfbad9..515fd29003af 100644
--- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs
+++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs
@@ -211,7 +211,7 @@ mod tests {
     #[test]
     fn test_null_handling() {
         // IsNull / IsNotNull can be rewritten to true / false
-        let guarantees = vec![
+        let guarantees = [
             // Note: AlwaysNull case handled by test_column_single_value test,
             // since it's a special case of a column with a single value.
             (
@@ -261,7 +261,7 @@ mod tests {
 
     #[test]
     fn test_inequalities_non_null_unbounded() {
-        let guarantees = vec![
+        let guarantees = [
             // y ∈ [2021-01-01, ∞) (not null)
             (
                 col("x"),
@@ -340,7 +340,7 @@ mod tests {
 
     #[test]
     fn test_inequalities_maybe_null() {
-        let guarantees = vec![
+        let guarantees = [
             // x ∈ ("abc", "def"]? (maybe null)
             (
                 col("x"),
@@ -411,7 +411,7 @@ mod tests {
         ];
 
         for scalar in scalars {
-            let guarantees = vec![(col("x"), NullableInterval::from(scalar.clone()))];
+            let guarantees = [(col("x"), NullableInterval::from(scalar.clone()))];
             let mut rewriter = GuaranteeRewriter::new(guarantees.iter());
 
             let output = col("x").rewrite(&mut rewriter).data().unwrap();
@@ -421,7 +421,7 @@ mod tests {
 
     #[test]
     fn test_in_list() {
-        let guarantees = vec![
+        let guarantees = [
             // x ∈ [1, 10] (not null)
             (
                 col("x"),
diff --git a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
index 5286cbd7bdf6..b1f3b006e0cf 100644
--- a/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
+++ b/datafusion/optimizer/src/simplify_expressions/unwrap_cast.rs
@@ -53,7 +53,6 @@
 //! ```text
 //! c1 > INT32(10)
 //! ```
-//!
 
 use arrow::datatypes::DataType;
 use datafusion_common::{internal_err, tree_node::Transformed};
diff --git a/datafusion/physical-expr-adapter/src/lib.rs b/datafusion/physical-expr-adapter/src/lib.rs
index 12ea0025e266..2bb02a706e3b 100644
--- a/datafusion/physical-expr-adapter/src/lib.rs
+++ b/datafusion/physical-expr-adapter/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml
index 58dc767dbad2..4602e59c422c 100644
--- a/datafusion/physical-expr-common/Cargo.toml
+++ b/datafusion/physical-expr-common/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs
index b37d9a7773ee..24bc43063059 100644
--- a/datafusion/physical-expr-common/src/binary_map.rs
+++ b/datafusion/physical-expr-common/src/binary_map.rs
@@ -349,7 +349,7 @@ where
         let batch_hashes = &mut self.hashes_buffer;
         batch_hashes.clear();
         batch_hashes.resize(values.len(), 0);
-        create_hashes(&[Arc::clone(values)], &self.random_state, batch_hashes)
+        create_hashes([values], &self.random_state, batch_hashes)
             // hash is supported for all types and create_hashes only
             // returns errors for unsupported types
             .unwrap();
diff --git a/datafusion/physical-expr-common/src/binary_view_map.rs b/datafusion/physical-expr-common/src/binary_view_map.rs
index 7ce943030a45..2de563472c78 100644
--- a/datafusion/physical-expr-common/src/binary_view_map.rs
+++ b/datafusion/physical-expr-common/src/binary_view_map.rs
@@ -19,6 +19,7 @@
 //! `StringViewArray`/`BinaryViewArray`.
 //! Much of the code is from `binary_map.rs`, but with simpler implementation because we directly use the
 //! [`GenericByteViewBuilder`].
+use crate::binary_map::OutputType;
 use ahash::RandomState;
 use arrow::array::cast::AsArray;
 use arrow::array::{Array, ArrayBuilder, ArrayRef, GenericByteViewBuilder};
@@ -28,8 +29,6 @@ use datafusion_common::utils::proxy::{HashTableAllocExt, VecAllocExt};
 use std::fmt::Debug;
 use std::sync::Arc;
 
-use crate::binary_map::OutputType;
-
 /// HashSet optimized for storing string or binary values that can produce that
 /// the final set as a `GenericBinaryViewArray` with minimal copies.
 #[derive(Debug)]
@@ -243,7 +242,7 @@ where
         let batch_hashes = &mut self.hashes_buffer;
         batch_hashes.clear();
         batch_hashes.resize(values.len(), 0);
-        create_hashes(&[Arc::clone(values)], &self.random_state, batch_hashes)
+        create_hashes([values], &self.random_state, batch_hashes)
             // hash is supported for all types and create_hashes only
             // returns errors for unsupported types
             .unwrap();
diff --git a/datafusion/physical-expr-common/src/datum.rs b/datafusion/physical-expr-common/src/datum.rs
index 7084bc440e86..56ef54a1d450 100644
--- a/datafusion/physical-expr-common/src/datum.rs
+++ b/datafusion/physical-expr-common/src/datum.rs
@@ -18,7 +18,10 @@
 use arrow::array::BooleanArray;
 use arrow::array::{make_comparator, ArrayRef, Datum};
 use arrow::buffer::NullBuffer;
-use arrow::compute::SortOptions;
+use arrow::compute::kernels::cmp::{
+    distinct, eq, gt, gt_eq, lt, lt_eq, neq, not_distinct,
+};
+use arrow::compute::{ilike, like, nilike, nlike, SortOptions};
 use arrow::error::ArrowError;
 use datafusion_common::DataFusionError;
 use datafusion_common::{arrow_datafusion_err, internal_err};
@@ -53,22 +56,49 @@ pub fn apply(
     }
 }
 
-/// Applies a binary [`Datum`] comparison kernel `f` to `lhs` and `rhs`
+/// Applies a binary [`Datum`] comparison operator `op` to `lhs` and `rhs`
 pub fn apply_cmp(
+    op: Operator,
     lhs: &ColumnarValue,
     rhs: &ColumnarValue,
-    f: impl Fn(&dyn Datum, &dyn Datum) -> Result<BooleanArray, ArrowError>,
 ) -> Result<ColumnarValue> {
-    apply(lhs, rhs, |l, r| Ok(Arc::new(f(l, r)?)))
+    if lhs.data_type().is_nested() {
+        apply_cmp_for_nested(op, lhs, rhs)
+    } else {
+        let f = match op {
+            Operator::Eq => eq,
+            Operator::NotEq => neq,
+            Operator::Lt => lt,
+            Operator::LtEq => lt_eq,
+            Operator::Gt => gt,
+            Operator::GtEq => gt_eq,
+            Operator::IsDistinctFrom => distinct,
+            Operator::IsNotDistinctFrom => not_distinct,
+
+            Operator::LikeMatch => like,
+            Operator::ILikeMatch => ilike,
+            Operator::NotLikeMatch => nlike,
+            Operator::NotILikeMatch => nilike,
+
+            _ => {
+                return internal_err!("Invalid compare operator: {}", op);
+            }
+        };
+
+        apply(lhs, rhs, |l, r| Ok(Arc::new(f(l, r)?)))
+    }
 }
 
-/// Applies a binary [`Datum`] comparison kernel `f` to `lhs` and `rhs` for nested type like
+/// Applies a binary [`Datum`] comparison operator `op` to `lhs` and `rhs` for nested type like
 /// List, FixedSizeList, LargeList, Struct, Union, Map, or a dictionary of a nested type
 pub fn apply_cmp_for_nested(
     op: Operator,
     lhs: &ColumnarValue,
     rhs: &ColumnarValue,
 ) -> Result<ColumnarValue> {
+    let left_data_type = lhs.data_type();
+    let right_data_type = rhs.data_type();
+
     if matches!(
         op,
         Operator::Eq
@@ -79,12 +109,18 @@ pub fn apply_cmp_for_nested(
             | Operator::GtEq
             | Operator::IsDistinctFrom
             | Operator::IsNotDistinctFrom
-    ) {
+    ) && left_data_type.equals_datatype(&right_data_type)
+    {
         apply(lhs, rhs, |l, r| {
             Ok(Arc::new(compare_op_for_nested(op, l, r)?))
         })
     } else {
-        internal_err!("invalid operator for nested")
+        internal_err!(
+            "invalid operator or data type mismatch for nested data, op {} left {}, right {}",
+            op,
+            left_data_type,
+            right_data_type
+        )
     }
 }
 
@@ -97,7 +133,7 @@ pub fn compare_with_eq(
     if is_nested {
         compare_op_for_nested(Operator::Eq, lhs, rhs)
     } else {
-        arrow::compute::kernels::cmp::eq(lhs, rhs).map_err(|e| arrow_datafusion_err!(e))
+        eq(lhs, rhs).map_err(|e| arrow_datafusion_err!(e))
     }
 }
 
diff --git a/datafusion/physical-expr-common/src/lib.rs b/datafusion/physical-expr-common/src/lib.rs
index e21206d90642..cac863ee69fb 100644
--- a/datafusion/physical-expr-common/src/lib.rs
+++ b/datafusion/physical-expr-common/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! Physical Expr Common packages for [DataFusion]
 //! This package contains high level PhysicalExpr trait
diff --git a/datafusion/physical-expr-common/src/physical_expr.rs b/datafusion/physical-expr-common/src/physical_expr.rs
index e5e7d6c00f08..e8280409c027 100644
--- a/datafusion/physical-expr-common/src/physical_expr.rs
+++ b/datafusion/physical-expr-common/src/physical_expr.rs
@@ -341,7 +341,6 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash {
     /// representation.
     ///
     /// See the [`fmt_sql`] function for an example of printing `PhysicalExpr`s as SQL.
-    ///
     fn fmt_sql(&self, f: &mut Formatter<'_>) -> fmt::Result;
 
     /// Take a snapshot of this `PhysicalExpr`, if it is dynamic.
@@ -566,9 +565,8 @@ pub fn fmt_sql(expr: &dyn PhysicalExpr) -> impl Display + '_ {
 ///
 /// # Returns
 ///
-/// Returns an `Option<Arc<dyn PhysicalExpr>>` which is the snapshot of the
-/// `PhysicalExpr` if it is dynamic. If the `PhysicalExpr` does not have
-/// any dynamic references or state, it returns `None`.
+/// Returns a snapshot of the `PhysicalExpr` if it is dynamic, otherwise
+/// returns itself.
 pub fn snapshot_physical_expr(
     expr: Arc<dyn PhysicalExpr>,
 ) -> Result<Arc<dyn PhysicalExpr>> {
diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml
index b7654a0f6f60..953a46929c39 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/physical-expr/benches/binary_op.rs b/datafusion/physical-expr/benches/binary_op.rs
index 5b0f700fdb8a..9bffd79dc00f 100644
--- a/datafusion/physical-expr/benches/binary_op.rs
+++ b/datafusion/physical-expr/benches/binary_op.rs
@@ -20,13 +20,14 @@ use arrow::{
     datatypes::{DataType, Field, Schema},
 };
 use arrow::{array::StringArray, record_batch::RecordBatch};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_expr::{and, binary_expr, col, lit, or, Operator};
 use datafusion_physical_expr::{
     expressions::{BinaryExpr, Column},
     planner::logical2physical,
     PhysicalExpr,
 };
+use std::hint::black_box;
 use std::sync::Arc;
 
 /// Generates BooleanArrays with different true/false distributions for benchmarking.
diff --git a/datafusion/physical-expr/benches/case_when.rs b/datafusion/physical-expr/benches/case_when.rs
index e52aeb1aee12..9ed6b58da7f7 100644
--- a/datafusion/physical-expr/benches/case_when.rs
+++ b/datafusion/physical-expr/benches/case_when.rs
@@ -19,7 +19,7 @@ use arrow::array::{Array, ArrayRef, Int32Array, Int32Builder, StringArray};
 use arrow::datatypes::{ArrowNativeTypeOp, Field, Schema};
 use arrow::record_batch::RecordBatch;
 use arrow::util::test_util::seedable_rng;
-use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
+use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
 use datafusion_expr::Operator;
 use datafusion_physical_expr::expressions::{case, col, lit, BinaryExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
@@ -29,6 +29,7 @@ use rand::distr::Alphanumeric;
 use rand::rngs::StdRng;
 use rand::{Rng, RngCore};
 use std::fmt::{Display, Formatter};
+use std::hint::black_box;
 use std::ops::Range;
 use std::sync::Arc;
 
diff --git a/datafusion/physical-expr/benches/in_list.rs b/datafusion/physical-expr/benches/in_list.rs
index e91e8d1f137c..778204055bbd 100644
--- a/datafusion/physical-expr/benches/in_list.rs
+++ b/datafusion/physical-expr/benches/in_list.rs
@@ -18,11 +18,12 @@
 use arrow::array::{Array, ArrayRef, Float32Array, Int32Array, StringArray};
 use arrow::datatypes::{Field, Schema};
 use arrow::record_batch::RecordBatch;
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::ScalarValue;
 use datafusion_physical_expr::expressions::{col, in_list, lit};
 use rand::distr::Alphanumeric;
 use rand::prelude::*;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn do_bench(c: &mut Criterion, name: &str, values: ArrayRef, exprs: &[ScalarValue]) {
diff --git a/datafusion/physical-expr/benches/is_null.rs b/datafusion/physical-expr/benches/is_null.rs
index ce6ad6eac2c7..80b2907a9e98 100644
--- a/datafusion/physical-expr/benches/is_null.rs
+++ b/datafusion/physical-expr/benches/is_null.rs
@@ -17,9 +17,10 @@
 
 use arrow::array::{builder::Int32Builder, RecordBatch};
 use arrow::datatypes::{DataType, Field, Schema};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_physical_expr::expressions::{Column, IsNotNullExpr, IsNullExpr};
 use datafusion_physical_expr_common::physical_expr::PhysicalExpr;
+use std::hint::black_box;
 use std::sync::Arc;
 
 fn criterion_benchmark(c: &mut Criterion) {
diff --git a/datafusion/physical-expr/src/aggregate.rs b/datafusion/physical-expr/src/aggregate.rs
index 19d2ecc924dd..2a8467eb8832 100644
--- a/datafusion/physical-expr/src/aggregate.rs
+++ b/datafusion/physical-expr/src/aggregate.rs
@@ -143,7 +143,7 @@ impl AggregateExprBuilder {
     /// #     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
     /// #         unimplemented!()
     /// #         }
-    /// #     
+    /// #
     /// #     fn state_fields(&self, args: StateFieldsArgs) -> Result<Vec<FieldRef>> {
     /// #         unimplemented!()
     /// #     }
@@ -231,9 +231,15 @@ impl AggregateExprBuilder {
             Some(alias) => alias,
         };
 
+        let arg_fields = args
+            .iter()
+            .map(|e| e.return_field(schema.as_ref()))
+            .collect::<Result<Vec<_>>>()?;
+
         Ok(AggregateFunctionExpr {
             fun: Arc::unwrap_or_clone(fun),
             args,
+            arg_fields,
             return_field,
             name,
             human_display,
@@ -306,6 +312,8 @@ impl AggregateExprBuilder {
 pub struct AggregateFunctionExpr {
     fun: AggregateUDF,
     args: Vec<Arc<dyn PhysicalExpr>>,
+    /// Fields corresponding to args (same order & length)
+    arg_fields: Vec<FieldRef>,
     /// Output / return field of this aggregate
     return_field: FieldRef,
     /// Output column name that this expression creates
@@ -383,6 +391,7 @@ impl AggregateFunctionExpr {
         let acc_args = AccumulatorArgs {
             return_field: Arc::clone(&self.return_field),
             schema: &self.schema,
+            expr_fields: &self.arg_fields,
             ignore_nulls: self.ignore_nulls,
             order_bys: self.order_bys.as_ref(),
             is_distinct: self.is_distinct,
@@ -467,6 +476,7 @@ impl AggregateFunctionExpr {
         let args = AccumulatorArgs {
             return_field: Arc::clone(&self.return_field),
             schema: &self.schema,
+            expr_fields: &self.arg_fields,
             ignore_nulls: self.ignore_nulls,
             order_bys: self.order_bys.as_ref(),
             is_distinct: self.is_distinct,
@@ -536,6 +546,7 @@ impl AggregateFunctionExpr {
         let args = AccumulatorArgs {
             return_field: Arc::clone(&self.return_field),
             schema: &self.schema,
+            expr_fields: &self.arg_fields,
             ignore_nulls: self.ignore_nulls,
             order_bys: self.order_bys.as_ref(),
             is_distinct: self.is_distinct,
@@ -555,6 +566,7 @@ impl AggregateFunctionExpr {
         let args = AccumulatorArgs {
             return_field: Arc::clone(&self.return_field),
             schema: &self.schema,
+            expr_fields: &self.arg_fields,
             ignore_nulls: self.ignore_nulls,
             order_bys: self.order_bys.as_ref(),
             is_distinct: self.is_distinct,
@@ -638,6 +650,9 @@ impl AggregateFunctionExpr {
         Some(AggregateFunctionExpr {
             fun: self.fun.clone(),
             args,
+            // TODO: need to align arg_fields here with new args
+            //       https://github.com/apache/datafusion/issues/18149
+            arg_fields: self.arg_fields.clone(),
             return_field: Arc::clone(&self.return_field),
             name: self.name.clone(),
             // TODO: Human name should be updated after re-write to not mislead
diff --git a/datafusion/physical-expr/src/analysis.rs b/datafusion/physical-expr/src/analysis.rs
index 1d59dab8fd6d..f34dfb4ae1b4 100644
--- a/datafusion/physical-expr/src/analysis.rs
+++ b/datafusion/physical-expr/src/analysis.rs
@@ -218,7 +218,7 @@ pub fn analyze(
             .update_ranges(&mut target_indices_and_boundaries, Interval::CERTAINLY_TRUE)?
         {
             PropagationResult::Success => {
-                shrink_boundaries(graph, target_boundaries, target_expr_and_indices)
+                shrink_boundaries(&graph, target_boundaries, &target_expr_and_indices)
             }
             PropagationResult::Infeasible => {
                 // If the propagation result is infeasible, set intervals to None
@@ -239,9 +239,9 @@ pub fn analyze(
 /// Following this, it constructs and returns a new `AnalysisContext` with the
 /// updated parameters.
 fn shrink_boundaries(
-    graph: ExprIntervalGraph,
+    graph: &ExprIntervalGraph,
     mut target_boundaries: Vec<ExprBoundaries>,
-    target_expr_and_indices: Vec<(Arc<dyn PhysicalExpr>, usize)>,
+    target_expr_and_indices: &[(Arc<dyn PhysicalExpr>, usize)],
 ) -> Result<AnalysisContext> {
     let initial_boundaries = target_boundaries.clone();
     target_expr_and_indices.iter().for_each(|(expr, i)| {
diff --git a/datafusion/physical-expr/src/equivalence/properties/mod.rs b/datafusion/physical-expr/src/equivalence/properties/mod.rs
index 2404b8f0dd3e..c13618feb8aa 100644
--- a/datafusion/physical-expr/src/equivalence/properties/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/properties/mod.rs
@@ -123,11 +123,14 @@ use itertools::Itertools;
 /// let mut eq_properties = EquivalenceProperties::new(schema);
 /// eq_properties.add_constants(vec![ConstExpr::from(col_b)]);
 /// eq_properties.add_ordering([
-///   PhysicalSortExpr::new_default(col_a).asc(),
-///   PhysicalSortExpr::new_default(col_c).desc(),
+///     PhysicalSortExpr::new_default(col_a).asc(),
+///     PhysicalSortExpr::new_default(col_c).desc(),
 /// ]);
 ///
-/// assert_eq!(eq_properties.to_string(), "order: [[a@0 ASC, c@2 DESC]], eq: [{members: [b@1], constant: (heterogeneous)}]");
+/// assert_eq!(
+///     eq_properties.to_string(),
+///     "order: [[a@0 ASC, c@2 DESC]], eq: [{members: [b@1], constant: (heterogeneous)}]"
+/// );
 /// ```
 #[derive(Clone, Debug)]
 pub struct EquivalenceProperties {
@@ -377,7 +380,7 @@ impl EquivalenceProperties {
         right: Arc<dyn PhysicalExpr>,
     ) -> Result<()> {
         // Add equal expressions to the state:
-        if self.eq_group.add_equal_conditions(Arc::clone(&left), right) {
+        if self.eq_group.add_equal_conditions(left, right) {
             self.update_oeq_cache()?;
         }
         self.update_oeq_cache()?;
diff --git a/datafusion/physical-expr/src/expressions/binary.rs b/datafusion/physical-expr/src/expressions/binary.rs
index ce3d4ced4e3a..f3a71cbea480 100644
--- a/datafusion/physical-expr/src/expressions/binary.rs
+++ b/datafusion/physical-expr/src/expressions/binary.rs
@@ -24,11 +24,8 @@ use std::{any::Any, sync::Arc};
 
 use arrow::array::*;
 use arrow::compute::kernels::boolean::{and_kleene, or_kleene};
-use arrow::compute::kernels::cmp::*;
 use arrow::compute::kernels::concat_elements::concat_elements_utf8;
-use arrow::compute::{
-    cast, filter_record_batch, ilike, like, nilike, nlike, SlicesIterator,
-};
+use arrow::compute::{cast, filter_record_batch, SlicesIterator};
 use arrow::datatypes::*;
 use arrow::error::ArrowError;
 use datafusion_common::cast::as_boolean_array;
@@ -42,7 +39,7 @@ use datafusion_expr::statistics::{
     new_generic_from_binary_op, Distribution,
 };
 use datafusion_expr::{ColumnarValue, Operator};
-use datafusion_physical_expr_common::datum::{apply, apply_cmp, apply_cmp_for_nested};
+use datafusion_physical_expr_common::datum::{apply, apply_cmp};
 
 use kernels::{
     bitwise_and_dyn, bitwise_and_dyn_scalar, bitwise_or_dyn, bitwise_or_dyn_scalar,
@@ -251,13 +248,6 @@ impl PhysicalExpr for BinaryExpr {
         let schema = batch.schema();
         let input_schema = schema.as_ref();
 
-        if left_data_type.is_nested() {
-            if !left_data_type.equals_datatype(&right_data_type) {
-                return internal_err!("Cannot evaluate binary expression because of type mismatch: left {}, right {} ", left_data_type, right_data_type);
-            }
-            return apply_cmp_for_nested(self.op, &lhs, &rhs);
-        }
-
         match self.op {
             Operator::Plus if self.fail_on_overflow => return apply(&lhs, &rhs, add),
             Operator::Plus => return apply(&lhs, &rhs, add_wrapping),
@@ -267,18 +257,21 @@ impl PhysicalExpr for BinaryExpr {
             Operator::Multiply => return apply(&lhs, &rhs, mul_wrapping),
             Operator::Divide => return apply(&lhs, &rhs, div),
             Operator::Modulo => return apply(&lhs, &rhs, rem),
-            Operator::Eq => return apply_cmp(&lhs, &rhs, eq),
-            Operator::NotEq => return apply_cmp(&lhs, &rhs, neq),
-            Operator::Lt => return apply_cmp(&lhs, &rhs, lt),
-            Operator::Gt => return apply_cmp(&lhs, &rhs, gt),
-            Operator::LtEq => return apply_cmp(&lhs, &rhs, lt_eq),
-            Operator::GtEq => return apply_cmp(&lhs, &rhs, gt_eq),
-            Operator::IsDistinctFrom => return apply_cmp(&lhs, &rhs, distinct),
-            Operator::IsNotDistinctFrom => return apply_cmp(&lhs, &rhs, not_distinct),
-            Operator::LikeMatch => return apply_cmp(&lhs, &rhs, like),
-            Operator::ILikeMatch => return apply_cmp(&lhs, &rhs, ilike),
-            Operator::NotLikeMatch => return apply_cmp(&lhs, &rhs, nlike),
-            Operator::NotILikeMatch => return apply_cmp(&lhs, &rhs, nilike),
+
+            Operator::Eq
+            | Operator::NotEq
+            | Operator::Lt
+            | Operator::Gt
+            | Operator::LtEq
+            | Operator::GtEq
+            | Operator::IsDistinctFrom
+            | Operator::IsNotDistinctFrom
+            | Operator::LikeMatch
+            | Operator::ILikeMatch
+            | Operator::NotLikeMatch
+            | Operator::NotILikeMatch => {
+                return apply_cmp(self.op, &lhs, &rhs);
+            }
             _ => {}
         }
 
@@ -580,10 +573,10 @@ impl BinaryExpr {
     ) -> Result<Option<Result<ArrayRef>>> {
         use Operator::*;
         let scalar_result = match &self.op {
-            RegexMatch => regex_match_dyn_scalar(array, scalar, false, false),
-            RegexIMatch => regex_match_dyn_scalar(array, scalar, false, true),
-            RegexNotMatch => regex_match_dyn_scalar(array, scalar, true, false),
-            RegexNotIMatch => regex_match_dyn_scalar(array, scalar, true, true),
+            RegexMatch => regex_match_dyn_scalar(array, &scalar, false, false),
+            RegexIMatch => regex_match_dyn_scalar(array, &scalar, false, true),
+            RegexNotMatch => regex_match_dyn_scalar(array, &scalar, true, false),
+            RegexNotIMatch => regex_match_dyn_scalar(array, &scalar, true, true),
             BitwiseAnd => bitwise_and_dyn_scalar(array, scalar),
             BitwiseOr => bitwise_or_dyn_scalar(array, scalar),
             BitwiseXor => bitwise_xor_dyn_scalar(array, scalar),
@@ -632,16 +625,16 @@ impl BinaryExpr {
                     )
                 }
             }
-            RegexMatch => regex_match_dyn(left, right, false, false),
-            RegexIMatch => regex_match_dyn(left, right, false, true),
-            RegexNotMatch => regex_match_dyn(left, right, true, false),
-            RegexNotIMatch => regex_match_dyn(left, right, true, true),
+            RegexMatch => regex_match_dyn(&left, &right, false, false),
+            RegexIMatch => regex_match_dyn(&left, &right, false, true),
+            RegexNotMatch => regex_match_dyn(&left, &right, true, false),
+            RegexNotIMatch => regex_match_dyn(&left, &right, true, true),
             BitwiseAnd => bitwise_and_dyn(left, right),
             BitwiseOr => bitwise_or_dyn(left, right),
             BitwiseXor => bitwise_xor_dyn(left, right),
             BitwiseShiftRight => bitwise_shift_right_dyn(left, right),
             BitwiseShiftLeft => bitwise_shift_left_dyn(left, right),
-            StringConcat => concat_elements(left, right),
+            StringConcat => concat_elements(&left, &right),
             AtArrow | ArrowAt | Arrow | LongArrow | HashArrow | HashLongArrow | AtAt
             | HashMinus | AtQuestion | Question | QuestionAnd | QuestionPipe
             | IntegerDivide => {
@@ -861,7 +854,7 @@ fn pre_selection_scatter(
     Ok(ColumnarValue::Array(Arc::new(boolean_result)))
 }
 
-fn concat_elements(left: Arc<dyn Array>, right: Arc<dyn Array>) -> Result<ArrayRef> {
+fn concat_elements(left: &ArrayRef, right: &ArrayRef) -> Result<ArrayRef> {
     Ok(match left.data_type() {
         DataType::Utf8 => Arc::new(concat_elements_utf8(
             left.as_string::<i32>(),
diff --git a/datafusion/physical-expr/src/expressions/binary/kernels.rs b/datafusion/physical-expr/src/expressions/binary/kernels.rs
index 36ecd1c81619..ad44b0021203 100644
--- a/datafusion/physical-expr/src/expressions/binary/kernels.rs
+++ b/datafusion/physical-expr/src/expressions/binary/kernels.rs
@@ -141,6 +141,12 @@ create_left_integral_dyn_scalar_kernel!(
     bitwise_shift_left_scalar
 );
 
+/// Concatenates two `StringViewArray`s element-wise.  
+/// If either element is `Null`, the result element is also `Null`.
+///
+/// # Errors
+/// - Returns an error if the input arrays have different lengths.  
+/// - Returns an error if any concatenated string exceeds `u32::MAX` (≈4 GB) in length.
 pub fn concat_elements_utf8view(
     left: &StringViewArray,
     right: &StringViewArray,
@@ -166,7 +172,7 @@ pub fn concat_elements_utf8view(
             buffer.clear();
             write!(&mut buffer, "{left}{right}")
                 .expect("writing into string buffer failed");
-            result.append_value(&buffer);
+            result.try_append_value(&buffer)?;
         } else {
             // at least one of the values is null, so the output is also null
             result.append_null()
@@ -201,8 +207,8 @@ macro_rules! regexp_is_match_flag {
 }
 
 pub(crate) fn regex_match_dyn(
-    left: ArrayRef,
-    right: ArrayRef,
+    left: &ArrayRef,
+    right: &ArrayRef,
     not_match: bool,
     flag: bool,
 ) -> Result<ArrayRef> {
@@ -253,20 +259,20 @@ macro_rules! regexp_is_match_flag_scalar {
 
 pub(crate) fn regex_match_dyn_scalar(
     left: &dyn Array,
-    right: ScalarValue,
+    right: &ScalarValue,
     not_match: bool,
     flag: bool,
 ) -> Option<Result<ArrayRef>> {
     let result: Result<ArrayRef> = match left.data_type() {
         DataType::Utf8 => {
             regexp_is_match_flag_scalar!(left, right, StringArray, not_match, flag)
-        },
+        }
         DataType::Utf8View => {
             regexp_is_match_flag_scalar!(left, right, StringViewArray, not_match, flag)
         }
         DataType::LargeUtf8 => {
             regexp_is_match_flag_scalar!(left, right, LargeStringArray, not_match, flag)
-        },
+        }
         DataType::Dictionary(_, _) => {
             let values = left.as_any_dictionary().values();
 
@@ -288,7 +294,7 @@ pub(crate) fn regex_match_dyn_scalar(
                     _ => unreachable!(),
                 }
             )
-        },
+        }
         other => internal_err!(
                 "Data type {} not supported for operation 'regex_match_dyn_scalar' on string array",
                 other
diff --git a/datafusion/physical-expr/src/expressions/case.rs b/datafusion/physical-expr/src/expressions/case.rs
index 2db599047bcd..7a33aa95c56b 100644
--- a/datafusion/physical-expr/src/expressions/case.rs
+++ b/datafusion/physical-expr/src/expressions/case.rs
@@ -15,25 +15,31 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use super::{Column, Literal};
+use crate::expressions::case::ResultState::{Complete, Empty, Partial};
 use crate::expressions::try_cast;
 use crate::PhysicalExpr;
-use std::borrow::Cow;
-use std::hash::Hash;
-use std::{any::Any, sync::Arc};
-
 use arrow::array::*;
 use arrow::compute::kernels::zip::zip;
-use arrow::compute::{and, and_not, is_null, not, nullif, or, prep_null_mask_filter};
-use arrow::datatypes::{DataType, Schema};
+use arrow::compute::{
+    is_not_null, not, nullif, prep_null_mask_filter, FilterBuilder, FilterPredicate,
+    SlicesIterator,
+};
+use arrow::datatypes::{DataType, Schema, UInt32Type, UnionMode};
+use arrow::error::ArrowError;
 use datafusion_common::cast::as_boolean_array;
+use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
 use datafusion_common::{
-    exec_err, internal_datafusion_err, internal_err, DataFusionError, Result, ScalarValue,
+    exec_err, internal_datafusion_err, internal_err, DataFusionError, HashMap, HashSet,
+    Result, ScalarValue,
 };
 use datafusion_expr::ColumnarValue;
-
-use super::{Column, Literal};
 use datafusion_physical_expr_common::datum::compare_with_eq;
 use itertools::Itertools;
+use std::borrow::Cow;
+use std::fmt::{Debug, Formatter};
+use std::hash::Hash;
+use std::{any::Any, sync::Arc};
 
 type WhenThen = (Arc<dyn PhysicalExpr>, Arc<dyn PhysicalExpr>);
 
@@ -43,13 +49,13 @@ enum EvalMethod {
     ///      [WHEN ...]
     ///      [ELSE result]
     /// END
-    NoExpression,
+    NoExpression(ProjectedCaseBody),
     /// CASE expression
     ///     WHEN value THEN result
     ///     [WHEN ...]
     ///     [ELSE result]
     /// END
-    WithExpression,
+    WithExpression(ProjectedCaseBody),
     /// This is a specialization for a specific use case where we can take a fast path
     /// for expressions that are infallible and can be cheaply computed for the entire
     /// record batch rather than just for the rows where the predicate is true.
@@ -65,7 +71,129 @@ enum EvalMethod {
     /// if there is just one when/then pair and both the `then` and `else` are expressions
     ///
     /// CASE WHEN condition THEN expression ELSE expression END
-    ExpressionOrExpression,
+    ExpressionOrExpression(ProjectedCaseBody),
+}
+
+/// The body of a CASE expression which consists of an optional base expression, the "when/then"
+/// branches and an optional "else" branch.
+#[derive(Debug, Hash, PartialEq, Eq)]
+struct CaseBody {
+    /// Optional base expression that can be compared to literal values in the "when" expressions
+    expr: Option<Arc<dyn PhysicalExpr>>,
+    /// One or more when/then expressions
+    when_then_expr: Vec<WhenThen>,
+    /// Optional "else" expression
+    else_expr: Option<Arc<dyn PhysicalExpr>>,
+}
+
+impl CaseBody {
+    /// Derives a [ProjectedCaseBody] from this [CaseBody].
+    fn project(&self) -> Result<ProjectedCaseBody> {
+        // Determine the set of columns that are used in all the expressions of the case body.
+        let mut used_column_indices = HashSet::<usize>::new();
+        let mut collect_column_indices = |expr: &Arc<dyn PhysicalExpr>| {
+            expr.apply(|expr| {
+                if let Some(column) = expr.as_any().downcast_ref::<Column>() {
+                    used_column_indices.insert(column.index());
+                }
+                Ok(TreeNodeRecursion::Continue)
+            })
+            .expect("Closure cannot fail");
+        };
+
+        if let Some(e) = &self.expr {
+            collect_column_indices(e);
+        }
+        self.when_then_expr.iter().for_each(|(w, t)| {
+            collect_column_indices(w);
+            collect_column_indices(t);
+        });
+        if let Some(e) = &self.else_expr {
+            collect_column_indices(e);
+        }
+
+        // Construct a mapping from the original column index to the projected column index.
+        let column_index_map = used_column_indices
+            .iter()
+            .enumerate()
+            .map(|(projected, original)| (*original, projected))
+            .collect::<HashMap<usize, usize>>();
+
+        // Construct the projected body by rewriting each expression from the original body
+        // using the column index mapping.
+        let project = |expr: &Arc<dyn PhysicalExpr>| -> Result<Arc<dyn PhysicalExpr>> {
+            Arc::clone(expr)
+                .transform_down(|e| {
+                    if let Some(column) = e.as_any().downcast_ref::<Column>() {
+                        let original = column.index();
+                        let projected = *column_index_map.get(&original).unwrap();
+                        if projected != original {
+                            return Ok(Transformed::yes(Arc::new(Column::new(
+                                column.name(),
+                                projected,
+                            ))));
+                        }
+                    }
+                    Ok(Transformed::no(e))
+                })
+                .map(|t| t.data)
+        };
+
+        let projected_body = CaseBody {
+            expr: self.expr.as_ref().map(project).transpose()?,
+            when_then_expr: self
+                .when_then_expr
+                .iter()
+                .map(|(e, t)| Ok((project(e)?, project(t)?)))
+                .collect::<Result<Vec<_>>>()?,
+            else_expr: self.else_expr.as_ref().map(project).transpose()?,
+        };
+
+        // Construct the projection vector
+        let projection = column_index_map
+            .iter()
+            .sorted_by_key(|(_, v)| **v)
+            .map(|(k, _)| *k)
+            .collect::<Vec<_>>();
+
+        Ok(ProjectedCaseBody {
+            projection,
+            body: projected_body,
+        })
+    }
+}
+
+/// A derived case body that can be used to evaluate a case expression after projecting
+/// record batches using a projection vector.
+///
+/// This is used to avoid filtering columns that are not used in the
+/// input `RecordBatch` when progressively evaluating a `CASE` expression's
+/// remainder batches. Filtering these columns is wasteful since for a record
+/// batch of `n` rows, filtering requires at worst a copy of `n - 1` values
+/// per array. If these filtered values will never be accessed, the time spent
+/// producing them is better avoided.
+///
+/// For example, if we are evaluating the following case expression that
+/// only references columns B and D:
+///
+/// ```sql
+/// SELECT CASE WHEN B > 10 THEN D ELSE NULL END FROM (VALUES (...)) T(A, B, C, D)
+/// ```
+///
+/// Of the 4 input columns `[A, B, C, D]`, the `CASE` expression only access `B` and `D`.
+/// Filtering `A` and `C` would be unnecessary and wasteful.
+///
+/// If we only retain columns `B` and `D` using `RecordBatch::project` and the projection vector
+/// `[1, 3]`, the indices of these two columns will change to `[0, 1]`. To evaluate the
+/// case expression, it will need to be rewritten from `CASE WHEN B@1 > 10 THEN D@3 ELSE NULL END`
+/// to `CASE WHEN B@0 > 10 THEN D@1 ELSE NULL END`.
+///
+/// The projection vector and the rewritten expression (which only differs from the original in
+/// column reference indices) are held in a `ProjectedCaseBody`.
+#[derive(Debug, Hash, PartialEq, Eq)]
+struct ProjectedCaseBody {
+    projection: Vec<usize>,
+    body: CaseBody,
 }
 
 /// The CASE expression is similar to a series of nested if/else and there are two forms that
@@ -87,26 +215,22 @@ enum EvalMethod {
 /// END
 #[derive(Debug, Hash, PartialEq, Eq)]
 pub struct CaseExpr {
-    /// Optional base expression that can be compared to literal values in the "when" expressions
-    expr: Option<Arc<dyn PhysicalExpr>>,
-    /// One or more when/then expressions
-    when_then_expr: Vec<WhenThen>,
-    /// Optional "else" expression
-    else_expr: Option<Arc<dyn PhysicalExpr>>,
+    /// The case expression body
+    body: CaseBody,
     /// Evaluation method to use
     eval_method: EvalMethod,
 }
 
 impl std::fmt::Display for CaseExpr {
-    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
         write!(f, "CASE ")?;
-        if let Some(e) = &self.expr {
+        if let Some(e) = &self.body.expr {
             write!(f, "{e} ")?;
         }
-        for (w, t) in &self.when_then_expr {
+        for (w, t) in &self.body.when_then_expr {
             write!(f, "WHEN {w} THEN {t} ")?;
         }
-        if let Some(e) = &self.else_expr {
+        if let Some(e) = &self.body.else_expr {
             write!(f, "ELSE {e} ")?;
         }
         write!(f, "END")
@@ -122,6 +246,509 @@ fn is_cheap_and_infallible(expr: &Arc<dyn PhysicalExpr>) -> bool {
     expr.as_any().is::<Column>()
 }
 
+/// Creates a [FilterPredicate] from a boolean array.
+fn create_filter(predicate: &BooleanArray, optimize: bool) -> FilterPredicate {
+    let mut filter_builder = FilterBuilder::new(predicate);
+    if optimize {
+        // Always optimize the filter since we use them multiple times.
+        filter_builder = filter_builder.optimize();
+    }
+    filter_builder.build()
+}
+
+fn multiple_arrays(data_type: &DataType) -> bool {
+    match data_type {
+        DataType::Struct(fields) => {
+            fields.len() > 1
+                || fields.len() == 1 && multiple_arrays(fields[0].data_type())
+        }
+        DataType::Union(fields, UnionMode::Sparse) => !fields.is_empty(),
+        _ => false,
+    }
+}
+
+// This should be removed when https://github.com/apache/arrow-rs/pull/8693
+// is merged and becomes available.
+fn filter_record_batch(
+    record_batch: &RecordBatch,
+    filter: &FilterPredicate,
+) -> std::result::Result<RecordBatch, ArrowError> {
+    let filtered_columns = record_batch
+        .columns()
+        .iter()
+        .map(|a| filter_array(a, filter))
+        .collect::<std::result::Result<Vec<_>, _>>()?;
+    // SAFETY: since we start from a valid RecordBatch, there's no need to revalidate the schema
+    // since the set of columns has not changed.
+    // The input column arrays all had the same length (since they're coming from a valid RecordBatch)
+    // and the filtering them with the same filter will produces a new set of arrays with identical
+    // lengths.
+    unsafe {
+        Ok(RecordBatch::new_unchecked(
+            record_batch.schema(),
+            filtered_columns,
+            filter.count(),
+        ))
+    }
+}
+
+// This function exists purely to be able to use the same call style
+// for `filter_record_batch` and `filter_array` at the point of use.
+// When https://github.com/apache/arrow-rs/pull/8693 is available, replace
+// both with method calls on `FilterPredicate`.
+#[inline(always)]
+fn filter_array(
+    array: &dyn Array,
+    filter: &FilterPredicate,
+) -> std::result::Result<ArrayRef, ArrowError> {
+    filter.filter(array)
+}
+
+fn merge(
+    mask: &BooleanArray,
+    truthy: ColumnarValue,
+    falsy: ColumnarValue,
+) -> std::result::Result<ArrayRef, ArrowError> {
+    let (truthy, truthy_is_scalar) = match truthy {
+        ColumnarValue::Array(a) => (a, false),
+        ColumnarValue::Scalar(s) => (s.to_array()?, true),
+    };
+    let (falsy, falsy_is_scalar) = match falsy {
+        ColumnarValue::Array(a) => (a, false),
+        ColumnarValue::Scalar(s) => (s.to_array()?, true),
+    };
+
+    if truthy_is_scalar && falsy_is_scalar {
+        return zip(mask, &Scalar::new(truthy), &Scalar::new(falsy));
+    }
+
+    let falsy = falsy.to_data();
+    let truthy = truthy.to_data();
+
+    let mut mutable = MutableArrayData::new(vec![&truthy, &falsy], false, truthy.len());
+
+    // the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
+    // fill with falsy values
+
+    // keep track of how much is filled
+    let mut filled = 0;
+    let mut falsy_offset = 0;
+    let mut truthy_offset = 0;
+
+    SlicesIterator::new(mask).for_each(|(start, end)| {
+        // the gap needs to be filled with falsy values
+        if start > filled {
+            if falsy_is_scalar {
+                for _ in filled..start {
+                    // Copy the first item from the 'falsy' array into the output buffer.
+                    mutable.extend(1, 0, 1);
+                }
+            } else {
+                let falsy_length = start - filled;
+                let falsy_end = falsy_offset + falsy_length;
+                mutable.extend(1, falsy_offset, falsy_end);
+                falsy_offset = falsy_end;
+            }
+        }
+        // fill with truthy values
+        if truthy_is_scalar {
+            for _ in start..end {
+                // Copy the first item from the 'truthy' array into the output buffer.
+                mutable.extend(0, 0, 1);
+            }
+        } else {
+            let truthy_length = end - start;
+            let truthy_end = truthy_offset + truthy_length;
+            mutable.extend(0, truthy_offset, truthy_end);
+            truthy_offset = truthy_end;
+        }
+        filled = end;
+    });
+    // the remaining part is falsy
+    if filled < mask.len() {
+        if falsy_is_scalar {
+            for _ in filled..mask.len() {
+                // Copy the first item from the 'falsy' array into the output buffer.
+                mutable.extend(1, 0, 1);
+            }
+        } else {
+            let falsy_length = mask.len() - filled;
+            let falsy_end = falsy_offset + falsy_length;
+            mutable.extend(1, falsy_offset, falsy_end);
+        }
+    }
+
+    let data = mutable.freeze();
+    Ok(make_array(data))
+}
+
+/// Merges elements by index from a list of [`ArrayData`], creating a new [`ColumnarValue`] from
+/// those values.
+///
+/// Each element in `indices` is the index of an array in `values`. The `indices` array is processed
+/// sequentially. The first occurrence of index value `n` will be mapped to the first
+/// value of the array at index `n`. The second occurrence to the second value, and so on.
+/// An index value where `PartialResultIndex::is_none` is `true` is used to indicate null values.
+///
+/// # Implementation notes
+///
+/// This algorithm is similar in nature to both `zip` and `interleave`, but there are some important
+/// differences.
+///
+/// In contrast to `zip`, this function supports multiple input arrays. Instead of a boolean
+/// selection vector, an index array is to take values from the input arrays, and a special marker
+/// value is used to indicate null values.
+///
+/// In contrast to `interleave`, this function does not use pairs of indices. The values in
+/// `indices` serve the same purpose as the first value in the pairs passed to `interleave`.
+/// The index in the array is implicit and is derived from the number of times a particular array
+/// index occurs.
+/// The more constrained indexing mechanism used by this algorithm makes it easier to copy values
+/// in contiguous slices. In the example below, the two subsequent elements from array `2` can be
+/// copied in a single operation from the source array instead of copying them one by one.
+/// Long spans of null values are also especially cheap because they do not need to be represented
+/// in an input array.
+///
+/// # Safety
+///
+/// This function does not check that the number of occurrences of any particular array index matches
+/// the length of the corresponding input array. If an array contains more values than required, the
+/// spurious values will be ignored. If an array contains fewer values than necessary, this function
+/// will panic.
+///
+/// # Example
+///
+/// ```text
+/// ┌───────────┐  ┌─────────┐                             ┌─────────┐
+/// │┌─────────┐│  │   None  │                             │   NULL  │
+/// ││    A    ││  ├─────────┤                             ├─────────┤
+/// │└─────────┘│  │    1    │                             │    B    │
+/// │┌─────────┐│  ├─────────┤                             ├─────────┤
+/// ││    B    ││  │    0    │    merge(values, indices)   │    A    │
+/// │└─────────┘│  ├─────────┤  ─────────────────────────▶ ├─────────┤
+/// │┌─────────┐│  │   None  │                             │   NULL  │
+/// ││    C    ││  ├─────────┤                             ├─────────┤
+/// │├─────────┤│  │    2    │                             │    C    │
+/// ││    D    ││  ├─────────┤                             ├─────────┤
+/// │└─────────┘│  │    2    │                             │    D    │
+/// └───────────┘  └─────────┘                             └─────────┘
+///    values        indices                                  result
+/// ```
+fn merge_n(values: &[ArrayData], indices: &[PartialResultIndex]) -> Result<ArrayRef> {
+    #[cfg(debug_assertions)]
+    for ix in indices {
+        if let Some(index) = ix.index() {
+            assert!(
+                index < values.len(),
+                "Index out of bounds: {} >= {}",
+                index,
+                values.len()
+            );
+        }
+    }
+
+    let data_refs = values.iter().collect();
+    let mut mutable = MutableArrayData::new(data_refs, true, indices.len());
+
+    // This loop extends the mutable array by taking slices from the partial results.
+    //
+    // take_offsets keeps track of how many values have been taken from each array.
+    let mut take_offsets = vec![0; values.len() + 1];
+    let mut start_row_ix = 0;
+    loop {
+        let array_ix = indices[start_row_ix];
+
+        // Determine the length of the slice to take.
+        let mut end_row_ix = start_row_ix + 1;
+        while end_row_ix < indices.len() && indices[end_row_ix] == array_ix {
+            end_row_ix += 1;
+        }
+        let slice_length = end_row_ix - start_row_ix;
+
+        // Extend mutable with either nulls or with values from the array.
+        match array_ix.index() {
+            None => mutable.extend_nulls(slice_length),
+            Some(index) => {
+                let start_offset = take_offsets[index];
+                let end_offset = start_offset + slice_length;
+                mutable.extend(index, start_offset, end_offset);
+                take_offsets[index] = end_offset;
+            }
+        }
+
+        if end_row_ix == indices.len() {
+            break;
+        } else {
+            // Set the start_row_ix for the next slice.
+            start_row_ix = end_row_ix;
+        }
+    }
+
+    Ok(make_array(mutable.freeze()))
+}
+
+/// An index into the partial results array that's more compact than `usize`.
+///
+/// `u32::MAX` is reserved as a special 'none' value. This is used instead of
+/// `Option` to keep the array of indices as compact as possible.
+#[derive(Copy, Clone, PartialEq, Eq)]
+struct PartialResultIndex {
+    index: u32,
+}
+
+const NONE_VALUE: u32 = u32::MAX;
+
+impl PartialResultIndex {
+    /// Returns the 'none' placeholder value.
+    fn none() -> Self {
+        Self { index: NONE_VALUE }
+    }
+
+    fn zero() -> Self {
+        Self { index: 0 }
+    }
+
+    /// Creates a new partial result index.
+    ///
+    /// If the provided value is greater than or equal to `u32::MAX`
+    /// an error will be returned.
+    fn try_new(index: usize) -> Result<Self> {
+        let Ok(index) = u32::try_from(index) else {
+            return internal_err!("Partial result index exceeds limit");
+        };
+
+        if index == NONE_VALUE {
+            return internal_err!("Partial result index exceeds limit");
+        }
+
+        Ok(Self { index })
+    }
+
+    /// Determines if this index is the 'none' placeholder value or not.
+    fn is_none(&self) -> bool {
+        self.index == NONE_VALUE
+    }
+
+    /// Returns `Some(index)` if this value is not the 'none' placeholder, `None` otherwise.
+    fn index(&self) -> Option<usize> {
+        if self.is_none() {
+            None
+        } else {
+            Some(self.index as usize)
+        }
+    }
+}
+
+impl Debug for PartialResultIndex {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        if self.is_none() {
+            write!(f, "null")
+        } else {
+            write!(f, "{}", self.index)
+        }
+    }
+}
+
+enum ResultState {
+    /// The final result is an array containing only null values.
+    Empty,
+    /// The final result needs to be computed by merging the data in `arrays`.
+    Partial {
+        // A `Vec` of partial results that should be merged.
+        // `partial_result_indices` contains indexes into this vec.
+        arrays: Vec<ArrayData>,
+        // Indicates per result row from which array in `partial_results` a value should be taken.
+        indices: Vec<PartialResultIndex>,
+    },
+    /// A single branch matched all input rows. When creating the final result, no further merging
+    /// of partial results is necessary.
+    Complete(ColumnarValue),
+}
+
+/// A builder for constructing result arrays for CASE expressions.
+///
+/// Rather than building a monolithic array containing all results, it maintains a set of
+/// partial result arrays and a mapping that indicates for each row which partial array
+/// contains the result value for that row.
+///
+/// On finish(), the builder will merge all partial results into a single array if necessary.
+/// If all rows evaluated to the same array, that array can be returned directly without
+/// any merging overhead.
+struct ResultBuilder {
+    data_type: DataType,
+    /// The number of rows in the final result.
+    row_count: usize,
+    state: ResultState,
+}
+
+impl ResultBuilder {
+    /// Creates a new ResultBuilder that will produce arrays of the given data type.
+    ///
+    /// The `row_count` parameter indicates the number of rows in the final result.
+    fn new(data_type: &DataType, row_count: usize) -> Self {
+        Self {
+            data_type: data_type.clone(),
+            row_count,
+            state: Empty,
+        }
+    }
+
+    /// Adds a result for one branch of the case expression.
+    ///
+    /// `row_indices` should be a [UInt32Array] containing [RecordBatch] relative row indices
+    /// for which `value` contains result values.
+    ///
+    /// If `value` is a scalar, the scalar value will be used as the value for each row in `row_indices`.
+    ///
+    /// If `value` is an array, the values from the array and the indices from `row_indices` will be
+    /// processed pairwise. The lengths of `value` and `row_indices` must match.
+    ///
+    /// The diagram below shows a situation where a when expression matched rows 1 and 4 of the
+    /// record batch. The then expression produced the value array `[A, D]`.
+    /// After adding this result, the result array will have been added to `partial arrays` and
+    /// `partial indices` will have been updated at indexes `1` and `4`.
+    ///
+    /// ```text
+    ///  ┌─────────┐     ┌─────────┐┌───────────┐                            ┌─────────┐┌───────────┐
+    ///  │    C    │     │ 0: None ││┌ 0 ──────┐│                            │ 0: None ││┌ 0 ──────┐│
+    ///  ├─────────┤     ├─────────┤││    A    ││                            ├─────────┤││    A    ││
+    ///  │    D    │     │ 1: None ││└─────────┘│                            │ 1:  2   ││└─────────┘│
+    ///  └─────────┘     ├─────────┤│┌ 1 ──────┐│   add_branch_result(       ├─────────┤│┌ 1 ──────┐│
+    ///   matching       │ 2:  0   │││    B    ││     row indices,           │ 2:  0   │││    B    ││
+    /// 'then' values    ├─────────┤│└─────────┘│     value                  ├─────────┤│└─────────┘│
+    ///                  │ 3: None ││           │   )                        │ 3: None ││┌ 2 ──────┐│
+    ///  ┌─────────┐     ├─────────┤│           │ ─────────────────────────▶ ├─────────┤││    C    ││
+    ///  │    1    │     │ 4: None ││           │                            │ 4:  2   ││├─────────┤│
+    ///  ├─────────┤     ├─────────┤│           │                            ├─────────┤││    D    ││
+    ///  │    4    │     │ 5:  1   ││           │                            │ 5:  1   ││└─────────┘│
+    ///  └─────────┘     └─────────┘└───────────┘                            └─────────┘└───────────┘
+    /// row indices        partial     partial                                 partial     partial
+    ///                    indices     arrays                                  indices     arrays
+    /// ```
+    fn add_branch_result(
+        &mut self,
+        row_indices: &ArrayRef,
+        value: ColumnarValue,
+    ) -> Result<()> {
+        match value {
+            ColumnarValue::Array(a) => {
+                if a.len() != row_indices.len() {
+                    internal_err!("Array length must match row indices length")
+                } else if row_indices.len() == self.row_count {
+                    self.set_complete_result(ColumnarValue::Array(a))
+                } else {
+                    self.add_partial_result(row_indices, a.to_data())
+                }
+            }
+            ColumnarValue::Scalar(s) => {
+                if row_indices.len() == self.row_count {
+                    self.set_complete_result(ColumnarValue::Scalar(s))
+                } else {
+                    self.add_partial_result(
+                        row_indices,
+                        s.to_array_of_size(row_indices.len())?.to_data(),
+                    )
+                }
+            }
+        }
+    }
+
+    /// Adds a partial result array.
+    ///
+    /// This method adds the given array data as a partial result and updates the index mapping
+    /// to indicate that the specified rows should take their values from this array.
+    /// The partial results will be merged into a single array when finish() is called.
+    fn add_partial_result(
+        &mut self,
+        row_indices: &ArrayRef,
+        row_values: ArrayData,
+    ) -> Result<()> {
+        if row_indices.null_count() != 0 {
+            return internal_err!("Row indices must not contain nulls");
+        }
+
+        match &mut self.state {
+            Empty => {
+                let array_index = PartialResultIndex::zero();
+                let mut indices = vec![PartialResultIndex::none(); self.row_count];
+                for row_ix in row_indices.as_primitive::<UInt32Type>().values().iter() {
+                    indices[*row_ix as usize] = array_index;
+                }
+
+                self.state = Partial {
+                    arrays: vec![row_values],
+                    indices,
+                };
+
+                Ok(())
+            }
+            Partial { arrays, indices } => {
+                let array_index = PartialResultIndex::try_new(arrays.len())?;
+
+                arrays.push(row_values);
+
+                for row_ix in row_indices.as_primitive::<UInt32Type>().values().iter() {
+                    // This is check is only active for debug config because the callers of this method,
+                    // `case_when_with_expr` and `case_when_no_expr`, already ensure that
+                    // they only calculate a value for each row at most once.
+                    #[cfg(debug_assertions)]
+                    if !indices[*row_ix as usize].is_none() {
+                        return internal_err!("Duplicate value for row {}", *row_ix);
+                    }
+
+                    indices[*row_ix as usize] = array_index;
+                }
+                Ok(())
+            }
+            Complete(_) => internal_err!(
+                "Cannot add a partial result when complete result is already set"
+            ),
+        }
+    }
+
+    /// Sets a result that applies to all rows.
+    ///
+    /// This is an optimization for cases where all rows evaluate to the same result.
+    /// When a complete result is set, the builder will return it directly from finish()
+    /// without any merging overhead.
+    fn set_complete_result(&mut self, value: ColumnarValue) -> Result<()> {
+        match &self.state {
+            Empty => {
+                self.state = Complete(value);
+                Ok(())
+            }
+            Partial { .. } => {
+                internal_err!(
+                    "Cannot set a complete result when there are already partial results"
+                )
+            }
+            Complete(_) => internal_err!("Complete result already set"),
+        }
+    }
+
+    /// Finishes building the result and returns the final array.
+    fn finish(self) -> Result<ColumnarValue> {
+        match self.state {
+            Empty => {
+                // No complete result and no partial results.
+                // This can happen for case expressions with no else branch where no rows
+                // matched.
+                Ok(ColumnarValue::Scalar(ScalarValue::try_new_null(
+                    &self.data_type,
+                )?))
+            }
+            Partial { arrays, indices } => {
+                // Merge partial results into a single array.
+                Ok(ColumnarValue::Array(merge_n(&arrays, &indices)?))
+            }
+            Complete(v) => {
+                // If we have a complete result, we can just return it.
+                Ok(v)
+            }
+        }
+    }
+}
+
 impl CaseExpr {
     /// Create a new CASE WHEN expression
     pub fn try_new(
@@ -140,213 +767,395 @@ impl CaseExpr {
         };
 
         if when_then_expr.is_empty() {
-            exec_err!("There must be at least one WHEN clause")
-        } else {
-            let eval_method = if expr.is_some() {
-                EvalMethod::WithExpression
-            } else if when_then_expr.len() == 1
-                && is_cheap_and_infallible(&(when_then_expr[0].1))
-                && else_expr.is_none()
-            {
-                EvalMethod::InfallibleExprOrNull
-            } else if when_then_expr.len() == 1
-                && when_then_expr[0].1.as_any().is::<Literal>()
-                && else_expr.is_some()
-                && else_expr.as_ref().unwrap().as_any().is::<Literal>()
-            {
-                EvalMethod::ScalarOrScalar
-            } else if when_then_expr.len() == 1 && else_expr.is_some() {
-                EvalMethod::ExpressionOrExpression
-            } else {
-                EvalMethod::NoExpression
-            };
-
-            Ok(Self {
-                expr,
-                when_then_expr,
-                else_expr,
-                eval_method,
-            })
+            return exec_err!("There must be at least one WHEN clause");
         }
+
+        let body = CaseBody {
+            expr,
+            when_then_expr,
+            else_expr,
+        };
+
+        let eval_method = if body.expr.is_some() {
+            EvalMethod::WithExpression(body.project()?)
+        } else if body.when_then_expr.len() == 1
+            && is_cheap_and_infallible(&(body.when_then_expr[0].1))
+            && body.else_expr.is_none()
+        {
+            EvalMethod::InfallibleExprOrNull
+        } else if body.when_then_expr.len() == 1
+            && body.when_then_expr[0].1.as_any().is::<Literal>()
+            && body.else_expr.is_some()
+            && body.else_expr.as_ref().unwrap().as_any().is::<Literal>()
+        {
+            EvalMethod::ScalarOrScalar
+        } else if body.when_then_expr.len() == 1 && body.else_expr.is_some() {
+            EvalMethod::ExpressionOrExpression(body.project()?)
+        } else {
+            EvalMethod::NoExpression(body.project()?)
+        };
+
+        Ok(Self { body, eval_method })
     }
 
     /// Optional base expression that can be compared to literal values in the "when" expressions
     pub fn expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
-        self.expr.as_ref()
+        self.body.expr.as_ref()
     }
 
     /// One or more when/then expressions
     pub fn when_then_expr(&self) -> &[WhenThen] {
-        &self.when_then_expr
+        &self.body.when_then_expr
     }
 
     /// Optional "else" expression
     pub fn else_expr(&self) -> Option<&Arc<dyn PhysicalExpr>> {
-        self.else_expr.as_ref()
+        self.body.else_expr.as_ref()
     }
 }
 
-impl CaseExpr {
-    /// This function evaluates the form of CASE that matches an expression to fixed values.
-    ///
-    /// CASE expression
-    ///     WHEN value THEN result
-    ///     [WHEN ...]
-    ///     [ELSE result]
-    /// END
-    fn case_when_with_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.data_type(&batch.schema())?;
-        let expr = self.expr.as_ref().unwrap();
-        let base_value = expr.evaluate(batch)?;
-        let base_value = base_value.into_array(batch.num_rows())?;
-        let base_nulls = is_null(base_value.as_ref())?;
-
-        // start with nulls as default output
-        let mut current_value = new_null_array(&return_type, batch.num_rows());
-        // We only consider non-null values while comparing with whens
-        let mut remainder = not(&base_nulls)?;
-        let mut non_null_remainder_count = remainder.true_count();
+impl CaseBody {
+    fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
+        // since all then results have the same data type, we can choose any one as the
+        // return data type except for the null.
+        let mut data_type = DataType::Null;
         for i in 0..self.when_then_expr.len() {
-            // If there are no rows left to process, break out of the loop early
-            if non_null_remainder_count == 0 {
+            data_type = self.when_then_expr[i].1.data_type(input_schema)?;
+            if !data_type.equals_datatype(&DataType::Null) {
                 break;
             }
-
-            let when_predicate = &self.when_then_expr[i].0;
-            let when_value = when_predicate.evaluate_selection(batch, &remainder)?;
-            let when_value = when_value.into_array(batch.num_rows())?;
-            // build boolean array representing which rows match the "when" value
-            let when_match = compare_with_eq(
-                &when_value,
-                &base_value,
-                // The types of case and when expressions will be coerced to match.
-                // We only need to check if the base_value is nested.
-                base_value.data_type().is_nested(),
-            )?;
-            // Treat nulls as false
-            let when_match = match when_match.null_count() {
-                0 => Cow::Borrowed(&when_match),
-                _ => Cow::Owned(prep_null_mask_filter(&when_match)),
-            };
-            // Make sure we only consider rows that have not been matched yet
-            let when_value = and(&when_match, &remainder)?;
-
-            // If the predicate did not match any rows, continue to the next branch immediately
-            let when_match_count = when_value.true_count();
-            if when_match_count == 0 {
-                continue;
+        }
+        // if all then results are null, we use data type of else expr instead if possible.
+        if data_type.equals_datatype(&DataType::Null) {
+            if let Some(e) = &self.else_expr {
+                data_type = e.data_type(input_schema)?;
             }
+        }
 
-            let then_expression = &self.when_then_expr[i].1;
-            let then_value = then_expression.evaluate_selection(batch, &when_value)?;
+        Ok(data_type)
+    }
+
+    /// See [CaseExpr::case_when_with_expr].
+    fn case_when_with_expr(
+        &self,
+        batch: &RecordBatch,
+        return_type: &DataType,
+    ) -> Result<ColumnarValue> {
+        let mut result_builder = ResultBuilder::new(return_type, batch.num_rows());
+
+        // `remainder_rows` contains the indices of the rows that need to be evaluated
+        let mut remainder_rows: ArrayRef =
+            Arc::new(UInt32Array::from_iter_values(0..batch.num_rows() as u32));
+        // `remainder_batch` contains the rows themselves that need to be evaluated
+        let mut remainder_batch = Cow::Borrowed(batch);
+
+        // evaluate the base expression
+        let mut base_values = self
+            .expr
+            .as_ref()
+            .unwrap()
+            .evaluate(batch)?
+            .into_array(batch.num_rows())?;
+
+        // Fill in a result value already for rows where the base expression value is null
+        // Since each when expression is tested against the base expression using the equality
+        // operator, null base values can never match any when expression. `x = NULL` is falsy,
+        // for all possible values of `x`.
+        if base_values.null_count() > 0 {
+            // Use `is_not_null` since this is a cheap clone of the null buffer from 'base_value'.
+            // We already checked there are nulls, so we can be sure a new buffer will not be
+            // created.
+            let base_not_nulls = is_not_null(base_values.as_ref())?;
+            let base_all_null = base_values.null_count() == remainder_batch.num_rows();
+
+            // If there is an else expression, use that as the default value for the null rows
+            // Otherwise the default `null` value from the result builder will be used.
+            if let Some(e) = &self.else_expr {
+                let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
 
-            current_value = match then_value {
-                ColumnarValue::Scalar(ScalarValue::Null) => {
-                    nullif(current_value.as_ref(), &when_value)?
+                if base_all_null {
+                    // All base values were null, so no need to filter
+                    let nulls_value = expr.evaluate(&remainder_batch)?;
+                    result_builder.add_branch_result(&remainder_rows, nulls_value)?;
+                } else {
+                    // Filter out the null rows and evaluate the else expression for those
+                    let nulls_filter = create_filter(&not(&base_not_nulls)?, true);
+                    let nulls_batch =
+                        filter_record_batch(&remainder_batch, &nulls_filter)?;
+                    let nulls_rows = filter_array(&remainder_rows, &nulls_filter)?;
+                    let nulls_value = expr.evaluate(&nulls_batch)?;
+                    result_builder.add_branch_result(&nulls_rows, nulls_value)?;
                 }
-                ColumnarValue::Scalar(then_value) => {
-                    zip(&when_value, &then_value.to_scalar()?, &current_value)?
+            }
+
+            // All base values are null, so we can return early
+            if base_all_null {
+                return result_builder.finish();
+            }
+
+            // Remove the null rows from the remainder batch
+            let not_null_filter = create_filter(&base_not_nulls, true);
+            remainder_batch =
+                Cow::Owned(filter_record_batch(&remainder_batch, &not_null_filter)?);
+            remainder_rows = filter_array(&remainder_rows, &not_null_filter)?;
+            base_values = filter_array(&base_values, &not_null_filter)?;
+        }
+
+        // The types of case and when expressions will be coerced to match.
+        // We only need to check if the base_value is nested.
+        let base_value_is_nested = base_values.data_type().is_nested();
+
+        for i in 0..self.when_then_expr.len() {
+            // Evaluate the 'when' predicate for the remainder batch
+            // This results in a boolean array with the same length as the remaining number of rows
+            let when_expr = &self.when_then_expr[i].0;
+            let when_value = match when_expr.evaluate(&remainder_batch)? {
+                ColumnarValue::Array(a) => {
+                    compare_with_eq(&a, &base_values, base_value_is_nested)
                 }
-                ColumnarValue::Array(then_value) => {
-                    zip(&when_value, &then_value, &current_value)?
+                ColumnarValue::Scalar(s) => {
+                    compare_with_eq(&s.to_scalar()?, &base_values, base_value_is_nested)
                 }
-            };
+            }?;
 
-            remainder = and_not(&remainder, &when_value)?;
-            non_null_remainder_count -= when_match_count;
-        }
+            // `true_count` ignores `true` values where the validity bit is not set, so there's
+            // no need to call `prep_null_mask_filter`.
+            let when_true_count = when_value.true_count();
 
-        if let Some(e) = self.else_expr() {
-            // null and unmatched tuples should be assigned else value
-            remainder = or(&base_nulls, &remainder)?;
+            // If the 'when' predicate did not match any rows, continue to the next branch immediately
+            if when_true_count == 0 {
+                continue;
+            }
 
-            if remainder.true_count() > 0 {
-                // keep `else_expr`'s data type and return type consistent
-                let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
+            // If the 'when' predicate matched all remaining rows, there is no need to filter
+            if when_true_count == remainder_batch.num_rows() {
+                let then_expression = &self.when_then_expr[i].1;
+                let then_value = then_expression.evaluate(&remainder_batch)?;
+                result_builder.add_branch_result(&remainder_rows, then_value)?;
+                return result_builder.finish();
+            }
+
+            // Filter the remainder batch based on the 'when' value
+            // This results in a batch containing only the rows that need to be evaluated
+            // for the current branch
+            // Still no need to call `prep_null_mask_filter` since `create_filter` will already do
+            // this unconditionally.
+            let then_filter = create_filter(&when_value, true);
+            let then_batch = filter_record_batch(&remainder_batch, &then_filter)?;
+            let then_rows = filter_array(&remainder_rows, &then_filter)?;
 
-                let else_ = expr
-                    .evaluate_selection(batch, &remainder)?
-                    .into_array(batch.num_rows())?;
-                current_value = zip(&remainder, &else_, &current_value)?;
+            let then_expression = &self.when_then_expr[i].1;
+            let then_value = then_expression.evaluate(&then_batch)?;
+            result_builder.add_branch_result(&then_rows, then_value)?;
+
+            // If this is the last 'when' branch and there is no 'else' expression, there's no
+            // point in calculating the remaining rows.
+            if self.else_expr.is_none() && i == self.when_then_expr.len() - 1 {
+                return result_builder.finish();
             }
+
+            // Prepare the next when branch (or the else branch)
+            let next_selection = match when_value.null_count() {
+                0 => not(&when_value),
+                _ => {
+                    // `prep_null_mask_filter` is required to ensure the not operation treats nulls
+                    // as false
+                    not(&prep_null_mask_filter(&when_value))
+                }
+            }?;
+            let next_filter = create_filter(&next_selection, true);
+            remainder_batch =
+                Cow::Owned(filter_record_batch(&remainder_batch, &next_filter)?);
+            remainder_rows = filter_array(&remainder_rows, &next_filter)?;
+            base_values = filter_array(&base_values, &next_filter)?;
         }
 
-        Ok(ColumnarValue::Array(current_value))
+        // If we reached this point, some rows were left unmatched.
+        // Check if those need to be evaluated using the 'else' expression.
+        if let Some(e) = &self.else_expr {
+            // keep `else_expr`'s data type and return type consistent
+            let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
+            let else_value = expr.evaluate(&remainder_batch)?;
+            result_builder.add_branch_result(&remainder_rows, else_value)?;
+        }
+
+        result_builder.finish()
     }
 
-    /// This function evaluates the form of CASE where each WHEN expression is a boolean
-    /// expression.
-    ///
-    /// CASE WHEN condition THEN result
-    ///      [WHEN ...]
-    ///      [ELSE result]
-    /// END
-    fn case_when_no_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.data_type(&batch.schema())?;
+    /// See [CaseExpr::case_when_no_expr].
+    fn case_when_no_expr(
+        &self,
+        batch: &RecordBatch,
+        return_type: &DataType,
+    ) -> Result<ColumnarValue> {
+        let mut result_builder = ResultBuilder::new(return_type, batch.num_rows());
 
-        // start with nulls as default output
-        let mut current_value = new_null_array(&return_type, batch.num_rows());
-        let mut remainder = BooleanArray::from(vec![true; batch.num_rows()]);
-        let mut remainder_count = batch.num_rows();
-        for i in 0..self.when_then_expr.len() {
-            // If there are no rows left to process, break out of the loop early
-            if remainder_count == 0 {
-                break;
-            }
+        // `remainder_rows` contains the indices of the rows that need to be evaluated
+        let mut remainder_rows: ArrayRef =
+            Arc::new(UInt32Array::from_iter(0..batch.num_rows() as u32));
+        // `remainder_batch` contains the rows themselves that need to be evaluated
+        let mut remainder_batch = Cow::Borrowed(batch);
 
+        for i in 0..self.when_then_expr.len() {
+            // Evaluate the 'when' predicate for the remainder batch
+            // This results in a boolean array with the same length as the remaining number of rows
             let when_predicate = &self.when_then_expr[i].0;
-            let when_value = when_predicate.evaluate_selection(batch, &remainder)?;
-            let when_value = when_value.into_array(batch.num_rows())?;
+            let when_value = when_predicate
+                .evaluate(&remainder_batch)?
+                .into_array(remainder_batch.num_rows())?;
             let when_value = as_boolean_array(&when_value).map_err(|_| {
                 internal_datafusion_err!("WHEN expression did not return a BooleanArray")
             })?;
-            // Treat 'NULL' as false value
-            let when_value = match when_value.null_count() {
-                0 => Cow::Borrowed(when_value),
-                _ => Cow::Owned(prep_null_mask_filter(when_value)),
-            };
-            // Make sure we only consider rows that have not been matched yet
-            let when_value = and(&when_value, &remainder)?;
-
-            // If the predicate did not match any rows, continue to the next branch immediately
-            let when_match_count = when_value.true_count();
-            if when_match_count == 0 {
+
+            // `true_count` ignores `true` values where the validity bit is not set, so there's
+            // no need to call `prep_null_mask_filter`.
+            let when_true_count = when_value.true_count();
+
+            // If the 'when' predicate did not match any rows, continue to the next branch immediately
+            if when_true_count == 0 {
                 continue;
             }
 
+            // If the 'when' predicate matched all remaining rows, there is no need to filter
+            if when_true_count == remainder_batch.num_rows() {
+                let then_expression = &self.when_then_expr[i].1;
+                let then_value = then_expression.evaluate(&remainder_batch)?;
+                result_builder.add_branch_result(&remainder_rows, then_value)?;
+                return result_builder.finish();
+            }
+
+            // Filter the remainder batch based on the 'when' value
+            // This results in a batch containing only the rows that need to be evaluated
+            // for the current branch
+            // Still no need to call `prep_null_mask_filter` since `create_filter` will already do
+            // this unconditionally.
+            let then_filter = create_filter(when_value, true);
+            let then_batch = filter_record_batch(&remainder_batch, &then_filter)?;
+            let then_rows = filter_array(&remainder_rows, &then_filter)?;
+
             let then_expression = &self.when_then_expr[i].1;
-            let then_value = then_expression.evaluate_selection(batch, &when_value)?;
+            let then_value = then_expression.evaluate(&then_batch)?;
+            result_builder.add_branch_result(&then_rows, then_value)?;
 
-            current_value = match then_value {
-                ColumnarValue::Scalar(ScalarValue::Null) => {
-                    nullif(current_value.as_ref(), &when_value)?
-                }
-                ColumnarValue::Scalar(then_value) => {
-                    zip(&when_value, &then_value.to_scalar()?, &current_value)?
-                }
-                ColumnarValue::Array(then_value) => {
-                    zip(&when_value, &then_value, &current_value)?
+            // If this is the last 'when' branch and there is no 'else' expression, there's no
+            // point in calculating the remaining rows.
+            if self.else_expr.is_none() && i == self.when_then_expr.len() - 1 {
+                return result_builder.finish();
+            }
+
+            // Prepare the next when branch (or the else branch)
+            let next_selection = match when_value.null_count() {
+                0 => not(when_value),
+                _ => {
+                    // `prep_null_mask_filter` is required to ensure the not operation treats nulls
+                    // as false
+                    not(&prep_null_mask_filter(when_value))
                 }
-            };
+            }?;
+            let next_filter = create_filter(&next_selection, true);
+            remainder_batch =
+                Cow::Owned(filter_record_batch(&remainder_batch, &next_filter)?);
+            remainder_rows = filter_array(&remainder_rows, &next_filter)?;
+        }
 
-            // Succeed tuples should be filtered out for short-circuit evaluation,
-            // null values for the current when expr should be kept
-            remainder = and_not(&remainder, &when_value)?;
-            remainder_count -= when_match_count;
+        // If we reached this point, some rows were left unmatched.
+        // Check if those need to be evaluated using the 'else' expression.
+        if let Some(e) = &self.else_expr {
+            // keep `else_expr`'s data type and return type consistent
+            let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
+            let else_value = expr.evaluate(&remainder_batch)?;
+            result_builder.add_branch_result(&remainder_rows, else_value)?;
         }
 
-        if let Some(e) = self.else_expr() {
-            if remainder_count > 0 {
-                // keep `else_expr`'s data type and return type consistent
-                let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())?;
-                let else_ = expr
-                    .evaluate_selection(batch, &remainder)?
-                    .into_array(batch.num_rows())?;
-                current_value = zip(&remainder, &else_, &current_value)?;
+        result_builder.finish()
+    }
+
+    /// See [CaseExpr::expr_or_expr].
+    fn expr_or_expr(
+        &self,
+        batch: &RecordBatch,
+        when_value: &BooleanArray,
+    ) -> Result<ColumnarValue> {
+        let when_value = match when_value.null_count() {
+            0 => Cow::Borrowed(when_value),
+            _ => {
+                // `prep_null_mask_filter` is required to ensure null is treated as false
+                Cow::Owned(prep_null_mask_filter(when_value))
             }
+        };
+
+        let optimize_filter = batch.num_columns() > 1
+            || (batch.num_columns() == 1 && multiple_arrays(batch.column(0).data_type()));
+
+        let when_filter = create_filter(&when_value, optimize_filter);
+        let then_batch = filter_record_batch(batch, &when_filter)?;
+        let then_value = self.when_then_expr[0].1.evaluate(&then_batch)?;
+
+        let else_selection = not(&when_value)?;
+        let else_filter = create_filter(&else_selection, optimize_filter);
+        let else_batch = filter_record_batch(batch, &else_filter)?;
+
+        // keep `else_expr`'s data type and return type consistent
+        let e = self.else_expr.as_ref().unwrap();
+        let return_type = self.data_type(&batch.schema())?;
+        let else_expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())
+            .unwrap_or_else(|_| Arc::clone(e));
+
+        let else_value = else_expr.evaluate(&else_batch)?;
+
+        Ok(ColumnarValue::Array(merge(
+            &when_value,
+            then_value,
+            else_value,
+        )?))
+    }
+}
+
+impl CaseExpr {
+    /// This function evaluates the form of CASE that matches an expression to fixed values.
+    ///
+    /// CASE expression
+    ///     WHEN value THEN result
+    ///     [WHEN ...]
+    ///     [ELSE result]
+    /// END
+    fn case_when_with_expr(
+        &self,
+        batch: &RecordBatch,
+        projected: &ProjectedCaseBody,
+    ) -> Result<ColumnarValue> {
+        let return_type = self.data_type(&batch.schema())?;
+        if projected.projection.len() < batch.num_columns() {
+            let projected_batch = batch.project(&projected.projection)?;
+            projected
+                .body
+                .case_when_with_expr(&projected_batch, &return_type)
+        } else {
+            self.body.case_when_with_expr(batch, &return_type)
         }
+    }
 
-        Ok(ColumnarValue::Array(current_value))
+    /// This function evaluates the form of CASE where each WHEN expression is a boolean
+    /// expression.
+    ///
+    /// CASE WHEN condition THEN result
+    ///      [WHEN ...]
+    ///      [ELSE result]
+    /// END
+    fn case_when_no_expr(
+        &self,
+        batch: &RecordBatch,
+        projected: &ProjectedCaseBody,
+    ) -> Result<ColumnarValue> {
+        let return_type = self.data_type(&batch.schema())?;
+        if projected.projection.len() < batch.num_columns() {
+            let projected_batch = batch.project(&projected.projection)?;
+            projected
+                .body
+                .case_when_no_expr(&projected_batch, &return_type)
+        } else {
+            self.body.case_when_no_expr(batch, &return_type)
+        }
     }
 
     /// This function evaluates the specialized case of:
@@ -359,8 +1168,8 @@ impl CaseExpr {
     /// that are infallible because the expression will be evaluated for all
     /// rows in the input batch.
     fn case_column_or_null(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let when_expr = &self.when_then_expr[0].0;
-        let then_expr = &self.when_then_expr[0].1;
+        let when_expr = &self.body.when_then_expr[0].0;
+        let then_expr = &self.body.when_then_expr[0].1;
 
         match when_expr.evaluate(batch)? {
             // WHEN true --> column
@@ -400,7 +1209,7 @@ impl CaseExpr {
         let return_type = self.data_type(&batch.schema())?;
 
         // evaluate when expression
-        let when_value = self.when_then_expr[0].0.evaluate(batch)?;
+        let when_value = self.body.when_then_expr[0].0.evaluate(batch)?;
         let when_value = when_value.into_array(batch.num_rows())?;
         let when_value = as_boolean_array(&when_value).map_err(|_| {
             internal_datafusion_err!("WHEN expression did not return a BooleanArray")
@@ -413,10 +1222,10 @@ impl CaseExpr {
         };
 
         // evaluate then_value
-        let then_value = self.when_then_expr[0].1.evaluate(batch)?;
+        let then_value = self.body.when_then_expr[0].1.evaluate(batch)?;
         let then_value = Scalar::new(then_value.into_array(1)?);
 
-        let Some(e) = self.else_expr() else {
+        let Some(e) = &self.body.else_expr else {
             return internal_err!("expression did not evaluate to an array");
         };
         // keep `else_expr`'s data type and return type consistent
@@ -425,12 +1234,17 @@ impl CaseExpr {
         Ok(ColumnarValue::Array(zip(&when_value, &then_value, &else_)?))
     }
 
-    fn expr_or_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        let return_type = self.data_type(&batch.schema())?;
-
+    fn expr_or_expr(
+        &self,
+        batch: &RecordBatch,
+        projected: &ProjectedCaseBody,
+    ) -> Result<ColumnarValue> {
         // evaluate when condition on batch
-        let when_value = self.when_then_expr[0].0.evaluate(batch)?;
-        let when_value = when_value.into_array(batch.num_rows())?;
+        let when_value = self.body.when_then_expr[0].0.evaluate(batch)?;
+        // `num_rows == 1` is intentional to avoid expanding scalars.
+        // If the `when_value` is effectively a scalar, the 'all true' and 'all false' checks
+        // below will avoid incorrectly using the scalar as a merge/zip mask.
+        let when_value = when_value.into_array(1)?;
         let when_value = as_boolean_array(&when_value).map_err(|e| {
             DataFusionError::Context(
                 "WHEN expression did not return a BooleanArray".to_string(),
@@ -438,38 +1252,22 @@ impl CaseExpr {
             )
         })?;
 
-        // For the true and false/null selection vectors, bypass `evaluate_selection` and merging
-        // results. This avoids materializing the array for the other branch which we will discard
-        // entirely anyway.
         let true_count = when_value.true_count();
-        if true_count == batch.num_rows() {
-            return self.when_then_expr[0].1.evaluate(batch);
+        if true_count == when_value.len() {
+            // All input rows are true, just call the 'then' expression
+            self.body.when_then_expr[0].1.evaluate(batch)
         } else if true_count == 0 {
-            return self.else_expr.as_ref().unwrap().evaluate(batch);
+            // All input rows are false/null, just call the 'else' expression
+            self.body.else_expr.as_ref().unwrap().evaluate(batch)
+        } else if projected.projection.len() < batch.num_columns() {
+            // The case expressions do not use all the columns of the input batch.
+            // Project first to reduce time spent filtering.
+            let projected_batch = batch.project(&projected.projection)?;
+            projected.body.expr_or_expr(&projected_batch, when_value)
+        } else {
+            // All columns are used in the case expressions, so there is no need to project.
+            self.body.expr_or_expr(batch, when_value)
         }
-
-        // Treat 'NULL' as false value
-        let when_value = match when_value.null_count() {
-            0 => Cow::Borrowed(when_value),
-            _ => Cow::Owned(prep_null_mask_filter(when_value)),
-        };
-
-        let then_value = self.when_then_expr[0]
-            .1
-            .evaluate_selection(batch, &when_value)?
-            .into_array(batch.num_rows())?;
-
-        // evaluate else expression on the values not covered by when_value
-        let remainder = not(&when_value)?;
-        let e = self.else_expr.as_ref().unwrap();
-        // keep `else_expr`'s data type and return type consistent
-        let expr = try_cast(Arc::clone(e), &batch.schema(), return_type.clone())
-            .unwrap_or_else(|_| Arc::clone(e));
-        let else_ = expr
-            .evaluate_selection(batch, &remainder)?
-            .into_array(batch.num_rows())?;
-
-        Ok(ColumnarValue::Array(zip(&remainder, &else_, &then_value)?))
     }
 }
 
@@ -480,35 +1278,20 @@ impl PhysicalExpr for CaseExpr {
     }
 
     fn data_type(&self, input_schema: &Schema) -> Result<DataType> {
-        // since all then results have the same data type, we can choose any one as the
-        // return data type except for the null.
-        let mut data_type = DataType::Null;
-        for i in 0..self.when_then_expr.len() {
-            data_type = self.when_then_expr[i].1.data_type(input_schema)?;
-            if !data_type.equals_datatype(&DataType::Null) {
-                break;
-            }
-        }
-        // if all then results are null, we use data type of else expr instead if possible.
-        if data_type.equals_datatype(&DataType::Null) {
-            if let Some(e) = &self.else_expr {
-                data_type = e.data_type(input_schema)?;
-            }
-        }
-
-        Ok(data_type)
+        self.body.data_type(input_schema)
     }
 
     fn nullable(&self, input_schema: &Schema) -> Result<bool> {
         // this expression is nullable if any of the input expressions are nullable
         let then_nullable = self
+            .body
             .when_then_expr
             .iter()
             .map(|(_, t)| t.nullable(input_schema))
             .collect::<Result<Vec<_>>>()?;
         if then_nullable.contains(&true) {
             Ok(true)
-        } else if let Some(e) = &self.else_expr {
+        } else if let Some(e) = &self.body.else_expr {
             e.nullable(input_schema)
         } else {
             // CASE produces NULL if there is no `else` expr
@@ -518,37 +1301,37 @@ impl PhysicalExpr for CaseExpr {
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        match self.eval_method {
-            EvalMethod::WithExpression => {
+        match &self.eval_method {
+            EvalMethod::WithExpression(p) => {
                 // this use case evaluates "expr" and then compares the values with the "when"
                 // values
-                self.case_when_with_expr(batch)
+                self.case_when_with_expr(batch, p)
             }
-            EvalMethod::NoExpression => {
+            EvalMethod::NoExpression(p) => {
                 // The "when" conditions all evaluate to boolean in this use case and can be
                 // arbitrary expressions
-                self.case_when_no_expr(batch)
+                self.case_when_no_expr(batch, p)
             }
             EvalMethod::InfallibleExprOrNull => {
                 // Specialization for CASE WHEN expr THEN column [ELSE NULL] END
                 self.case_column_or_null(batch)
             }
             EvalMethod::ScalarOrScalar => self.scalar_or_scalar(batch),
-            EvalMethod::ExpressionOrExpression => self.expr_or_expr(batch),
+            EvalMethod::ExpressionOrExpression(p) => self.expr_or_expr(batch, p),
         }
     }
 
     fn children(&self) -> Vec<&Arc<dyn PhysicalExpr>> {
         let mut children = vec![];
-        if let Some(expr) = &self.expr {
+        if let Some(expr) = &self.body.expr {
             children.push(expr)
         }
-        self.when_then_expr.iter().for_each(|(cond, value)| {
+        self.body.when_then_expr.iter().for_each(|(cond, value)| {
             children.push(cond);
             children.push(value);
         });
 
-        if let Some(else_expr) = &self.else_expr {
+        if let Some(else_expr) = &self.body.else_expr {
             children.push(else_expr)
         }
         children
@@ -563,7 +1346,7 @@ impl PhysicalExpr for CaseExpr {
             internal_err!("CaseExpr: Wrong number of children")
         } else {
             let (expr, when_then_expr, else_expr) =
-                match (self.expr().is_some(), self.else_expr().is_some()) {
+                match (self.expr().is_some(), self.body.else_expr.is_some()) {
                     (true, true) => (
                         Some(&children[0]),
                         &children[1..children.len() - 1],
@@ -587,14 +1370,14 @@ impl PhysicalExpr for CaseExpr {
         }
     }
 
-    fn fmt_sql(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    fn fmt_sql(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         write!(f, "CASE ")?;
-        if let Some(e) = &self.expr {
+        if let Some(e) = &self.body.expr {
             e.fmt_sql(f)?;
             write!(f, " ")?;
         }
 
-        for (w, t) in &self.when_then_expr {
+        for (w, t) in &self.body.when_then_expr {
             write!(f, "WHEN ")?;
             w.fmt_sql(f)?;
             write!(f, " THEN ")?;
@@ -602,7 +1385,7 @@ impl PhysicalExpr for CaseExpr {
             write!(f, " ")?;
         }
 
-        if let Some(e) = &self.else_expr {
+        if let Some(e) = &self.body.else_expr {
             write!(f, "ELSE ")?;
             e.fmt_sql(f)?;
             write!(f, " ")?;
@@ -1346,7 +2129,7 @@ mod tests {
         let expr = CaseExpr::try_new(None, vec![(when, then)], Some(else_expr))?;
         assert!(matches!(
             expr.eval_method,
-            EvalMethod::ExpressionOrExpression
+            EvalMethod::ExpressionOrExpression(_)
         ));
         let result = expr
             .evaluate(&batch)?
@@ -1458,4 +2241,55 @@ mod tests {
 
         Ok(())
     }
+
+    #[test]
+    fn test_merge_n() {
+        let a1 = StringArray::from(vec![Some("A")]).to_data();
+        let a2 = StringArray::from(vec![Some("B")]).to_data();
+        let a3 = StringArray::from(vec![Some("C"), Some("D")]).to_data();
+
+        let indices = vec![
+            PartialResultIndex::none(),
+            PartialResultIndex::try_new(1).unwrap(),
+            PartialResultIndex::try_new(0).unwrap(),
+            PartialResultIndex::none(),
+            PartialResultIndex::try_new(2).unwrap(),
+            PartialResultIndex::try_new(2).unwrap(),
+        ];
+
+        let merged = merge_n(&[a1, a2, a3], &indices).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), indices.len());
+        assert!(!merged.is_valid(0));
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "B");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "A");
+        assert!(!merged.is_valid(3));
+        assert!(merged.is_valid(4));
+        assert_eq!(merged.value(4), "C");
+        assert!(merged.is_valid(5));
+        assert_eq!(merged.value(5), "D");
+    }
+
+    #[test]
+    fn test_merge() {
+        let a1 = Arc::new(StringArray::from(vec![Some("A"), Some("C")]));
+        let a2 = Arc::new(StringArray::from(vec![Some("B")]));
+
+        let mask = BooleanArray::from(vec![true, false, true]);
+
+        let merged =
+            merge(&mask, ColumnarValue::Array(a1), ColumnarValue::Array(a2)).unwrap();
+        let merged = merged.as_string::<i32>();
+
+        assert_eq!(merged.len(), mask.len());
+        assert!(merged.is_valid(0));
+        assert_eq!(merged.value(0), "A");
+        assert!(merged.is_valid(1));
+        assert_eq!(merged.value(1), "B");
+        assert!(merged.is_valid(2));
+        assert_eq!(merged.value(2), "C");
+    }
 }
diff --git a/datafusion/physical-expr/src/expressions/cast.rs b/datafusion/physical-expr/src/expressions/cast.rs
index 407e3e6a9d29..0419161b532c 100644
--- a/datafusion/physical-expr/src/expressions/cast.rs
+++ b/datafusion/physical-expr/src/expressions/cast.rs
@@ -439,8 +439,8 @@ mod tests {
         let expression =
             cast_with_options(col("a", &schema)?, &schema, Decimal128(6, 2), None)?;
         let e = expression.evaluate(&batch).unwrap_err().strip_backtrace(); // panics on OK
-        assert_snapshot!(e, @"Arrow error: Invalid argument error: 12345679 is too large to store in a Decimal128 of precision 6. Max is 999999");
-
+        assert_snapshot!(e, @"Arrow error: Invalid argument error: 123456.79 is too large to store in a Decimal128 of precision 6. Max is 9999.99");
+        // safe cast should return null
         let expression_safe = cast_with_options(
             col("a", &schema)?,
             &schema,
diff --git a/datafusion/physical-expr/src/expressions/column.rs b/datafusion/physical-expr/src/expressions/column.rs
index c9f3fb00f019..9ca464b30430 100644
--- a/datafusion/physical-expr/src/expressions/column.rs
+++ b/datafusion/physical-expr/src/expressions/column.rs
@@ -49,9 +49,9 @@ use datafusion_expr::ColumnarValue;
 /// # use arrow::datatypes::{DataType, Field, Schema};
 /// // Schema with columns a, b, c
 /// let schema = Schema::new(vec![
-///    Field::new("a", DataType::Int32, false),
-///    Field::new("b", DataType::Int32, false),
-///    Field::new("c", DataType::Int32, false),
+///     Field::new("a", DataType::Int32, false),
+///     Field::new("b", DataType::Int32, false),
+///     Field::new("c", DataType::Int32, false),
 /// ]);
 ///
 /// // reference to column b is index 1
diff --git a/datafusion/physical-expr/src/expressions/dynamic_filters.rs b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
index a53b32c97689..964a193db833 100644
--- a/datafusion/physical-expr/src/expressions/dynamic_filters.rs
+++ b/datafusion/physical-expr/src/expressions/dynamic_filters.rs
@@ -381,14 +381,14 @@ mod test {
         )
         .unwrap();
         let snap = dynamic_filter_1.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 0 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
         let dynamic_filter_2 = reassign_expr_columns(
             Arc::clone(&dynamic_filter) as Arc<dyn PhysicalExpr>,
             &filter_schema_2,
         )
         .unwrap();
         let snap = dynamic_filter_2.snapshot().unwrap().unwrap();
-        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} } }, fail_on_overflow: false }"#);
+        insta::assert_snapshot!(format!("{snap:?}"), @r#"BinaryExpr { left: Column { name: "a", index: 1 }, op: Eq, right: Literal { value: Int32(42), field: Field { name: "lit", data_type: Int32 } }, fail_on_overflow: false }"#);
         // Both filters allow evaluating the same expression
         let batch_1 = RecordBatch::try_new(
             Arc::clone(&filter_schema_1),
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index fa91635d9bfd..eeac986beec0 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -149,7 +149,7 @@ where
 ///
 /// Note: This is split into a separate function as higher-rank trait bounds currently
 /// cause type inference to misbehave
-fn make_hash_set<T>(array: T) -> ArrayHashSet
+fn make_hash_set<T>(array: &T) -> ArrayHashSet
 where
     T: ArrayAccessor,
     T::Item: IsEqual,
@@ -183,26 +183,26 @@ where
 /// Creates a `Box<dyn Set>` for the given list of `IN` expressions and `batch`
 fn make_set(array: &dyn Array) -> Result<Arc<dyn Set>> {
     Ok(downcast_primitive_array! {
-        array => Arc::new(ArraySet::new(array, make_hash_set(array))),
+        array => Arc::new(ArraySet::new(array, make_hash_set(&array))),
         DataType::Boolean => {
             let array = as_boolean_array(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+            Arc::new(ArraySet::new(array, make_hash_set(&array)))
         },
         DataType::Utf8 => {
             let array = as_string_array(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+            Arc::new(ArraySet::new(array, make_hash_set(&array)))
         }
         DataType::LargeUtf8 => {
             let array = as_largestring_array(array);
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+            Arc::new(ArraySet::new(array, make_hash_set(&array)))
         }
         DataType::Binary => {
             let array = as_generic_binary_array::<i32>(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+            Arc::new(ArraySet::new(array, make_hash_set(&array)))
         }
         DataType::LargeBinary => {
             let array = as_generic_binary_array::<i64>(array)?;
-            Arc::new(ArraySet::new(array, make_hash_set(array)))
+            Arc::new(ArraySet::new(array, make_hash_set(&array)))
         }
         DataType::Dictionary(_, _) => unreachable!("dictionary should have been flattened"),
         d => return not_impl_err!("DataType::{d} not supported in InList")
diff --git a/datafusion/physical-expr/src/expressions/like.rs b/datafusion/physical-expr/src/expressions/like.rs
index e86c778d5161..1c9ae530f500 100644
--- a/datafusion/physical-expr/src/expressions/like.rs
+++ b/datafusion/physical-expr/src/expressions/like.rs
@@ -19,7 +19,7 @@ use crate::PhysicalExpr;
 use arrow::datatypes::{DataType, Schema};
 use arrow::record_batch::RecordBatch;
 use datafusion_common::{internal_err, Result};
-use datafusion_expr::ColumnarValue;
+use datafusion_expr::{ColumnarValue, Operator};
 use datafusion_physical_expr_common::datum::apply_cmp;
 use std::hash::Hash;
 use std::{any::Any, sync::Arc};
@@ -118,14 +118,13 @@ impl PhysicalExpr for LikeExpr {
     }
 
     fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
-        use arrow::compute::*;
         let lhs = self.expr.evaluate(batch)?;
         let rhs = self.pattern.evaluate(batch)?;
         match (self.negated, self.case_insensitive) {
-            (false, false) => apply_cmp(&lhs, &rhs, like),
-            (false, true) => apply_cmp(&lhs, &rhs, ilike),
-            (true, false) => apply_cmp(&lhs, &rhs, nlike),
-            (true, true) => apply_cmp(&lhs, &rhs, nilike),
+            (false, false) => apply_cmp(Operator::LikeMatch, &lhs, &rhs),
+            (false, true) => apply_cmp(Operator::ILikeMatch, &lhs, &rhs),
+            (true, false) => apply_cmp(Operator::NotLikeMatch, &lhs, &rhs),
+            (true, true) => apply_cmp(Operator::NotILikeMatch, &lhs, &rhs),
         }
     }
 
diff --git a/datafusion/physical-expr/src/expressions/literal.rs b/datafusion/physical-expr/src/expressions/literal.rs
index 94e91d43a1c4..359bfcefdbb5 100644
--- a/datafusion/physical-expr/src/expressions/literal.rs
+++ b/datafusion/physical-expr/src/expressions/literal.rs
@@ -137,6 +137,7 @@ impl PhysicalExpr for Literal {
 }
 
 /// Create a literal expression
+#[allow(clippy::needless_pass_by_value)]
 pub fn lit<T: datafusion_expr::Literal>(value: T) -> Arc<dyn PhysicalExpr> {
     match value.lit() {
         Expr::Literal(v, _) => Arc::new(Literal::new(v)),
diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs
index c44197bbbe6f..573cc88db7ab 100644
--- a/datafusion/physical-expr/src/intervals/cp_solver.rs
+++ b/datafusion/physical-expr/src/intervals/cp_solver.rs
@@ -579,15 +579,11 @@ impl ExprIntervalGraph {
     ///
     /// let mut graph = ExprIntervalGraph::try_new(expr, &schema).unwrap();
     /// // Do it once, while constructing.
-    /// let node_indices = graph
-    ///     .gather_node_indices(&[Arc::new(Column::new("gnz", 0))]);
+    /// let node_indices = graph.gather_node_indices(&[Arc::new(Column::new("gnz", 0))]);
     /// let left_index = node_indices.get(0).unwrap().1;
     ///
     /// // Provide intervals for leaf variables (here, there is only one).
-    /// let intervals = vec![(
-    ///     left_index,
-    ///     Interval::make(Some(10), Some(20)).unwrap(),
-    /// )];
+    /// let intervals = vec![(left_index, Interval::make(Some(10), Some(20)).unwrap())];
     ///
     /// // Evaluate bounds for the composite expression:
     /// graph.assign_intervals(&intervals);
@@ -795,11 +791,11 @@ mod tests {
         result: PropagationResult,
         schema: &Schema,
     ) -> Result<()> {
-        let col_stats = vec![
+        let col_stats = [
             (Arc::clone(&exprs_with_interval.0), left_interval),
             (Arc::clone(&exprs_with_interval.1), right_interval),
         ];
-        let expected = vec![
+        let expected = [
             (Arc::clone(&exprs_with_interval.0), left_expected),
             (Arc::clone(&exprs_with_interval.1), right_expected),
         ];
diff --git a/datafusion/physical-expr/src/lib.rs b/datafusion/physical-expr/src/lib.rs
index aa8c9e50fd71..f59582f40506 100644
--- a/datafusion/physical-expr/src/lib.rs
+++ b/datafusion/physical-expr/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 // Backward compatibility
 pub mod aggregate;
diff --git a/datafusion/physical-expr/src/physical_expr.rs b/datafusion/physical-expr/src/physical_expr.rs
index 2cc484ec6a62..c658a8eddc23 100644
--- a/datafusion/physical-expr/src/physical_expr.rs
+++ b/datafusion/physical-expr/src/physical_expr.rs
@@ -118,12 +118,16 @@ pub fn physical_exprs_bag_equal(
 /// ]);
 ///
 /// let sort_exprs = vec![
-///     vec![
-///         SortExpr { expr: Expr::Column(Column::new(Some("t"), "id")), asc: true, nulls_first: false }
-///     ],
-///     vec![
-///         SortExpr { expr: Expr::Column(Column::new(Some("t"), "name")), asc: false, nulls_first: true }
-///     ]
+///     vec![SortExpr {
+///         expr: Expr::Column(Column::new(Some("t"), "id")),
+///         asc: true,
+///         nulls_first: false,
+///     }],
+///     vec![SortExpr {
+///         expr: Expr::Column(Column::new(Some("t"), "name")),
+///         asc: false,
+///         nulls_first: true,
+///     }],
 /// ];
 /// let result = create_ordering(&schema, &sort_exprs).unwrap();
 /// ```
diff --git a/datafusion/physical-expr/src/projection.rs b/datafusion/physical-expr/src/projection.rs
index e35bfbb3a20d..a120ab427e1d 100644
--- a/datafusion/physical-expr/src/projection.rs
+++ b/datafusion/physical-expr/src/projection.rs
@@ -100,24 +100,24 @@ impl From<ProjectionExpr> for (Arc<dyn PhysicalExpr>, String) {
 /// representing a complete projection operation and provides
 /// methods to manipulate and analyze the projection as a whole.
 #[derive(Debug, Clone)]
-pub struct Projection {
+pub struct ProjectionExprs {
     exprs: Vec<ProjectionExpr>,
 }
 
-impl std::fmt::Display for Projection {
+impl std::fmt::Display for ProjectionExprs {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         let exprs: Vec<String> = self.exprs.iter().map(|e| e.to_string()).collect();
         write!(f, "Projection[{}]", exprs.join(", "))
     }
 }
 
-impl From<Vec<ProjectionExpr>> for Projection {
+impl From<Vec<ProjectionExpr>> for ProjectionExprs {
     fn from(value: Vec<ProjectionExpr>) -> Self {
         Self { exprs: value }
     }
 }
 
-impl From<&[ProjectionExpr]> for Projection {
+impl From<&[ProjectionExpr]> for ProjectionExprs {
     fn from(value: &[ProjectionExpr]) -> Self {
         Self {
             exprs: value.to_vec(),
@@ -125,15 +125,83 @@ impl From<&[ProjectionExpr]> for Projection {
     }
 }
 
-impl AsRef<[ProjectionExpr]> for Projection {
+impl FromIterator<ProjectionExpr> for ProjectionExprs {
+    fn from_iter<T: IntoIterator<Item = ProjectionExpr>>(exprs: T) -> Self {
+        Self {
+            exprs: exprs.into_iter().collect::<Vec<_>>(),
+        }
+    }
+}
+
+impl AsRef<[ProjectionExpr]> for ProjectionExprs {
     fn as_ref(&self) -> &[ProjectionExpr] {
         &self.exprs
     }
 }
 
-impl Projection {
-    pub fn new(exprs: Vec<ProjectionExpr>) -> Self {
-        Self { exprs }
+impl ProjectionExprs {
+    pub fn new<I>(exprs: I) -> Self
+    where
+        I: IntoIterator<Item = ProjectionExpr>,
+    {
+        Self {
+            exprs: exprs.into_iter().collect::<Vec<_>>(),
+        }
+    }
+
+    /// Creates a [`ProjectionExpr`] from a list of column indices.
+    ///
+    /// This is a convenience method for creating simple column-only projections, where each projection expression is a reference to a column
+    /// in the input schema.
+    ///
+    /// # Behavior
+    /// - Ordering: the output projection preserves the exact order of indices provided in the input slice
+    ///   For example, `[2, 0, 1]` will produce projections for columns 2, 0, then 1 in that order
+    /// - Duplicates: Duplicate indices are allowed and will create multiple projection expressions referencing the same source column
+    ///   For example, `[0, 0]` creates 2 separate projections both referencing column 0
+    ///
+    /// # Panics
+    /// Panics if any index in `indices` is out of bounds for the provided schema.
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use arrow::datatypes::{DataType, Field, Schema};
+    /// use datafusion_physical_expr::projection::ProjectionExprs;
+    /// use std::sync::Arc;
+    ///
+    /// // Create a schema with three columns
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("a", DataType::Int32, false),
+    ///     Field::new("b", DataType::Utf8, false),
+    ///     Field::new("c", DataType::Float64, false),
+    /// ]));
+    ///
+    /// // Project columns at indices 2 and 0 (c and a) - ordering is preserved
+    /// let projection = ProjectionExprs::from_indices(&[2, 0], &schema);
+    ///
+    /// // This creates: SELECT c@2 AS c, a@0 AS a
+    /// assert_eq!(projection.as_ref().len(), 2);
+    /// assert_eq!(projection.as_ref()[0].alias, "c");
+    /// assert_eq!(projection.as_ref()[1].alias, "a");
+    ///
+    /// // Duplicate indices are allowed
+    /// let projection_with_dups = ProjectionExprs::from_indices(&[0, 0, 1], &schema);
+    /// assert_eq!(projection_with_dups.as_ref().len(), 3);
+    /// assert_eq!(projection_with_dups.as_ref()[0].alias, "a");
+    /// assert_eq!(projection_with_dups.as_ref()[1].alias, "a"); // duplicate
+    /// assert_eq!(projection_with_dups.as_ref()[2].alias, "b");
+    /// ```
+    pub fn from_indices(indices: &[usize], schema: &SchemaRef) -> Self {
+        let projection_exprs = indices.iter().map(|&i| {
+            let field = schema.field(i);
+            ProjectionExpr {
+                expr: Arc::new(Column::new(field.name(), i)),
+                alias: field.name().clone(),
+            }
+        });
+
+        Self::from_iter(projection_exprs)
     }
 
     /// Returns an iterator over the projection expressions
@@ -166,16 +234,16 @@ impl Projection {
     /// # Example
     ///
     /// ```rust
-    /// use std::sync::Arc;
-    /// use datafusion_physical_expr::projection::{Projection, ProjectionExpr};
-    /// use datafusion_physical_expr::expressions::{Column, BinaryExpr, Literal};
     /// use datafusion_common::{Result, ScalarValue};
     /// use datafusion_expr::Operator;
+    /// use datafusion_physical_expr::expressions::{BinaryExpr, Column, Literal};
+    /// use datafusion_physical_expr::projection::{ProjectionExpr, ProjectionExprs};
+    /// use std::sync::Arc;
     ///
     /// fn main() -> Result<()> {
     ///     // Example from the docstring:
     ///     // Base projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
-    ///     let base = Projection::new(vec![
+    ///     let base = ProjectionExprs::new(vec![
     ///         ProjectionExpr {
     ///             expr: Arc::new(Column::new("c", 2)),
     ///             alias: "x".to_string(),
@@ -191,7 +259,7 @@ impl Projection {
     ///     ]);
     ///
     ///     // Top projection: SELECT x@0 + 1 AS c1, y@1 + z@2 AS c2
-    ///     let top = Projection::new(vec![
+    ///     let top = ProjectionExprs::new(vec![
     ///         ProjectionExpr {
     ///             expr: Arc::new(BinaryExpr::new(
     ///                 Arc::new(Column::new("x", 0)),
@@ -224,7 +292,7 @@ impl Projection {
     /// # Errors
     /// This function returns an error if any expression in the `other` projection cannot be
     /// applied on top of this projection.
-    pub fn try_merge(&self, other: &Projection) -> Result<Projection> {
+    pub fn try_merge(&self, other: &ProjectionExprs) -> Result<ProjectionExprs> {
         let mut new_exprs = Vec::with_capacity(other.exprs.len());
         for proj_expr in &other.exprs {
             let new_expr = update_expr(&proj_expr.expr, &self.exprs, true)?
@@ -240,7 +308,7 @@ impl Projection {
                 alias: proj_expr.alias.clone(),
             });
         }
-        Ok(Projection::new(new_exprs))
+        Ok(ProjectionExprs::new(new_exprs))
     }
 
     /// Extract the column indices used in this projection.
@@ -256,6 +324,46 @@ impl Projection {
             .collect_vec()
     }
 
+    /// Extract the ordered column indices for a column-only projection.
+    ///
+    /// This function assumes that all expressions in the projection are simple column references.
+    /// It returns the column indices in the order they appear in the projection.
+    ///
+    /// # Panics
+    ///
+    /// Panics if any expression in the projection is not a simple column reference. This includes:
+    /// - Computed expressions (e.g., `a + 1`, `CAST(a AS INT)`)
+    /// - Function calls (e.g., `UPPER(name)`, `SUM(amount)`)
+    /// - Literals (e.g., `42`, `'hello'`)
+    /// - Complex nested expressions (e.g., `CASE WHEN ... THEN ... END`)
+    ///
+    /// # Returns
+    ///
+    /// A vector of column indices in projection order. Unlike [`column_indices()`](Self::column_indices),
+    /// this function:
+    /// - Preserves the projection order (does not sort)
+    /// - Preserves duplicates (does not deduplicate)
+    ///
+    /// # Example
+    ///
+    /// For a projection `SELECT c, a, c` where `a` is at index 0 and `c` is at index 2,
+    /// this function would return `[2, 0, 2]`.
+    ///
+    /// Use [`column_indices()`](Self::column_indices) instead if the projection may contain
+    /// non-column expressions or if you need a deduplicated sorted list.
+    pub fn ordered_column_indices(&self) -> Vec<usize> {
+        self.exprs
+            .iter()
+            .map(|e| {
+                e.expr
+                    .as_any()
+                    .downcast_ref::<Column>()
+                    .expect("Expected column reference in projection")
+                    .index()
+            })
+            .collect()
+    }
+
     /// Project a schema according to this projection.
     /// For example, for a projection `SELECT a AS x, b + 1 AS y`, where `a` is at index 0 and `b` is at index 1,
     /// if the input schema is `[a: Int32, b: Int32, c: Int32]`, the output schema would be `[x: Int32, y: Int32]`.
@@ -327,7 +435,7 @@ impl Projection {
     }
 }
 
-impl<'a> IntoIterator for &'a Projection {
+impl<'a> IntoIterator for &'a ProjectionExprs {
     type Item = &'a ProjectionExpr;
     type IntoIter = std::slice::Iter<'a, ProjectionExpr>;
 
@@ -336,7 +444,7 @@ impl<'a> IntoIterator for &'a Projection {
     }
 }
 
-impl IntoIterator for Projection {
+impl IntoIterator for ProjectionExprs {
     type Item = ProjectionExpr;
     type IntoIter = std::vec::IntoIter<ProjectionExpr>;
 
@@ -1252,7 +1360,7 @@ pub(crate) mod tests {
             Arc::clone(col_b_new),
         )) as Arc<dyn PhysicalExpr>;
 
-        let test_cases = vec![
+        let test_cases = [
             // ---------- TEST CASE 1 ------------
             (
                 // orderings
@@ -1570,7 +1678,7 @@ pub(crate) mod tests {
         let source = get_stats();
         let schema = get_schema();
 
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("col1", 1)),
                 alias: "col1".to_string(),
@@ -1612,7 +1720,7 @@ pub(crate) mod tests {
         let source = get_stats();
         let schema = get_schema();
 
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("col2", 2)),
                 alias: "col2".to_string(),
@@ -1663,7 +1771,7 @@ pub(crate) mod tests {
                 alias: "b".to_string(),
             },
         ];
-        let projection = Projection::new(exprs.clone());
+        let projection = ProjectionExprs::new(exprs.clone());
         assert_eq!(projection.as_ref().len(), 2);
         Ok(())
     }
@@ -1674,7 +1782,7 @@ pub(crate) mod tests {
             expr: Arc::new(Column::new("x", 0)),
             alias: "x".to_string(),
         }];
-        let projection: Projection = exprs.clone().into();
+        let projection: ProjectionExprs = exprs.clone().into();
         assert_eq!(projection.as_ref().len(), 1);
         Ok(())
     }
@@ -1691,7 +1799,7 @@ pub(crate) mod tests {
                 alias: "col2".to_string(),
             },
         ];
-        let projection = Projection::new(exprs);
+        let projection = ProjectionExprs::new(exprs);
         let as_ref: &[ProjectionExpr] = projection.as_ref();
         assert_eq!(as_ref.len(), 2);
         Ok(())
@@ -1700,7 +1808,7 @@ pub(crate) mod tests {
     #[test]
     fn test_column_indices_multiple_columns() -> Result<()> {
         // Test with reversed column order to ensure proper reordering
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("c", 5)),
                 alias: "c".to_string(),
@@ -1722,7 +1830,7 @@ pub(crate) mod tests {
     #[test]
     fn test_column_indices_duplicates() -> Result<()> {
         // Test that duplicate column indices appear only once
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("a", 1)),
                 alias: "a".to_string(),
@@ -1743,7 +1851,7 @@ pub(crate) mod tests {
     #[test]
     fn test_column_indices_unsorted() -> Result<()> {
         // Test that column indices are sorted in the output
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("c", 5)),
                 alias: "c".to_string(),
@@ -1769,7 +1877,7 @@ pub(crate) mod tests {
             Operator::Plus,
             Arc::new(Column::new("b", 4)),
         ));
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr,
                 alias: "sum".to_string(),
@@ -1786,7 +1894,7 @@ pub(crate) mod tests {
 
     #[test]
     fn test_column_indices_empty() -> Result<()> {
-        let projection = Projection::new(vec![]);
+        let projection = ProjectionExprs::new(vec![]);
         assert_eq!(projection.column_indices(), Vec::<usize>::new());
         Ok(())
     }
@@ -1794,7 +1902,7 @@ pub(crate) mod tests {
     #[test]
     fn test_merge_simple_columns() -> Result<()> {
         // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
-        let base_projection = Projection::new(vec![
+        let base_projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("c", 2)),
                 alias: "x".to_string(),
@@ -1810,7 +1918,7 @@ pub(crate) mod tests {
         ]);
 
         // Second projection: SELECT y@1 AS col2, x@0 AS col1
-        let top_projection = Projection::new(vec![
+        let top_projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("y", 1)),
                 alias: "col2".to_string(),
@@ -1831,7 +1939,7 @@ pub(crate) mod tests {
     #[test]
     fn test_merge_with_expressions() -> Result<()> {
         // First projection: SELECT c@2 AS x, b@1 AS y, a@0 AS z
-        let base_projection = Projection::new(vec![
+        let base_projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("c", 2)),
                 alias: "x".to_string(),
@@ -1847,7 +1955,7 @@ pub(crate) mod tests {
         ]);
 
         // Second projection: SELECT y@1 + z@2 AS c2, x@0 + 1 AS c1
-        let top_projection = Projection::new(vec![
+        let top_projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(BinaryExpr::new(
                     Arc::new(Column::new("y", 1)),
@@ -1876,7 +1984,7 @@ pub(crate) mod tests {
     #[test]
     fn try_merge_error() {
         // Create a base projection
-        let base = Projection::new(vec![
+        let base = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("a", 0)),
                 alias: "x".to_string(),
@@ -1888,7 +1996,7 @@ pub(crate) mod tests {
         ]);
 
         // Create a top projection that references a non-existent column index
-        let top = Projection::new(vec![ProjectionExpr {
+        let top = ProjectionExprs::new(vec![ProjectionExpr {
             expr: Arc::new(Column::new("z", 5)), // Invalid index
             alias: "result".to_string(),
         }]);
@@ -1907,7 +2015,7 @@ pub(crate) mod tests {
         let input_schema = get_schema();
 
         // Projection: SELECT col2 AS c, col0 AS a
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("col2", 2)),
                 alias: "c".to_string(),
@@ -1940,7 +2048,7 @@ pub(crate) mod tests {
         let input_schema = get_schema();
 
         // Projection: SELECT col0 + 1 AS incremented
-        let projection = Projection::new(vec![ProjectionExpr {
+        let projection = ProjectionExprs::new(vec![ProjectionExpr {
             expr: Arc::new(BinaryExpr::new(
                 Arc::new(Column::new("col0", 0)),
                 Operator::Plus,
@@ -1974,7 +2082,7 @@ pub(crate) mod tests {
         ]);
 
         // Projection: SELECT col0 AS renamed
-        let projection = Projection::new(vec![ProjectionExpr {
+        let projection = ProjectionExprs::new(vec![ProjectionExpr {
             expr: Arc::new(Column::new("col0", 0)),
             alias: "renamed".to_string(),
         }]);
@@ -1994,7 +2102,7 @@ pub(crate) mod tests {
     #[test]
     fn test_project_schema_empty() -> Result<()> {
         let input_schema = get_schema();
-        let projection = Projection::new(vec![]);
+        let projection = ProjectionExprs::new(vec![]);
 
         let output_schema = projection.project_schema(&input_schema)?;
 
@@ -2009,7 +2117,7 @@ pub(crate) mod tests {
         let input_schema = get_schema();
 
         // Projection: SELECT col1 AS text, col0 AS num
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("col1", 1)),
                 alias: "text".to_string(),
@@ -2057,7 +2165,7 @@ pub(crate) mod tests {
         let input_schema = get_schema();
 
         // Projection with expression: SELECT col0 + 1 AS incremented, col1 AS text
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(BinaryExpr::new(
                     Arc::new(Column::new("col0", 0)),
@@ -2105,7 +2213,7 @@ pub(crate) mod tests {
         let input_schema = get_schema();
 
         // Projection with only primitive width columns: SELECT col2 AS f, col0 AS i
-        let projection = Projection::new(vec![
+        let projection = ProjectionExprs::new(vec![
             ProjectionExpr {
                 expr: Arc::new(Column::new("col2", 2)),
                 alias: "f".to_string(),
@@ -2136,7 +2244,7 @@ pub(crate) mod tests {
         let input_stats = get_stats();
         let input_schema = get_schema();
 
-        let projection = Projection::new(vec![]);
+        let projection = ProjectionExprs::new(vec![]);
 
         let output_stats = projection.project_statistics(input_stats, &input_schema)?;
 
diff --git a/datafusion/physical-expr/src/utils/guarantee.rs b/datafusion/physical-expr/src/utils/guarantee.rs
index 8a57cc7b7c15..d63a9590c3f6 100644
--- a/datafusion/physical-expr/src/utils/guarantee.rs
+++ b/datafusion/physical-expr/src/utils/guarantee.rs
@@ -124,7 +124,7 @@ impl LiteralGuarantee {
             // for an `AND` conjunction to be true, all terms individually must be true
             .fold(GuaranteeBuilder::new(), |builder, expr| {
                 if let Some(cel) = ColOpLit::try_new(expr) {
-                    builder.aggregate_conjunct(cel)
+                    builder.aggregate_conjunct(&cel)
                 } else if let Some(inlist) = expr
                     .as_any()
                     .downcast_ref::<crate::expressions::InListExpr>()
@@ -292,7 +292,7 @@ impl<'a> GuaranteeBuilder<'a> {
     /// # Examples
     /// * `AND (a = 1)`: `a` is guaranteed to be 1
     /// * `AND (a != 1)`: a is guaranteed to not be 1
-    fn aggregate_conjunct(self, col_op_lit: ColOpLit<'a>) -> Self {
+    fn aggregate_conjunct(self, col_op_lit: &ColOpLit<'a>) -> Self {
         self.aggregate_multi_conjunct(
             col_op_lit.col,
             col_op_lit.guarantee,
diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml
index 4df011fc0a05..395da10d629b 100644
--- a/datafusion/physical-optimizer/Cargo.toml
+++ b/datafusion/physical-optimizer/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/physical-optimizer/src/coalesce_async_exec_input.rs b/datafusion/physical-optimizer/src/coalesce_async_exec_input.rs
deleted file mode 100644
index 0b46c68f2dae..000000000000
--- a/datafusion/physical-optimizer/src/coalesce_async_exec_input.rs
+++ /dev/null
@@ -1,71 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use crate::PhysicalOptimizerRule;
-use datafusion_common::config::ConfigOptions;
-use datafusion_common::internal_err;
-use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
-use datafusion_physical_plan::async_func::AsyncFuncExec;
-use datafusion_physical_plan::coalesce_batches::CoalesceBatchesExec;
-use datafusion_physical_plan::ExecutionPlan;
-use std::sync::Arc;
-
-/// Optimizer rule that introduces CoalesceAsyncExec to reduce the number of async executions.
-#[derive(Default, Debug)]
-pub struct CoalesceAsyncExecInput {}
-
-impl CoalesceAsyncExecInput {
-    #[allow(missing_docs)]
-    pub fn new() -> Self {
-        Self::default()
-    }
-}
-
-impl PhysicalOptimizerRule for CoalesceAsyncExecInput {
-    fn optimize(
-        &self,
-        plan: Arc<dyn ExecutionPlan>,
-        config: &ConfigOptions,
-    ) -> datafusion_common::Result<Arc<dyn ExecutionPlan>> {
-        let target_batch_size = config.execution.batch_size;
-        plan.transform(|plan| {
-            if let Some(async_exec) = plan.as_any().downcast_ref::<AsyncFuncExec>() {
-                if async_exec.children().len() != 1 {
-                    return internal_err!(
-                        "Expected AsyncFuncExec to have exactly one child"
-                    );
-                }
-                let child = Arc::clone(async_exec.children()[0]);
-                let coalesce_exec =
-                    Arc::new(CoalesceBatchesExec::new(child, target_batch_size));
-                let coalesce_async_exec = plan.with_new_children(vec![coalesce_exec])?;
-                Ok(Transformed::yes(coalesce_async_exec))
-            } else {
-                Ok(Transformed::no(plan))
-            }
-        })
-        .data()
-    }
-
-    fn name(&self) -> &str {
-        "coalesce_async_exec_input"
-    }
-
-    fn schema_check(&self) -> bool {
-        true
-    }
-}
diff --git a/datafusion/physical-optimizer/src/coalesce_batches.rs b/datafusion/physical-optimizer/src/coalesce_batches.rs
index 5cf2c877c61a..b19d8d9518b3 100644
--- a/datafusion/physical-optimizer/src/coalesce_batches.rs
+++ b/datafusion/physical-optimizer/src/coalesce_batches.rs
@@ -22,12 +22,12 @@ use crate::PhysicalOptimizerRule;
 
 use std::sync::Arc;
 
-use datafusion_common::config::ConfigOptions;
 use datafusion_common::error::Result;
+use datafusion_common::{config::ConfigOptions, internal_err};
 use datafusion_physical_expr::Partitioning;
 use datafusion_physical_plan::{
-    coalesce_batches::CoalesceBatchesExec, filter::FilterExec, joins::HashJoinExec,
-    repartition::RepartitionExec, ExecutionPlan,
+    async_func::AsyncFuncExec, coalesce_batches::CoalesceBatchesExec, filter::FilterExec,
+    joins::HashJoinExec, repartition::RepartitionExec, ExecutionPlan,
 };
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
@@ -72,11 +72,27 @@ impl PhysicalOptimizerRule for CoalesceBatches {
                         )
                     })
                     .unwrap_or(false);
+
             if wrap_in_coalesce {
                 Ok(Transformed::yes(Arc::new(CoalesceBatchesExec::new(
                     plan,
                     target_batch_size,
                 ))))
+            } else if let Some(async_exec) = plan_any.downcast_ref::<AsyncFuncExec>() {
+                // Coalesce inputs to async functions to reduce number of async function invocations
+                let children = async_exec.children();
+                if children.len() != 1 {
+                    return internal_err!(
+                        "Expected AsyncFuncExec to have exactly one child"
+                    );
+                }
+
+                let coalesce_exec = Arc::new(CoalesceBatchesExec::new(
+                    Arc::clone(children[0]),
+                    target_batch_size,
+                ));
+                let new_plan = plan.with_new_children(vec![coalesce_exec])?;
+                Ok(Transformed::yes(new_plan))
             } else {
                 Ok(Transformed::no(plan))
             }
diff --git a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs
index 86f7e73e9e35..bffb2c9df98e 100644
--- a/datafusion/physical-optimizer/src/combine_partial_final_agg.rs
+++ b/datafusion/physical-optimizer/src/combine_partial_final_agg.rs
@@ -36,7 +36,6 @@ use datafusion_physical_expr::{physical_exprs_equal, PhysicalExpr};
 /// into a Single AggregateExec if their grouping exprs and aggregate exprs equal.
 ///
 /// This rule should be applied after the EnforceDistribution and EnforceSorting rules
-///
 #[derive(Default, Debug)]
 pub struct CombinePartialFinalAggregate {}
 
diff --git a/datafusion/physical-optimizer/src/enforce_distribution.rs b/datafusion/physical-optimizer/src/enforce_distribution.rs
index 898386e2f988..4464c12ca7bf 100644
--- a/datafusion/physical-optimizer/src/enforce_distribution.rs
+++ b/datafusion/physical-optimizer/src/enforce_distribution.rs
@@ -281,7 +281,6 @@ pub type PlanWithKeyRequirements = PlanContext<Vec<Arc<dyn PhysicalExpr>>>;
 /// 3) If the current plan is RepartitionExec, CoalescePartitionsExec or WindowAggExec, clear all the requirements, return the unchanged plan
 /// 4) If the current plan is Projection, transform the requirements to the columns before the Projection and push down requirements
 /// 5) For other types of operators, by default, pushdown the parent requirements to children.
-///
 pub fn adjust_input_keys_ordering(
     mut requirements: PlanWithKeyRequirements,
 ) -> Result<Transformed<PlanWithKeyRequirements>> {
@@ -1274,7 +1273,8 @@ pub fn ensure_distribution(
                     child = add_merge_on_top(child);
                 }
                 Distribution::HashPartitioned(exprs) => {
-                    if add_roundrobin {
+                    // See https://github.com/apache/datafusion/issues/18341#issuecomment-3503238325 for background
+                    if add_roundrobin && !hash_necessary {
                         // Add round-robin repartitioning on top of the operator
                         // to increase parallelism.
                         child = add_roundrobin_on_top(child, target_partitions)?;
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
index 8a71b28486a2..28d187bbf893 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/mod.rs
@@ -91,7 +91,7 @@ impl EnforceSorting {
 /// via its children.
 pub type PlanWithCorrespondingSort = PlanContext<bool>;
 
-/// For a given node, update the [`PlanContext.data`] attribute.
+/// For a given node, update the `PlanContext.data` attribute.
 ///
 /// If the node is a `SortExec`, or any of the node's children are a `SortExec`,
 /// then set the attribute to true.
diff --git a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs
index 6e4e78486612..7c02b901169a 100644
--- a/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs
+++ b/datafusion/physical-optimizer/src/enforce_sorting/sort_pushdown.rs
@@ -383,7 +383,7 @@ fn pushdown_requirement_to_children(
     } else if let Some(hash_join) = plan.as_any().downcast_ref::<HashJoinExec>() {
         handle_hash_join(hash_join, parent_required)
     } else {
-        handle_custom_pushdown(plan, parent_required, maintains_input_order)
+        handle_custom_pushdown(plan, parent_required, &maintains_input_order)
     }
     // TODO: Add support for Projection push down
 }
@@ -604,7 +604,7 @@ fn expr_source_side(
 fn handle_custom_pushdown(
     plan: &Arc<dyn ExecutionPlan>,
     parent_required: OrderingRequirements,
-    maintains_input_order: Vec<bool>,
+    maintains_input_order: &[bool],
 ) -> Result<Option<Vec<Option<OrderingRequirements>>>> {
     // If the plan has no children, return early:
     if plan.children().is_empty() {
diff --git a/datafusion/physical-optimizer/src/filter_pushdown.rs b/datafusion/physical-optimizer/src/filter_pushdown.rs
index 5ee7023ff6ee..df44225159e3 100644
--- a/datafusion/physical-optimizer/src/filter_pushdown.rs
+++ b/datafusion/physical-optimizer/src/filter_pushdown.rs
@@ -422,7 +422,7 @@ impl PhysicalOptimizerRule for FilterPushdown {
         config: &ConfigOptions,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         Ok(
-            push_down_filters(Arc::clone(&plan), vec![], config, self.phase)?
+            push_down_filters(&Arc::clone(&plan), vec![], config, self.phase)?
                 .updated_node
                 .unwrap_or(plan),
         )
@@ -438,7 +438,7 @@ impl PhysicalOptimizerRule for FilterPushdown {
 }
 
 fn push_down_filters(
-    node: Arc<dyn ExecutionPlan>,
+    node: &Arc<dyn ExecutionPlan>,
     parent_predicates: Vec<Arc<dyn PhysicalExpr>>,
     config: &ConfigOptions,
     phase: FilterPushdownPhase,
@@ -510,7 +510,8 @@ fn push_down_filters(
         let num_parent_filters = all_predicates.len() - num_self_filters;
 
         // Any filters that could not be pushed down to a child are marked as not-supported to our parents
-        let result = push_down_filters(Arc::clone(child), all_predicates, config, phase)?;
+        let result =
+            push_down_filters(&Arc::clone(child), all_predicates, config, phase)?;
 
         if let Some(new_child) = result.updated_node {
             // If we have a filter pushdown result, we need to update our children
@@ -571,7 +572,7 @@ fn push_down_filters(
     }
 
     // Re-create this node with new children
-    let updated_node = with_new_children_if_necessary(Arc::clone(&node), new_children)?;
+    let updated_node = with_new_children_if_necessary(Arc::clone(node), new_children)?;
 
     // TODO: by calling `handle_child_pushdown_result` we are assuming that the
     // `ExecutionPlan` implementation will not change the plan itself.
@@ -596,7 +597,7 @@ fn push_down_filters(
     )?;
     // Compare pointers for new_node and node, if they are different we must replace
     // ourselves because of changes in our children.
-    if res.updated_node.is_none() && !Arc::ptr_eq(&updated_node, &node) {
+    if res.updated_node.is_none() && !Arc::ptr_eq(&updated_node, node) {
         res.updated_node = Some(updated_node)
     }
     Ok(res)
diff --git a/datafusion/physical-optimizer/src/join_selection.rs b/datafusion/physical-optimizer/src/join_selection.rs
index 1db4d7b30565..b55c01f62e99 100644
--- a/datafusion/physical-optimizer/src/join_selection.rs
+++ b/datafusion/physical-optimizer/src/join_selection.rs
@@ -476,7 +476,6 @@ fn hash_join_convert_symmetric_subrule(
 ///           | Data Source  |--------------| Repartition  |
 ///           |              |              |              |
 ///           +--------------+              +--------------+
-///
 /// ```
 pub fn hash_join_swap_subrule(
     mut input: Arc<dyn ExecutionPlan>,
diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs
index 79db43c1cbe9..f4b82eed3c40 100644
--- a/datafusion/physical-optimizer/src/lib.rs
+++ b/datafusion/physical-optimizer/src/lib.rs
@@ -23,9 +23,11 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 pub mod aggregate_statistics;
-pub mod coalesce_async_exec_input;
 pub mod coalesce_batches;
 pub mod combine_partial_final_agg;
 pub mod enforce_distribution;
diff --git a/datafusion/physical-optimizer/src/optimizer.rs b/datafusion/physical-optimizer/src/optimizer.rs
index 4d00f1029db7..03c83bb5a092 100644
--- a/datafusion/physical-optimizer/src/optimizer.rs
+++ b/datafusion/physical-optimizer/src/optimizer.rs
@@ -36,7 +36,6 @@ use crate::sanity_checker::SanityCheckPlan;
 use crate::topk_aggregation::TopKAggregation;
 use crate::update_aggr_exprs::OptimizeAggregateOrder;
 
-use crate::coalesce_async_exec_input::CoalesceAsyncExecInput;
 use crate::limit_pushdown_past_window::LimitPushPastWindows;
 use datafusion_common::config::ConfigOptions;
 use datafusion_common::Result;
@@ -123,7 +122,6 @@ impl PhysicalOptimizer {
             // The CoalesceBatches rule will not influence the distribution and ordering of the
             // whole plan tree. Therefore, to avoid influencing other rules, it should run last.
             Arc::new(CoalesceBatches::new()),
-            Arc::new(CoalesceAsyncExecInput::new()),
             // Remove the ancillary output requirement operator since we are done with the planning
             // phase.
             Arc::new(OutputRequirements::new_remove_mode()),
diff --git a/datafusion/physical-optimizer/src/projection_pushdown.rs b/datafusion/physical-optimizer/src/projection_pushdown.rs
index 987e3cb6f713..b5e002b51f92 100644
--- a/datafusion/physical-optimizer/src/projection_pushdown.rs
+++ b/datafusion/physical-optimizer/src/projection_pushdown.rs
@@ -129,7 +129,7 @@ fn try_push_down_join_filter(
 
     let join_filter = minimize_join_filter(
         Arc::clone(rhs_rewrite.data.1.expression()),
-        rhs_rewrite.data.1.column_indices().to_vec(),
+        rhs_rewrite.data.1.column_indices(),
         lhs_rewrite.data.0.schema().as_ref(),
         rhs_rewrite.data.0.schema().as_ref(),
     );
@@ -238,7 +238,7 @@ fn try_push_down_projection(
 /// columns are not needed anymore.
 fn minimize_join_filter(
     expr: Arc<dyn PhysicalExpr>,
-    old_column_indices: Vec<ColumnIndex>,
+    old_column_indices: &[ColumnIndex],
     lhs_schema: &Schema,
     rhs_schema: &Schema,
 ) -> JoinFilter {
diff --git a/datafusion/physical-optimizer/src/utils.rs b/datafusion/physical-optimizer/src/utils.rs
index 3655e555a744..13a1745216e8 100644
--- a/datafusion/physical-optimizer/src/utils.rs
+++ b/datafusion/physical-optimizer/src/utils.rs
@@ -32,8 +32,8 @@ use datafusion_physical_plan::{ExecutionPlan, ExecutionPlanProperties};
 /// This utility function adds a `SortExec` above an operator according to the
 /// given ordering requirements while preserving the original partitioning.
 ///
-/// Note that this updates the plan in both the [`PlanContext.children`] and
-/// the [`PlanContext.plan`]'s children. Therefore its not required to sync
+/// Note that this updates the plan in both the `PlanContext.children` and
+/// the `PlanContext.plan`'s children. Therefore its not required to sync
 /// the child plans with [`PlanContext::update_plan_from_children`].
 pub fn add_sort_above<T: Clone + Default>(
     node: PlanContext<T>,
diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml
index 607224782fc4..5858deb83c83 100644
--- a/datafusion/physical-plan/Cargo.toml
+++ b/datafusion/physical-plan/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/physical-plan/benches/aggregate_vectorized.rs b/datafusion/physical-plan/benches/aggregate_vectorized.rs
index 5c28fcc20440..3c1899406c98 100644
--- a/datafusion/physical-plan/benches/aggregate_vectorized.rs
+++ b/datafusion/physical-plan/benches/aggregate_vectorized.rs
@@ -31,6 +31,7 @@ use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_vi
 use datafusion_physical_plan::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder;
 use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
 use rand::distr::{Bernoulli, Distribution};
+use std::hint::black_box;
 use std::sync::Arc;
 
 const SIZES: [usize; 3] = [1_000, 10_000, 100_000];
@@ -293,7 +294,7 @@ fn vectorized_equal_to<GroupColumnBuilder: GroupColumn>(
             builder.vectorized_equal_to(rows, input, rows, &mut equal_to_results);
 
             // Make sure that the compiler does not optimize away the call
-            criterion::black_box(equal_to_results);
+            black_box(equal_to_results);
         });
     });
 }
diff --git a/datafusion/physical-plan/src/aggregates/group_values/metrics.rs b/datafusion/physical-plan/src/aggregates/group_values/metrics.rs
new file mode 100644
index 000000000000..c4e29ea71060
--- /dev/null
+++ b/datafusion/physical-plan/src/aggregates/group_values/metrics.rs
@@ -0,0 +1,214 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Metrics for the various group-by implementations.
+
+use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder, Time};
+
+pub(crate) struct GroupByMetrics {
+    /// Time spent calculating the group IDs from the evaluated grouping columns.
+    pub(crate) time_calculating_group_ids: Time,
+    /// Time spent evaluating the inputs to the aggregate functions.
+    pub(crate) aggregate_arguments_time: Time,
+    /// Time spent evaluating the aggregate expressions themselves
+    /// (e.g. summing all elements and counting number of elements for `avg` aggregate).
+    pub(crate) aggregation_time: Time,
+    /// Time spent emitting the final results and constructing the record batch
+    /// which includes finalizing the grouping expressions
+    /// (e.g. emit from the hash table in case of hash aggregation) and the accumulators
+    pub(crate) emitting_time: Time,
+}
+
+impl GroupByMetrics {
+    pub(crate) fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self {
+        Self {
+            time_calculating_group_ids: MetricBuilder::new(metrics)
+                .subset_time("time_calculating_group_ids", partition),
+            aggregate_arguments_time: MetricBuilder::new(metrics)
+                .subset_time("aggregate_arguments_time", partition),
+            aggregation_time: MetricBuilder::new(metrics)
+                .subset_time("aggregation_time", partition),
+            emitting_time: MetricBuilder::new(metrics)
+                .subset_time("emitting_time", partition),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::aggregates::{AggregateExec, AggregateMode, PhysicalGroupBy};
+    use crate::metrics::MetricsSet;
+    use crate::test::TestMemoryExec;
+    use crate::{collect, ExecutionPlan};
+    use arrow::array::{Float64Array, UInt32Array};
+    use arrow::datatypes::{DataType, Field, Schema};
+    use arrow::record_batch::RecordBatch;
+    use datafusion_common::Result;
+    use datafusion_execution::TaskContext;
+    use datafusion_functions_aggregate::count::count_udaf;
+    use datafusion_functions_aggregate::sum::sum_udaf;
+    use datafusion_physical_expr::aggregate::AggregateExprBuilder;
+    use datafusion_physical_expr::expressions::col;
+    use std::sync::Arc;
+
+    /// Helper function to verify all three GroupBy metrics exist and have non-zero values
+    fn assert_groupby_metrics(metrics: &MetricsSet) {
+        let agg_arguments_time = metrics.sum_by_name("aggregate_arguments_time");
+        assert!(agg_arguments_time.is_some());
+        assert!(agg_arguments_time.unwrap().as_usize() > 0);
+
+        let aggregation_time = metrics.sum_by_name("aggregation_time");
+        assert!(aggregation_time.is_some());
+        assert!(aggregation_time.unwrap().as_usize() > 0);
+
+        let emitting_time = metrics.sum_by_name("emitting_time");
+        assert!(emitting_time.is_some());
+        assert!(emitting_time.unwrap().as_usize() > 0);
+    }
+
+    #[tokio::test]
+    async fn test_groupby_metrics_partial_mode() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::UInt32, false),
+            Field::new("b", DataType::Float64, false),
+        ]));
+
+        // Create multiple batches to ensure metrics accumulate
+        let batches = (0..5)
+            .map(|i| {
+                RecordBatch::try_new(
+                    Arc::clone(&schema),
+                    vec![
+                        Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
+                        Arc::new(Float64Array::from(vec![
+                            i as f64,
+                            (i + 1) as f64,
+                            (i + 2) as f64,
+                            (i + 3) as f64,
+                        ])),
+                    ],
+                )
+                .unwrap()
+            })
+            .collect::<Vec<_>>();
+
+        let input = TestMemoryExec::try_new_exec(&[batches], Arc::clone(&schema), None)?;
+
+        let group_by =
+            PhysicalGroupBy::new_single(vec![(col("a", &schema)?, "a".to_string())]);
+
+        let aggregates = vec![
+            Arc::new(
+                AggregateExprBuilder::new(sum_udaf(), vec![col("b", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias("SUM(b)")
+                    .build()?,
+            ),
+            Arc::new(
+                AggregateExprBuilder::new(count_udaf(), vec![col("b", &schema)?])
+                    .schema(Arc::clone(&schema))
+                    .alias("COUNT(b)")
+                    .build()?,
+            ),
+        ];
+
+        let aggregate_exec = Arc::new(AggregateExec::try_new(
+            AggregateMode::Partial,
+            group_by,
+            aggregates,
+            vec![None, None],
+            input,
+            schema,
+        )?);
+
+        let task_ctx = Arc::new(TaskContext::default());
+        let _result =
+            collect(Arc::clone(&aggregate_exec) as _, Arc::clone(&task_ctx)).await?;
+
+        let metrics = aggregate_exec.metrics().unwrap();
+        assert_groupby_metrics(&metrics);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_groupby_metrics_final_mode() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::UInt32, false),
+            Field::new("b", DataType::Float64, false),
+        ]));
+
+        let batches = (0..3)
+            .map(|i| {
+                RecordBatch::try_new(
+                    Arc::clone(&schema),
+                    vec![
+                        Arc::new(UInt32Array::from(vec![1, 2, 3])),
+                        Arc::new(Float64Array::from(vec![
+                            i as f64,
+                            (i + 1) as f64,
+                            (i + 2) as f64,
+                        ])),
+                    ],
+                )
+                .unwrap()
+            })
+            .collect::<Vec<_>>();
+
+        let partial_input =
+            TestMemoryExec::try_new_exec(&[batches], Arc::clone(&schema), None)?;
+
+        let group_by =
+            PhysicalGroupBy::new_single(vec![(col("a", &schema)?, "a".to_string())]);
+
+        let aggregates = vec![Arc::new(
+            AggregateExprBuilder::new(sum_udaf(), vec![col("b", &schema)?])
+                .schema(Arc::clone(&schema))
+                .alias("SUM(b)")
+                .build()?,
+        )];
+
+        // Create partial aggregate
+        let partial_aggregate = Arc::new(AggregateExec::try_new(
+            AggregateMode::Partial,
+            group_by.clone(),
+            aggregates.clone(),
+            vec![None],
+            partial_input,
+            Arc::clone(&schema),
+        )?);
+
+        // Create final aggregate
+        let final_aggregate = Arc::new(AggregateExec::try_new(
+            AggregateMode::Final,
+            group_by.as_final(),
+            aggregates,
+            vec![None],
+            partial_aggregate,
+            schema,
+        )?);
+
+        let task_ctx = Arc::new(TaskContext::default());
+        let _result =
+            collect(Arc::clone(&final_aggregate) as _, Arc::clone(&task_ctx)).await?;
+
+        let metrics = final_aggregate.metrics().unwrap();
+        assert_groupby_metrics(&metrics);
+
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-plan/src/aggregates/group_values/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
index 316fbe11ae31..4bd7f03506a1 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/mod.rs
@@ -46,8 +46,11 @@ use crate::aggregates::{
     order::GroupOrdering,
 };
 
+mod metrics;
 mod null_builder;
 
+pub(crate) use metrics::GroupByMetrics;
+
 /// Stores the group values during hash aggregation.
 ///
 /// # Background
@@ -128,7 +131,6 @@ pub trait GroupValues: Send {
 /// `GroupColumn`:  crate::aggregates::group_values::multi_group_by::GroupColumn
 /// `GroupValuesColumn`: crate::aggregates::group_values::multi_group_by::GroupValuesColumn
 /// `GroupValuesRows`: crate::aggregates::group_values::row::GroupValuesRows
-///
 pub fn new_group_values(
     schema: SchemaRef,
     group_ordering: &GroupOrdering,
diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
index 58bd35d640c3..9adf028eca7f 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
@@ -77,7 +77,6 @@ pub trait GroupColumn: Send + Sync {
     ///
     /// And if found nth result in `equal_to_results` is already
     /// `false`, the check for nth row will be skipped.
-    ///
     fn vectorized_equal_to(
         &self,
         lhs_rows: &[usize],
@@ -137,7 +136,6 @@ pub fn nulls_equal_to(lhs_null: bool, rhs_null: bool) -> Option<bool> {
 ///   +---------------------+---------------------------------------------+
 ///
 /// `inlined flag`: 1 represents `non-inlined`, and 0 represents `inlined`
-///
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 struct GroupIndexView(u64);
 
@@ -166,7 +164,6 @@ impl GroupIndexView {
 
 /// A [`GroupValues`] that stores multiple columns of group values,
 /// and supports vectorized operators for them
-///
 pub struct GroupValuesColumn<const STREAMING: bool> {
     /// The output schema
     schema: SchemaRef,
@@ -184,7 +181,6 @@ pub struct GroupValuesColumn<const STREAMING: bool> {
     /// instead we store the `group indices` pointing to values in `GroupValues`.
     /// And we use [`GroupIndexView`] to represent such `group indices` in table.
     ///
-    ///
     map: HashTable<(u64, GroupIndexView)>,
 
     /// The size of `map` in bytes
@@ -197,7 +193,6 @@ pub struct GroupValuesColumn<const STREAMING: bool> {
     ///
     /// The chained indices is like:
     ///   `latest group index -> older group index -> even older group index -> ...`
-    ///
     group_index_lists: Vec<Vec<usize>>,
 
     /// When emitting first n, we need to decrease/erase group indices in
@@ -323,7 +318,6 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
     ///
     /// `Group indices` order are against with their input order, and this will lead to error
     /// in `streaming aggregation`.
-    ///
     fn scalarized_intern(
         &mut self,
         cols: &[ArrayRef],
@@ -425,7 +419,6 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
     ///
     /// The vectorized approach can offer higher performance for avoiding row by row
     /// downcast for `cols` and being able to implement even more optimizations(like simd).
-    ///
     fn vectorized_intern(
         &mut self,
         cols: &[ArrayRef],
@@ -493,7 +486,6 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
     ///   - Check if the `group index view` is `inlined` or `non_inlined`:
     ///     If it is inlined, add to `vectorized_equal_to_group_indices` directly.
     ///     Otherwise get all group indices from `group_index_lists`, and add them.
-    ///
     fn collect_vectorized_process_context(
         &mut self,
         batch_hashes: &[u64],
@@ -721,7 +713,6 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
     /// The hash collision may be not frequent, so the fallback will indeed hardly happen.
     /// In most situations, `scalarized_indices` will found to be empty after finishing to
     /// preform `vectorized_equal_to`.
-    ///
     fn scalarized_intern_remaining(
         &mut self,
         cols: &[ArrayRef],
@@ -886,7 +877,6 @@ impl<const STREAMING: bool> GroupValuesColumn<STREAMING> {
 /// `$v`: the vector to push the new builder into
 /// `$nullable`: whether the input can contains nulls
 /// `$t`: the primitive type of the builder
-///
 macro_rules! instantiate_primitive {
     ($v:expr, $nullable:expr, $t:ty, $data_type:ident) => {
         if $nullable {
@@ -1468,7 +1458,6 @@ mod tests {
     ///   - Group not exist + bucket not found in `map`
     ///   - Group not exist + not equal to inlined group view(tested in hash collision)
     ///   - Group not exist + not equal to non-inlined group view(tested in hash collision)
-    ///
     struct VectorizedTestDataSet {
         test_batches: Vec<Vec<ArrayRef>>,
         expected_batch: RecordBatch,
diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
index 8b1905e54041..f35c580b0e63 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs
@@ -87,7 +87,6 @@ pub struct GroupValuesPrimitive<T: ArrowPrimitiveType> {
     /// is obvious in high cardinality group by situation.
     /// More details can see:
     /// <https://github.com/apache/datafusion/issues/15961>
-    ///
     map: HashTable<(usize, u64)>,
     /// The group index of the null value if any
     null_group: Option<usize>,
diff --git a/datafusion/physical-plan/src/aggregates/mod.rs b/datafusion/physical-plan/src/aggregates/mod.rs
index 878bccc1d177..30d1441f5773 100644
--- a/datafusion/physical-plan/src/aggregates/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/mod.rs
@@ -26,12 +26,18 @@ use crate::aggregates::{
     topk_stream::GroupedTopKAggregateStream,
 };
 use crate::execution_plan::{CardinalityEffect, EmissionType};
+use crate::filter_pushdown::{
+    ChildFilterDescription, FilterDescription, FilterPushdownPhase, PushedDownPredicate,
+};
 use crate::metrics::{ExecutionPlanMetricsSet, MetricsSet};
 use crate::windows::get_ordered_partition_by_indices;
 use crate::{
     DisplayFormatType, Distribution, ExecutionPlan, InputOrderMode,
     SendableRecordBatchStream, Statistics,
 };
+use datafusion_common::config::ConfigOptions;
+use datafusion_physical_expr::utils::collect_columns;
+use std::collections::HashSet;
 
 use arrow::array::{ArrayRef, UInt16Array, UInt32Array, UInt64Array, UInt8Array};
 use arrow::datatypes::{Field, Schema, SchemaRef};
@@ -1025,6 +1031,88 @@ impl ExecutionPlan for AggregateExec {
     fn cardinality_effect(&self) -> CardinalityEffect {
         CardinalityEffect::LowerEqual
     }
+
+    /// Push down parent filters when possible (see implementation comment for details),
+    /// but do not introduce any new self filters.
+    fn gather_filters_for_pushdown(
+        &self,
+        _phase: FilterPushdownPhase,
+        parent_filters: Vec<Arc<dyn PhysicalExpr>>,
+        _config: &ConfigOptions,
+    ) -> Result<FilterDescription> {
+        // It's safe to push down filters through aggregates when filters only reference
+        // grouping columns, because such filters determine which groups to compute, not
+        // *how* to compute them. Each group's aggregate values (SUM, COUNT, etc.) are
+        // calculated from the same input rows regardless of whether we filter before or
+        // after grouping - filtering before just eliminates entire groups early.
+        // This optimization is NOT safe for filters on aggregated columns (like filtering on
+        // the result of SUM or COUNT), as those require computing all groups first.
+
+        let grouping_columns: HashSet<_> = self
+            .group_by
+            .expr()
+            .iter()
+            .flat_map(|(expr, _)| collect_columns(expr))
+            .collect();
+
+        // Analyze each filter separately to determine if it can be pushed down
+        let mut safe_filters = Vec::new();
+        let mut unsafe_filters = Vec::new();
+
+        for filter in parent_filters {
+            let filter_columns: HashSet<_> =
+                collect_columns(&filter).into_iter().collect();
+
+            // Check if this filter references non-grouping columns
+            let references_non_grouping = !grouping_columns.is_empty()
+                && !filter_columns.is_subset(&grouping_columns);
+
+            if references_non_grouping {
+                unsafe_filters.push(filter);
+                continue;
+            }
+
+            // For GROUPING SETS, verify this filter's columns appear in all grouping sets
+            if self.group_by.groups().len() > 1 {
+                let filter_column_indices: Vec<usize> = filter_columns
+                    .iter()
+                    .filter_map(|filter_col| {
+                        self.group_by.expr().iter().position(|(expr, _)| {
+                            collect_columns(expr).contains(filter_col)
+                        })
+                    })
+                    .collect();
+
+                // Check if any of this filter's columns are missing from any grouping set
+                let has_missing_column = self.group_by.groups().iter().any(|null_mask| {
+                    filter_column_indices
+                        .iter()
+                        .any(|&idx| null_mask.get(idx) == Some(&true))
+                });
+
+                if has_missing_column {
+                    unsafe_filters.push(filter);
+                    continue;
+                }
+            }
+
+            // This filter is safe to push down
+            safe_filters.push(filter);
+        }
+
+        // Build child filter description with both safe and unsafe filters
+        let child = self.children()[0];
+        let mut child_desc = ChildFilterDescription::from_child(&safe_filters, child)?;
+
+        // Add unsafe filters as unsupported
+        child_desc.parent_filters.extend(
+            unsafe_filters
+                .into_iter()
+                .map(PushedDownPredicate::unsupported),
+        );
+
+        Ok(FilterDescription::new().with_child(child_desc))
+    }
 }
 
 fn create_schema(
diff --git a/datafusion/physical-plan/src/aggregates/order/partial.rs b/datafusion/physical-plan/src/aggregates/order/partial.rs
index 3e495900f77a..476551a7ca21 100644
--- a/datafusion/physical-plan/src/aggregates/order/partial.rs
+++ b/datafusion/physical-plan/src/aggregates/order/partial.rs
@@ -61,7 +61,7 @@ use datafusion_expr::EmitTo;
 ///  group indices
 /// (in group value  group_values               current tracks the most
 ///      order)                                    recent group index
-///```
+/// ```
 #[derive(Debug)]
 pub struct GroupOrderingPartial {
     /// State machine
diff --git a/datafusion/physical-plan/src/aggregates/row_hash.rs b/datafusion/physical-plan/src/aggregates/row_hash.rs
index 6132a8b0add5..3c6577af4286 100644
--- a/datafusion/physical-plan/src/aggregates/row_hash.rs
+++ b/datafusion/physical-plan/src/aggregates/row_hash.rs
@@ -23,7 +23,7 @@ use std::vec;
 
 use super::order::GroupOrdering;
 use super::AggregateExec;
-use crate::aggregates::group_values::{new_group_values, GroupValues};
+use crate::aggregates::group_values::{new_group_values, GroupByMetrics, GroupValues};
 use crate::aggregates::order::GroupOrderingFull;
 use crate::aggregates::{
     create_schema, evaluate_group_by, evaluate_many, evaluate_optional, AggregateMode,
@@ -49,6 +49,7 @@ use datafusion_physical_expr::expressions::Column;
 use datafusion_physical_expr::{GroupsAccumulatorAdapter, PhysicalSortExpr};
 use datafusion_physical_expr_common::sort_expr::LexOrdering;
 
+use datafusion_common::instant::Instant;
 use futures::ready;
 use futures::stream::{Stream, StreamExt};
 use log::debug;
@@ -297,7 +298,6 @@ impl SkipAggregationProbe {
 /// later stream-merge sort on reading back the spilled data does re-grouping. Note the rows cannot
 /// be grouped once spilled onto disk, the read back data needs to be re-grouped again. In addition,
 /// re-grouping may cause out of memory again. Thus, re-grouping has to be a sort based aggregation.
-///
 /// ```text
 /// Partial Aggregation [batch_size = 2] (max memory = 3 rows)
 ///
@@ -430,6 +430,12 @@ pub(crate) struct GroupedHashAggregateStream {
 
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
+
+    /// Aggregation-specific metrics
+    group_by_metrics: GroupByMetrics,
+
+    /// Reduction factor metric, calculated as `output_rows/input_rows` (only for partial aggregation)
+    reduction_factor: Option<metrics::RatioMetrics>,
 }
 
 impl GroupedHashAggregateStream {
@@ -447,6 +453,7 @@ impl GroupedHashAggregateStream {
         let batch_size = context.session_config().batch_size();
         let input = agg.input.execute(partition, Arc::clone(&context))?;
         let baseline_metrics = BaselineMetrics::new(&agg.metrics, partition);
+        let group_by_metrics = GroupByMetrics::new(&agg.metrics, partition);
 
         let timer = baseline_metrics.elapsed_compute().timer();
 
@@ -596,6 +603,16 @@ impl GroupedHashAggregateStream {
             None
         };
 
+        let reduction_factor = if agg.mode == AggregateMode::Partial {
+            Some(
+                MetricBuilder::new(&agg.metrics)
+                    .with_type(metrics::MetricType::SUMMARY)
+                    .ratio_metrics("reduction_factor", partition),
+            )
+        } else {
+            None
+        };
+
         Ok(GroupedHashAggregateStream {
             schema: agg_schema,
             input,
@@ -609,12 +626,14 @@ impl GroupedHashAggregateStream {
             current_group_indices: Default::default(),
             exec_state,
             baseline_metrics,
+            group_by_metrics,
             batch_size,
             group_ordering,
             input_done: false,
             spill_state,
             group_values_soft_limit: agg.limit,
             skip_aggregation_probe,
+            reduction_factor,
         })
     }
 }
@@ -657,6 +676,11 @@ impl Stream for GroupedHashAggregateStream {
                             let timer = elapsed_compute.timer();
                             let input_rows = batch.num_rows();
 
+                            if let Some(reduction_factor) = self.reduction_factor.as_ref()
+                            {
+                                reduction_factor.add_total(input_rows);
+                            }
+
                             // Do the grouping
                             self.group_aggregate_batch(batch)?;
 
@@ -794,6 +818,11 @@ impl Stream for GroupedHashAggregateStream {
                         let output = batch.slice(0, size);
                         (ExecutionState::ProducingOutput(remaining), output)
                     };
+
+                    if let Some(reduction_factor) = self.reduction_factor.as_ref() {
+                        reduction_factor.add_part(output_batch.num_rows());
+                    }
+
                     // Empty record batches should not be emitted.
                     // They need to be treated as  [`Option<RecordBatch>`]es and handled separately
                     debug_assert!(output_batch.num_rows() > 0);
@@ -830,12 +859,25 @@ impl GroupedHashAggregateStream {
             evaluate_group_by(&self.group_by, &batch)?
         };
 
+        // Only create the timer if there are actual aggregate arguments to evaluate
+        let timer = match (
+            self.spill_state.is_stream_merging,
+            self.spill_state.merging_aggregate_arguments.is_empty(),
+            self.aggregate_arguments.is_empty(),
+        ) {
+            (true, false, _) | (false, _, false) => {
+                Some(self.group_by_metrics.aggregate_arguments_time.timer())
+            }
+            _ => None,
+        };
+
         // Evaluate the aggregation expressions.
         let input_values = if self.spill_state.is_stream_merging {
             evaluate_many(&self.spill_state.merging_aggregate_arguments, &batch)?
         } else {
             evaluate_many(&self.aggregate_arguments, &batch)?
         };
+        drop(timer);
 
         // Evaluate the filter expressions, if any, against the inputs
         let filter_values = if self.spill_state.is_stream_merging {
@@ -846,6 +888,8 @@ impl GroupedHashAggregateStream {
         };
 
         for group_values in &group_by_values {
+            let groups_start_time = Instant::now();
+
             // calculate the group indices for each input row
             let starting_num_groups = self.group_values.len();
             self.group_values
@@ -862,6 +906,12 @@ impl GroupedHashAggregateStream {
                 )?;
             }
 
+            // Use this instant for both measurements to save a syscall
+            let agg_start_time = Instant::now();
+            self.group_by_metrics
+                .time_calculating_group_ids
+                .add_duration(agg_start_time - groups_start_time);
+
             // Gather the inputs to call the actual accumulator
             let t = self
                 .accumulators
@@ -897,6 +947,9 @@ impl GroupedHashAggregateStream {
                         acc.merge_batch(values, group_indices, None, total_num_groups)?;
                     }
                 }
+                self.group_by_metrics
+                    .aggregation_time
+                    .add_elapsed(agg_start_time);
             }
         }
 
@@ -941,6 +994,7 @@ impl GroupedHashAggregateStream {
             return Ok(None);
         }
 
+        let timer = self.group_by_metrics.emitting_time.timer();
         let mut output = self.group_values.emit(emit_to)?;
         if let EmitTo::First(n) = emit_to {
             self.group_ordering.remove_groups(n);
@@ -961,12 +1015,14 @@ impl GroupedHashAggregateStream {
                 | AggregateMode::SinglePartitioned => output.push(acc.evaluate(emit_to)?),
             }
         }
+        drop(timer);
 
         // emit reduces the memory usage. Ignore Err from update_memory_reservation. Even if it is
         // over the target memory size after emission, we can emit again rather than returning Err.
         let _ = self.update_memory_reservation();
         let batch = RecordBatch::try_new(schema, output)?;
         debug_assert!(batch.num_rows() > 0);
+
         Ok(Some(batch))
     }
 
diff --git a/datafusion/physical-plan/src/aggregates/topk_stream.rs b/datafusion/physical-plan/src/aggregates/topk_stream.rs
index 9aaadfd52b96..eb1b7543cbfd 100644
--- a/datafusion/physical-plan/src/aggregates/topk_stream.rs
+++ b/datafusion/physical-plan/src/aggregates/topk_stream.rs
@@ -17,11 +17,13 @@
 
 //! A memory-conscious aggregation implementation that limits group buckets to a fixed number
 
+use crate::aggregates::group_values::GroupByMetrics;
 use crate::aggregates::topk::priority_map::PriorityMap;
 use crate::aggregates::{
     aggregate_expressions, evaluate_group_by, evaluate_many, AggregateExec,
     PhysicalGroupBy,
 };
+use crate::metrics::BaselineMetrics;
 use crate::{RecordBatchStream, SendableRecordBatchStream};
 use arrow::array::{Array, ArrayRef, RecordBatch};
 use arrow::datatypes::SchemaRef;
@@ -42,6 +44,8 @@ pub struct GroupedTopKAggregateStream {
     started: bool,
     schema: SchemaRef,
     input: SendableRecordBatchStream,
+    baseline_metrics: BaselineMetrics,
+    group_by_metrics: GroupByMetrics,
     aggregate_arguments: Vec<Vec<Arc<dyn PhysicalExpr>>>,
     group_by: PhysicalGroupBy,
     priority_map: PriorityMap,
@@ -57,6 +61,8 @@ impl GroupedTopKAggregateStream {
         let agg_schema = Arc::clone(&aggr.schema);
         let group_by = aggr.group_by.clone();
         let input = aggr.input.execute(partition, Arc::clone(&context))?;
+        let baseline_metrics = BaselineMetrics::new(&aggr.metrics, partition);
+        let group_by_metrics = GroupByMetrics::new(&aggr.metrics, partition);
         let aggregate_arguments =
             aggregate_expressions(&aggr.aggr_expr, &aggr.mode, group_by.expr.len())?;
         let (val_field, desc) = aggr
@@ -75,6 +81,8 @@ impl GroupedTopKAggregateStream {
             row_count: 0,
             schema: agg_schema,
             input,
+            baseline_metrics,
+            group_by_metrics,
             aggregate_arguments,
             group_by,
             priority_map,
@@ -90,6 +98,8 @@ impl RecordBatchStream for GroupedTopKAggregateStream {
 
 impl GroupedTopKAggregateStream {
     fn intern(&mut self, ids: ArrayRef, vals: ArrayRef) -> Result<()> {
+        let _timer = self.group_by_metrics.time_calculating_group_ids.timer();
+
         let len = ids.len();
         self.priority_map.set_batch(ids, Arc::clone(&vals));
 
@@ -111,7 +121,10 @@ impl Stream for GroupedTopKAggregateStream {
         mut self: Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
+        let elapsed_compute = self.baseline_metrics.elapsed_compute().clone();
+        let emitting_time = self.group_by_metrics.emitting_time.clone();
         while let Poll::Ready(res) = self.input.poll_next_unpin(cx) {
+            let _timer = elapsed_compute.timer();
             match res {
                 // got a batch, convert to rows and append to our TreeMap
                 Some(Ok(batch)) => {
@@ -140,10 +153,15 @@ impl Stream for GroupedTopKAggregateStream {
                         "Exactly 1 group value required"
                     );
                     let group_by_values = Arc::clone(&group_by_values[0][0]);
-                    let input_values = evaluate_many(
-                        &self.aggregate_arguments,
-                        batches.first().unwrap(),
-                    )?;
+                    let input_values = {
+                        let _timer = (!self.aggregate_arguments.is_empty()).then(|| {
+                            self.group_by_metrics.aggregate_arguments_time.timer()
+                        });
+                        evaluate_many(
+                            &self.aggregate_arguments,
+                            batches.first().unwrap(),
+                        )?
+                    };
                     assert_eq!(input_values.len(), 1, "Exactly 1 input required");
                     assert_eq!(input_values[0].len(), 1, "Exactly 1 input required");
                     let input_values = Arc::clone(&input_values[0][0]);
@@ -157,8 +175,11 @@ impl Stream for GroupedTopKAggregateStream {
                         trace!("partition {} emit None", self.partition);
                         return Poll::Ready(None);
                     }
-                    let cols = self.priority_map.emit()?;
-                    let batch = RecordBatch::try_new(Arc::clone(&self.schema), cols)?;
+                    let batch = {
+                        let _timer = emitting_time.timer();
+                        let cols = self.priority_map.emit()?;
+                        RecordBatch::try_new(Arc::clone(&self.schema), cols)?
+                    };
                     trace!(
                         "partition {} emit batch with {} rows",
                         self.partition,
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs b/datafusion/physical-plan/src/coalesce/mod.rs
index 5962362d7681..8405a660f063 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -15,76 +15,38 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{
-    builder::StringViewBuilder, cast::AsArray, Array, ArrayRef, RecordBatch,
-    RecordBatchOptions,
-};
-use arrow::compute::concat_batches;
+use arrow::array::RecordBatch;
+use arrow::compute::BatchCoalescer;
 use arrow::datatypes::SchemaRef;
-use std::sync::Arc;
+use datafusion_common::{internal_err, Result};
 
-/// Concatenate multiple [`RecordBatch`]es
-///
-/// `BatchCoalescer` concatenates multiple small [`RecordBatch`]es, produced by
-/// operations such as `FilterExec` and `RepartitionExec`, into larger ones for
-/// more efficient processing by subsequent operations.
-///
-/// # Background
-///
-/// Generally speaking, larger [`RecordBatch`]es are more efficient to process
-/// than smaller record batches (until the CPU cache is exceeded) because there
-/// is fixed processing overhead per batch. DataFusion tries to operate on
-/// batches of `target_batch_size` rows to amortize this overhead
-///
-/// ```text
-/// ┌────────────────────┐
-/// │    RecordBatch     │
-/// │   num_rows = 23    │
-/// └────────────────────┘                 ┌────────────────────┐
-///                                        │                    │
-/// ┌────────────────────┐     Coalesce    │                    │
-/// │                    │      Batches    │                    │
-/// │    RecordBatch     │                 │                    │
-/// │   num_rows = 50    │  ─ ─ ─ ─ ─ ─ ▶  │                    │
-/// │                    │                 │    RecordBatch     │
-/// │                    │                 │   num_rows = 106   │
-/// └────────────────────┘                 │                    │
-///                                        │                    │
-/// ┌────────────────────┐                 │                    │
-/// │                    │                 │                    │
-/// │    RecordBatch     │                 │                    │
-/// │   num_rows = 33    │                 └────────────────────┘
-/// │                    │
-/// └────────────────────┘
-/// ```
-///
-/// # Notes:
-///
-/// 1. Output rows are produced in the same order as the input rows
-///
-/// 2. The output is a sequence of batches, with all but the last being at least
-///    `target_batch_size` rows.
-///
-/// 3. Eventually this may also be able to handle other optimizations such as a
-///    combined filter/coalesce operation.
+/// Concatenate multiple [`RecordBatch`]es and apply a limit
 ///
+/// See [`BatchCoalescer`] for more details on how this works.
 #[derive(Debug)]
-pub struct BatchCoalescer {
-    /// The input schema
-    schema: SchemaRef,
-    /// Minimum number of rows for coalesces batches
-    target_batch_size: usize,
+pub struct LimitedBatchCoalescer {
+    /// The arrow structure that builds the output batches
+    inner: BatchCoalescer,
     /// Total number of rows returned so far
     total_rows: usize,
-    /// Buffered batches
-    buffer: Vec<RecordBatch>,
-    /// Buffered row count
-    buffered_rows: usize,
     /// Limit: maximum number of rows to fetch, `None` means fetch all rows
     fetch: Option<usize>,
+    /// Indicates if the coalescer is finished
+    finished: bool,
+}
+
+/// Status returned by [`LimitedBatchCoalescer::push_batch`]
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum PushBatchStatus {
+    /// The limit has **not** been reached, and more batches can be pushed
+    Continue,
+    /// The limit **has** been reached after processing this batch
+    /// The caller should call [`LimitedBatchCoalescer::finish`]
+    /// to flush any buffered rows and stop pushing more batches.
+    LimitReached,
 }
 
-impl BatchCoalescer {
+impl LimitedBatchCoalescer {
     /// Create a new `BatchCoalescer`
     ///
     /// # Arguments
@@ -98,197 +60,95 @@ impl BatchCoalescer {
         fetch: Option<usize>,
     ) -> Self {
         Self {
-            schema,
-            target_batch_size,
+            inner: BatchCoalescer::new(schema, target_batch_size)
+                .with_biggest_coalesce_batch_size(Some(target_batch_size / 2)),
             total_rows: 0,
-            buffer: vec![],
-            buffered_rows: 0,
             fetch,
+            finished: false,
         }
     }
 
     /// Return the schema of the output batches
     pub fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
+        self.inner.schema()
     }
 
-    /// Push next batch, and returns [`CoalescerState`] indicating the current
-    /// state of the buffer.
-    pub fn push_batch(&mut self, batch: RecordBatch) -> CoalescerState {
-        let batch = gc_string_view_batch(&batch);
-        if self.limit_reached(&batch) {
-            CoalescerState::LimitReached
-        } else if self.target_reached(batch) {
-            CoalescerState::TargetReached
-        } else {
-            CoalescerState::Continue
+    /// Pushes the next [`RecordBatch`] into the coalescer and returns its status.
+    ///
+    /// # Arguments
+    /// * `batch` - The [`RecordBatch`] to append.
+    ///
+    /// # Returns
+    /// * [`PushBatchStatus::Continue`] - More batches can still be pushed.
+    /// * [`PushBatchStatus::LimitReached`] - The row limit was reached after processing
+    ///   this batch. The caller should call [`Self::finish`] before retrieving the
+    ///   remaining buffered batches.
+    ///
+    /// # Errors
+    /// Returns an error if called after [`Self::finish`] or if the internal push
+    /// operation fails.
+    pub fn push_batch(&mut self, batch: RecordBatch) -> Result<PushBatchStatus> {
+        if self.finished {
+            return internal_err!(
+                "LimitedBatchCoalescer: cannot push batch after finish"
+            );
         }
-    }
 
-    /// Return true if the there is no data buffered
-    pub fn is_empty(&self) -> bool {
-        self.buffer.is_empty()
-    }
+        // if we are at the limit, return LimitReached
+        if let Some(fetch) = self.fetch {
+            // limit previously reached
+            if self.total_rows >= fetch {
+                return Ok(PushBatchStatus::LimitReached);
+            }
 
-    /// Checks if the buffer will reach the specified limit after getting
-    /// `batch`.
-    ///
-    /// If fetch would be exceeded, slices the received batch, updates the
-    /// buffer with it, and returns `true`.
-    ///
-    /// Otherwise: does nothing and returns `false`.
-    fn limit_reached(&mut self, batch: &RecordBatch) -> bool {
-        match self.fetch {
-            Some(fetch) if self.total_rows + batch.num_rows() >= fetch => {
+            // limit now reached
+            if self.total_rows + batch.num_rows() >= fetch {
                 // Limit is reached
                 let remaining_rows = fetch - self.total_rows;
                 debug_assert!(remaining_rows > 0);
 
-                let batch = batch.slice(0, remaining_rows);
-                self.buffered_rows += batch.num_rows();
-                self.total_rows = fetch;
-                self.buffer.push(batch);
-                true
+                let batch_head = batch.slice(0, remaining_rows);
+                self.total_rows += batch_head.num_rows();
+                self.inner.push_batch(batch_head)?;
+                return Ok(PushBatchStatus::LimitReached);
             }
-            _ => false,
         }
-    }
 
-    /// Updates the buffer with the given batch.
-    ///
-    /// If the target batch size is reached, returns `true`. Otherwise, returns
-    /// `false`.
-    fn target_reached(&mut self, batch: RecordBatch) -> bool {
-        if batch.num_rows() == 0 {
-            false
-        } else {
-            self.total_rows += batch.num_rows();
-            self.buffered_rows += batch.num_rows();
-            self.buffer.push(batch);
-            self.buffered_rows >= self.target_batch_size
-        }
+        // Limit not reached, push the entire batch
+        self.total_rows += batch.num_rows();
+        self.inner.push_batch(batch)?;
+
+        Ok(PushBatchStatus::Continue)
     }
 
-    /// Concatenates and returns all buffered batches, and clears the buffer.
-    pub fn finish_batch(&mut self) -> datafusion_common::Result<RecordBatch> {
-        let batch = concat_batches(&self.schema, &self.buffer)?;
-        self.buffer.clear();
-        self.buffered_rows = 0;
-        Ok(batch)
+    /// Return true if there is no data buffered
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty()
     }
-}
 
-/// Indicates the state of the [`BatchCoalescer`] buffer after the
-/// [`BatchCoalescer::push_batch()`] operation.
-///
-/// The caller should take different actions, depending on the variant returned.
-pub enum CoalescerState {
-    /// Neither the limit nor the target batch size is reached.
+    /// Complete the current buffered batch and finish the coalescer
     ///
-    /// Action: continue pushing batches.
-    Continue,
-    /// The limit has been reached.
-    ///
-    /// Action: call [`BatchCoalescer::finish_batch()`] to get the final
-    /// buffered results as a batch and finish the query.
-    LimitReached,
-    /// The specified minimum number of rows a batch should have is reached.
-    ///
-    /// Action: call [`BatchCoalescer::finish_batch()`] to get the current
-    /// buffered results as a batch and then continue pushing batches.
-    TargetReached,
-}
-
-/// Heuristically compact `StringViewArray`s to reduce memory usage, if needed
-///
-/// Decides when to consolidate the StringView into a new buffer to reduce
-/// memory usage and improve string locality for better performance.
-///
-/// This differs from `StringViewArray::gc` because:
-/// 1. It may not compact the array depending on a heuristic.
-/// 2. It uses a precise block size to reduce the number of buffers to track.
-///
-/// # Heuristic
-///
-/// If the average size of each view is larger than 32 bytes, we compact the array.
-///
-/// `StringViewArray` include pointers to buffer that hold the underlying data.
-/// One of the great benefits of `StringViewArray` is that many operations
-/// (e.g., `filter`) can be done without copying the underlying data.
-///
-/// However, after a while (e.g., after `FilterExec` or `HashJoinExec`) the
-/// `StringViewArray` may only refer to a small portion of the buffer,
-/// significantly increasing memory usage.
-fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch {
-    let new_columns: Vec<ArrayRef> = batch
-        .columns()
-        .iter()
-        .map(|c| {
-            // Try to re-create the `StringViewArray` to prevent holding the underlying buffer too long.
-            let Some(s) = c.as_string_view_opt() else {
-                return Arc::clone(c);
-            };
-
-            // Fast path: if the data buffers are empty, we can return the original array
-            if s.data_buffers().is_empty() {
-                return Arc::clone(c);
-            }
-
-            let ideal_buffer_size: usize = s
-                .views()
-                .iter()
-                .map(|v| {
-                    let len = (*v as u32) as usize;
-                    if len > 12 {
-                        len
-                    } else {
-                        0
-                    }
-                })
-                .sum();
-
-            // We don't use get_buffer_memory_size here, because gc is for the contents of the
-            // data buffers, not views and nulls.
-            let actual_buffer_size =
-                s.data_buffers().iter().map(|b| b.capacity()).sum::<usize>();
-
-            // Re-creating the array copies data and can be time consuming.
-            // We only do it if the array is sparse
-            if actual_buffer_size > (ideal_buffer_size * 2) {
-                // We set the block size to `ideal_buffer_size` so that the new StringViewArray only has one buffer, which accelerate later concat_batches.
-                // See https://github.com/apache/arrow-rs/issues/6094 for more details.
-                let mut builder = StringViewBuilder::with_capacity(s.len());
-                if ideal_buffer_size > 0 {
-                    builder = builder.with_fixed_block_size(ideal_buffer_size as u32);
-                }
-
-                for v in s.iter() {
-                    builder.append_option(v);
-                }
-
-                let gc_string = builder.finish();
-
-                debug_assert!(gc_string.data_buffers().len() <= 1); // buffer count can be 0 if the `ideal_buffer_size` is 0
+    /// Any subsequent calls to `push_batch()` will return an Err
+    pub fn finish(&mut self) -> Result<()> {
+        self.inner.finish_buffered_batch()?;
+        self.finished = true;
+        Ok(())
+    }
 
-                Arc::new(gc_string)
-            } else {
-                Arc::clone(c)
-            }
-        })
-        .collect();
-    let mut options = RecordBatchOptions::new();
-    options = options.with_row_count(Some(batch.num_rows()));
-    RecordBatch::try_new_with_options(batch.schema(), new_columns, &options)
-        .expect("Failed to re-create the gc'ed record batch")
+    /// Return the next completed batch, if any
+    pub fn next_completed_batch(&mut self) -> Option<RecordBatch> {
+        self.inner.next_completed_batch()
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use std::ops::Range;
-
     use super::*;
+    use std::ops::Range;
+    use std::sync::Arc;
 
-    use arrow::array::{builder::ArrayBuilder, StringViewArray, UInt32Array};
+    use arrow::array::UInt32Array;
+    use arrow::compute::concat_batches;
     use arrow::datatypes::{DataType, Field, Schema};
 
     #[test]
@@ -296,9 +156,9 @@ mod tests {
         let batch = uint32_batch(0..8);
         Test::new()
             .with_batches(std::iter::repeat_n(batch, 10))
-            // expected output is batches of at least 20 rows (except for the final batch)
+            // expected output is batches of exactly 21 rows (except for the final batch)
             .with_target_batch_size(21)
-            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .with_expected_output_sizes(vec![21, 21, 21, 17])
             .run()
     }
 
@@ -311,7 +171,7 @@ mod tests {
             // expected to behave the same as `test_concat_batches`
             .with_target_batch_size(21)
             .with_fetch(Some(100))
-            .with_expected_output_sizes(vec![24, 24, 24, 8])
+            .with_expected_output_sizes(vec![21, 21, 21, 17])
             .run();
     }
 
@@ -323,7 +183,7 @@ mod tests {
             // input is 10 batches x 8 rows (80 rows) with fetch limit of 50
             .with_target_batch_size(21)
             .with_fetch(Some(50))
-            .with_expected_output_sizes(vec![24, 24, 2])
+            .with_expected_output_sizes(vec![21, 21, 8])
             .run();
     }
 
@@ -333,7 +193,7 @@ mod tests {
         Test::new()
             .with_batches(std::iter::repeat_n(batch, 10))
             // input is 10 batches x 8 rows (80 rows) with fetch limit of 48
-            .with_target_batch_size(21)
+            .with_target_batch_size(24)
             .with_fetch(Some(48))
             .with_expected_output_sizes(vec![24, 24])
             .run();
@@ -362,7 +222,7 @@ mod tests {
             .run()
     }
 
-    /// Test for [`BatchCoalescer`]
+    /// Test for [`LimitedBatchCoalescer`]
     ///
     /// Pushes the input batches to the coalescer and verifies that the resulting
     /// batches have the expected number of rows and contents.
@@ -435,26 +295,32 @@ mod tests {
             let single_input_batch = concat_batches(&schema, &input_batches).unwrap();
 
             let mut coalescer =
-                BatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
+                LimitedBatchCoalescer::new(Arc::clone(&schema), target_batch_size, fetch);
 
             let mut output_batches = vec![];
             for batch in input_batches {
-                match coalescer.push_batch(batch) {
-                    CoalescerState::Continue => {}
-                    CoalescerState::LimitReached => {
-                        output_batches.push(coalescer.finish_batch().unwrap());
-                        break;
+                match coalescer.push_batch(batch).unwrap() {
+                    PushBatchStatus::Continue => {
+                        // continue pushing batches
                     }
-                    CoalescerState::TargetReached => {
-                        coalescer.buffered_rows = 0;
-                        output_batches.push(coalescer.finish_batch().unwrap());
+                    PushBatchStatus::LimitReached => {
+                        break;
                     }
                 }
             }
-            if coalescer.buffered_rows != 0 {
-                output_batches.extend(coalescer.buffer);
+            coalescer.finish().unwrap();
+            while let Some(batch) = coalescer.next_completed_batch() {
+                output_batches.push(batch);
             }
 
+            let actual_output_sizes: Vec<usize> =
+                output_batches.iter().map(|b| b.num_rows()).collect();
+            assert_eq!(
+                expected_output_sizes, actual_output_sizes,
+                "Unexpected number of rows in output batches\n\
+                Expected\n{expected_output_sizes:#?}\nActual:{actual_output_sizes:#?}"
+            );
+
             // make sure we got the expected number of output batches and content
             let mut starting_idx = 0;
             assert_eq!(expected_output_sizes.len(), output_batches.len());
@@ -498,110 +364,6 @@ mod tests {
         .unwrap()
     }
 
-    #[test]
-    fn test_gc_string_view_batch_small_no_compact() {
-        // view with only short strings (no buffers) --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("a"), Some("b"), Some("c")],
-        }
-        .build();
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 0);
-        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
-    }
-
-    #[test]
-    fn test_gc_string_view_test_batch_empty() {
-        let schema = Schema::empty();
-        let batch = RecordBatch::new_empty(schema.into());
-        let output_batch = gc_string_view_batch(&batch);
-        assert_eq!(batch.num_columns(), output_batch.num_columns());
-        assert_eq!(batch.num_rows(), output_batch.num_rows());
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_large_no_compact() {
-        // view with large strings (has buffers) but full --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("This string is longer than 12 bytes")],
-        }
-        .build();
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 5);
-        assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len()); // no compaction
-    }
-
-    #[test]
-    fn test_gc_string_view_batch_large_slice_compact() {
-        // view with large strings (has buffers) and only partially used  --> no need to compact
-        let array = StringViewTest {
-            rows: 1000,
-            strings: vec![Some("this string is longer than 12 bytes")],
-        }
-        .build();
-
-        // slice only 11 rows, so most of the buffer is not used
-        let array = array.slice(11, 22);
-
-        let gc_array = do_gc(array.clone());
-        compare_string_array_values(&array, &gc_array);
-        assert_eq!(array.data_buffers().len(), 5);
-        assert_eq!(gc_array.data_buffers().len(), 1); // compacted into a single buffer
-    }
-
-    /// Compares the values of two string view arrays
-    fn compare_string_array_values(arr1: &StringViewArray, arr2: &StringViewArray) {
-        assert_eq!(arr1.len(), arr2.len());
-        for (s1, s2) in arr1.iter().zip(arr2.iter()) {
-            assert_eq!(s1, s2);
-        }
-    }
-
-    /// runs garbage collection on string view array
-    /// and ensures the number of rows are the same
-    fn do_gc(array: StringViewArray) -> StringViewArray {
-        let batch =
-            RecordBatch::try_from_iter(vec![("a", Arc::new(array) as ArrayRef)]).unwrap();
-        let gc_batch = gc_string_view_batch(&batch);
-        assert_eq!(batch.num_rows(), gc_batch.num_rows());
-        assert_eq!(batch.schema(), gc_batch.schema());
-        gc_batch
-            .column(0)
-            .as_any()
-            .downcast_ref::<StringViewArray>()
-            .unwrap()
-            .clone()
-    }
-
-    /// Describes parameters for creating a `StringViewArray`
-    struct StringViewTest {
-        /// The number of rows in the array
-        rows: usize,
-        /// The strings to use in the array (repeated over and over
-        strings: Vec<Option<&'static str>>,
-    }
-
-    impl StringViewTest {
-        /// Create a `StringViewArray` with the parameters specified in this struct
-        fn build(self) -> StringViewArray {
-            let mut builder =
-                StringViewBuilder::with_capacity(100).with_fixed_block_size(8192);
-            loop {
-                for &v in self.strings.iter() {
-                    builder.append_option(v);
-                    if builder.len() >= self.rows {
-                        return builder.finish();
-                    }
-                }
-            }
-        }
-    }
     fn batch_to_pretty_strings(batch: &RecordBatch) -> String {
         arrow::util::pretty::pretty_format_batches(std::slice::from_ref(batch))
             .unwrap()
diff --git a/datafusion/physical-plan/src/coalesce_batches.rs b/datafusion/physical-plan/src/coalesce_batches.rs
index 397bd9a377c3..eb3c3b5befbd 100644
--- a/datafusion/physical-plan/src/coalesce_batches.rs
+++ b/datafusion/physical-plan/src/coalesce_batches.rs
@@ -34,7 +34,7 @@ use datafusion_common::Result;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::PhysicalExpr;
 
-use crate::coalesce::{BatchCoalescer, CoalescerState};
+use crate::coalesce::{LimitedBatchCoalescer, PushBatchStatus};
 use crate::execution_plan::CardinalityEffect;
 use crate::filter_pushdown::{
     ChildPushdownResult, FilterDescription, FilterPushdownPhase,
@@ -53,7 +53,7 @@ use futures::stream::{Stream, StreamExt};
 /// buffering and returns the final batch once the number of collected rows
 /// reaches the `fetch` value.
 ///
-/// See [`BatchCoalescer`] for more information
+/// See [`LimitedBatchCoalescer`] for more information
 #[derive(Debug, Clone)]
 pub struct CoalesceBatchesExec {
     /// The input plan
@@ -182,14 +182,13 @@ impl ExecutionPlan for CoalesceBatchesExec {
     ) -> Result<SendableRecordBatchStream> {
         Ok(Box::pin(CoalesceBatchesStream {
             input: self.input.execute(partition, context)?,
-            coalescer: BatchCoalescer::new(
+            coalescer: LimitedBatchCoalescer::new(
                 self.input.schema(),
                 self.target_batch_size,
                 self.fetch,
             ),
             baseline_metrics: BaselineMetrics::new(&self.metrics, partition),
-            // Start by pulling data
-            inner_state: CoalesceBatchesStreamState::Pull,
+            completed: false,
         }))
     }
 
@@ -249,12 +248,11 @@ struct CoalesceBatchesStream {
     /// The input plan
     input: SendableRecordBatchStream,
     /// Buffer for combining batches
-    coalescer: BatchCoalescer,
+    coalescer: LimitedBatchCoalescer,
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
-    /// The current inner state of the stream. This state dictates the current
-    /// action or operation to be performed in the streaming process.
-    inner_state: CoalesceBatchesStreamState,
+    /// is the input stream exhausted or limit reached?
+    completed: bool,
 }
 
 impl Stream for CoalesceBatchesStream {
@@ -274,50 +272,6 @@ impl Stream for CoalesceBatchesStream {
     }
 }
 
-/// Enumeration of possible states for `CoalesceBatchesStream`.
-/// It represents different stages in the lifecycle of a stream of record batches.
-///
-/// An example of state transition:
-/// Notation:
-/// `[3000]`: A batch with size 3000
-/// `{[2000], [3000]}`: `CoalesceBatchStream`'s internal buffer with 2 batches buffered
-/// Input of `CoalesceBatchStream` will generate three batches `[2000], [3000], [4000]`
-/// The coalescing procedure will go through the following steps with 4096 coalescing threshold:
-/// 1. Read the first batch and get it buffered.
-/// - initial state: `Pull`
-/// - initial buffer: `{}`
-/// - updated buffer: `{[2000]}`
-/// - next state: `Pull`
-/// 2. Read the second batch, the coalescing target is reached since 2000 + 3000 > 4096
-/// - initial state: `Pull`
-/// - initial buffer: `{[2000]}`
-/// - updated buffer: `{[2000], [3000]}`
-/// - next state: `ReturnBuffer`
-/// 4. Two batches in the batch get merged and consumed by the upstream operator.
-/// - initial state: `ReturnBuffer`
-/// - initial buffer: `{[2000], [3000]}`
-/// - updated buffer: `{}`
-/// - next state: `Pull`
-/// 5. Read the third input batch.
-/// - initial state: `Pull`
-/// - initial buffer: `{}`
-/// - updated buffer: `{[4000]}`
-/// - next state: `Pull`
-/// 5. The input is ended now. Jump to exhaustion state preparing the finalized data.
-/// - initial state: `Pull`
-/// - initial buffer: `{[4000]}`
-/// - updated buffer: `{[4000]}`
-/// - next state: `Exhausted`
-#[derive(Debug, Clone, Eq, PartialEq)]
-enum CoalesceBatchesStreamState {
-    /// State to pull a new batch from the input stream.
-    Pull,
-    /// State to return a buffered batch.
-    ReturnBuffer,
-    /// State indicating that the stream is exhausted.
-    Exhausted,
-}
-
 impl CoalesceBatchesStream {
     fn poll_next_inner(
         self: &mut Pin<&mut Self>,
@@ -325,51 +279,39 @@ impl CoalesceBatchesStream {
     ) -> Poll<Option<Result<RecordBatch>>> {
         let cloned_time = self.baseline_metrics.elapsed_compute().clone();
         loop {
-            match &self.inner_state {
-                CoalesceBatchesStreamState::Pull => {
-                    // Attempt to pull the next batch from the input stream.
-                    let input_batch = ready!(self.input.poll_next_unpin(cx));
-                    // Start timing the operation. The timer records time upon being dropped.
-                    let _timer = cloned_time.timer();
-
-                    match input_batch {
-                        Some(Ok(batch)) => match self.coalescer.push_batch(batch) {
-                            CoalescerState::Continue => {}
-                            CoalescerState::LimitReached => {
-                                self.inner_state = CoalesceBatchesStreamState::Exhausted;
-                            }
-                            CoalescerState::TargetReached => {
-                                self.inner_state =
-                                    CoalesceBatchesStreamState::ReturnBuffer;
-                            }
-                        },
-                        None => {
-                            // End of input stream, but buffered batches might still be present.
-                            self.inner_state = CoalesceBatchesStreamState::Exhausted;
+            // If there is any completed batch ready, return it
+            if let Some(batch) = self.coalescer.next_completed_batch() {
+                return Poll::Ready(Some(Ok(batch)));
+            }
+            if self.completed {
+                // If input is done and no batches are ready, return None to signal end of stream.
+                return Poll::Ready(None);
+            }
+            // Attempt to pull the next batch from the input stream.
+            let input_batch = ready!(self.input.poll_next_unpin(cx));
+            // Start timing the operation. The timer records time upon being dropped.
+            let _timer = cloned_time.timer();
+
+            match input_batch {
+                None => {
+                    // Input stream is exhausted, finalize any remaining batches
+                    self.completed = true;
+                    self.coalescer.finish()?;
+                }
+                Some(Ok(batch)) => {
+                    match self.coalescer.push_batch(batch)? {
+                        PushBatchStatus::Continue => {
+                            // Keep pushing more batches
+                        }
+                        PushBatchStatus::LimitReached => {
+                            // limit was reached, so stop early
+                            self.completed = true;
+                            self.coalescer.finish()?;
                         }
-                        other => return Poll::Ready(other),
                     }
                 }
-                CoalesceBatchesStreamState::ReturnBuffer => {
-                    let _timer = cloned_time.timer();
-                    // Combine buffered batches into one batch and return it.
-                    let batch = self.coalescer.finish_batch()?;
-                    // Set to pull state for the next iteration.
-                    self.inner_state = CoalesceBatchesStreamState::Pull;
-                    return Poll::Ready(Some(Ok(batch)));
-                }
-                CoalesceBatchesStreamState::Exhausted => {
-                    // Handle the end of the input stream.
-                    return if self.coalescer.is_empty() {
-                        // If buffer is empty, return None indicating the stream is fully consumed.
-                        Poll::Ready(None)
-                    } else {
-                        let _timer = cloned_time.timer();
-                        // If the buffer still contains batches, prepare to return them.
-                        let batch = self.coalescer.finish_batch()?;
-                        Poll::Ready(Some(Ok(batch)))
-                    };
-                }
+                // Error case
+                other => return Poll::Ready(other),
             }
         }
     }
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index a70cd9cb0d64..ffa9611d26e8 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -354,12 +354,15 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ///     fn execute(
     ///         &self,
     ///         partition: usize,
-    ///         context: Arc<TaskContext>
+    ///         context: Arc<TaskContext>,
     ///     ) -> Result<SendableRecordBatchStream> {
     ///         // use functions from futures crate convert the batch into a stream
     ///         let fut = futures::future::ready(Ok(self.batch.clone()));
     ///         let stream = futures::stream::once(fut);
-    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.batch.schema(), stream)))
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(
+    ///             self.batch.schema(),
+    ///             stream,
+    ///         )))
     ///     }
     /// }
     /// ```
@@ -389,11 +392,14 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ///     fn execute(
     ///         &self,
     ///         partition: usize,
-    ///         context: Arc<TaskContext>
+    ///         context: Arc<TaskContext>,
     ///     ) -> Result<SendableRecordBatchStream> {
     ///         let fut = get_batch();
     ///         let stream = futures::stream::once(fut);
-    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(
+    ///             self.schema.clone(),
+    ///             stream,
+    ///         )))
     ///     }
     /// }
     /// ```
@@ -425,13 +431,16 @@ pub trait ExecutionPlan: Debug + DisplayAs + Send + Sync {
     ///     fn execute(
     ///         &self,
     ///         partition: usize,
-    ///         context: Arc<TaskContext>
+    ///         context: Arc<TaskContext>,
     ///     ) -> Result<SendableRecordBatchStream> {
     ///         // A future that yields a stream
     ///         let fut = get_batch_stream();
     ///         // Use TryStreamExt::try_flatten to flatten the stream of streams
     ///         let stream = futures::stream::once(fut).try_flatten();
-    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(self.schema.clone(), stream)))
+    ///         Ok(Box::pin(RecordBatchStreamAdapter::new(
+    ///             self.schema.clone(),
+    ///             stream,
+    ///         )))
     ///     }
     /// }
     /// ```
@@ -788,7 +797,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan {
 /// For unbounded streams, it also tracks whether the operator requires finite memory
 /// to process the stream or if memory usage could grow unbounded.
 ///
-/// Boundedness of the output stream is based on the the boundedness of the input stream and the nature of
+/// Boundedness of the output stream is based on the boundedness of the input stream and the nature of
 /// the operator. For example, limit or topk with fetch operator can convert an unbounded stream to a bounded stream.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Boundedness {
diff --git a/datafusion/physical-plan/src/filter.rs b/datafusion/physical-plan/src/filter.rs
index 047c72076e4c..5ba508a8defe 100644
--- a/datafusion/physical-plan/src/filter.rs
+++ b/datafusion/physical-plan/src/filter.rs
@@ -32,12 +32,13 @@ use crate::filter_pushdown::{
     ChildFilterDescription, ChildPushdownResult, FilterDescription, FilterPushdownPhase,
     FilterPushdownPropagation, PushedDown, PushedDownPredicate,
 };
+use crate::metrics::{MetricBuilder, MetricType};
 use crate::projection::{
     make_with_child, try_embed_projection, update_expr, EmbeddedProjection,
     ProjectionExec, ProjectionExpr,
 };
 use crate::{
-    metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet},
+    metrics::{BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RatioMetrics},
     DisplayFormatType, ExecutionPlan,
 };
 
@@ -384,12 +385,12 @@ impl ExecutionPlan for FilterExec {
         context: Arc<TaskContext>,
     ) -> Result<SendableRecordBatchStream> {
         trace!("Start FilterExec::execute for partition {} of context session_id {} and task_id {:?}", partition, context.session_id(), context.task_id());
-        let baseline_metrics = BaselineMetrics::new(&self.metrics, partition);
+        let metrics = FilterExecMetrics::new(&self.metrics, partition);
         Ok(Box::pin(FilterExecStream {
             schema: self.schema(),
             predicate: Arc::clone(&self.predicate),
             input: self.input.execute(partition, context)?,
-            baseline_metrics,
+            metrics,
             projection: self.projection.clone(),
         }))
     }
@@ -623,11 +624,30 @@ struct FilterExecStream {
     /// The input partition to filter.
     input: SendableRecordBatchStream,
     /// Runtime metrics recording
-    baseline_metrics: BaselineMetrics,
+    metrics: FilterExecMetrics,
     /// The projection indices of the columns in the input schema
     projection: Option<Vec<usize>>,
 }
 
+/// The metrics for `FilterExec`
+struct FilterExecMetrics {
+    // Common metrics for most operators
+    baseline_metrics: BaselineMetrics,
+    // Selectivity of the filter, calculated as output_rows / input_rows
+    selectivity: RatioMetrics,
+}
+
+impl FilterExecMetrics {
+    pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self {
+        Self {
+            baseline_metrics: BaselineMetrics::new(metrics, partition),
+            selectivity: MetricBuilder::new(metrics)
+                .with_type(MetricType::SUMMARY)
+                .ratio_metrics("selectivity", partition),
+        }
+    }
+}
+
 pub fn batch_filter(
     batch: &RecordBatch,
     predicate: &Arc<dyn PhysicalExpr>,
@@ -679,7 +699,7 @@ impl Stream for FilterExecStream {
         loop {
             match ready!(self.input.poll_next_unpin(cx)) {
                 Some(Ok(batch)) => {
-                    let timer = self.baseline_metrics.elapsed_compute().timer();
+                    let timer = self.metrics.baseline_metrics.elapsed_compute().timer();
                     let filtered_batch = filter_and_project(
                         &batch,
                         &self.predicate,
@@ -687,6 +707,10 @@ impl Stream for FilterExecStream {
                         &self.schema,
                     )?;
                     timer.done();
+
+                    self.metrics.selectivity.add_part(filtered_batch.num_rows());
+                    self.metrics.selectivity.add_total(batch.num_rows());
+
                     // Skip entirely filtered batches
                     if filtered_batch.num_rows() == 0 {
                         continue;
@@ -700,7 +724,7 @@ impl Stream for FilterExecStream {
                 }
             }
         }
-        self.baseline_metrics.record_poll(poll)
+        self.metrics.baseline_metrics.record_poll(poll)
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 949c4e784bc3..2c531786c9c2 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -627,7 +627,7 @@ impl<T: BatchTransformer> CrossJoinStream<T> {
         Poll::Ready(Ok(StatefulStreamResult::Continue))
     }
 
-    /// Joins the the indexed row of left data with the current probe batch.
+    /// Joins the indexed row of left data with the current probe batch.
     /// If all the results are produced, the state is set to fetch new probe batch.
     fn build_batches(&mut self) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         let right_batch = self.state.try_as_record_batch()?;
@@ -650,7 +650,6 @@ impl<T: BatchTransformer> CrossJoinStream<T> {
                         self.left_index += 1;
                     }
 
-                    self.join_metrics.output_batches.add(1);
                     return Ok(StatefulStreamResult::Ready(Some(batch)));
                 }
             }
diff --git a/datafusion/physical-plan/src/joins/hash_join/exec.rs b/datafusion/physical-plan/src/joins/hash_join/exec.rs
index b5fe5ee5cda1..c552e6954c8f 100644
--- a/datafusion/physical-plan/src/joins/hash_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/exec.rs
@@ -238,7 +238,6 @@ impl JoinLeftData {
 ///            └───────┘                                                    │          └───────┘        │
 ///                                                                         │                           │
 ///                                                                         └───────────────────────────┘
-///
 /// ```
 ///
 /// 2. the **probe phase** where the tuples of the probe side are streamed
@@ -273,7 +272,6 @@ impl JoinLeftData {
 ///     └────────────┘                                            └────────────┘
 ///
 ///        build side                                                probe side
-///
 /// ```
 ///
 /// # Example "Optimal" Plans
@@ -3454,11 +3452,7 @@ mod tests {
 
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
         let hashes_buff = &mut vec![0; left.num_rows()];
-        let hashes = create_hashes(
-            &[Arc::clone(&left.columns()[0])],
-            &random_state,
-            hashes_buff,
-        )?;
+        let hashes = create_hashes([&left.columns()[0]], &random_state, hashes_buff)?;
 
         // Maps both values to both indices (1 and 2, representing input 0 and 1)
         // 0 -> (0, 1)
@@ -3487,11 +3481,7 @@ mod tests {
         let right_keys_values =
             key_column.evaluate(&right)?.into_array(right.num_rows())?;
         let mut hashes_buffer = vec![0; right.num_rows()];
-        create_hashes(
-            &[Arc::clone(&right_keys_values)],
-            &random_state,
-            &mut hashes_buffer,
-        )?;
+        create_hashes([&right_keys_values], &random_state, &mut hashes_buffer)?;
 
         let (l, r, _) = lookup_join_hashmap(
             &join_hash_map,
@@ -3525,11 +3515,7 @@ mod tests {
 
         let random_state = RandomState::with_seeds(0, 0, 0, 0);
         let hashes_buff = &mut vec![0; left.num_rows()];
-        let hashes = create_hashes(
-            &[Arc::clone(&left.columns()[0])],
-            &random_state,
-            hashes_buff,
-        )?;
+        let hashes = create_hashes([&left.columns()[0]], &random_state, hashes_buff)?;
 
         hashmap_left.insert_unique(hashes[0], (hashes[0], 1u32), |(h, _)| *h);
         hashmap_left.insert_unique(hashes[0], (hashes[0], 2u32), |(h, _)| *h);
@@ -3552,11 +3538,7 @@ mod tests {
         let right_keys_values =
             key_column.evaluate(&right)?.into_array(right.num_rows())?;
         let mut hashes_buffer = vec![0; right.num_rows()];
-        create_hashes(
-            &[Arc::clone(&right_keys_values)],
-            &random_state,
-            &mut hashes_buffer,
-        )?;
+        create_hashes([&right_keys_values], &random_state, &mut hashes_buffer)?;
 
         let (l, r, _) = lookup_join_hashmap(
             &join_hash_map,
diff --git a/datafusion/physical-plan/src/joins/hash_join/stream.rs b/datafusion/physical-plan/src/joins/hash_join/stream.rs
index 88c50c2eb2ce..1f4aeecb2972 100644
--- a/datafusion/physical-plan/src/joins/hash_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/hash_join/stream.rs
@@ -115,7 +115,6 @@ impl BuildSide {
 ///  │          │
 ///  │          ▼
 ///  └─ ProcessProbeBatch
-///
 /// ```
 #[derive(Debug, Clone)]
 pub(super) enum HashJoinStreamState {
@@ -495,7 +494,6 @@ impl HashJoinStream {
                 &self.column_indices,
                 self.join_type,
             )?;
-            self.join_metrics.output_batches.add(1);
             timer.done();
 
             self.state = HashJoinStreamState::FetchProbeBatch;
@@ -598,7 +596,6 @@ impl HashJoinStream {
             )?
         };
 
-        self.join_metrics.output_batches.add(1);
         timer.done();
 
         if next_offset.is_none() {
@@ -654,8 +651,6 @@ impl HashJoinStream {
         if let Ok(ref batch) = result {
             self.join_metrics.input_batches.add(1);
             self.join_metrics.input_rows.add(batch.num_rows());
-
-            self.join_metrics.output_batches.add(1);
         }
         timer.done();
 
diff --git a/datafusion/physical-plan/src/joins/nested_loop_join.rs b/datafusion/physical-plan/src/joins/nested_loop_join.rs
index 7ae09a42de88..9377ace33a1b 100644
--- a/datafusion/physical-plan/src/joins/nested_loop_join.rs
+++ b/datafusion/physical-plan/src/joins/nested_loop_join.rs
@@ -36,7 +36,9 @@ use crate::joins::utils::{
     OnceAsync, OnceFut,
 };
 use crate::joins::SharedBitmapBuilder;
-use crate::metrics::{Count, ExecutionPlanMetricsSet, MetricsSet};
+use crate::metrics::{
+    Count, ExecutionPlanMetricsSet, MetricBuilder, MetricType, MetricsSet, RatioMetrics,
+};
 use crate::projection::{
     try_embed_projection, try_pushdown_through_join, EmbeddedProjection, JoinData,
     ProjectionExec,
@@ -496,7 +498,7 @@ impl ExecutionPlan for NestedLoopJoinExec {
             );
         }
 
-        let join_metrics = BuildProbeJoinMetrics::new(partition, &self.metrics);
+        let metrics = NestedLoopJoinMetrics::new(&self.metrics, partition);
 
         // Initialization reservation for load of inner table
         let load_reservation =
@@ -508,7 +510,7 @@ impl ExecutionPlan for NestedLoopJoinExec {
 
             Ok(collect_left_input(
                 stream,
-                join_metrics.clone(),
+                metrics.join_metrics.clone(),
                 load_reservation,
                 need_produce_result_in_final(self.join_type),
                 self.right().output_partitioning().partition_count(),
@@ -535,7 +537,7 @@ impl ExecutionPlan for NestedLoopJoinExec {
             probe_side_data,
             build_side_data,
             column_indices_after_projection,
-            join_metrics,
+            metrics,
             batch_size,
         )))
     }
@@ -749,7 +751,7 @@ pub(crate) struct NestedLoopJoinStream {
     /// the join filter (e.g., `JOIN ON (b+c)>0`).
     pub(crate) column_indices: Vec<ColumnIndex>,
     /// Join execution metrics
-    pub(crate) join_metrics: BuildProbeJoinMetrics,
+    pub(crate) metrics: NestedLoopJoinMetrics,
 
     /// `batch_size` from configuration
     batch_size: usize,
@@ -794,6 +796,24 @@ pub(crate) struct NestedLoopJoinStream {
     current_right_batch_matched: Option<BooleanArray>,
 }
 
+pub(crate) struct NestedLoopJoinMetrics {
+    /// Join execution metrics
+    pub(crate) join_metrics: BuildProbeJoinMetrics,
+    /// Selectivity of the join: output_rows / (left_rows * right_rows)
+    pub(crate) selectivity: RatioMetrics,
+}
+
+impl NestedLoopJoinMetrics {
+    pub fn new(metrics: &ExecutionPlanMetricsSet, partition: usize) -> Self {
+        Self {
+            join_metrics: BuildProbeJoinMetrics::new(partition, metrics),
+            selectivity: MetricBuilder::new(metrics)
+                .with_type(MetricType::SUMMARY)
+                .ratio_metrics("selectivity", partition),
+        }
+    }
+}
+
 impl Stream for NestedLoopJoinStream {
     type Item = Result<RecordBatch>;
 
@@ -844,7 +864,7 @@ impl Stream for NestedLoopJoinStream {
                     // -side batches), related metrics except build time will be
                     // updated.
                     // stop on drop
-                    let build_metric = self.join_metrics.build_time.clone();
+                    let build_metric = self.metrics.join_metrics.build_time.clone();
                     let _build_timer = build_metric.timer();
 
                     match self.handle_buffering_left(cx) {
@@ -878,7 +898,7 @@ impl Stream for NestedLoopJoinStream {
                 NLJState::FetchingRight => {
                     debug!("[NLJState] Entering: {:?}", self.state);
                     // stop on drop
-                    let join_metric = self.join_metrics.join_time.clone();
+                    let join_metric = self.metrics.join_metrics.join_time.clone();
                     let _join_timer = join_metric.timer();
 
                     match self.handle_fetching_right(cx) {
@@ -905,13 +925,13 @@ impl Stream for NestedLoopJoinStream {
                     debug!("[NLJState] Entering: {:?}", self.state);
 
                     // stop on drop
-                    let join_metric = self.join_metrics.join_time.clone();
+                    let join_metric = self.metrics.join_metrics.join_time.clone();
                     let _join_timer = join_metric.timer();
 
                     match self.handle_probe_right() {
                         ControlFlow::Continue(()) => continue,
                         ControlFlow::Break(poll) => {
-                            return self.join_metrics.baseline.record_poll(poll)
+                            return self.metrics.join_metrics.baseline.record_poll(poll)
                         }
                     }
                 }
@@ -926,13 +946,13 @@ impl Stream for NestedLoopJoinStream {
                     debug!("[NLJState] Entering: {:?}", self.state);
 
                     // stop on drop
-                    let join_metric = self.join_metrics.join_time.clone();
+                    let join_metric = self.metrics.join_metrics.join_time.clone();
                     let _join_timer = join_metric.timer();
 
                     match self.handle_emit_right_unmatched() {
                         ControlFlow::Continue(()) => continue,
                         ControlFlow::Break(poll) => {
-                            return self.join_metrics.baseline.record_poll(poll)
+                            return self.metrics.join_metrics.baseline.record_poll(poll)
                         }
                     }
                 }
@@ -956,13 +976,13 @@ impl Stream for NestedLoopJoinStream {
                     debug!("[NLJState] Entering: {:?}", self.state);
 
                     // stop on drop
-                    let join_metric = self.join_metrics.join_time.clone();
+                    let join_metric = self.metrics.join_metrics.join_time.clone();
                     let _join_timer = join_metric.timer();
 
                     match self.handle_emit_left_unmatched() {
                         ControlFlow::Continue(()) => continue,
                         ControlFlow::Break(poll) => {
-                            return self.join_metrics.baseline.record_poll(poll)
+                            return self.metrics.join_metrics.baseline.record_poll(poll)
                         }
                     }
                 }
@@ -972,13 +992,13 @@ impl Stream for NestedLoopJoinStream {
                     debug!("[NLJState] Entering: {:?}", self.state);
 
                     // stop on drop
-                    let join_metric = self.join_metrics.join_time.clone();
+                    let join_metric = self.metrics.join_metrics.join_time.clone();
                     let _join_timer = join_metric.timer();
                     // counting it in join timer due to there might be some
                     // final resout batches to output in this state
 
                     let poll = self.handle_done();
-                    return self.join_metrics.baseline.record_poll(poll);
+                    return self.metrics.join_metrics.baseline.record_poll(poll);
                 }
             }
         }
@@ -1000,7 +1020,7 @@ impl NestedLoopJoinStream {
         right_data: SendableRecordBatchStream,
         left_data: OnceFut<JoinLeftData>,
         column_indices: Vec<ColumnIndex>,
-        join_metrics: BuildProbeJoinMetrics,
+        metrics: NestedLoopJoinMetrics,
         batch_size: usize,
     ) -> Self {
         Self {
@@ -1010,7 +1030,7 @@ impl NestedLoopJoinStream {
             right_data,
             column_indices,
             left_data,
-            join_metrics,
+            metrics,
             buffered_left_data: None,
             output_buffer: Box::new(BatchCoalescer::new(schema, batch_size)),
             batch_size,
@@ -1057,8 +1077,8 @@ impl NestedLoopJoinStream {
                 Some(Ok(right_batch)) => {
                     // Update metrics
                     let right_batch_size = right_batch.num_rows();
-                    self.join_metrics.input_rows.add(right_batch_size);
-                    self.join_metrics.input_batches.add(1);
+                    self.metrics.join_metrics.input_rows.add(right_batch_size);
+                    self.metrics.join_metrics.input_batches.add(1);
 
                     // Skip the empty batch
                     if right_batch_size == 0 {
@@ -1108,6 +1128,17 @@ impl NestedLoopJoinStream {
             Ok(false) => {
                 // Left exhausted, transition to FetchingRight
                 self.left_probe_idx = 0;
+
+                // Selectivity Metric: Update total possibilities for the batch (left_rows * right_rows)
+                // If memory-limited execution is implemented, this logic must be updated accordingly.
+                if let (Ok(left_data), Some(right_batch)) =
+                    (self.get_left_data(), self.current_right_batch.as_ref())
+                {
+                    let left_rows = left_data.batch().num_rows();
+                    let right_rows = right_batch.num_rows();
+                    self.metrics.selectivity.add_total(left_rows * right_rows);
+                }
+
                 if self.should_track_unmatched_right {
                     debug_assert!(
                         self.current_right_batch_matched.is_some(),
@@ -1138,7 +1169,6 @@ impl NestedLoopJoinStream {
                 && self.current_right_batch.is_some(),
             "This state is yielding output for unmatched rows in the current right batch, so both the right batch and the bitmap must be present"
         );
-
         // Construct the result batch for unmatched right rows using a utility function
         match self.process_right_unmatched() {
             Ok(Some(batch)) => {
@@ -1205,7 +1235,7 @@ impl NestedLoopJoinStream {
         // should be with the expected schema for this operator
         if !self.handled_empty_output {
             let zero_count = Count::new();
-            if *self.join_metrics.baseline.output_rows() == zero_count {
+            if *self.metrics.join_metrics.baseline.output_rows() == zero_count {
                 let empty_batch = RecordBatch::new_empty(Arc::clone(&self.output_schema));
                 self.handled_empty_output = true;
                 return Poll::Ready(Some(Ok(empty_batch)));
@@ -1453,9 +1483,9 @@ impl NestedLoopJoinStream {
     fn maybe_flush_ready_batch(&mut self) -> Option<Poll<Option<Result<RecordBatch>>>> {
         if self.output_buffer.has_completed_batch() {
             if let Some(batch) = self.output_buffer.next_completed_batch() {
-                // HACK: this is not part of `BaselineMetrics` yet, so update it
-                // manually
-                self.join_metrics.output_batches.add(1);
+                // Update output rows for selectivity metric
+                let output_rows = batch.num_rows();
+                self.metrics.selectivity.add_part(output_rows);
 
                 return Some(Poll::Ready(Some(Ok(batch))));
             }
diff --git a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
index 987f3e9df45a..a9ea92f2d92d 100644
--- a/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
+++ b/datafusion/physical-plan/src/joins/piecewise_merge_join/exec.rs
@@ -156,7 +156,6 @@ use crate::{DisplayAs, DisplayFormatType, ExecutionPlanProperties};
 ///       ├──────────────────┤  
 ///     5 │       400        │
 ///       └──────────────────┘     
-///
 /// ```
 ///
 /// ## Existence Joins (Semi, Anti, Mark)
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs b/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs
index 5920cd663a77..ac476853d5d7 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/metrics.rs
@@ -31,8 +31,6 @@ pub(super) struct SortMergeJoinMetrics {
     input_batches: Count,
     /// Number of rows consumed by this operator
     input_rows: Count,
-    /// Number of batches produced by this operator
-    output_batches: Count,
     /// Execution metrics
     baseline_metrics: BaselineMetrics,
     /// Peak memory used for buffered data.
@@ -49,8 +47,6 @@ impl SortMergeJoinMetrics {
         let input_batches =
             MetricBuilder::new(metrics).counter("input_batches", partition);
         let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
         let peak_mem_used = MetricBuilder::new(metrics).gauge("peak_mem_used", partition);
         let spill_metrics = SpillMetrics::new(metrics, partition);
 
@@ -60,7 +56,6 @@ impl SortMergeJoinMetrics {
             join_time,
             input_batches,
             input_rows,
-            output_batches,
             baseline_metrics,
             peak_mem_used,
             spill_metrics,
@@ -82,9 +77,6 @@ impl SortMergeJoinMetrics {
     pub fn input_rows(&self) -> Count {
         self.input_rows.clone()
     }
-    pub fn output_batches(&self) -> Count {
-        self.output_batches.clone()
-    }
 
     pub fn peak_mem_used(&self) -> Gauge {
         self.peak_mem_used.clone()
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
index 5a2e3669ab5e..28020450c427 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
@@ -35,6 +35,7 @@ use std::task::{Context, Poll};
 
 use crate::joins::sort_merge_join::metrics::SortMergeJoinMetrics;
 use crate::joins::utils::{compare_join_arrays, JoinFilter};
+use crate::metrics::RecordOutput;
 use crate::spill::spill_manager::SpillManager;
 use crate::{PhysicalExpr, RecordBatchStream, SendableRecordBatchStream};
 
@@ -604,7 +605,7 @@ impl Stream for SortMergeJoinStream {
                                             // Append filtered batch to the output buffer
                                             self.output = concat_batches(
                                                 &self.schema(),
-                                                vec![&self.output, &out_filtered_batch],
+                                                [&self.output, &out_filtered_batch],
                                             )?;
 
                                             // Send to output if the output buffer surpassed the `batch_size`
@@ -1031,7 +1032,7 @@ impl SortMergeJoinStream {
         let mut join_streamed = false;
         // Whether to join buffered rows
         let mut join_buffered = false;
-        // For Mark join we store a dummy id to indicate the the row has a match
+        // For Mark join we store a dummy id to indicate the row has a match
         let mut mark_row_as_match = false;
 
         // determine whether we need to join streamed/buffered rows
@@ -1140,7 +1141,7 @@ impl SortMergeJoinStream {
             } else {
                 Some(self.buffered_data.scanning_batch_idx)
             };
-            // For Mark join we store a dummy id to indicate the the row has a match
+            // For Mark join we store a dummy id to indicate the row has a match
             let scanning_idx = mark_row_as_match.then_some(0);
 
             self.streamed_batch
@@ -1462,10 +1463,7 @@ impl SortMergeJoinStream {
     fn output_record_batch_and_reset(&mut self) -> Result<RecordBatch> {
         let record_batch =
             concat_batches(&self.schema, &self.staging_output_record_batches.batches)?;
-        self.join_metrics.output_batches().add(1);
-        self.join_metrics
-            .baseline_metrics()
-            .record_output(record_batch.num_rows());
+        (&record_batch).record_output(&self.join_metrics.baseline_metrics());
         // If join filter exists, `self.output_size` is not accurate as we don't know the exact
         // number of rows in the output record batch. If streamed row joined with buffered rows,
         // once join filter is applied, the number of output rows may be more than 1.
diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs
index 9f5485ee93bd..f4a3cd92f16d 100644
--- a/datafusion/physical-plan/src/joins/stream_join_utils.rs
+++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs
@@ -286,7 +286,7 @@ pub fn map_origin_col_to_filter_col(
 ///    the [`convert_filter_columns`] function.
 /// 5. Searches for the converted filter expression in the filter expression using the
 ///    [`check_filter_expr_contains_sort_information`] function.
-/// 6. If an exact match is found, returns the converted filter expression as [`Some(Arc<dyn PhysicalExpr>)`].
+/// 6. If an exact match is found, returns the converted filter expression as `Some(Arc<dyn PhysicalExpr>)`.
 /// 7. If all columns are not included or an exact match is not found, returns [`None`].
 ///
 /// Examples:
@@ -655,7 +655,6 @@ pub fn combine_two_batches(
 /// * `visited` - A hash set to store the visited indices.
 /// * `offset` - An offset to the indices in the `PrimitiveArray`.
 /// * `indices` - The input `PrimitiveArray` of type `T` which stores the indices to be recorded.
-///
 pub fn record_visited_indices<T: ArrowPrimitiveType>(
     visited: &mut HashSet<usize>,
     offset: usize,
@@ -683,8 +682,6 @@ pub struct StreamJoinMetrics {
     pub(crate) right: StreamJoinSideMetrics,
     /// Memory used by sides in bytes
     pub(crate) stream_memory_usage: metrics::Gauge,
-    /// Number of batches produced by this operator
-    pub(crate) output_batches: metrics::Count,
     /// Number of rows produced by this operator
     pub(crate) baseline_metrics: BaselineMetrics,
 }
@@ -710,13 +707,9 @@ impl StreamJoinMetrics {
         let stream_memory_usage =
             MetricBuilder::new(metrics).gauge("stream_memory_usage", partition);
 
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
-
         Self {
             left,
             right,
-            output_batches,
             stream_memory_usage,
             baseline_metrics: BaselineMetrics::new(metrics, partition),
         }
diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index b55b7e15f194..a9a2bbff42c6 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -796,7 +796,6 @@ fn need_to_produce_result_in_final(build_side: JoinSide, join_type: JoinType) ->
 /// # Returns
 ///
 /// A tuple of two arrays of primitive types representing the build and probe indices.
-///
 fn calculate_indices_by_join_type<L: ArrowPrimitiveType, R: ArrowPrimitiveType>(
     build_side: JoinSide,
     prune_length: usize,
@@ -1377,7 +1376,6 @@ impl<T: BatchTransformer> SymmetricHashJoinStream<T> {
                     }
                 }
                 Some((batch, _)) => {
-                    self.metrics.output_batches.add(1);
                     return self
                         .metrics
                         .baseline_metrics
diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs
index 78652d443d3c..6ff829815451 100644
--- a/datafusion/physical-plan/src/joins/utils.rs
+++ b/datafusion/physical-plan/src/joins/utils.rs
@@ -221,7 +221,6 @@ pub struct ColumnIndex {
 
 /// Returns the output field given the input field. Outer joins may
 /// insert nulls even if the input was not null
-///
 fn output_join_field(old_field: &Field, join_type: &JoinType, is_left: bool) -> Field {
     let force_nullable = match join_type {
         JoinType::Inner => false,
@@ -1328,8 +1327,6 @@ pub(crate) struct BuildProbeJoinMetrics {
     pub(crate) input_batches: metrics::Count,
     /// Number of rows consumed by probe-side this operator
     pub(crate) input_rows: metrics::Count,
-    /// Number of batches produced by this operator
-    pub(crate) output_batches: metrics::Count,
 }
 
 // This Drop implementation updates the elapsed compute part of the metrics.
@@ -1373,9 +1370,6 @@ impl BuildProbeJoinMetrics {
 
         let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
 
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
-
         Self {
             build_time,
             build_input_batches,
@@ -1384,7 +1378,6 @@ impl BuildProbeJoinMetrics {
             join_time,
             input_batches,
             input_rows,
-            output_batches,
             baseline,
         }
     }
diff --git a/datafusion/physical-plan/src/memory.rs b/datafusion/physical-plan/src/memory.rs
index 1bf1e04efb53..09710ae1e2ed 100644
--- a/datafusion/physical-plan/src/memory.rs
+++ b/datafusion/physical-plan/src/memory.rs
@@ -153,6 +153,8 @@ pub trait LazyBatchGenerator: Send + Sync + fmt::Debug + fmt::Display {
 pub struct LazyMemoryExec {
     /// Schema representing the data
     schema: SchemaRef,
+    /// Optional projection for which columns to load
+    projection: Option<Vec<usize>>,
     /// Functions to generate batches for each partition
     batch_generators: Vec<Arc<RwLock<dyn LazyBatchGenerator>>>,
     /// Plan properties cache storing equivalence properties, partitioning, and execution mode
@@ -199,12 +201,28 @@ impl LazyMemoryExec {
 
         Ok(Self {
             schema,
+            projection: None,
             batch_generators: generators,
             cache,
             metrics: ExecutionPlanMetricsSet::new(),
         })
     }
 
+    pub fn with_projection(mut self, projection: Option<Vec<usize>>) -> Self {
+        match projection.as_ref() {
+            Some(columns) => {
+                let projected = Arc::new(self.schema.project(columns).unwrap());
+                self.cache = self.cache.with_eq_properties(EquivalenceProperties::new(
+                    Arc::clone(&projected),
+                ));
+                self.schema = projected;
+                self.projection = projection;
+                self
+            }
+            _ => self,
+        }
+    }
+
     pub fn try_set_partitioning(&mut self, partitioning: Partitioning) -> Result<()> {
         if partitioning.partition_count() != self.batch_generators.len() {
             internal_err!(
@@ -320,6 +338,7 @@ impl ExecutionPlan for LazyMemoryExec {
 
         let stream = LazyMemoryStream {
             schema: Arc::clone(&self.schema),
+            projection: self.projection.clone(),
             generator: Arc::clone(&self.batch_generators[partition]),
             baseline_metrics,
         };
@@ -338,6 +357,8 @@ impl ExecutionPlan for LazyMemoryExec {
 /// Stream that generates record batches on demand
 pub struct LazyMemoryStream {
     schema: SchemaRef,
+    /// Optional projection for which columns to load
+    projection: Option<Vec<usize>>,
     /// Generator to produce batches
     ///
     /// Note: Idiomatically, DataFusion uses plan-time parallelism - each stream
@@ -361,7 +382,14 @@ impl Stream for LazyMemoryStream {
         let batch = self.generator.write().generate_next_batch();
 
         let poll = match batch {
-            Ok(Some(batch)) => Poll::Ready(Some(Ok(batch))),
+            Ok(Some(batch)) => {
+                // return just the columns requested
+                let batch = match self.projection.as_ref() {
+                    Some(columns) => batch.project(columns)?,
+                    None => batch,
+                };
+                Poll::Ready(Some(Ok(batch)))
+            }
             Ok(None) => Poll::Ready(None),
             Err(e) => Poll::Ready(Some(Err(e))),
         };
diff --git a/datafusion/physical-plan/src/metrics/baseline.rs b/datafusion/physical-plan/src/metrics/baseline.rs
index 45cef58b5dd8..8dc2f30d9f79 100644
--- a/datafusion/physical-plan/src/metrics/baseline.rs
+++ b/datafusion/physical-plan/src/metrics/baseline.rs
@@ -21,6 +21,8 @@ use std::task::Poll;
 
 use arrow::record_batch::RecordBatch;
 
+use crate::spill::get_record_batch_memory_size;
+
 use super::{Count, ExecutionPlanMetricsSet, MetricBuilder, Time, Timestamp};
 use datafusion_common::Result;
 
@@ -53,6 +55,19 @@ pub struct BaselineMetrics {
 
     /// output rows: the total output rows
     output_rows: Count,
+
+    /// Memory usage of all output batches.
+    ///
+    /// Note: This value may be overestimated. If multiple output `RecordBatch`
+    /// instances share underlying memory buffers, their sizes will be counted
+    /// multiple times.
+    /// Issue: <https://github.com/apache/datafusion/issues/16841>
+    output_bytes: Count,
+
+    /// output batches: the total output batch count
+    output_batches: Count,
+    // Remember to update `docs/source/user-guide/metrics.md` when updating comments
+    // or adding new metrics
 }
 
 impl BaselineMetrics {
@@ -71,6 +86,12 @@ impl BaselineMetrics {
             output_rows: MetricBuilder::new(metrics)
                 .with_type(super::MetricType::SUMMARY)
                 .output_rows(partition),
+            output_bytes: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::SUMMARY)
+                .output_bytes(partition),
+            output_batches: MetricBuilder::new(metrics)
+                .with_type(super::MetricType::DEV)
+                .output_batches(partition),
         }
     }
 
@@ -84,6 +105,8 @@ impl BaselineMetrics {
             end_time: Default::default(),
             elapsed_compute: self.elapsed_compute.clone(),
             output_rows: Default::default(),
+            output_bytes: Default::default(),
+            output_batches: Default::default(),
         }
     }
 
@@ -97,6 +120,11 @@ impl BaselineMetrics {
         &self.output_rows
     }
 
+    /// return the metric for the total number of output batches produced
+    pub fn output_batches(&self) -> &Count {
+        &self.output_batches
+    }
+
     /// Records the fact that this operator's execution is complete
     /// (recording the `end_time` metric).
     ///
@@ -211,6 +239,9 @@ impl RecordOutput for usize {
 impl RecordOutput for RecordBatch {
     fn record_output(self, bm: &BaselineMetrics) -> Self {
         bm.record_output(self.num_rows());
+        let n_bytes = get_record_batch_memory_size(&self);
+        bm.output_bytes.add(n_bytes);
+        bm.output_batches.add(1);
         self
     }
 }
@@ -218,6 +249,9 @@ impl RecordOutput for RecordBatch {
 impl RecordOutput for &RecordBatch {
     fn record_output(self, bm: &BaselineMetrics) -> Self {
         bm.record_output(self.num_rows());
+        let n_bytes = get_record_batch_memory_size(self);
+        bm.output_bytes.add(n_bytes);
+        bm.output_batches.add(1);
         self
     }
 }
diff --git a/datafusion/physical-plan/src/metrics/builder.rs b/datafusion/physical-plan/src/metrics/builder.rs
index 74ba5a2a1834..91b2440122f0 100644
--- a/datafusion/physical-plan/src/metrics/builder.rs
+++ b/datafusion/physical-plan/src/metrics/builder.rs
@@ -19,7 +19,10 @@
 
 use std::{borrow::Cow, sync::Arc};
 
-use crate::metrics::MetricType;
+use crate::metrics::{
+    value::{PruningMetrics, RatioMetrics},
+    MetricType,
+};
 
 use super::{
     Count, ExecutionPlanMetricsSet, Gauge, Label, Metric, MetricValue, Time, Timestamp,
@@ -31,19 +34,18 @@ use super::{
 /// case of constant strings
 ///
 /// ```rust
-///  use datafusion_physical_plan::metrics::*;
-///
-///  let metrics = ExecutionPlanMetricsSet::new();
-///  let partition = 1;
+/// use datafusion_physical_plan::metrics::*;
 ///
-///  // Create the standard output_rows metric
-///  let output_rows = MetricBuilder::new(&metrics).output_rows(partition);
+/// let metrics = ExecutionPlanMetricsSet::new();
+/// let partition = 1;
 ///
-///  // Create a operator specific counter with some labels
-///  let num_bytes = MetricBuilder::new(&metrics)
-///    .with_new_label("filename", "my_awesome_file.parquet")
-///    .counter("num_bytes", partition);
+/// // Create the standard output_rows metric
+/// let output_rows = MetricBuilder::new(&metrics).output_rows(partition);
 ///
+/// // Create a operator specific counter with some labels
+/// let num_bytes = MetricBuilder::new(&metrics)
+///     .with_new_label("filename", "my_awesome_file.parquet")
+///     .counter("num_bytes", partition);
 /// ```
 pub struct MetricBuilder<'a> {
     /// Location that the metric created by this builder will be added do
@@ -151,6 +153,22 @@ impl<'a> MetricBuilder<'a> {
         count
     }
 
+    /// Consume self and create a new counter for recording total output bytes
+    pub fn output_bytes(self, partition: usize) -> Count {
+        let count = Count::new();
+        self.with_partition(partition)
+            .build(MetricValue::OutputBytes(count.clone()));
+        count
+    }
+
+    /// Consume self and create a new counter for recording total output batches
+    pub fn output_batches(self, partition: usize) -> Count {
+        let count = Count::new();
+        self.with_partition(partition)
+            .build(MetricValue::OutputBatches(count.clone()));
+        count
+    }
+
     /// Consume self and create a new gauge for reporting current memory usage
     pub fn mem_used(self, partition: usize) -> Gauge {
         let gauge = Gauge::new();
@@ -242,4 +260,34 @@ impl<'a> MetricBuilder<'a> {
             .build(MetricValue::EndTimestamp(timestamp.clone()));
         timestamp
     }
+
+    /// Consumes self and creates a new `PruningMetrics`
+    pub fn pruning_metrics(
+        self,
+        name: impl Into<Cow<'static, str>>,
+        partition: usize,
+    ) -> PruningMetrics {
+        let pruning_metrics = PruningMetrics::new();
+        self.with_partition(partition)
+            .build(MetricValue::PruningMetrics {
+                name: name.into(),
+                // inner values will be `Arc::clone()`
+                pruning_metrics: pruning_metrics.clone(),
+            });
+        pruning_metrics
+    }
+
+    /// Consumes self and creates a new [`RatioMetrics`]
+    pub fn ratio_metrics(
+        self,
+        name: impl Into<Cow<'static, str>>,
+        partition: usize,
+    ) -> RatioMetrics {
+        let ratio_metrics = RatioMetrics::new();
+        self.with_partition(partition).build(MetricValue::Ratio {
+            name: name.into(),
+            ratio_metrics: ratio_metrics.clone(),
+        });
+        ratio_metrics
+    }
 }
diff --git a/datafusion/physical-plan/src/metrics/custom.rs b/datafusion/physical-plan/src/metrics/custom.rs
index 546af6f3335e..4421db94dc17 100644
--- a/datafusion/physical-plan/src/metrics/custom.rs
+++ b/datafusion/physical-plan/src/metrics/custom.rs
@@ -64,7 +64,8 @@ use std::{any::Any, fmt::Debug, fmt::Display, sync::Arc};
 ///
 ///     fn aggregate(&self, other: Arc<dyn CustomMetricValue>) {
 ///         let other = other.as_any().downcast_ref::<Self>().unwrap();
-///         self.count.fetch_add(other.count.load(Ordering::Relaxed), Ordering::Relaxed);
+///         self.count
+///             .fetch_add(other.count.load(Ordering::Relaxed), Ordering::Relaxed);
 ///     }
 ///
 ///     fn as_any(&self) -> &dyn Any {
diff --git a/datafusion/physical-plan/src/metrics/mod.rs b/datafusion/physical-plan/src/metrics/mod.rs
index 0fd7bfb8c812..613c031808cb 100644
--- a/datafusion/physical-plan/src/metrics/mod.rs
+++ b/datafusion/physical-plan/src/metrics/mod.rs
@@ -35,7 +35,10 @@ use datafusion_common::HashMap;
 pub use baseline::{BaselineMetrics, RecordOutput, SpillMetrics, SplitMetrics};
 pub use builder::MetricBuilder;
 pub use custom::CustomMetricValue;
-pub use value::{Count, Gauge, MetricValue, ScopedTimerGuard, Time, Timestamp};
+pub use value::{
+    Count, Gauge, MetricValue, PruningMetrics, RatioMetrics, ScopedTimerGuard, Time,
+    Timestamp,
+};
 
 /// Something that tracks a value of interest (metric) of a DataFusion
 /// [`ExecutionPlan`] execution.
@@ -45,24 +48,23 @@ pub use value::{Count, Gauge, MetricValue, ScopedTimerGuard, Time, Timestamp};
 /// [`ExecutionPlanMetricsSet`].
 ///
 /// ```
-///  use datafusion_physical_plan::metrics::*;
+/// use datafusion_physical_plan::metrics::*;
 ///
-///  let metrics = ExecutionPlanMetricsSet::new();
-///  assert!(metrics.clone_inner().output_rows().is_none());
+/// let metrics = ExecutionPlanMetricsSet::new();
+/// assert!(metrics.clone_inner().output_rows().is_none());
 ///
-///  // Create a counter to increment using the MetricBuilder
-///  let partition = 1;
-///  let output_rows = MetricBuilder::new(&metrics)
-///      .output_rows(partition);
+/// // Create a counter to increment using the MetricBuilder
+/// let partition = 1;
+/// let output_rows = MetricBuilder::new(&metrics).output_rows(partition);
 ///
-///  // Counter can be incremented
-///  output_rows.add(13);
+/// // Counter can be incremented
+/// output_rows.add(13);
 ///
-///  // The value can be retrieved directly:
-///  assert_eq!(output_rows.value(), 13);
+/// // The value can be retrieved directly:
+/// assert_eq!(output_rows.value(), 13);
 ///
-///  // As well as from the metrics set
-///  assert_eq!(metrics.clone_inner().output_rows(), Some(13));
+/// // As well as from the metrics set
+/// assert_eq!(metrics.clone_inner().output_rows(), Some(13));
 /// ```
 ///
 /// [`ExecutionPlan`]: super::ExecutionPlan
@@ -296,11 +298,15 @@ impl MetricsSet {
             MetricValue::ElapsedCompute(_) => false,
             MetricValue::SpillCount(_) => false,
             MetricValue::SpilledBytes(_) => false,
+            MetricValue::OutputBytes(_) => false,
+            MetricValue::OutputBatches(_) => false,
             MetricValue::SpilledRows(_) => false,
             MetricValue::CurrentMemoryUsage(_) => false,
             MetricValue::Gauge { name, .. } => name == metric_name,
             MetricValue::StartTimestamp(_) => false,
             MetricValue::EndTimestamp(_) => false,
+            MetricValue::PruningMetrics { name, .. } => name == metric_name,
+            MetricValue::Ratio { name, .. } => name == metric_name,
             MetricValue::Custom { .. } => false,
         })
     }
diff --git a/datafusion/physical-plan/src/metrics/value.rs b/datafusion/physical-plan/src/metrics/value.rs
index 3149fca95ba8..7f31f757944d 100644
--- a/datafusion/physical-plan/src/metrics/value.rs
+++ b/datafusion/physical-plan/src/metrics/value.rs
@@ -362,6 +362,160 @@ impl Drop for ScopedTimerGuard<'_> {
     }
 }
 
+/// Counters tracking pruning metrics
+///
+/// For example, a file scanner initially is planned to scan 10 files, but skipped
+/// 8 of them using statistics, the pruning metrics would look like: 10 total -> 2 matched
+///
+/// Note `clone`ing update the same underlying metrics
+#[derive(Debug, Clone)]
+pub struct PruningMetrics {
+    pruned: Arc<AtomicUsize>,
+    matched: Arc<AtomicUsize>,
+}
+
+impl Display for PruningMetrics {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        let matched = self.matched.load(Ordering::Relaxed);
+        let total = self.pruned.load(Ordering::Relaxed) + matched;
+
+        write!(f, "{total} total → {matched} matched")
+    }
+}
+
+impl Default for PruningMetrics {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl PruningMetrics {
+    /// create a new PruningMetrics
+    pub fn new() -> Self {
+        Self {
+            pruned: Arc::new(AtomicUsize::new(0)),
+            matched: Arc::new(AtomicUsize::new(0)),
+        }
+    }
+
+    /// Add `n` to the metric's pruned value
+    pub fn add_pruned(&self, n: usize) {
+        // relaxed ordering for operations on `value` poses no issues
+        // we're purely using atomic ops with no associated memory ops
+        self.pruned.fetch_add(n, Ordering::Relaxed);
+    }
+
+    /// Add `n` to the metric's matched value
+    pub fn add_matched(&self, n: usize) {
+        // relaxed ordering for operations on `value` poses no issues
+        // we're purely using atomic ops with no associated memory ops
+        self.matched.fetch_add(n, Ordering::Relaxed);
+    }
+
+    /// Subtract `n` to the metric's matched value.
+    pub fn subtract_matched(&self, n: usize) {
+        // relaxed ordering for operations on `value` poses no issues
+        // we're purely using atomic ops with no associated memory ops
+        self.matched.fetch_sub(n, Ordering::Relaxed);
+    }
+
+    /// Number of items pruned
+    pub fn pruned(&self) -> usize {
+        self.pruned.load(Ordering::Relaxed)
+    }
+
+    /// Number of items matched (not pruned)
+    pub fn matched(&self) -> usize {
+        self.matched.load(Ordering::Relaxed)
+    }
+}
+
+/// Counters tracking ratio metrics (e.g. matched vs total)
+///
+/// The counters are thread-safe and shared across clones.
+#[derive(Debug, Clone, Default)]
+pub struct RatioMetrics {
+    part: Arc<AtomicUsize>,
+    total: Arc<AtomicUsize>,
+}
+
+impl RatioMetrics {
+    /// Create a new [`RatioMetrics`]
+    pub fn new() -> Self {
+        Self {
+            part: Arc::new(AtomicUsize::new(0)),
+            total: Arc::new(AtomicUsize::new(0)),
+        }
+    }
+
+    /// Add `n` to the numerator (`part`) value
+    pub fn add_part(&self, n: usize) {
+        self.part.fetch_add(n, Ordering::Relaxed);
+    }
+
+    /// Add `n` to the denominator (`total`) value
+    pub fn add_total(&self, n: usize) {
+        self.total.fetch_add(n, Ordering::Relaxed);
+    }
+
+    /// Merge the value from `other` into `self`
+    pub fn merge(&self, other: &Self) {
+        self.add_part(other.part());
+        self.add_total(other.total());
+    }
+
+    /// Return the numerator (`part`) value
+    pub fn part(&self) -> usize {
+        self.part.load(Ordering::Relaxed)
+    }
+
+    /// Return the denominator (`total`) value
+    pub fn total(&self) -> usize {
+        self.total.load(Ordering::Relaxed)
+    }
+}
+
+impl PartialEq for RatioMetrics {
+    fn eq(&self, other: &Self) -> bool {
+        self.part() == other.part() && self.total() == other.total()
+    }
+}
+
+/// Format a float number with `digits` most significant numbers.
+///
+/// fmt_significant(12.5) -> "12"
+/// fmt_significant(0.0543) -> "0.054"
+/// fmt_significant(0.000123) -> "0.00012"
+fn fmt_significant(mut x: f64, digits: usize) -> String {
+    if x == 0.0 {
+        return "0".to_string();
+    }
+
+    let exp = x.abs().log10().floor(); // exponent of first significant digit
+    let scale = 10f64.powf(-(exp - (digits as f64 - 1.0)));
+    x = (x * scale).round() / scale; // round to N significant digits
+    format!("{x}")
+}
+
+impl Display for RatioMetrics {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let part = self.part();
+        let total = self.total();
+
+        if total == 0 {
+            if part == 0 {
+                write!(f, "N/A (0/0)")
+            } else {
+                write!(f, "N/A ({part}/0)")
+            }
+        } else {
+            let percentage = (part as f64 / total as f64) * 100.0;
+
+            write!(f, "{}% ({part}/{total})", fmt_significant(percentage, 2))
+        }
+    }
+}
+
 /// Possible values for a [super::Metric].
 ///
 /// Among other differences, the metric types have different ways to
@@ -395,6 +549,10 @@ pub enum MetricValue {
     SpillCount(Count),
     /// Total size of spilled bytes produced: "spilled_bytes" metric
     SpilledBytes(Count),
+    /// Total size of output bytes produced: "output_bytes" metric
+    OutputBytes(Count),
+    /// Total number of output batches produced: "output_batches" metric
+    OutputBatches(Count),
     /// Total size of spilled rows produced: "spilled_rows" metric
     SpilledRows(Count),
     /// Current memory used
@@ -424,6 +582,16 @@ pub enum MetricValue {
     StartTimestamp(Timestamp),
     /// The time at which execution ended
     EndTimestamp(Timestamp),
+    /// Metrics related to scan pruning
+    PruningMetrics {
+        name: Cow<'static, str>,
+        pruning_metrics: PruningMetrics,
+    },
+    /// Metrics that should be displayed as ratio like (42%)
+    Ratio {
+        name: Cow<'static, str>,
+        ratio_metrics: RatioMetrics,
+    },
     Custom {
         /// The provided name of this metric
         name: Cow<'static, str>,
@@ -449,6 +617,12 @@ impl PartialEq for MetricValue {
             (MetricValue::SpilledBytes(count), MetricValue::SpilledBytes(other)) => {
                 count == other
             }
+            (MetricValue::OutputBytes(count), MetricValue::OutputBytes(other)) => {
+                count == other
+            }
+            (MetricValue::OutputBatches(count), MetricValue::OutputBatches(other)) => {
+                count == other
+            }
             (MetricValue::SpilledRows(count), MetricValue::SpilledRows(other)) => {
                 count == other
             }
@@ -485,6 +659,30 @@ impl PartialEq for MetricValue {
             (MetricValue::EndTimestamp(timestamp), MetricValue::EndTimestamp(other)) => {
                 timestamp == other
             }
+            (
+                MetricValue::PruningMetrics {
+                    name,
+                    pruning_metrics,
+                },
+                MetricValue::PruningMetrics {
+                    name: other_name,
+                    pruning_metrics: other_pruning_metrics,
+                },
+            ) => {
+                name == other_name
+                    && pruning_metrics.pruned() == other_pruning_metrics.pruned()
+                    && pruning_metrics.matched() == other_pruning_metrics.matched()
+            }
+            (
+                MetricValue::Ratio {
+                    name,
+                    ratio_metrics,
+                },
+                MetricValue::Ratio {
+                    name: other_name,
+                    ratio_metrics: other_ratio_metrics,
+                },
+            ) => name == other_name && ratio_metrics == other_ratio_metrics,
             (
                 MetricValue::Custom { name, value },
                 MetricValue::Custom {
@@ -505,6 +703,8 @@ impl MetricValue {
             Self::OutputRows(_) => "output_rows",
             Self::SpillCount(_) => "spill_count",
             Self::SpilledBytes(_) => "spilled_bytes",
+            Self::OutputBytes(_) => "output_bytes",
+            Self::OutputBatches(_) => "output_batches",
             Self::SpilledRows(_) => "spilled_rows",
             Self::CurrentMemoryUsage(_) => "mem_used",
             Self::ElapsedCompute(_) => "elapsed_compute",
@@ -513,16 +713,21 @@ impl MetricValue {
             Self::Time { name, .. } => name.borrow(),
             Self::StartTimestamp(_) => "start_timestamp",
             Self::EndTimestamp(_) => "end_timestamp",
+            Self::PruningMetrics { name, .. } => name.borrow(),
+            Self::Ratio { name, .. } => name.borrow(),
             Self::Custom { name, .. } => name.borrow(),
         }
     }
 
-    /// Return the value of the metric as a usize value
+    /// Return the value of the metric as a usize value, used to aggregate metric
+    /// value across partitions.
     pub fn as_usize(&self) -> usize {
         match self {
             Self::OutputRows(count) => count.value(),
             Self::SpillCount(count) => count.value(),
             Self::SpilledBytes(bytes) => bytes.value(),
+            Self::OutputBytes(bytes) => bytes.value(),
+            Self::OutputBatches(count) => count.value(),
             Self::SpilledRows(count) => count.value(),
             Self::CurrentMemoryUsage(used) => used.value(),
             Self::ElapsedCompute(time) => time.value(),
@@ -539,6 +744,12 @@ impl MetricValue {
                 .and_then(|ts| ts.timestamp_nanos_opt())
                 .map(|nanos| nanos as usize)
                 .unwrap_or(0),
+            // This function is a utility for aggregating metrics, for complex metric
+            // like `PruningMetrics`, this function is not supposed to get called.
+            // Metrics aggregation for them are implemented inside `MetricsSet` directly.
+            Self::PruningMetrics { .. } => 0,
+            // Should not be used. See comments in `PruningMetrics` for details.
+            Self::Ratio { .. } => 0,
             Self::Custom { value, .. } => value.as_usize(),
         }
     }
@@ -550,6 +761,8 @@ impl MetricValue {
             Self::OutputRows(_) => Self::OutputRows(Count::new()),
             Self::SpillCount(_) => Self::SpillCount(Count::new()),
             Self::SpilledBytes(_) => Self::SpilledBytes(Count::new()),
+            Self::OutputBytes(_) => Self::OutputBytes(Count::new()),
+            Self::OutputBatches(_) => Self::OutputBatches(Count::new()),
             Self::SpilledRows(_) => Self::SpilledRows(Count::new()),
             Self::CurrentMemoryUsage(_) => Self::CurrentMemoryUsage(Gauge::new()),
             Self::ElapsedCompute(_) => Self::ElapsedCompute(Time::new()),
@@ -567,6 +780,14 @@ impl MetricValue {
             },
             Self::StartTimestamp(_) => Self::StartTimestamp(Timestamp::new()),
             Self::EndTimestamp(_) => Self::EndTimestamp(Timestamp::new()),
+            Self::PruningMetrics { name, .. } => Self::PruningMetrics {
+                name: name.clone(),
+                pruning_metrics: PruningMetrics::new(),
+            },
+            Self::Ratio { name, .. } => Self::Ratio {
+                name: name.clone(),
+                ratio_metrics: RatioMetrics::new(),
+            },
             Self::Custom { name, value } => Self::Custom {
                 name: name.clone(),
                 value: value.new_empty(),
@@ -588,6 +809,8 @@ impl MetricValue {
             (Self::OutputRows(count), Self::OutputRows(other_count))
             | (Self::SpillCount(count), Self::SpillCount(other_count))
             | (Self::SpilledBytes(count), Self::SpilledBytes(other_count))
+            | (Self::OutputBytes(count), Self::OutputBytes(other_count))
+            | (Self::OutputBatches(count), Self::OutputBatches(other_count))
             | (Self::SpilledRows(count), Self::SpilledRows(other_count))
             | (
                 Self::Count { count, .. },
@@ -617,6 +840,29 @@ impl MetricValue {
             (Self::EndTimestamp(timestamp), Self::EndTimestamp(other_timestamp)) => {
                 timestamp.update_to_max(other_timestamp);
             }
+            (
+                Self::PruningMetrics {
+                    pruning_metrics, ..
+                },
+                Self::PruningMetrics {
+                    pruning_metrics: other_pruning_metrics,
+                    ..
+                },
+            ) => {
+                let pruned = other_pruning_metrics.pruned.load(Ordering::Relaxed);
+                let matched = other_pruning_metrics.matched.load(Ordering::Relaxed);
+                pruning_metrics.add_pruned(pruned);
+                pruning_metrics.add_matched(matched);
+            }
+            (
+                Self::Ratio { ratio_metrics, .. },
+                Self::Ratio {
+                    ratio_metrics: other_ratio_metrics,
+                    ..
+                },
+            ) => {
+                ratio_metrics.merge(other_ratio_metrics);
+            }
             (
                 Self::Custom { value, .. },
                 Self::Custom {
@@ -638,18 +884,37 @@ impl MetricValue {
     /// numbers are "more useful" (and displayed first)
     pub fn display_sort_key(&self) -> u8 {
         match self {
-            Self::OutputRows(_) => 0,     // show first
-            Self::ElapsedCompute(_) => 1, // show second
-            Self::SpillCount(_) => 2,
-            Self::SpilledBytes(_) => 3,
-            Self::SpilledRows(_) => 4,
-            Self::CurrentMemoryUsage(_) => 5,
-            Self::Count { .. } => 6,
-            Self::Gauge { .. } => 7,
-            Self::Time { .. } => 8,
-            Self::StartTimestamp(_) => 9, // show timestamps last
-            Self::EndTimestamp(_) => 10,
-            Self::Custom { .. } => 11,
+            // `BaselineMetrics` that is common for most operators
+            Self::OutputRows(_) => 0,
+            Self::ElapsedCompute(_) => 1,
+            Self::OutputBytes(_) => 2,
+            Self::OutputBatches(_) => 3,
+            // Other metrics
+            Self::PruningMetrics { name, .. } => match name.as_ref() {
+                // The following metrics belong to `DataSourceExec` with a Parquet data source.
+                // They are displayed in a specific order that reflects the actual pruning process,
+                // from coarse-grained to fine-grained pruning levels.
+                //
+                // You may update these metrics as long as their relative order remains unchanged.
+                //
+                // Reference PR: <https://github.com/apache/datafusion/pull/18379>
+                "files_ranges_pruned_statistics" => 4,
+                "row_groups_pruned_statistics" => 5,
+                "row_groups_pruned_bloom_filter" => 6,
+                "page_index_rows_pruned" => 7,
+                _ => 8,
+            },
+            Self::SpillCount(_) => 9,
+            Self::SpilledBytes(_) => 10,
+            Self::SpilledRows(_) => 11,
+            Self::CurrentMemoryUsage(_) => 12,
+            Self::Count { .. } => 13,
+            Self::Gauge { .. } => 14,
+            Self::Time { .. } => 15,
+            Self::Ratio { .. } => 16,
+            Self::StartTimestamp(_) => 17, // show timestamps last
+            Self::EndTimestamp(_) => 18,
+            Self::Custom { .. } => 19,
         }
     }
 
@@ -664,12 +929,13 @@ impl Display for MetricValue {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         match self {
             Self::OutputRows(count)
+            | Self::OutputBatches(count)
             | Self::SpillCount(count)
             | Self::SpilledRows(count)
             | Self::Count { count, .. } => {
                 write!(f, "{count}")
             }
-            Self::SpilledBytes(count) => {
+            Self::SpilledBytes(count) | Self::OutputBytes(count) => {
                 let readable_count = human_readable_size(count.value());
                 write!(f, "{readable_count}")
             }
@@ -688,6 +954,12 @@ impl Display for MetricValue {
             Self::StartTimestamp(timestamp) | Self::EndTimestamp(timestamp) => {
                 write!(f, "{timestamp}")
             }
+            Self::PruningMetrics {
+                pruning_metrics, ..
+            } => {
+                write!(f, "{pruning_metrics}")
+            }
+            Self::Ratio { ratio_metrics, .. } => write!(f, "{ratio_metrics}"),
             Self::Custom { name, value } => {
                 write!(f, "name:{name} {value}")
             }
@@ -842,6 +1114,32 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_display_ratio() {
+        let ratio_metrics = RatioMetrics::new();
+        let ratio = MetricValue::Ratio {
+            name: Cow::Borrowed("ratio_metric"),
+            ratio_metrics: ratio_metrics.clone(),
+        };
+
+        assert_eq!("N/A (0/0)", ratio.to_string());
+
+        ratio_metrics.add_part(10);
+        assert_eq!("N/A (10/0)", ratio.to_string());
+
+        ratio_metrics.add_total(40);
+        assert_eq!("25% (10/40)", ratio.to_string());
+
+        let tiny_ratio_metrics = RatioMetrics::new();
+        let tiny_ratio = MetricValue::Ratio {
+            name: Cow::Borrowed("tiny_ratio_metric"),
+            ratio_metrics: tiny_ratio_metrics.clone(),
+        };
+        tiny_ratio_metrics.add_part(1);
+        tiny_ratio_metrics.add_total(3000);
+        assert_eq!("0.033% (1/3000)", tiny_ratio.to_string());
+    }
+
     #[test]
     fn test_display_timestamp() {
         let timestamp = Timestamp::new();
diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs
index 4dc88bc56631..ead2196860cd 100644
--- a/datafusion/physical-plan/src/projection.rs
+++ b/datafusion/physical-plan/src/projection.rs
@@ -53,7 +53,9 @@ use datafusion_physical_expr_common::physical_expr::{fmt_sql, PhysicalExprRef};
 use datafusion_physical_expr_common::sort_expr::{LexOrdering, LexRequirement};
 // Re-exported from datafusion-physical-expr for backwards compatibility
 // We recommend updating your imports to use datafusion-physical-expr directly
-pub use datafusion_physical_expr::projection::{update_expr, Projection, ProjectionExpr};
+pub use datafusion_physical_expr::projection::{
+    update_expr, ProjectionExpr, ProjectionExprs,
+};
 
 use futures::stream::{Stream, StreamExt};
 use log::trace;
@@ -65,7 +67,7 @@ use log::trace;
 #[derive(Debug, Clone)]
 pub struct ProjectionExec {
     /// The projection expressions stored as tuples of (expression, output column name)
-    projection: Projection,
+    projection: ProjectionExprs,
     /// The schema once the projection has been applied to the input
     schema: SchemaRef,
     /// The input plan
@@ -108,18 +110,22 @@ impl ProjectionExec {
     /// let b = col("b", &schema).unwrap();
     /// let a_plus_b = binary(Arc::clone(&a), Operator::Plus, b, &schema).unwrap();
     /// // create ProjectionExec
-    /// let proj = ProjectionExec::try_new([
-    ///     ProjectionExpr {
-    ///       // expr a produces the column named "a"
-    ///       expr: a,
-    ///       alias: "a".to_string(),
-    ///     },
-    ///     ProjectionExpr {
-    ///       // expr: a + b produces the column named "sum_ab"
-    ///       expr: a_plus_b,
-    ///       alias: "sum_ab".to_string(),
-    ///     }
-    ///   ], input()).unwrap();
+    /// let proj = ProjectionExec::try_new(
+    ///     [
+    ///         ProjectionExpr {
+    ///             // expr a produces the column named "a"
+    ///             expr: a,
+    ///             alias: "a".to_string(),
+    ///         },
+    ///         ProjectionExpr {
+    ///             // expr: a + b produces the column named "sum_ab"
+    ///             expr: a_plus_b,
+    ///             alias: "sum_ab".to_string(),
+    ///         },
+    ///     ],
+    ///     input(),
+    /// )
+    /// .unwrap();
     /// # }
     /// ```
     pub fn try_new<I, E>(expr: I, input: Arc<dyn ExecutionPlan>) -> Result<Self>
@@ -130,7 +136,7 @@ impl ProjectionExec {
         let input_schema = input.schema();
         // convert argument to Vec<ProjectionExpr>
         let expr_vec = expr.into_iter().map(Into::into).collect::<Vec<_>>();
-        let projection = Projection::new(expr_vec);
+        let projection = ProjectionExprs::new(expr_vec);
 
         let schema = Arc::new(projection.project_schema(&input_schema)?);
 
diff --git a/datafusion/physical-plan/src/recursive_query.rs b/datafusion/physical-plan/src/recursive_query.rs
index b4cdf2dff2bf..163f214444d0 100644
--- a/datafusion/physical-plan/src/recursive_query.rs
+++ b/datafusion/physical-plan/src/recursive_query.rs
@@ -247,7 +247,6 @@ impl DisplayAs for RecursiveQueryExec {
 ///    while batch := recursive_stream.next():
 ///        buffer.append(batch)
 ///        yield buffer
-///
 struct RecursiveQueryStream {
     /// The context to be used for managing handlers & executing new tasks
     task_context: Arc<TaskContext>,
diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs
index 6e06c87a4821..34294d0f2326 100644
--- a/datafusion/physical-plan/src/repartition/distributor_channels.rs
+++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs
@@ -151,7 +151,7 @@ impl<T> Clone for DistributionSender<T> {
 impl<T> Drop for DistributionSender<T> {
     fn drop(&mut self) {
         let n_senders_pre = self.channel.n_senders.fetch_sub(1, Ordering::SeqCst);
-        // is the the last copy of the sender side?
+        // is the last copy of the sender side?
         if n_senders_pre > 1 {
             return;
         }
diff --git a/datafusion/physical-plan/src/repartition/mod.rs b/datafusion/physical-plan/src/repartition/mod.rs
index 8174f71c31af..8f73fe86cfef 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -34,11 +34,9 @@ use crate::execution_plan::{CardinalityEffect, EvaluationType, SchedulingType};
 use crate::hash_utils::create_hashes;
 use crate::metrics::{BaselineMetrics, SpillMetrics};
 use crate::projection::{all_columns, make_with_child, update_expr, ProjectionExec};
-use crate::repartition::distributor_channels::{
-    channels, partition_aware_channels, DistributionReceiver, DistributionSender,
-};
 use crate::sorts::streaming_merge::StreamingMergeBuilder;
 use crate::spill::spill_manager::SpillManager;
+use crate::spill::spill_pool::{self, SpillPoolWriter};
 use crate::stream::RecordBatchStreamAdapter;
 use crate::{DisplayFormatType, ExecutionPlan, Partitioning, PlanProperties, Statistics};
 
@@ -51,7 +49,6 @@ use datafusion_common::utils::transpose;
 use datafusion_common::{internal_err, ColumnStatistics, HashMap};
 use datafusion_common::{not_impl_err, DataFusionError, Result};
 use datafusion_common_runtime::SpawnedTask;
-use datafusion_execution::disk_manager::RefCountedTempFile;
 use datafusion_execution::memory_pool::MemoryConsumer;
 use datafusion_execution::TaskContext;
 use datafusion_physical_expr::{EquivalenceProperties, PhysicalExpr};
@@ -67,27 +64,101 @@ use log::trace;
 use parking_lot::Mutex;
 
 mod distributor_channels;
+use distributor_channels::{
+    channels, partition_aware_channels, DistributionReceiver, DistributionSender,
+};
 
-/// A batch in the repartition queue - either in memory or spilled to disk
+/// A batch in the repartition queue - either in memory or spilled to disk.
+///
+/// This enum represents the two states a batch can be in during repartitioning.
+/// The decision to spill is made based on memory availability when sending a batch
+/// to an output partition.
+///
+/// # Batch Flow with Spilling
+///
+/// ```text
+/// Input Stream ──▶ Partition Logic ──▶ try_grow()
+///                                            │
+///                            ┌───────────────┴────────────────┐
+///                            │                                │
+///                            ▼                                ▼
+///                   try_grow() succeeds            try_grow() fails
+///                   (Memory Available)              (Memory Pressure)
+///                            │                                │
+///                            ▼                                ▼
+///                  RepartitionBatch::Memory         spill_writer.push_batch()
+///                  (batch held in memory)           (batch written to disk)
+///                            │                                │
+///                            │                                ▼
+///                            │                      RepartitionBatch::Spilled
+///                            │                      (marker - no batch data)
+///                            │                                │
+///                            └────────┬───────────────────────┘
+///                                     │
+///                                     ▼
+///                              Send to channel
+///                                     │
+///                                     ▼
+///                            Output Stream (poll)
+///                                     │
+///                      ┌──────────────┴─────────────┐
+///                      │                            │
+///                      ▼                            ▼
+///         RepartitionBatch::Memory      RepartitionBatch::Spilled
+///         Return batch immediately       Poll spill_stream (blocks)
+///                      │                            │
+///                      └────────┬───────────────────┘
+///                               │
+///                               ▼
+///                          Return batch
+///                    (FIFO order preserved)
+/// ```
+///
+/// See [`RepartitionExec`] for overall architecture and [`StreamState`] for
+/// the state machine that handles reading these batches.
 #[derive(Debug)]
 enum RepartitionBatch {
     /// Batch held in memory (counts against memory reservation)
     Memory(RecordBatch),
-    /// Batch spilled to disk (one file per batch for queue semantics)
-    /// File automatically deleted when dropped via reference counting
-    /// The size field stores the original batch size for validation when reading back
-    Spilled {
-        spill_file: RefCountedTempFile,
-        size: usize,
-    },
+    /// Marker indicating a batch was spilled to the partition's SpillPool.
+    /// The actual batch can be retrieved by reading from the SpillPoolStream.
+    /// This variant contains no data itself - it's just a signal to the reader
+    /// to fetch the next batch from the spill stream.
+    Spilled,
 }
 
 type MaybeBatch = Option<Result<RepartitionBatch>>;
 type InputPartitionsToCurrentPartitionSender = Vec<DistributionSender<MaybeBatch>>;
 type InputPartitionsToCurrentPartitionReceiver = Vec<DistributionReceiver<MaybeBatch>>;
 
-/// Channels and resources for a single output partition
-#[derive(Debug)]
+/// Output channel with its associated memory reservation and spill writer
+struct OutputChannel {
+    sender: DistributionSender<MaybeBatch>,
+    reservation: SharedMemoryReservation,
+    spill_writer: SpillPoolWriter,
+}
+
+/// Channels and resources for a single output partition.
+///
+/// Each output partition has channels to receive data from all input partitions.
+/// To handle memory pressure, each (input, output) pair gets its own
+/// [`SpillPool`](crate::spill::spill_pool) channel via [`spill_pool::channel`].
+///
+/// # Structure
+///
+/// For an output partition receiving from N input partitions:
+/// - `tx`: N senders (one per input) for sending batches to this output
+/// - `rx`: N receivers (one per input) for receiving batches at this output
+/// - `spill_writers`: N spill writers (one per input) for writing spilled data
+/// - `spill_readers`: N spill readers (one per input) for reading spilled data
+///
+/// This 1:1 mapping between input partitions and spill channels ensures that
+/// batches from each input are processed in FIFO order, even when some batches
+/// are spilled to disk and others remain in memory.
+///
+/// See [`RepartitionExec`] for the overall N×M architecture.
+///
+/// [`spill_pool::channel`]: crate::spill::spill_pool::channel
 struct PartitionChannels {
     /// Senders for each input partition to send data to this output partition
     tx: InputPartitionsToCurrentPartitionSender,
@@ -95,24 +166,38 @@ struct PartitionChannels {
     rx: InputPartitionsToCurrentPartitionReceiver,
     /// Memory reservation for this output partition
     reservation: SharedMemoryReservation,
-    /// Spill manager for handling disk spills for this output partition
-    spill_manager: Arc<SpillManager>,
+    /// Spill writers for writing spilled data.
+    /// SpillPoolWriter is Clone, so multiple writers can share state in non-preserve-order mode.
+    spill_writers: Vec<SpillPoolWriter>,
+    /// Spill readers for reading spilled data - one per input partition (FIFO semantics).
+    /// Each (input, output) pair gets its own reader to maintain proper ordering.
+    spill_readers: Vec<SendableRecordBatchStream>,
 }
 
-#[derive(Debug)]
 struct ConsumingInputStreamsState {
     /// Channels for sending batches from input partitions to output partitions.
     /// Key is the partition number.
     channels: HashMap<usize, PartitionChannels>,
 
-    /// Helper that ensures that that background job is killed once it is no longer needed.
+    /// Helper that ensures that background jobs are killed once they are no longer needed.
     abort_helper: Arc<Vec<SpawnedTask<()>>>,
 }
 
+impl Debug for ConsumingInputStreamsState {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("ConsumingInputStreamsState")
+            .field("num_channels", &self.channels.len())
+            .field("abort_helper", &self.abort_helper)
+            .finish()
+    }
+}
+
 /// Inner state of [`RepartitionExec`].
+#[derive(Default)]
 enum RepartitionExecState {
     /// Not initialized yet. This is the default state stored in the RepartitionExec node
     /// upon instantiation.
+    #[default]
     NotInitialized,
     /// Input streams are initialized, but they are still not being consumed. The node
     /// transitions to this state when the arrow's RecordBatch stream is created in
@@ -123,12 +208,6 @@ enum RepartitionExecState {
     ConsumingInputStreams(ConsumingInputStreamsState),
 }
 
-impl Default for RepartitionExecState {
-    fn default() -> Self {
-        Self::NotInitialized
-    }
-}
-
 impl Debug for RepartitionExecState {
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
@@ -171,6 +250,7 @@ impl RepartitionExecState {
         Ok(())
     }
 
+    #[expect(clippy::too_many_arguments)]
     fn consume_input_streams(
         &mut self,
         input: Arc<dyn ExecutionPlan>,
@@ -179,6 +259,7 @@ impl RepartitionExecState {
         preserve_order: bool,
         name: String,
         context: Arc<TaskContext>,
+        spill_manager: SpillManager,
     ) -> Result<&mut ConsumingInputStreamsState> {
         let streams_and_metrics = match self {
             RepartitionExecState::NotInitialized => {
@@ -202,17 +283,19 @@ impl RepartitionExecState {
         let num_input_partitions = streams_and_metrics.len();
         let num_output_partitions = partitioning.partition_count();
 
+        let spill_manager = Arc::new(spill_manager);
+
         let (txs, rxs) = if preserve_order {
-            let (txs, rxs) =
+            // Create partition-aware channels with one channel per (input, output) pair
+            // This provides backpressure while maintaining proper ordering
+            let (txs_all, rxs_all) =
                 partition_aware_channels(num_input_partitions, num_output_partitions);
             // Take transpose of senders and receivers. `state.channels` keeps track of entries per output partition
-            let txs = transpose(txs);
-            let rxs = transpose(rxs);
+            let txs = transpose(txs_all);
+            let rxs = transpose(rxs_all);
             (txs, rxs)
         } else {
-            // create one channel per *output* partition
-            // note we use a custom channel that ensures there is always data for each receiver
-            // but limits the amount of buffering if required.
+            // Create one channel per *output* partition with backpressure
             let (txs, rxs) = channels(num_output_partitions);
             // Clone sender for each input partitions
             let txs = txs
@@ -230,19 +313,34 @@ impl RepartitionExecState {
                     .with_can_spill(true)
                     .register(context.memory_pool()),
             ));
-            let spill_metrics = SpillMetrics::new(&metrics, partition);
-            let spill_manager = Arc::new(SpillManager::new(
-                Arc::clone(&context.runtime_env()),
-                spill_metrics,
-                input.schema(),
-            ));
+
+            // Create spill channels based on mode:
+            // - preserve_order: one spill channel per (input, output) pair for proper FIFO ordering
+            // - non-preserve-order: one shared spill channel per output partition since all inputs
+            //   share the same receiver
+            let max_file_size = context
+                .session_config()
+                .options()
+                .execution
+                .max_spill_file_size_bytes;
+            let num_spill_channels = if preserve_order {
+                num_input_partitions
+            } else {
+                1
+            };
+            let (spill_writers, spill_readers): (Vec<_>, Vec<_>) = (0
+                ..num_spill_channels)
+                .map(|_| spill_pool::channel(max_file_size, Arc::clone(&spill_manager)))
+                .unzip();
+
             channels.insert(
                 partition,
                 PartitionChannels {
                     tx,
                     rx,
                     reservation,
-                    spill_manager,
+                    spill_readers,
+                    spill_writers,
                 },
             );
         }
@@ -255,34 +353,38 @@ impl RepartitionExecState {
             let txs: HashMap<_, _> = channels
                 .iter()
                 .map(|(partition, channels)| {
+                    // In preserve_order mode: each input gets its own spill writer (index i)
+                    // In non-preserve-order mode: all inputs share spill writer 0 via clone
+                    let spill_writer_idx = if preserve_order { i } else { 0 };
                     (
                         *partition,
-                        (
-                            channels.tx[i].clone(),
-                            Arc::clone(&channels.reservation),
-                            Arc::clone(&channels.spill_manager),
-                        ),
+                        OutputChannel {
+                            sender: channels.tx[i].clone(),
+                            reservation: Arc::clone(&channels.reservation),
+                            spill_writer: channels.spill_writers[spill_writer_idx]
+                                .clone(),
+                        },
                     )
                 })
                 .collect();
 
+            // Extract senders for wait_for_task before moving txs
+            let senders: HashMap<_, _> = txs
+                .iter()
+                .map(|(partition, channel)| (*partition, channel.sender.clone()))
+                .collect();
+
             let input_task = SpawnedTask::spawn(RepartitionExec::pull_from_input(
                 stream,
-                txs.clone(),
+                txs,
                 partitioning.clone(),
                 metrics,
             ));
 
             // In a separate task, wait for each input to be done
             // (and pass along any errors, including panic!s)
-            let wait_for_task = SpawnedTask::spawn(RepartitionExec::wait_for_task(
-                input_task,
-                txs.into_iter()
-                    .map(|(partition, (tx, _reservation, _spill_manager))| {
-                        (partition, tx)
-                    })
-                    .collect(),
-            ));
+            let wait_for_task =
+                SpawnedTask::spawn(RepartitionExec::wait_for_task(input_task, senders));
             spawned_tasks.push(wait_for_task);
         }
         *self = Self::ConsumingInputStreams(ConsumingInputStreamsState {
@@ -476,10 +578,10 @@ impl BatchPartitioner {
 ///        │                  │                  │
 ///        │                  │                  │
 ///        │                  │                  │
-///┌───────────────┐  ┌───────────────┐  ┌───────────────┐
-///│    GroupBy    │  │    GroupBy    │  │    GroupBy    │
-///│   (Partial)   │  │   (Partial)   │  │   (Partial)   │
-///└───────────────┘  └───────────────┘  └───────────────┘
+/// ┌───────────────┐  ┌───────────────┐  ┌───────────────┐
+/// │    GroupBy    │  │    GroupBy    │  │    GroupBy    │
+/// │   (Partial)   │  │   (Partial)   │  │   (Partial)   │
+/// └───────────────┘  └───────────────┘  └───────────────┘
 ///        ▲                  ▲                  ▲
 ///        └──────────────────┼──────────────────┘
 ///                           │
@@ -498,7 +600,7 @@ impl BatchPartitioner {
 ///     ╲               ╱           ╲               ╱
 ///      '─.         ,─'             '─.         ,─'
 ///         `───────'                   `───────'
-///```
+/// ```
 ///
 /// # Error Handling
 ///
@@ -511,6 +613,38 @@ impl BatchPartitioner {
 /// arbitrary interleaving (and thus unordered) unless
 /// [`Self::with_preserve_order`] specifies otherwise.
 ///
+/// # Spilling Architecture
+///
+/// RepartitionExec uses [`SpillPool`](crate::spill::spill_pool) channels to handle
+/// memory pressure during repartitioning. Each (input partition, output partition)
+/// pair gets its own SpillPool channel for FIFO ordering.
+///
+/// ```text
+/// Input Partitions (N)          Output Partitions (M)
+/// ────────────────────          ─────────────────────
+///
+///    Input 0 ──┐                      ┌──▶ Output 0
+///              │  ┌──────────────┐    │
+///              ├─▶│ SpillPool    │────┤
+///              │  │ [In0→Out0]   │    │
+///    Input 1 ──┤  └──────────────┘    ├──▶ Output 1
+///              │                       │
+///              │  ┌──────────────┐    │
+///              ├─▶│ SpillPool    │────┤
+///              │  │ [In1→Out0]   │    │
+///    Input 2 ──┤  └──────────────┘    ├──▶ Output 2
+///              │                      │
+///              │       ... (N×M SpillPools total)
+///              │                      │
+///              │  ┌──────────────┐    │
+///              └─▶│ SpillPool    │────┘
+///                 │ [InN→OutM]   │
+///                 └──────────────┘
+///
+/// Each SpillPool maintains FIFO order for its (input, output) pair.
+/// See `RepartitionBatch` for details on the memory/spill decision logic.
+/// ```
+///
 /// # Footnote
 ///
 /// The "Exchange Operator" was first described in the 1989 paper
@@ -590,7 +724,7 @@ impl RepartitionExec {
         &self.cache.partitioning
     }
 
-    /// Get preserve_order flag of the RepartitionExecutor
+    /// Get preserve_order flag of the RepartitionExec
     /// `true` means `SortPreservingRepartitionExec`, `false` means `RepartitionExec`
     pub fn preserve_order(&self) -> bool {
         self.preserve_order
@@ -696,6 +830,8 @@ impl ExecutionPlan for RepartitionExec {
             partition
         );
 
+        let spill_metrics = SpillMetrics::new(&self.metrics, partition);
+
         let input = Arc::clone(&self.input);
         let partitioning = self.partitioning().clone();
         let metrics = self.metrics.clone();
@@ -704,6 +840,12 @@ impl ExecutionPlan for RepartitionExec {
         let schema = self.schema();
         let schema_captured = Arc::clone(&schema);
 
+        let spill_manager = SpillManager::new(
+            Arc::clone(&context.runtime_env()),
+            spill_metrics,
+            input.schema(),
+        );
+
         // Get existing ordering to use for merging
         let sort_exprs = self.sort_exprs().cloned();
 
@@ -717,11 +859,11 @@ impl ExecutionPlan for RepartitionExec {
             )?;
         }
 
-        let stream = futures::stream::once(async move {
-            let num_input_partitions = input.output_partitioning().partition_count();
+        let num_input_partitions = input.output_partitioning().partition_count();
 
+        let stream = futures::stream::once(async move {
             // lock scope
-            let (mut rx, reservation, spill_manager, abort_helper) = {
+            let (rx, reservation, spill_readers, abort_helper) = {
                 // lock mutexes
                 let mut state = state.lock();
                 let state = state.consume_input_streams(
@@ -731,6 +873,7 @@ impl ExecutionPlan for RepartitionExec {
                     preserve_order,
                     name.clone(),
                     Arc::clone(&context),
+                    spill_manager.clone(),
                 )?;
 
                 // now return stream for the specified *output* partition which will
@@ -738,7 +881,7 @@ impl ExecutionPlan for RepartitionExec {
                 let PartitionChannels {
                     rx,
                     reservation,
-                    spill_manager,
+                    spill_readers,
                     ..
                 } = state
                     .channels
@@ -748,7 +891,7 @@ impl ExecutionPlan for RepartitionExec {
                 (
                     rx,
                     reservation,
-                    spill_manager,
+                    spill_readers,
                     Arc::clone(&state.abort_helper),
                 )
             };
@@ -759,17 +902,21 @@ impl ExecutionPlan for RepartitionExec {
 
             if preserve_order {
                 // Store streams from all the input partitions:
+                // Each input partition gets its own spill reader to maintain proper FIFO ordering
                 let input_streams = rx
                     .into_iter()
-                    .map(|receiver| {
-                        Box::pin(PerPartitionStream {
-                            schema: Arc::clone(&schema_captured),
+                    .zip(spill_readers)
+                    .map(|(receiver, spill_stream)| {
+                        // In preserve_order mode, each receiver corresponds to exactly one input partition
+                        Box::pin(PerPartitionStream::new(
+                            Arc::clone(&schema_captured),
                             receiver,
-                            _drop_helper: Arc::clone(&abort_helper),
-                            reservation: Arc::clone(&reservation),
-                            spill_manager: Arc::clone(&spill_manager),
-                            state: RepartitionStreamState::ReceivingFromChannel,
-                        }) as SendableRecordBatchStream
+                            Arc::clone(&abort_helper),
+                            Arc::clone(&reservation),
+                            spill_stream,
+                            1, // Each receiver handles one input partition
+                            BaselineMetrics::new(&metrics, partition),
+                        )) as SendableRecordBatchStream
                     })
                     .collect::<Vec<_>>();
                 // Note that receiver size (`rx.len()`) and `num_input_partitions` are same.
@@ -788,18 +935,26 @@ impl ExecutionPlan for RepartitionExec {
                     .with_batch_size(context.session_config().batch_size())
                     .with_fetch(fetch)
                     .with_reservation(merge_reservation)
+                    .with_spill_manager(spill_manager)
                     .build()
             } else {
-                Ok(Box::pin(RepartitionStream {
-                    num_input_partitions,
-                    num_input_partitions_processed: 0,
-                    schema: input.schema(),
-                    input: rx.swap_remove(0),
-                    _drop_helper: abort_helper,
+                // Non-preserve-order case: single input stream, so use the first spill reader
+                let spill_stream = spill_readers
+                    .into_iter()
+                    .next()
+                    .expect("at least one spill reader should exist");
+
+                Ok(Box::pin(PerPartitionStream::new(
+                    schema_captured,
+                    rx.into_iter()
+                        .next()
+                        .expect("at least one receiver should exist"),
+                    abort_helper,
                     reservation,
-                    spill_manager,
-                    state: RepartitionStreamState::ReceivingFromChannel,
-                }) as SendableRecordBatchStream)
+                    spill_stream,
+                    num_input_partitions,
+                    BaselineMetrics::new(&metrics, partition),
+                )) as SendableRecordBatchStream)
             }
         })
         .try_flatten();
@@ -1034,17 +1189,10 @@ impl RepartitionExec {
     /// Pulls data from the specified input plan, feeding it to the
     /// output partitions based on the desired partitioning
     ///
-    /// txs hold the output sending channels for each output partition
+    /// `output_channels` holds the output sending channels for each output partition
     async fn pull_from_input(
         mut stream: SendableRecordBatchStream,
-        mut output_channels: HashMap<
-            usize,
-            (
-                DistributionSender<MaybeBatch>,
-                SharedMemoryReservation,
-                Arc<SpillManager>,
-            ),
-        >,
+        mut output_channels: HashMap<usize, OutputChannel>,
         partitioning: Partitioning,
         metrics: RepartitionMetrics,
     ) -> Result<()> {
@@ -1076,37 +1224,27 @@ impl RepartitionExec {
 
                 let timer = metrics.send_time[partition].timer();
                 // if there is still a receiver, send to it
-                if let Some((tx, reservation, spill_manager)) =
-                    output_channels.get_mut(&partition)
-                {
+                if let Some(channel) = output_channels.get_mut(&partition) {
                     let (batch_to_send, is_memory_batch) =
-                        match reservation.lock().try_grow(size) {
+                        match channel.reservation.lock().try_grow(size) {
                             Ok(_) => {
                                 // Memory available - send in-memory batch
                                 (RepartitionBatch::Memory(batch), true)
                             }
                             Err(_) => {
-                                // We're memory limited - spill this single batch to its own file
-                                let spill_file = spill_manager
-                                    .spill_record_batch_and_finish(
-                                        &[batch],
-                                        &format!(
-                                            "RepartitionExec spill partition {partition}"
-                                        ),
-                                    )?
-                                    // Note that we handled empty batch above, so this is safe
-                                    .expect("non-empty batch should produce spill file");
-
-                                // Store size for validation when reading back
-                                (RepartitionBatch::Spilled { spill_file, size }, false)
+                                // We're memory limited - spill to SpillPool
+                                // SpillPool handles file handle reuse and rotation
+                                channel.spill_writer.push_batch(&batch)?;
+                                // Send marker indicating batch was spilled
+                                (RepartitionBatch::Spilled, false)
                             }
                         };
 
-                    if tx.send(Some(Ok(batch_to_send))).await.is_err() {
+                    if channel.sender.send(Some(Ok(batch_to_send))).await.is_err() {
                         // If the other end has hung up, it was an early shutdown (e.g. LIMIT)
                         // Only shrink memory if it was a memory batch
                         if is_memory_batch {
-                            reservation.lock().shrink(size);
+                            channel.reservation.lock().shrink(size);
                         }
                         output_channels.remove(&partition);
                     }
@@ -1138,6 +1276,8 @@ impl RepartitionExec {
             }
         }
 
+        // Spill writers will auto-finalize when dropped
+        // No need for explicit flush
         Ok(())
     }
 
@@ -1180,7 +1320,7 @@ impl RepartitionExec {
             // Input task completed successfully
             Ok(Ok(())) => {
                 // notify each output partition that this input partition has no more data
-                for (_, tx) in txs {
+                for (_partition, tx) in txs {
                     tx.send(None).await.ok();
                 }
             }
@@ -1188,25 +1328,65 @@ impl RepartitionExec {
     }
 }
 
-enum RepartitionStreamState {
-    /// Waiting for next item from channel
-    ReceivingFromChannel,
-    /// Reading a spilled batch from disk (stream reads via tokio::fs)
-    ReadingSpilledBatch(SendableRecordBatchStream),
+/// State for tracking whether we're reading from memory channel or spill stream.
+///
+/// This state machine ensures proper ordering when batches are mixed between memory
+/// and spilled storage. When a [`RepartitionBatch::Spilled`] marker is received,
+/// the stream must block on the spill stream until the corresponding batch arrives.
+///
+/// # State Machine
+///
+/// ```text
+///                        ┌─────────────────┐
+///                   ┌───▶│  ReadingMemory  │◀───┐
+///                   │    └────────┬────────┘    │
+///                   │             │             │
+///                   │     Poll channel          │
+///                   │             │             │
+///                   │  ┌──────────┼─────────────┐
+///                   │  │          │             │
+///                   │  ▼          ▼             │
+///                   │ Memory   Spilled          │
+///       Got batch   │ batch    marker           │
+///       from spill  │  │          │             │
+///                   │  │          ▼             │
+///                   │  │  ┌──────────────────┐  │
+///                   │  │  │ ReadingSpilled   │  │
+///                   │  │  └────────┬─────────┘  │
+///                   │  │           │            │
+///                   │  │   Poll spill_stream    │
+///                   │  │           │            │
+///                   │  │           ▼            │
+///                   │  │      Get batch         │
+///                   │  │           │            │
+///                   └──┴───────────┴────────────┘
+///                                  │
+///                                  ▼
+///                           Return batch
+///                     (Order preserved within
+///                      (input, output) pair)
+/// ```
+///
+/// The transition to `ReadingSpilled` blocks further channel polling to maintain
+/// FIFO ordering - we cannot read the next item from the channel until the spill
+/// stream provides the current batch.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum StreamState {
+    /// Reading from the memory channel (normal operation)
+    ReadingMemory,
+    /// Waiting for a spilled batch from the spill stream.
+    /// Must not poll channel until spilled batch is received to preserve ordering.
+    ReadingSpilled,
 }
 
-struct RepartitionStream {
-    /// Number of input partitions that will be sending batches to this output channel
-    num_input_partitions: usize,
-
-    /// Number of input partitions that have finished sending batches to this output channel
-    num_input_partitions_processed: usize,
-
+/// This struct converts a receiver to a stream.
+/// Receiver receives data on an SPSC channel.
+struct PerPartitionStream {
     /// Schema wrapped by Arc
     schema: SchemaRef,
 
     /// channel containing the repartitioned batches
-    input: DistributionReceiver<MaybeBatch>,
+    receiver: DistributionReceiver<MaybeBatch>,
 
     /// Handle to ensure background tasks are killed when no longer needed.
     _drop_helper: Arc<Vec<SpawnedTask<()>>>,
@@ -1214,79 +1394,117 @@ struct RepartitionStream {
     /// Memory reservation.
     reservation: SharedMemoryReservation,
 
-    /// Spill manager for reading spilled batches
-    spill_manager: Arc<SpillManager>,
+    /// Infinite stream for reading from the spill pool
+    spill_stream: SendableRecordBatchStream,
 
-    /// Current state of the stream
-    state: RepartitionStreamState,
+    /// Internal state indicating if we are reading from memory or spill stream
+    state: StreamState,
+
+    /// Number of input partitions that have not yet finished.
+    /// In non-preserve-order mode, multiple input partitions send to the same channel,
+    /// each sending None when complete. We must wait for all of them.
+    remaining_partitions: usize,
+
+    /// Execution metrics
+    baseline_metrics: BaselineMetrics,
 }
 
-impl Stream for RepartitionStream {
-    type Item = Result<RecordBatch>;
+impl PerPartitionStream {
+    fn new(
+        schema: SchemaRef,
+        receiver: DistributionReceiver<MaybeBatch>,
+        drop_helper: Arc<Vec<SpawnedTask<()>>>,
+        reservation: SharedMemoryReservation,
+        spill_stream: SendableRecordBatchStream,
+        num_input_partitions: usize,
+        baseline_metrics: BaselineMetrics,
+    ) -> Self {
+        Self {
+            schema,
+            receiver,
+            _drop_helper: drop_helper,
+            reservation,
+            spill_stream,
+            state: StreamState::ReadingMemory,
+            remaining_partitions: num_input_partitions,
+            baseline_metrics,
+        }
+    }
 
-    fn poll_next(
-        mut self: Pin<&mut Self>,
+    fn poll_next_inner(
+        self: &mut Pin<&mut Self>,
         cx: &mut Context<'_>,
-    ) -> Poll<Option<Self::Item>> {
+    ) -> Poll<Option<Result<RecordBatch>>> {
+        use futures::StreamExt;
+        let cloned_time = self.baseline_metrics.elapsed_compute().clone();
+        let _timer = cloned_time.timer();
+
         loop {
-            match &mut self.state {
-                RepartitionStreamState::ReceivingFromChannel => {
-                    let value = futures::ready!(self.input.recv().poll_unpin(cx));
+            match self.state {
+                StreamState::ReadingMemory => {
+                    // Poll the memory channel for next message
+                    let value = match self.receiver.recv().poll_unpin(cx) {
+                        Poll::Ready(v) => v,
+                        Poll::Pending => {
+                            // Nothing from channel, wait
+                            return Poll::Pending;
+                        }
+                    };
+
                     match value {
                         Some(Some(v)) => match v {
                             Ok(RepartitionBatch::Memory(batch)) => {
-                                // Release memory and return
+                                // Release memory and return batch
                                 self.reservation
                                     .lock()
                                     .shrink(batch.get_array_memory_size());
                                 return Poll::Ready(Some(Ok(batch)));
                             }
-                            Ok(RepartitionBatch::Spilled { spill_file, size }) => {
-                                // Read from disk - SpillReaderStream uses tokio::fs internally
-                                // Pass the original size for validation
-                                let stream = self
-                                    .spill_manager
-                                    .read_spill_as_stream(spill_file, Some(size))?;
-                                self.state =
-                                    RepartitionStreamState::ReadingSpilledBatch(stream);
-                                // Continue loop to poll the stream immediately
+                            Ok(RepartitionBatch::Spilled) => {
+                                // Batch was spilled, transition to reading from spill stream
+                                // We must block on spill stream until we get the batch
+                                // to preserve ordering
+                                self.state = StreamState::ReadingSpilled;
+                                continue;
                             }
                             Err(e) => {
                                 return Poll::Ready(Some(Err(e)));
                             }
                         },
                         Some(None) => {
-                            self.num_input_partitions_processed += 1;
-
-                            if self.num_input_partitions
-                                == self.num_input_partitions_processed
-                            {
-                                // all input partitions have finished sending batches
+                            // One input partition finished
+                            self.remaining_partitions -= 1;
+                            if self.remaining_partitions == 0 {
+                                // All input partitions finished
                                 return Poll::Ready(None);
-                            } else {
-                                // other partitions still have data to send
-                                continue;
                             }
+                            // Continue to poll for more data from other partitions
+                            continue;
                         }
                         None => {
+                            // Channel closed unexpectedly
                             return Poll::Ready(None);
                         }
                     }
                 }
-                RepartitionStreamState::ReadingSpilledBatch(stream) => {
-                    match futures::ready!(stream.poll_next_unpin(cx)) {
-                        Some(Ok(batch)) => {
-                            // Return batch and stay in ReadingSpilledBatch state to read more batches
+                StreamState::ReadingSpilled => {
+                    // Poll spill stream for the spilled batch
+                    match self.spill_stream.poll_next_unpin(cx) {
+                        Poll::Ready(Some(Ok(batch))) => {
+                            self.state = StreamState::ReadingMemory;
                             return Poll::Ready(Some(Ok(batch)));
                         }
-                        Some(Err(e)) => {
-                            self.state = RepartitionStreamState::ReceivingFromChannel;
+                        Poll::Ready(Some(Err(e))) => {
                             return Poll::Ready(Some(Err(e)));
                         }
-                        None => {
-                            // Spill stream ended - go back to receiving from channel
-                            self.state = RepartitionStreamState::ReceivingFromChannel;
-                            continue;
+                        Poll::Ready(None) => {
+                            // Spill stream ended, keep draining the memory channel
+                            self.state = StreamState::ReadingMemory;
+                        }
+                        Poll::Pending => {
+                            // Spilled batch not ready yet, must wait
+                            // This preserves ordering by blocking until spill data arrives
+                            return Poll::Pending;
                         }
                     }
                 }
@@ -1295,35 +1513,6 @@ impl Stream for RepartitionStream {
     }
 }
 
-impl RecordBatchStream for RepartitionStream {
-    /// Get the schema
-    fn schema(&self) -> SchemaRef {
-        Arc::clone(&self.schema)
-    }
-}
-
-/// This struct converts a receiver to a stream.
-/// Receiver receives data on an SPSC channel.
-struct PerPartitionStream {
-    /// Schema wrapped by Arc
-    schema: SchemaRef,
-
-    /// channel containing the repartitioned batches
-    receiver: DistributionReceiver<MaybeBatch>,
-
-    /// Handle to ensure background tasks are killed when no longer needed.
-    _drop_helper: Arc<Vec<SpawnedTask<()>>>,
-
-    /// Memory reservation.
-    reservation: SharedMemoryReservation,
-
-    /// Spill manager for reading spilled batches
-    spill_manager: Arc<SpillManager>,
-
-    /// Current state of the stream
-    state: RepartitionStreamState,
-}
-
 impl Stream for PerPartitionStream {
     type Item = Result<RecordBatch>;
 
@@ -1331,60 +1520,8 @@ impl Stream for PerPartitionStream {
         mut self: Pin<&mut Self>,
         cx: &mut Context<'_>,
     ) -> Poll<Option<Self::Item>> {
-        loop {
-            match &mut self.state {
-                RepartitionStreamState::ReceivingFromChannel => {
-                    let value = futures::ready!(self.receiver.recv().poll_unpin(cx));
-                    match value {
-                        Some(Some(v)) => match v {
-                            Ok(RepartitionBatch::Memory(batch)) => {
-                                // Release memory and return
-                                self.reservation
-                                    .lock()
-                                    .shrink(batch.get_array_memory_size());
-                                return Poll::Ready(Some(Ok(batch)));
-                            }
-                            Ok(RepartitionBatch::Spilled { spill_file, size }) => {
-                                // Read from disk - SpillReaderStream uses tokio::fs internally
-                                // Pass the original size for validation
-                                let stream = self
-                                    .spill_manager
-                                    .read_spill_as_stream(spill_file, Some(size))?;
-                                self.state =
-                                    RepartitionStreamState::ReadingSpilledBatch(stream);
-                                // Continue loop to poll the stream immediately
-                            }
-                            Err(e) => {
-                                return Poll::Ready(Some(Err(e)));
-                            }
-                        },
-                        Some(None) => {
-                            // Input partition has finished sending batches
-                            return Poll::Ready(None);
-                        }
-                        None => return Poll::Ready(None),
-                    }
-                }
-
-                RepartitionStreamState::ReadingSpilledBatch(stream) => {
-                    match futures::ready!(stream.poll_next_unpin(cx)) {
-                        Some(Ok(batch)) => {
-                            // Return batch and stay in ReadingSpilledBatch state to read more batches
-                            return Poll::Ready(Some(Ok(batch)));
-                        }
-                        Some(Err(e)) => {
-                            self.state = RepartitionStreamState::ReceivingFromChannel;
-                            return Poll::Ready(Some(Err(e)));
-                        }
-                        None => {
-                            // Spill stream ended - go back to receiving from channel
-                            self.state = RepartitionStreamState::ReceivingFromChannel;
-                            continue;
-                        }
-                    }
-                }
-            }
-        }
+        let poll = self.poll_next_inner(cx);
+        self.baseline_metrics.record_poll(poll)
     }
 }
 
@@ -2140,12 +2277,105 @@ mod tests {
         )
         .unwrap()
     }
+
+    /// Create batches with sequential values for ordering tests
+    fn create_ordered_batches(num_batches: usize) -> Vec<RecordBatch> {
+        let schema = test_schema();
+        (0..num_batches)
+            .map(|i| {
+                let start = (i * 8) as u32;
+                RecordBatch::try_new(
+                    Arc::clone(&schema),
+                    vec![Arc::new(UInt32Array::from(
+                        (start..start + 8).collect::<Vec<_>>(),
+                    ))],
+                )
+                .unwrap()
+            })
+            .collect()
+    }
+
+    #[tokio::test]
+    async fn test_repartition_ordering_with_spilling() -> Result<()> {
+        // Test that repartition preserves ordering when spilling occurs
+        // This tests the state machine fix where we must block on spill_stream
+        // when a Spilled marker is received, rather than continuing to poll the channel
+
+        let schema = test_schema();
+        // Create batches with sequential values: batch 0 has [0,1,2,3,4,5,6,7],
+        // batch 1 has [8,9,10,11,12,13,14,15], etc.
+        let partition = create_ordered_batches(20);
+        let input_partitions = vec![partition];
+
+        // Use RoundRobinBatch to ensure predictable ordering
+        let partitioning = Partitioning::RoundRobinBatch(2);
+
+        // Set up context with very tight memory limit to force spilling
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(1, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // create physical plan
+        let exec =
+            TestMemoryExec::try_new_exec(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = RepartitionExec::try_new(exec, partitioning)?;
+
+        // Collect all output partitions
+        let mut all_batches = Vec::new();
+        for i in 0..exec.partitioning().partition_count() {
+            let mut partition_batches = Vec::new();
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                partition_batches.push(batch);
+            }
+            all_batches.push(partition_batches);
+        }
+
+        // Verify spilling occurred
+        let metrics = exec.metrics().unwrap();
+        assert!(
+            metrics.spill_count().unwrap() > 0,
+            "Expected spilling to occur, but spill_count = 0"
+        );
+
+        // Verify ordering is preserved within each partition
+        // With RoundRobinBatch, even batches go to partition 0, odd batches to partition 1
+        for (partition_idx, batches) in all_batches.iter().enumerate() {
+            let mut last_value = None;
+            for batch in batches {
+                let array = batch
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<UInt32Array>()
+                    .unwrap();
+
+                for i in 0..array.len() {
+                    let value = array.value(i);
+                    if let Some(last) = last_value {
+                        assert!(
+                            value > last,
+                            "Ordering violated in partition {partition_idx}: {value} is not greater than {last}"
+                        );
+                    }
+                    last_value = Some(value);
+                }
+            }
+        }
+
+        Ok(())
+    }
 }
 
 #[cfg(test)]
 mod test {
+    use arrow::array::record_batch;
     use arrow::compute::SortOptions;
     use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::assert_batches_eq;
 
     use super::*;
     use crate::test::TestMemoryExec;
@@ -2158,7 +2388,6 @@ mod test {
     ///
     /// `$EXPECTED_PLAN_LINES`: input plan
     /// `$PLAN`: the plan to optimized
-    ///
     macro_rules! assert_plan {
         ($PLAN: expr,  @ $EXPECTED: expr) => {
             let formatted = crate::displayable($PLAN).indent(true).to_string();
@@ -2229,6 +2458,204 @@ mod test {
         Ok(())
     }
 
+    #[tokio::test]
+    async fn test_preserve_order_with_spilling() -> Result<()> {
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+        use datafusion_execution::TaskContext;
+
+        // Create sorted input data across multiple partitions
+        // Partition1: [1,3], [5,7], [9,11]
+        // Partition2: [2,4], [6,8], [10,12]
+        let batch1 = record_batch!(("c0", UInt32, [1, 3])).unwrap();
+        let batch2 = record_batch!(("c0", UInt32, [2, 4])).unwrap();
+        let batch3 = record_batch!(("c0", UInt32, [5, 7])).unwrap();
+        let batch4 = record_batch!(("c0", UInt32, [6, 8])).unwrap();
+        let batch5 = record_batch!(("c0", UInt32, [9, 11])).unwrap();
+        let batch6 = record_batch!(("c0", UInt32, [10, 12])).unwrap();
+        let schema = batch1.schema();
+        let sort_exprs = LexOrdering::new([PhysicalSortExpr {
+            expr: col("c0", &schema).unwrap(),
+            options: SortOptions::default().asc(),
+        }])
+        .unwrap();
+        let partition1 = vec![batch1.clone(), batch3.clone(), batch5.clone()];
+        let partition2 = vec![batch2.clone(), batch4.clone(), batch6.clone()];
+        let input_partitions = vec![partition1, partition2];
+
+        // Set up context with tight memory limit to force spilling
+        // Sorting needs some non-spillable memory, so 64 bytes should force spilling while still allowing the query to complete
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(64, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // Create physical plan with order preservation
+        let exec = TestMemoryExec::try_new(&input_partitions, Arc::clone(&schema), None)?
+            .try_with_sort_information(vec![sort_exprs.clone(), sort_exprs])?;
+        let exec = Arc::new(TestMemoryExec::update_cache(Arc::new(exec)));
+        // Repartition into 3 partitions with order preservation
+        // We expect 1 batch per output partition after repartitioning
+        let exec = RepartitionExec::try_new(exec, Partitioning::RoundRobinBatch(3))?
+            .with_preserve_order();
+
+        let mut batches = vec![];
+
+        // Collect all partitions - should succeed by spilling to disk
+        for i in 0..exec.partitioning().partition_count() {
+            let mut stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            while let Some(result) = stream.next().await {
+                let batch = result?;
+                batches.push(batch);
+            }
+        }
+
+        #[rustfmt::skip]
+        let expected = [
+            [
+                "+----+",
+                "| c0 |",
+                "+----+",
+                "| 1  |",
+                "| 2  |",
+                "| 3  |",
+                "| 4  |",
+                "+----+",
+            ],
+            [
+                "+----+",
+                "| c0 |",
+                "+----+",
+                "| 5  |",
+                "| 6  |",
+                "| 7  |",
+                "| 8  |",
+                "+----+",
+            ],
+            [
+                "+----+",
+                "| c0 |",
+                "+----+",
+                "| 9  |",
+                "| 10 |",
+                "| 11 |",
+                "| 12 |",
+                "+----+",
+            ],
+        ];
+
+        for (batch, expected) in batches.iter().zip(expected.iter()) {
+            assert_batches_eq!(expected, std::slice::from_ref(batch));
+        }
+
+        // We should have spilled ~ all of the data.
+        // - We spill data during the repartitioning phase
+        // - We may also spill during the final merge sort
+        let all_batches = [batch1, batch2, batch3, batch4, batch5, batch6];
+        let metrics = exec.metrics().unwrap();
+        assert!(
+            metrics.spill_count().unwrap() > input_partitions.len(),
+            "Expected spill_count > {} for order-preserving repartition, but got {:?}",
+            input_partitions.len(),
+            metrics.spill_count()
+        );
+        assert!(
+            metrics.spilled_bytes().unwrap()
+                > all_batches
+                    .iter()
+                    .map(|b| b.get_array_memory_size())
+                    .sum::<usize>(),
+            "Expected spilled_bytes > {} for order-preserving repartition, got {}",
+            all_batches
+                .iter()
+                .map(|b| b.get_array_memory_size())
+                .sum::<usize>(),
+            metrics.spilled_bytes().unwrap()
+        );
+        assert!(
+            metrics.spilled_rows().unwrap()
+                >= all_batches.iter().map(|b| b.num_rows()).sum::<usize>(),
+            "Expected spilled_rows > {} for order-preserving repartition, got {}",
+            all_batches.iter().map(|b| b.num_rows()).sum::<usize>(),
+            metrics.spilled_rows().unwrap()
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_hash_partitioning_with_spilling() -> Result<()> {
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+        use datafusion_execution::TaskContext;
+
+        // Create input data similar to the round-robin test
+        let batch1 = record_batch!(("c0", UInt32, [1, 3])).unwrap();
+        let batch2 = record_batch!(("c0", UInt32, [2, 4])).unwrap();
+        let batch3 = record_batch!(("c0", UInt32, [5, 7])).unwrap();
+        let batch4 = record_batch!(("c0", UInt32, [6, 8])).unwrap();
+        let schema = batch1.schema();
+
+        let partition1 = vec![batch1.clone(), batch3.clone()];
+        let partition2 = vec![batch2.clone(), batch4.clone()];
+        let input_partitions = vec![partition1, partition2];
+
+        // Set up context with memory limit to test hash partitioning with spilling infrastructure
+        let runtime = RuntimeEnvBuilder::default()
+            .with_memory_limit(1, 1.0)
+            .build_arc()?;
+
+        let task_ctx = TaskContext::default().with_runtime(runtime);
+        let task_ctx = Arc::new(task_ctx);
+
+        // Create physical plan with hash partitioning
+        let exec = TestMemoryExec::try_new(&input_partitions, Arc::clone(&schema), None)?;
+        let exec = Arc::new(TestMemoryExec::update_cache(Arc::new(exec)));
+        // Hash partition into 2 partitions by column c0
+        let hash_expr = col("c0", &schema)?;
+        let exec =
+            RepartitionExec::try_new(exec, Partitioning::Hash(vec![hash_expr], 2))?;
+
+        // Collect all partitions concurrently using JoinSet - this prevents deadlock
+        // where the distribution channel gate closes when all output channels are full
+        let mut join_set = tokio::task::JoinSet::new();
+        for i in 0..exec.partitioning().partition_count() {
+            let stream = exec.execute(i, Arc::clone(&task_ctx))?;
+            join_set.spawn(async move {
+                let mut count = 0;
+                futures::pin_mut!(stream);
+                while let Some(result) = stream.next().await {
+                    let batch = result?;
+                    count += batch.num_rows();
+                }
+                Ok::<usize, DataFusionError>(count)
+            });
+        }
+
+        // Wait for all partitions and sum the rows
+        let mut total_rows = 0;
+        while let Some(result) = join_set.join_next().await {
+            total_rows += result.unwrap()?;
+        }
+
+        // Verify we got all rows back
+        let all_batches = [batch1, batch2, batch3, batch4];
+        let expected_rows: usize = all_batches.iter().map(|b| b.num_rows()).sum();
+        assert_eq!(total_rows, expected_rows);
+
+        // Verify metrics are available
+        let metrics = exec.metrics().unwrap();
+        // Just verify the metrics can be retrieved (spilling may or may not occur)
+        let spill_count = metrics.spill_count().unwrap_or(0);
+        assert!(spill_count > 0);
+        let spilled_bytes = metrics.spilled_bytes().unwrap_or(0);
+        assert!(spilled_bytes > 0);
+        let spilled_rows = metrics.spilled_rows().unwrap_or(0);
+        assert!(spilled_rows > 0);
+
+        Ok(())
+    }
+
     #[tokio::test]
     async fn test_repartition() -> Result<()> {
         let schema = test_schema();
diff --git a/datafusion/physical-plan/src/sorts/merge.rs b/datafusion/physical-plan/src/sorts/merge.rs
index 0b0136cd12ce..720a3e53e459 100644
--- a/datafusion/physical-plan/src/sorts/merge.rs
+++ b/datafusion/physical-plan/src/sorts/merge.rs
@@ -390,7 +390,6 @@ impl<C: CursorValues> SortPreservingMergeStream<C> {
     ///
     /// Zooming in at node 2 in the loser tree as an example, we can see that
     /// it takes as input the next item at (S0) and the loser of (S3, S4).
-    ///
     #[inline]
     fn lt_leaf_node_index(&self, cursor_index: usize) -> usize {
         (self.cursors.len() + cursor_index) / 2
diff --git a/datafusion/physical-plan/src/sorts/multi_level_merge.rs b/datafusion/physical-plan/src/sorts/multi_level_merge.rs
index 58d046cc9091..6e7a5e7a7261 100644
--- a/datafusion/physical-plan/src/sorts/multi_level_merge.rs
+++ b/datafusion/physical-plan/src/sorts/multi_level_merge.rs
@@ -125,7 +125,6 @@ use futures::{Stream, StreamExt};
 ///    available during merge operations.
 /// 2. **Adaptive Buffer Sizing**: Reduces buffer sizes when memory is constrained
 /// 3. **Spill-to-Disk**: Spill to disk when we cannot merge all files in memory
-///
 pub(crate) struct MultiLevelMergeBuilder {
     spill_manager: SpillManager,
     schema: SchemaRef,
diff --git a/datafusion/physical-plan/src/sorts/partial_sort.rs b/datafusion/physical-plan/src/sorts/partial_sort.rs
index 513081e627e1..7a623b0c30d3 100644
--- a/datafusion/physical-plan/src/sorts/partial_sort.rs
+++ b/datafusion/physical-plan/src/sorts/partial_sort.rs
@@ -32,7 +32,7 @@
 //! | 0 | 1 | 1 |
 //! | 0 | 2 | 0 |
 //! +---+---+---+
-//!```
+//! ```
 //!
 //! and required ordering for the plan is `a ASC, b ASC, d ASC`.
 //! The first 3 rows(segment) can be sorted as the segment already
@@ -46,7 +46,7 @@
 //! +---+---+---+
 //! | 0 | 2 | 4 |
 //! +---+---+---+
-//!```
+//! ```
 //!
 //! The plan concats incoming data with such last rows of previous input
 //! and continues partial sorting of the segments.
diff --git a/datafusion/physical-plan/src/sorts/sort.rs b/datafusion/physical-plan/src/sorts/sort.rs
index bd798ab4f54b..2b31ff3da9f0 100644
--- a/datafusion/physical-plan/src/sorts/sort.rs
+++ b/datafusion/physical-plan/src/sorts/sort.rs
@@ -34,7 +34,8 @@ use crate::filter_pushdown::{
 };
 use crate::limit::LimitStream;
 use crate::metrics::{
-    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, SpillMetrics, SplitMetrics,
+    BaselineMetrics, ExecutionPlanMetricsSet, MetricsSet, RecordOutput, SpillMetrics,
+    SplitMetrics,
 };
 use crate::projection::{make_with_child, update_ordering, ProjectionExec};
 use crate::sorts::streaming_merge::{SortedSpillFile, StreamingMergeBuilder};
@@ -133,7 +134,6 @@ impl ExternalSorterMetrics {
 ///    └─────┘
 ///
 /// in_mem_batches
-///
 /// ```
 ///
 /// # When data does not fit in available memory
@@ -739,7 +739,7 @@ impl ExternalSorter {
 
             let sorted = sort_batch(&batch, &expressions, None)?;
 
-            metrics.record_output(sorted.num_rows());
+            (&sorted).record_output(&metrics);
             drop(batch);
             drop(reservation);
             Ok(sorted)
diff --git a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
index 14917e23b792..e7f354a73b4c 100644
--- a/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
+++ b/datafusion/physical-plan/src/spill/in_progress_spill_file.rs
@@ -88,6 +88,12 @@ impl InProgressSpillFile {
         Ok(())
     }
 
+    /// Returns a reference to the in-progress file, if it exists.
+    /// This can be used to get the file path for creating readers before the file is finished.
+    pub fn file(&self) -> Option<&RefCountedTempFile> {
+        self.in_progress_file.as_ref()
+    }
+
     /// Finalizes the file, returning the completed file reference.
     /// If there are no batches spilled before, it returns `None`.
     pub fn finish(&mut self) -> Result<Option<RefCountedTempFile>> {
diff --git a/datafusion/physical-plan/src/spill/mod.rs b/datafusion/physical-plan/src/spill/mod.rs
index 5b9a91e781b1..58fd016a63dd 100644
--- a/datafusion/physical-plan/src/spill/mod.rs
+++ b/datafusion/physical-plan/src/spill/mod.rs
@@ -19,6 +19,11 @@
 
 pub(crate) mod in_progress_spill_file;
 pub(crate) mod spill_manager;
+pub mod spill_pool;
+
+// Re-export SpillManager for doctests only (hidden from public docs)
+#[doc(hidden)]
+pub use spill_manager::SpillManager;
 
 use std::fs::File;
 use std::io::BufReader;
diff --git a/datafusion/physical-plan/src/spill/spill_manager.rs b/datafusion/physical-plan/src/spill/spill_manager.rs
index cc39102d8981..6fd97a8e2e6a 100644
--- a/datafusion/physical-plan/src/spill/spill_manager.rs
+++ b/datafusion/physical-plan/src/spill/spill_manager.rs
@@ -72,6 +72,11 @@ impl SpillManager {
         self
     }
 
+    /// Returns the schema for batches managed by this SpillManager
+    pub fn schema(&self) -> &SchemaRef {
+        &self.schema
+    }
+
     /// Creates a temporary file for in-progress operations, returning an error
     /// message if file creation fails. The file can be used to append batches
     /// incrementally and then finish the file when done.
diff --git a/datafusion/physical-plan/src/spill/spill_pool.rs b/datafusion/physical-plan/src/spill/spill_pool.rs
new file mode 100644
index 000000000000..bbe54ca45caa
--- /dev/null
+++ b/datafusion/physical-plan/src/spill/spill_pool.rs
@@ -0,0 +1,1425 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use futures::{Stream, StreamExt};
+use std::collections::VecDeque;
+use std::sync::Arc;
+use std::task::Waker;
+
+use parking_lot::Mutex;
+
+use arrow::datatypes::SchemaRef;
+use arrow::record_batch::RecordBatch;
+use datafusion_common::Result;
+use datafusion_execution::disk_manager::RefCountedTempFile;
+use datafusion_execution::{RecordBatchStream, SendableRecordBatchStream};
+
+use super::in_progress_spill_file::InProgressSpillFile;
+use super::spill_manager::SpillManager;
+
+/// Shared state between the writer and readers of a spill pool.
+/// This contains the queue of files and coordination state.
+///
+/// # Locking Design
+///
+/// This struct uses **fine-grained locking** with nested `Arc<Mutex<>>`:
+/// - `SpillPoolShared` is wrapped in `Arc<Mutex<>>` (outer lock)
+/// - Each `ActiveSpillFileShared` is wrapped in `Arc<Mutex<>>` (inner lock)
+///
+/// This enables:
+/// 1. **Short critical sections**: The outer lock is held only for queue operations
+/// 2. **I/O outside locks**: Disk I/O happens while holding only the file-specific lock
+/// 3. **Concurrent operations**: Reader can access the queue while writer does I/O
+///
+/// **Lock ordering discipline**: Never hold both locks simultaneously to prevent deadlock.
+/// Always: acquire outer lock → release outer lock → acquire inner lock (if needed).
+struct SpillPoolShared {
+    /// Queue of ALL files (including the current write file if it exists).
+    /// Readers always read from the front of this queue (FIFO).
+    /// Each file has its own lock to enable concurrent reader/writer access.
+    files: VecDeque<Arc<Mutex<ActiveSpillFileShared>>>,
+    /// SpillManager for creating files and tracking metrics
+    spill_manager: Arc<SpillManager>,
+    /// Pool-level waker to notify when new files are available (single reader)
+    waker: Option<Waker>,
+    /// Whether the writer has been dropped (no more files will be added)
+    writer_dropped: bool,
+    /// Writer's reference to the current file (shared by all cloned writers).
+    /// Has its own lock to allow I/O without blocking queue access.
+    current_write_file: Option<Arc<Mutex<ActiveSpillFileShared>>>,
+}
+
+impl SpillPoolShared {
+    /// Creates a new shared pool state
+    fn new(spill_manager: Arc<SpillManager>) -> Self {
+        Self {
+            files: VecDeque::new(),
+            spill_manager,
+            waker: None,
+            writer_dropped: false,
+            current_write_file: None,
+        }
+    }
+
+    /// Registers a waker to be notified when new data is available (pool-level)
+    fn register_waker(&mut self, waker: Waker) {
+        self.waker = Some(waker);
+    }
+
+    /// Wakes the pool-level reader
+    fn wake(&mut self) {
+        if let Some(waker) = self.waker.take() {
+            waker.wake();
+        }
+    }
+}
+
+/// Writer for a spill pool. Provides coordinated write access with FIFO semantics.
+///
+/// Created by [`channel`]. See that function for architecture diagrams and usage examples.
+///
+/// The writer is `Clone`, allowing multiple writers to coordinate on the same pool.
+/// All clones share the same current write file and coordinate file rotation.
+/// The writer automatically manages file rotation based on the `max_file_size_bytes`
+/// configured in [`channel`]. When the last writer clone is dropped, it finalizes the
+/// current file so readers can access all written data.
+#[derive(Clone)]
+pub struct SpillPoolWriter {
+    /// Maximum size in bytes before rotating to a new file.
+    /// Typically set from configuration `datafusion.execution.max_spill_file_size_bytes`.
+    max_file_size_bytes: usize,
+    /// Shared state with readers (includes current_write_file for coordination)
+    shared: Arc<Mutex<SpillPoolShared>>,
+}
+
+impl SpillPoolWriter {
+    /// Spills a batch to the pool, rotating files when necessary.
+    ///
+    /// If the current file would exceed `max_file_size_bytes` after adding
+    /// this batch, the file is finalized and a new one is started.
+    ///
+    /// See [`channel`] for overall architecture and examples.
+    ///
+    /// # File Rotation Logic
+    ///
+    /// ```text
+    /// push_batch()
+    ///      │
+    ///      ▼
+    /// Current file exists?
+    ///      │
+    ///      ├─ No ──▶ Create new file ──▶ Add to shared queue
+    ///      │                               Wake readers
+    ///      ▼
+    /// Write batch to current file
+    ///      │
+    ///      ▼
+    /// estimated_size > max_file_size_bytes?
+    ///      │
+    ///      ├─ No ──▶ Keep current file for next batch
+    ///      │
+    ///      ▼
+    /// Yes: finish() current file
+    ///      Mark writer_finished = true
+    ///      Wake readers
+    ///      │
+    ///      ▼
+    /// Next push_batch() creates new file
+    /// ```
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if disk I/O fails or disk quota is exceeded.
+    pub fn push_batch(&self, batch: &RecordBatch) -> Result<()> {
+        if batch.num_rows() == 0 {
+            // Skip empty batches
+            return Ok(());
+        }
+
+        let batch_size = batch.get_array_memory_size();
+
+        // Fine-grained locking: Lock shared state briefly for queue access
+        let mut shared = self.shared.lock();
+
+        // Create new file if we don't have one yet
+        if shared.current_write_file.is_none() {
+            let spill_manager = Arc::clone(&shared.spill_manager);
+            // Release shared lock before disk I/O (fine-grained locking)
+            drop(shared);
+
+            let writer = spill_manager.create_in_progress_file("SpillPool")?;
+            // Clone the file so readers can access it immediately
+            let file = writer.file().expect("InProgressSpillFile should always have a file when it is first created").clone();
+
+            let file_shared = Arc::new(Mutex::new(ActiveSpillFileShared {
+                writer: Some(writer),
+                file: Some(file), // Set immediately so readers can access it
+                batches_written: 0,
+                estimated_size: 0,
+                writer_finished: false,
+                waker: None,
+            }));
+
+            // Re-acquire lock and push to shared queue
+            shared = self.shared.lock();
+            shared.files.push_back(Arc::clone(&file_shared));
+            shared.current_write_file = Some(file_shared);
+            shared.wake(); // Wake readers waiting for new files
+        }
+
+        let current_write_file = shared.current_write_file.take();
+        // Release shared lock before file I/O (fine-grained locking)
+        // This allows readers to access the queue while we do disk I/O
+        drop(shared);
+
+        // Write batch to current file - lock only the specific file
+        if let Some(current_file) = current_write_file {
+            // Now lock just this file for I/O (separate from shared lock)
+            let mut file_shared = current_file.lock();
+
+            // Append the batch
+            if let Some(ref mut writer) = file_shared.writer {
+                writer.append_batch(batch)?;
+                file_shared.batches_written += 1;
+                file_shared.estimated_size += batch_size;
+            }
+
+            // Wake reader waiting on this specific file
+            file_shared.wake();
+
+            // Check if we need to rotate
+            let needs_rotation = file_shared.estimated_size > self.max_file_size_bytes;
+
+            if needs_rotation {
+                // Finish the IPC writer
+                if let Some(mut writer) = file_shared.writer.take() {
+                    writer.finish()?;
+                }
+                // Mark as finished so readers know not to wait for more data
+                file_shared.writer_finished = true;
+                // Wake reader waiting on this file (it's now finished)
+                file_shared.wake();
+                // Don't put back current_write_file - let it rotate
+            } else {
+                // Release file lock
+                drop(file_shared);
+                // Put back the current file for further writing
+                let mut shared = self.shared.lock();
+                shared.current_write_file = Some(current_file);
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl Drop for SpillPoolWriter {
+    fn drop(&mut self) {
+        let mut shared = self.shared.lock();
+
+        // Finalize the current file when the last writer is dropped
+        if let Some(current_file) = shared.current_write_file.take() {
+            // Release shared lock before locking file
+            drop(shared);
+
+            let mut file_shared = current_file.lock();
+
+            // Finish the current writer if it exists
+            if let Some(mut writer) = file_shared.writer.take() {
+                // Ignore errors on drop - we're in destructor
+                let _ = writer.finish();
+            }
+
+            // Mark as finished so readers know not to wait for more data
+            file_shared.writer_finished = true;
+
+            // Wake reader waiting on this file (it's now finished)
+            file_shared.wake();
+
+            drop(file_shared);
+            shared = self.shared.lock();
+        }
+
+        // Mark writer as dropped and wake pool-level readers
+        shared.writer_dropped = true;
+        shared.wake();
+    }
+}
+
+/// Creates a paired writer and reader for a spill pool with MPSC (multi-producer, single-consumer)
+/// semantics.
+///
+/// This is the recommended way to create a spill pool. The writer is `Clone`, allowing
+/// multiple producers to coordinate writes to the same pool. The reader can consume batches
+/// in FIFO order. The reader can start reading immediately after a writer appends a batch
+/// to the spill file, without waiting for the file to be sealed, while writers continue to
+/// write more data.
+///
+/// Internally this coordinates rotating spill files based on size limits, and
+/// handles asynchronous notification between the writer and reader using wakers.
+/// This ensures that we manage disk usage efficiently while allowing concurrent
+/// I/O between the writer and reader.
+///
+/// # Data Flow Overview
+///
+/// 1. Writer write batch `B0` to F1
+/// 2. Writer write batch `B1` to F1, notices the size limit exceeded, finishes F1.
+/// 3. Reader read `B0` from F1
+/// 4. Reader read `B1`, no more batch to read -> wait on the waker
+/// 5. Writer write batch `B2` to a new file `F2`, wake up the waiting reader.
+/// 6. Reader read `B2` from F2.
+/// 7. Repeat until writer is dropped.
+///
+/// # Architecture
+///
+/// ```text
+/// ┌─────────────────────────────────────────────────────────────────────────┐
+/// │                            SpillPool                                    │
+/// │                                                                         │
+/// │  Writer Side              Shared State              Reader Side         │
+/// │  ───────────              ────────────              ───────────         │
+/// │                                                                         │
+/// │  SpillPoolWriter    ┌────────────────────┐    SpillPoolReader           │
+/// │       │             │  VecDeque<File>    │          │                   │
+/// │       │             │  ┌────┐┌────┐      │          │                   │
+/// │  push_batch()       │  │ F1 ││ F2 │ ...  │      next().await            │
+/// │       │             │  └────┘└────┘      │          │                   │
+/// │       ▼             │   (FIFO order)     │          ▼                   │
+/// │  ┌─────────┐        │                    │    ┌──────────┐              │
+/// │  │Current  │───────▶│ Coordination:      │◀───│ Current  │              │
+/// │  │Write    │        │ - Wakers           │    │ Read     │              │
+/// │  │File     │        │ - Batch counts     │    │ File     │              │
+/// │  └─────────┘        │ - Writer status    │    └──────────┘              │
+/// │       │             └────────────────────┘          │                   │
+/// │       │                                              │                  │
+/// │  Size > limit?                                Read all batches?         │
+/// │       │                                              │                  │
+/// │       ▼                                              ▼                  │
+/// │  Rotate to new file                            Pop from queue           │
+/// └─────────────────────────────────────────────────────────────────────────┘
+///
+/// Writer produces → Shared FIFO queue → Reader consumes
+/// ```
+///
+/// # File State Machine
+///
+/// Each file in the pool coordinates between writer and reader:
+///
+/// ```text
+///                Writer View              Reader View
+///                ───────────              ───────────
+///
+/// Created        writer: Some(..)         batches_read: 0
+///                batches_written: 0       (waiting for data)
+///                       │
+///                       ▼
+/// Writing        append_batch()           Can read if:
+///                batches_written++        batches_read < batches_written
+///                wake readers
+///                       │                        │
+///                       │                        ▼
+///                ┌──────┴──────┐          poll_next() → batch
+///                │             │          batches_read++
+///                ▼             ▼
+///          Size > limit?  More data?
+///                │             │
+///                │             └─▶ Yes ──▶ Continue writing
+///                ▼
+///          finish()                   Reader catches up:
+///          writer_finished = true     batches_read == batches_written
+///          wake readers                       │
+///                │                            ▼
+///                └─────────────────────▶ Returns Poll::Ready(None)
+///                                       File complete, pop from queue
+/// ```
+///
+/// # Arguments
+///
+/// * `max_file_size_bytes` - Maximum size per file before rotation. When a file
+///   exceeds this size, the writer automatically rotates to a new file.
+/// * `spill_manager` - Manager for file creation and metrics tracking
+///
+/// # Returns
+///
+/// A tuple of `(SpillPoolWriter, SendableRecordBatchStream)` that share the same
+/// underlying pool. The reader is returned as a stream for immediate use with
+/// async stream combinators.
+///
+/// # Example
+///
+/// ```
+/// use std::sync::Arc;
+/// use arrow::array::{ArrayRef, Int32Array};
+/// use arrow::datatypes::{DataType, Field, Schema};
+/// use arrow::record_batch::RecordBatch;
+/// use datafusion_execution::runtime_env::RuntimeEnv;
+/// use futures::StreamExt;
+///
+/// # use datafusion_physical_plan::spill::spill_pool;
+/// # use datafusion_physical_plan::spill::SpillManager; // Re-exported for doctests
+/// # use datafusion_physical_plan::metrics::{ExecutionPlanMetricsSet, SpillMetrics};
+/// #
+/// # #[tokio::main]
+/// # async fn main() -> datafusion_common::Result<()> {
+/// # // Setup for the example (typically comes from TaskContext in production)
+/// # let env = Arc::new(RuntimeEnv::default());
+/// # let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+/// # let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]));
+/// # let spill_manager = Arc::new(SpillManager::new(env, metrics, schema.clone()));
+/// #
+/// // Create channel with 1MB file size limit
+/// let (writer, mut reader) = spill_pool::channel(1024 * 1024, spill_manager);
+///
+/// // Spawn writer task to produce batches
+/// let write_handle = tokio::spawn(async move {
+///     for i in 0..5 {
+///         let array: ArrayRef = Arc::new(Int32Array::from(vec![i; 100]));
+///         let batch = RecordBatch::try_new(schema.clone(), vec![array]).unwrap();
+///         writer.push_batch(&batch).unwrap();
+///     }
+///     // Writer dropped here, finalizing current file
+/// });
+///
+/// // Reader consumes batches in FIFO order (can run concurrently with writer)
+/// let mut batches_read = 0;
+/// while let Some(result) = reader.next().await {
+///     let batch = result?;
+///     batches_read += 1;
+///     // Process batch...
+///     if batches_read == 5 {
+///         break; // Got all expected batches
+///     }
+/// }
+///
+/// write_handle.await.unwrap();
+/// assert_eq!(batches_read, 5);
+/// # Ok(())
+/// # }
+/// ```
+///
+/// # Why rotate files?
+///
+/// File rotation ensures we don't end up with unreferenced disk usage.
+/// If we used a single file for all spilled data, we would end up with
+/// unreferenced data at the beginning of the file that has already been read
+/// by readers but we can't delete because you can't truncate from the start of a file.
+///
+/// Consider the case of a query like `SELECT * FROM large_table WHERE false`.
+/// Obviously this query produces no output rows, but if we had a spilling operator
+/// in the middle of this query between the scan and the filter it would see the entire
+/// `large_table` flow through it and thus would spill all of that data to disk.
+/// So we'd end up using up to `size(large_table)` bytes of disk space.
+/// If instead we use file rotation, and as long as the readers can keep up with the writer,
+/// then we can ensure that once a file is fully read by all readers it can be deleted,
+/// thus bounding the maximum disk usage to roughly `max_file_size_bytes`.
+pub fn channel(
+    max_file_size_bytes: usize,
+    spill_manager: Arc<SpillManager>,
+) -> (SpillPoolWriter, SendableRecordBatchStream) {
+    let schema = Arc::clone(spill_manager.schema());
+    let shared = Arc::new(Mutex::new(SpillPoolShared::new(spill_manager)));
+
+    let writer = SpillPoolWriter {
+        max_file_size_bytes,
+        shared: Arc::clone(&shared),
+    };
+
+    let reader = SpillPoolReader::new(shared, schema);
+
+    (writer, Box::pin(reader))
+}
+
+/// Shared state between writer and readers for an active spill file.
+/// Protected by a Mutex to coordinate between concurrent readers and the writer.
+struct ActiveSpillFileShared {
+    /// Writer handle - taken (set to None) when finish() is called
+    writer: Option<InProgressSpillFile>,
+    /// The spill file, set when the writer finishes.
+    /// Taken by the reader when creating a stream (the file stays open via file handles).
+    file: Option<RefCountedTempFile>,
+    /// Total number of batches written to this file
+    batches_written: usize,
+    /// Estimated size in bytes of data written to this file
+    estimated_size: usize,
+    /// Whether the writer has finished writing to this file
+    writer_finished: bool,
+    /// Waker for reader waiting on this specific file (SPSC: only one reader)
+    waker: Option<Waker>,
+}
+
+impl ActiveSpillFileShared {
+    /// Registers a waker to be notified when new data is written to this file
+    fn register_waker(&mut self, waker: Waker) {
+        self.waker = Some(waker);
+    }
+
+    /// Wakes the reader waiting on this file
+    fn wake(&mut self) {
+        if let Some(waker) = self.waker.take() {
+            waker.wake();
+        }
+    }
+}
+
+/// Reader state for a SpillFile (owned by individual SpillFile instances).
+/// This is kept separate from the shared state to avoid holding locks during I/O.
+struct SpillFileReader {
+    /// The actual stream reading from disk
+    stream: SendableRecordBatchStream,
+    /// Number of batches this reader has consumed
+    batches_read: usize,
+}
+
+struct SpillFile {
+    /// Shared coordination state (contains writer and batch counts)
+    shared: Arc<Mutex<ActiveSpillFileShared>>,
+    /// Reader state (lazy-initialized, owned by this SpillFile)
+    reader: Option<SpillFileReader>,
+    /// Spill manager for creating readers
+    spill_manager: Arc<SpillManager>,
+}
+
+impl Stream for SpillFile {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        use std::task::Poll;
+
+        // Step 1: Lock shared state and check coordination
+        let (should_read, file) = {
+            let mut shared = self.shared.lock();
+
+            // Determine if we can read
+            let batches_read = self.reader.as_ref().map_or(0, |r| r.batches_read);
+
+            if batches_read < shared.batches_written {
+                // More data available to read - take the file if we don't have a reader yet
+                let file = if self.reader.is_none() {
+                    shared.file.take()
+                } else {
+                    None
+                };
+                (true, file)
+            } else if shared.writer_finished {
+                // No more data and writer is done - EOF
+                return Poll::Ready(None);
+            } else {
+                // Caught up to writer, but writer still active - register waker and wait
+                shared.register_waker(cx.waker().clone());
+                return Poll::Pending;
+            }
+        }; // Lock released here
+
+        // Step 2: Lazy-create reader stream if needed
+        if self.reader.is_none() && should_read {
+            if let Some(file) = file {
+                match self.spill_manager.read_spill_as_stream(file, None) {
+                    Ok(stream) => {
+                        self.reader = Some(SpillFileReader {
+                            stream,
+                            batches_read: 0,
+                        });
+                    }
+                    Err(e) => return Poll::Ready(Some(Err(e))),
+                }
+            } else {
+                // File not available yet (writer hasn't finished or already taken)
+                // Register waker and wait for file to be ready
+                let mut shared = self.shared.lock();
+                shared.register_waker(cx.waker().clone());
+                return Poll::Pending;
+            }
+        }
+
+        // Step 3: Poll the reader stream (no lock held)
+        if let Some(reader) = &mut self.reader {
+            match reader.stream.poll_next_unpin(cx) {
+                Poll::Ready(Some(Ok(batch))) => {
+                    // Successfully read a batch - increment counter
+                    reader.batches_read += 1;
+                    Poll::Ready(Some(Ok(batch)))
+                }
+                Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))),
+                Poll::Ready(None) => {
+                    // Stream exhausted unexpectedly
+                    // This shouldn't happen if coordination is correct, but handle gracefully
+                    Poll::Ready(None)
+                }
+                Poll::Pending => Poll::Pending,
+            }
+        } else {
+            // Should not reach here, but handle gracefully
+            Poll::Ready(None)
+        }
+    }
+}
+
+/// A stream that reads from a SpillPool in FIFO order.
+///
+/// Created by [`channel`]. See that function for architecture diagrams and usage examples.
+///
+/// The stream automatically handles file rotation and reads from completed files.
+/// When no data is available, it returns `Poll::Pending` and registers a waker to
+/// be notified when the writer produces more data.
+///
+/// # Infinite Stream Semantics
+///
+/// This stream never returns `None` (`Poll::Ready(None)`) on its own - it will keep
+/// waiting for the writer to produce more data. The stream ends only when:
+/// - The reader is dropped
+/// - The writer is dropped AND all queued data has been consumed
+///
+/// This makes it suitable for continuous streaming scenarios where the writer may
+/// produce data intermittently.
+pub struct SpillPoolReader {
+    /// Shared reference to the spill pool
+    shared: Arc<Mutex<SpillPoolShared>>,
+    /// Current SpillFile we're reading from
+    current_file: Option<SpillFile>,
+    /// Schema of the spilled data
+    schema: SchemaRef,
+}
+
+impl SpillPoolReader {
+    /// Creates a new reader from shared pool state.
+    ///
+    /// This is private - use the `channel()` function to create a reader/writer pair.
+    ///
+    /// # Arguments
+    ///
+    /// * `shared` - Shared reference to the pool state
+    fn new(shared: Arc<Mutex<SpillPoolShared>>, schema: SchemaRef) -> Self {
+        Self {
+            shared,
+            current_file: None,
+            schema,
+        }
+    }
+}
+
+impl Stream for SpillPoolReader {
+    type Item = Result<RecordBatch>;
+
+    fn poll_next(
+        mut self: std::pin::Pin<&mut Self>,
+        cx: &mut std::task::Context<'_>,
+    ) -> std::task::Poll<Option<Self::Item>> {
+        use std::task::Poll;
+
+        loop {
+            // If we have a current file, try to read from it
+            if let Some(ref mut file) = self.current_file {
+                match file.poll_next_unpin(cx) {
+                    Poll::Ready(Some(Ok(batch))) => {
+                        // Got a batch, return it
+                        return Poll::Ready(Some(Ok(batch)));
+                    }
+                    Poll::Ready(Some(Err(e))) => {
+                        // Error reading batch
+                        return Poll::Ready(Some(Err(e)));
+                    }
+                    Poll::Ready(None) => {
+                        // Current file stream exhausted
+                        // Check if this file is marked as writer_finished
+                        let writer_finished = { file.shared.lock().writer_finished };
+
+                        if writer_finished {
+                            // File is complete, pop it from the queue and move to next
+                            let mut shared = self.shared.lock();
+                            shared.files.pop_front();
+                            drop(shared); // Release lock
+
+                            // Clear current file and continue loop to get next file
+                            self.current_file = None;
+                            continue;
+                        } else {
+                            // Stream exhausted but writer not finished - unexpected
+                            // This shouldn't happen with proper coordination
+                            return Poll::Ready(None);
+                        }
+                    }
+                    Poll::Pending => {
+                        // File not ready yet (waiting for writer)
+                        // Register waker so we get notified when writer adds more batches
+                        let mut shared = self.shared.lock();
+                        shared.register_waker(cx.waker().clone());
+                        return Poll::Pending;
+                    }
+                }
+            }
+
+            // No current file, need to get the next one
+            let mut shared = self.shared.lock();
+
+            // Peek at the front of the queue (don't pop yet)
+            if let Some(file_shared) = shared.files.front() {
+                // Create a SpillFile from the shared state
+                let spill_manager = Arc::clone(&shared.spill_manager);
+                let file_shared = Arc::clone(file_shared);
+                drop(shared); // Release lock before creating SpillFile
+
+                self.current_file = Some(SpillFile {
+                    shared: file_shared,
+                    reader: None,
+                    spill_manager,
+                });
+
+                // Continue loop to poll the new file
+                continue;
+            }
+
+            // No files in queue - check if writer is done
+            if shared.writer_dropped {
+                // Writer is done and no more files will be added - EOF
+                return Poll::Ready(None);
+            }
+
+            // Writer still active, register waker that will get notified when new files are added
+            shared.register_waker(cx.waker().clone());
+            return Poll::Pending;
+        }
+    }
+}
+
+impl RecordBatchStream for SpillPoolReader {
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.schema)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::metrics::{ExecutionPlanMetricsSet, SpillMetrics};
+    use arrow::array::{ArrayRef, Int32Array};
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common_runtime::SpawnedTask;
+    use datafusion_execution::runtime_env::RuntimeEnv;
+    use futures::StreamExt;
+
+    fn create_test_schema() -> SchemaRef {
+        Arc::new(Schema::new(vec![Field::new("a", DataType::Int32, false)]))
+    }
+
+    fn create_test_batch(start: i32, count: usize) -> RecordBatch {
+        let schema = create_test_schema();
+        let a: ArrayRef = Arc::new(Int32Array::from(
+            (start..start + count as i32).collect::<Vec<_>>(),
+        ));
+        RecordBatch::try_new(schema, vec![a]).unwrap()
+    }
+
+    fn create_spill_channel(
+        max_file_size: usize,
+    ) -> (SpillPoolWriter, SendableRecordBatchStream) {
+        let env = Arc::new(RuntimeEnv::default());
+        let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+        let schema = create_test_schema();
+        let spill_manager = Arc::new(SpillManager::new(env, metrics, schema));
+
+        channel(max_file_size, spill_manager)
+    }
+
+    fn create_spill_channel_with_metrics(
+        max_file_size: usize,
+    ) -> (SpillPoolWriter, SendableRecordBatchStream, SpillMetrics) {
+        let env = Arc::new(RuntimeEnv::default());
+        let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+        let schema = create_test_schema();
+        let spill_manager = Arc::new(SpillManager::new(env, metrics.clone(), schema));
+
+        let (writer, reader) = channel(max_file_size, spill_manager);
+        (writer, reader, metrics)
+    }
+
+    #[tokio::test]
+    async fn test_basic_write_and_read() -> Result<()> {
+        let (writer, mut reader) = create_spill_channel(1024 * 1024);
+
+        // Write one batch
+        let batch1 = create_test_batch(0, 10);
+        writer.push_batch(&batch1)?;
+
+        // Read the batch
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), 10);
+
+        // Write another batch
+        let batch2 = create_test_batch(10, 5);
+        writer.push_batch(&batch2)?;
+        // Read the second batch
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), 5);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_batch_write_read() -> Result<()> {
+        let (writer, mut reader) = create_spill_channel(1024 * 1024);
+
+        // Write one batch
+        let batch = create_test_batch(0, 5);
+        writer.push_batch(&batch)?;
+
+        // Read it back
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), 5);
+
+        // Verify the actual data
+        let col = result
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(col.value(0), 0);
+        assert_eq!(col.value(4), 4);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multiple_batches_sequential() -> Result<()> {
+        let (writer, mut reader) = create_spill_channel(1024 * 1024);
+
+        // Write multiple batches
+        for i in 0..5 {
+            let batch = create_test_batch(i * 10, 10);
+            writer.push_batch(&batch)?;
+        }
+
+        // Read all batches and verify FIFO order
+        for i in 0..5 {
+            let result = reader.next().await.unwrap()?;
+            assert_eq!(result.num_rows(), 10);
+
+            let col = result
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            assert_eq!(col.value(0), i * 10, "Batch {i} not in FIFO order");
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_empty_writer() -> Result<()> {
+        let (_writer, reader) = create_spill_channel(1024 * 1024);
+
+        // Reader should pend since no batches were written
+        let mut reader = reader;
+        let result =
+            tokio::time::timeout(std::time::Duration::from_millis(100), reader.next())
+                .await;
+
+        assert!(result.is_err(), "Reader should timeout on empty writer");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_empty_batch_skipping() -> Result<()> {
+        let (writer, mut reader) = create_spill_channel(1024 * 1024);
+
+        // Write empty batch
+        let empty_batch = create_test_batch(0, 0);
+        writer.push_batch(&empty_batch)?;
+
+        // Write non-empty batch
+        let batch = create_test_batch(0, 5);
+        writer.push_batch(&batch)?;
+
+        // Should only read the non-empty batch
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), 5);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_rotation_triggered_by_size() -> Result<()> {
+        // Set a small max_file_size to trigger rotation after one batch
+        let batch1 = create_test_batch(0, 10);
+        let batch_size = batch1.get_array_memory_size() + 1;
+
+        let (writer, mut reader, metrics) = create_spill_channel_with_metrics(batch_size);
+
+        // Write first batch (should fit in first file)
+        writer.push_batch(&batch1)?;
+
+        // Check metrics after first batch - file created but not finalized yet
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            1,
+            "Should have created 1 file after first batch"
+        );
+        assert_eq!(
+            metrics.spilled_bytes.value(),
+            0,
+            "Spilled bytes should be 0 before file finalization"
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            10,
+            "Should have spilled 10 rows from first batch"
+        );
+
+        // Write second batch (should trigger rotation - finalize first file)
+        let batch2 = create_test_batch(10, 10);
+        assert!(
+            batch2.get_array_memory_size() <= batch_size,
+            "batch2 size {} exceeds limit {batch_size}",
+            batch2.get_array_memory_size(),
+        );
+        assert!(
+            batch1.get_array_memory_size() + batch2.get_array_memory_size() > batch_size,
+            "Combined size {} does not exceed limit to trigger rotation",
+            batch1.get_array_memory_size() + batch2.get_array_memory_size()
+        );
+        writer.push_batch(&batch2)?;
+
+        // Check metrics after rotation - first file finalized, but second file not created yet
+        // (new file created lazily on next push_batch call)
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            1,
+            "Should still have 1 file (second file not created until next write)"
+        );
+        assert!(
+            metrics.spilled_bytes.value() > 0,
+            "Spilled bytes should be > 0 after first file finalized (got {})",
+            metrics.spilled_bytes.value()
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            20,
+            "Should have spilled 20 total rows (10 + 10)"
+        );
+
+        // Write a third batch to confirm rotation occurred (creates second file)
+        let batch3 = create_test_batch(20, 5);
+        writer.push_batch(&batch3)?;
+
+        // Now check that second file was created
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            2,
+            "Should have created 2 files after writing to new file"
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            25,
+            "Should have spilled 25 total rows (10 + 10 + 5)"
+        );
+
+        // Read all three batches
+        let result1 = reader.next().await.unwrap()?;
+        assert_eq!(result1.num_rows(), 10);
+
+        let result2 = reader.next().await.unwrap()?;
+        assert_eq!(result2.num_rows(), 10);
+
+        let result3 = reader.next().await.unwrap()?;
+        assert_eq!(result3.num_rows(), 5);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_multiple_rotations() -> Result<()> {
+        let batches = (0..10)
+            .map(|i| create_test_batch(i * 10, 10))
+            .collect::<Vec<_>>();
+
+        let batch_size = batches[0].get_array_memory_size() * 2 + 1;
+
+        // Very small max_file_size to force frequent rotations
+        let (writer, mut reader, metrics) = create_spill_channel_with_metrics(batch_size);
+
+        // Write many batches to cause multiple rotations
+        for i in 0..10 {
+            let batch = create_test_batch(i * 10, 10);
+            writer.push_batch(&batch)?;
+        }
+
+        // Check metrics after all writes - should have multiple files due to rotations
+        // With batch_size = 2 * one_batch + 1, each file fits ~2 batches before rotating
+        // 10 batches should create multiple files (exact count depends on rotation timing)
+        let file_count = metrics.spill_file_count.value();
+        assert!(
+            file_count >= 4,
+            "Should have created at least 4 files with multiple rotations (got {file_count})"
+        );
+        assert!(
+            metrics.spilled_bytes.value() > 0,
+            "Spilled bytes should be > 0 after rotations (got {})",
+            metrics.spilled_bytes.value()
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            100,
+            "Should have spilled 100 total rows (10 batches * 10 rows)"
+        );
+
+        // Read all batches and verify order
+        for i in 0..10 {
+            let result = reader.next().await.unwrap()?;
+            assert_eq!(result.num_rows(), 10);
+
+            let col = result
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            assert_eq!(
+                col.value(0),
+                i * 10,
+                "Batch {i} not in correct order after rotations"
+            );
+        }
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_single_batch_larger_than_limit() -> Result<()> {
+        // Very small limit
+        let (writer, mut reader, metrics) = create_spill_channel_with_metrics(100);
+
+        // Write a batch that exceeds the limit
+        let large_batch = create_test_batch(0, 100);
+        writer.push_batch(&large_batch)?;
+
+        // Check metrics after large batch - should trigger rotation immediately
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            1,
+            "Should have created 1 file for large batch"
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            100,
+            "Should have spilled 100 rows from large batch"
+        );
+
+        // Should still write and read successfully
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), 100);
+
+        // Next batch should go to a new file
+        let batch2 = create_test_batch(100, 10);
+        writer.push_batch(&batch2)?;
+
+        // Check metrics after second batch - should have rotated to a new file
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            2,
+            "Should have created 2 files after rotation"
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            110,
+            "Should have spilled 110 total rows (100 + 10)"
+        );
+
+        let result2 = reader.next().await.unwrap()?;
+        assert_eq!(result2.num_rows(), 10);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_very_small_max_file_size() -> Result<()> {
+        // Test with just 1 byte max (extreme case)
+        let (writer, mut reader) = create_spill_channel(1);
+
+        // Any batch will exceed this limit
+        let batch = create_test_batch(0, 5);
+        writer.push_batch(&batch)?;
+
+        // Should still work
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), 5);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_exact_size_boundary() -> Result<()> {
+        // Create a batch and measure its approximate size
+        let batch = create_test_batch(0, 10);
+        let batch_size = batch.get_array_memory_size();
+
+        // Set max_file_size to exactly the batch size
+        let (writer, mut reader, metrics) = create_spill_channel_with_metrics(batch_size);
+
+        // Write first batch (exactly at the size limit)
+        writer.push_batch(&batch)?;
+
+        // Check metrics after first batch - should NOT rotate yet (size == limit, not >)
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            1,
+            "Should have created 1 file after first batch at exact boundary"
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            10,
+            "Should have spilled 10 rows from first batch"
+        );
+
+        // Write second batch (exceeds the limit, should trigger rotation)
+        let batch2 = create_test_batch(10, 10);
+        writer.push_batch(&batch2)?;
+
+        // Check metrics after second batch - rotation triggered, first file finalized
+        // Note: second file not created yet (lazy creation on next write)
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            1,
+            "Should still have 1 file after rotation (second file created lazily)"
+        );
+        assert_eq!(
+            metrics.spilled_rows.value(),
+            20,
+            "Should have spilled 20 total rows (10 + 10)"
+        );
+        // Verify first file was finalized by checking spilled_bytes
+        assert!(
+            metrics.spilled_bytes.value() > 0,
+            "Spilled bytes should be > 0 after file finalization (got {})",
+            metrics.spilled_bytes.value()
+        );
+
+        // Both should be readable
+        let result1 = reader.next().await.unwrap()?;
+        assert_eq!(result1.num_rows(), 10);
+
+        let result2 = reader.next().await.unwrap()?;
+        assert_eq!(result2.num_rows(), 10);
+
+        // Spill another batch, now we should see the second file created
+        let batch3 = create_test_batch(20, 5);
+        writer.push_batch(&batch3)?;
+        assert_eq!(
+            metrics.spill_file_count.value(),
+            2,
+            "Should have created 2 files after writing to new file"
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_concurrent_reader_writer() -> Result<()> {
+        let (writer, mut reader) = create_spill_channel(1024 * 1024);
+
+        // Spawn writer task
+        let writer_handle = SpawnedTask::spawn(async move {
+            for i in 0..10 {
+                let batch = create_test_batch(i * 10, 10);
+                writer.push_batch(&batch).unwrap();
+                // Small delay to simulate real concurrent work
+                tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+            }
+        });
+
+        // Reader task (runs concurrently)
+        let reader_handle = SpawnedTask::spawn(async move {
+            let mut count = 0;
+            for i in 0..10 {
+                let result = reader.next().await.unwrap().unwrap();
+                assert_eq!(result.num_rows(), 10);
+
+                let col = result
+                    .column(0)
+                    .as_any()
+                    .downcast_ref::<Int32Array>()
+                    .unwrap();
+                assert_eq!(col.value(0), i * 10);
+                count += 1;
+            }
+            count
+        });
+
+        // Wait for both to complete
+        writer_handle.await.unwrap();
+        let batches_read = reader_handle.await.unwrap();
+        assert_eq!(batches_read, 10);
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_reader_catches_up_to_writer() -> Result<()> {
+        let (writer, mut reader) = create_spill_channel(1024 * 1024);
+
+        #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+        enum ReadWriteEvent {
+            ReadStart,
+            Read(usize),
+            Write(usize),
+        }
+
+        let events = Arc::new(Mutex::new(vec![]));
+        // Start reader first (will pend)
+        let reader_events = Arc::clone(&events);
+        let reader_handle = SpawnedTask::spawn(async move {
+            reader_events.lock().push(ReadWriteEvent::ReadStart);
+            let result = reader.next().await.unwrap().unwrap();
+            reader_events
+                .lock()
+                .push(ReadWriteEvent::Read(result.num_rows()));
+            let result = reader.next().await.unwrap().unwrap();
+            reader_events
+                .lock()
+                .push(ReadWriteEvent::Read(result.num_rows()));
+        });
+
+        // Give reader time to start pending
+        tokio::time::sleep(std::time::Duration::from_millis(5)).await;
+
+        // Now write a batch (should wake the reader)
+        let batch = create_test_batch(0, 5);
+        events.lock().push(ReadWriteEvent::Write(batch.num_rows()));
+        writer.push_batch(&batch)?;
+
+        // Wait for the reader to process
+        let processed = async {
+            loop {
+                if events.lock().len() >= 3 {
+                    break;
+                }
+                tokio::time::sleep(std::time::Duration::from_micros(500)).await;
+            }
+        };
+        tokio::time::timeout(std::time::Duration::from_secs(1), processed)
+            .await
+            .unwrap();
+
+        // Write another batch
+        let batch = create_test_batch(5, 10);
+        events.lock().push(ReadWriteEvent::Write(batch.num_rows()));
+        writer.push_batch(&batch)?;
+
+        // Reader should complete
+        reader_handle.await.unwrap();
+        let events = events.lock().clone();
+        assert_eq!(
+            events,
+            vec![
+                ReadWriteEvent::ReadStart,
+                ReadWriteEvent::Write(5),
+                ReadWriteEvent::Read(5),
+                ReadWriteEvent::Write(10),
+                ReadWriteEvent::Read(10)
+            ]
+        );
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_reader_starts_after_writer_finishes() -> Result<()> {
+        let (writer, reader) = create_spill_channel(128);
+
+        // Writer writes all data
+        for i in 0..5 {
+            let batch = create_test_batch(i * 10, 10);
+            writer.push_batch(&batch)?;
+        }
+
+        drop(writer);
+
+        // Now start reader
+        let mut reader = reader;
+        let mut count = 0;
+        for i in 0..5 {
+            let result = reader.next().await.unwrap()?;
+            assert_eq!(result.num_rows(), 10);
+
+            let col = result
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            assert_eq!(col.value(0), i * 10);
+            count += 1;
+        }
+
+        assert_eq!(count, 5, "Should read all batches after writer finishes");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_writer_drop_finalizes_file() -> Result<()> {
+        let env = Arc::new(RuntimeEnv::default());
+        let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+        let schema = create_test_schema();
+        let spill_manager =
+            Arc::new(SpillManager::new(Arc::clone(&env), metrics.clone(), schema));
+
+        let (writer, mut reader) = channel(1024 * 1024, spill_manager);
+
+        // Write some batches
+        for i in 0..5 {
+            let batch = create_test_batch(i * 10, 10);
+            writer.push_batch(&batch)?;
+        }
+
+        // Check metrics before drop - spilled_bytes should be 0 since file isn't finalized yet
+        let spilled_bytes_before = metrics.spilled_bytes.value();
+        assert_eq!(
+            spilled_bytes_before, 0,
+            "Spilled bytes should be 0 before writer is dropped"
+        );
+
+        // Explicitly drop the writer - this should finalize the current file
+        drop(writer);
+
+        // Check metrics after drop - spilled_bytes should be > 0 now
+        let spilled_bytes_after = metrics.spilled_bytes.value();
+        assert!(
+            spilled_bytes_after > 0,
+            "Spilled bytes should be > 0 after writer is dropped (got {spilled_bytes_after})"
+        );
+
+        // Verify reader can still read all batches
+        let mut count = 0;
+        for i in 0..5 {
+            let result = reader.next().await.unwrap()?;
+            assert_eq!(result.num_rows(), 10);
+
+            let col = result
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            assert_eq!(col.value(0), i * 10);
+            count += 1;
+        }
+
+        assert_eq!(count, 5, "Should read all batches after writer is dropped");
+
+        Ok(())
+    }
+
+    #[tokio::test]
+    async fn test_disk_usage_decreases_as_files_consumed() -> Result<()> {
+        use datafusion_execution::runtime_env::RuntimeEnvBuilder;
+
+        // Test configuration
+        const NUM_BATCHES: usize = 3;
+        const ROWS_PER_BATCH: usize = 100;
+
+        // Step 1: Create a test batch and measure its size
+        let batch = create_test_batch(0, ROWS_PER_BATCH);
+        let batch_size = batch.get_array_memory_size();
+
+        // Step 2: Configure file rotation to approximately 1 batch per file
+        // Create a custom RuntimeEnv so we can access the DiskManager
+        let runtime = Arc::new(RuntimeEnvBuilder::default().build()?);
+        let disk_manager = Arc::clone(&runtime.disk_manager);
+
+        let metrics = SpillMetrics::new(&ExecutionPlanMetricsSet::new(), 0);
+        let schema = create_test_schema();
+        let spill_manager = Arc::new(SpillManager::new(runtime, metrics.clone(), schema));
+
+        let (writer, mut reader) = channel(batch_size, spill_manager);
+
+        // Step 3: Write NUM_BATCHES batches to create approximately NUM_BATCHES files
+        for i in 0..NUM_BATCHES {
+            let start = (i * ROWS_PER_BATCH) as i32;
+            writer.push_batch(&create_test_batch(start, ROWS_PER_BATCH))?;
+        }
+
+        // Check how many files were created (should be at least a few due to file rotation)
+        let file_count = metrics.spill_file_count.value();
+        assert_eq!(
+            file_count,
+            NUM_BATCHES - 1,
+            "Expected at {} files with rotation, got {file_count}",
+            NUM_BATCHES - 1
+        );
+
+        // Step 4: Verify initial disk usage reflects all files
+        let initial_disk_usage = disk_manager.used_disk_space();
+        assert!(
+            initial_disk_usage > 0,
+            "Expected disk usage > 0 after writing batches, got {initial_disk_usage}"
+        );
+
+        // Step 5: Read NUM_BATCHES - 1 batches (all but 1)
+        // As each file is fully consumed, it should be dropped and disk usage should decrease
+        for i in 0..(NUM_BATCHES - 1) {
+            let result = reader.next().await.unwrap()?;
+            assert_eq!(result.num_rows(), ROWS_PER_BATCH);
+
+            let col = result
+                .column(0)
+                .as_any()
+                .downcast_ref::<Int32Array>()
+                .unwrap();
+            assert_eq!(col.value(0), (i * ROWS_PER_BATCH) as i32);
+        }
+
+        // Step 6: Verify disk usage decreased but is not zero (at least 1 batch remains)
+        let partial_disk_usage = disk_manager.used_disk_space();
+        assert!(
+            partial_disk_usage > 0
+                && partial_disk_usage < (batch_size * NUM_BATCHES * 2) as u64,
+            "Disk usage should be > 0 with remaining batches"
+        );
+        assert!(
+            partial_disk_usage < initial_disk_usage,
+            "Disk usage should have decreased after reading most batches: initial={initial_disk_usage}, partial={partial_disk_usage}"
+        );
+
+        // Step 7: Read the final batch
+        let result = reader.next().await.unwrap()?;
+        assert_eq!(result.num_rows(), ROWS_PER_BATCH);
+
+        // Step 8: Drop writer first to signal no more data will be written
+        // The reader has infinite stream semantics and will wait for the writer
+        // to be dropped before returning None
+        drop(writer);
+
+        // Verify we've read all batches - now the reader should return None
+        assert!(
+            reader.next().await.is_none(),
+            "Should have no more batches to read"
+        );
+
+        // Step 9: Drop reader to release all references
+        drop(reader);
+
+        // Step 10: Verify complete cleanup - disk usage should be 0
+        let final_disk_usage = disk_manager.used_disk_space();
+        assert_eq!(
+            final_disk_usage, 0,
+            "Disk usage should be 0 after all files dropped, got {final_disk_usage}"
+        );
+
+        Ok(())
+    }
+}
diff --git a/datafusion/physical-plan/src/stream.rs b/datafusion/physical-plan/src/stream.rs
index 100a6a7ffcc0..480b723d0b15 100644
--- a/datafusion/physical-plan/src/stream.rs
+++ b/datafusion/physical-plan/src/stream.rs
@@ -207,7 +207,9 @@ impl<O: Send + 'static> ReceiverStreamBuilder<O> {
 /// let schema_1 = Arc::clone(&schema);
 /// builder.spawn(async move {
 ///     // Your task needs to send batches to the tx
-///     tx_1.send(Ok(RecordBatch::new_empty(schema_1))).await.unwrap();
+///     tx_1.send(Ok(RecordBatch::new_empty(schema_1)))
+///         .await
+///         .unwrap();
 ///
 ///     Ok(())
 /// });
@@ -217,7 +219,9 @@ impl<O: Send + 'static> ReceiverStreamBuilder<O> {
 /// let schema_2 = Arc::clone(&schema);
 /// builder.spawn(async move {
 ///     // Your task needs to send batches to the tx
-///     tx_2.send(Ok(RecordBatch::new_empty(schema_2))).await.unwrap();
+///     tx_2.send(Ok(RecordBatch::new_empty(schema_2)))
+///         .await
+///         .unwrap();
 ///
 ///     Ok(())
 /// });
@@ -417,9 +421,10 @@ impl<S> RecordBatchStreamAdapter<S> {
     /// # use datafusion_physical_plan::stream::RecordBatchStreamAdapter;
     /// // Create stream of Result<RecordBatch>
     /// let batch = record_batch!(
-    ///   ("a", Int32, [1, 2, 3]),
-    ///   ("b", Float64, [Some(4.0), None, Some(5.0)])
-    /// ).expect("created batch");
+    ///     ("a", Int32, [1, 2, 3]),
+    ///     ("b", Float64, [Some(4.0), None, Some(5.0)])
+    /// )
+    /// .expect("created batch");
     /// let schema = batch.schema();
     /// let stream = futures::stream::iter(vec![Ok(batch)]);
     /// // Convert the stream to a SendableRecordBatchStream
diff --git a/datafusion/physical-plan/src/test/exec.rs b/datafusion/physical-plan/src/test/exec.rs
index 12ffca871f07..b720181b27fe 100644
--- a/datafusion/physical-plan/src/test/exec.rs
+++ b/datafusion/physical-plan/src/test/exec.rs
@@ -291,7 +291,6 @@ fn clone_error(e: &DataFusionError) -> DataFusionError {
 
 /// A Mock ExecutionPlan that does not start producing input until a
 /// barrier is called
-///
 #[derive(Debug)]
 pub struct BarrierExec {
     /// partitions to send back
diff --git a/datafusion/physical-plan/src/topk/mod.rs b/datafusion/physical-plan/src/topk/mod.rs
index 9435de1cc448..0b5ab784df67 100644
--- a/datafusion/physical-plan/src/topk/mod.rs
+++ b/datafusion/physical-plan/src/topk/mod.rs
@@ -26,7 +26,9 @@ use datafusion_expr::{ColumnarValue, Operator};
 use std::mem::size_of;
 use std::{cmp::Ordering, collections::BinaryHeap, sync::Arc};
 
-use super::metrics::{BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder};
+use super::metrics::{
+    BaselineMetrics, Count, ExecutionPlanMetricsSet, MetricBuilder, RecordOutput,
+};
 use crate::spill::get_record_batch_memory_size;
 use crate::{stream::RecordBatchStreamAdapter, SendableRecordBatchStream};
 
@@ -596,7 +598,7 @@ impl TopK {
         // break into record batches as needed
         let mut batches = vec![];
         if let Some(mut batch) = heap.emit()? {
-            metrics.baseline.output_rows().add(batch.num_rows());
+            (&batch).record_output(&metrics.baseline);
 
             loop {
                 if batch.num_rows() <= batch_size {
diff --git a/datafusion/physical-plan/src/tree_node.rs b/datafusion/physical-plan/src/tree_node.rs
index 78ba984ed1a5..85d7b33575ca 100644
--- a/datafusion/physical-plan/src/tree_node.rs
+++ b/datafusion/physical-plan/src/tree_node.rs
@@ -42,8 +42,8 @@ impl DynTreeNode for dyn ExecutionPlan {
 /// A node context object beneficial for writing optimizer rules.
 /// This context encapsulating an [`ExecutionPlan`] node with a payload.
 ///
-/// Since each wrapped node has it's children within both the [`PlanContext.plan.children()`],
-/// as well as separately within the [`PlanContext.children`] (which are child nodes wrapped in the context),
+/// Since each wrapped node has it's children within both the `PlanContext.plan.children()`,
+/// as well as separately within the `PlanContext.children` (which are child nodes wrapped in the context),
 /// it's important to keep these child plans in sync when performing mutations.
 ///
 /// Since there are two ways to access child plans directly -— it's recommended
@@ -69,7 +69,7 @@ impl<T> PlanContext<T> {
         }
     }
 
-    /// Update the [`PlanContext.plan.children()`] from the [`PlanContext.children`],
+    /// Update the `PlanContext.plan.children()` from the `PlanContext.children`,
     /// if the `PlanContext.children` have been changed.
     pub fn update_plan_from_children(mut self) -> Result<Self> {
         let children_plans = self.children.iter().map(|c| Arc::clone(&c.plan)).collect();
diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs
index 164f17edebd3..c95678dac9cd 100644
--- a/datafusion/physical-plan/src/union.rs
+++ b/datafusion/physical-plan/src/union.rs
@@ -67,14 +67,14 @@ use tokio::macros::support::thread_rng_n;
 /// partitions, and then next `M` output partitions are from Input 2.
 ///
 /// ```text
-///                       ▲       ▲           ▲         ▲
-///                       │       │           │         │
-///     Output            │  ...  │           │         │
-///   Partitions          │0      │N-1        │ N       │N+M-1
-///(passes through   ┌────┴───────┴───────────┴─────────┴───┐
-/// the N+M input    │              UnionExec               │
-///  partitions)     │                                      │
-///                  └──────────────────────────────────────┘
+///                        ▲       ▲           ▲         ▲
+///                        │       │           │         │
+///      Output            │  ...  │           │         │
+///    Partitions          │0      │N-1        │ N       │N+M-1
+/// (passes through   ┌────┴───────┴───────────┴─────────┴───┐
+///  the N+M input    │              UnionExec               │
+///   partitions)     │                                      │
+///                   └──────────────────────────────────────┘
 ///                                      ▲
 ///                                      │
 ///                                      │
diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs
index 026a7fbcd0e5..22132f2f8639 100644
--- a/datafusion/physical-plan/src/unnest.rs
+++ b/datafusion/physical-plan/src/unnest.rs
@@ -277,8 +277,6 @@ struct UnnestMetrics {
     input_batches: metrics::Count,
     /// Number of rows consumed
     input_rows: metrics::Count,
-    /// Number of batches produced
-    output_batches: metrics::Count,
 }
 
 impl UnnestMetrics {
@@ -288,14 +286,10 @@ impl UnnestMetrics {
 
         let input_rows = MetricBuilder::new(metrics).counter("input_rows", partition);
 
-        let output_batches =
-            MetricBuilder::new(metrics).counter("output_batches", partition);
-
         Self {
             baseline_metrics: BaselineMetrics::new(metrics, partition),
             input_batches,
             input_rows,
-            output_batches,
         }
     }
 }
@@ -361,7 +355,6 @@ impl UnnestStream {
                     let Some(result_batch) = result else {
                         continue;
                     };
-                    self.metrics.output_batches.add(1);
                     (&result_batch).record_output(&self.metrics.baseline_metrics);
 
                     // Empty record batches should not be emitted.
@@ -375,7 +368,7 @@ impl UnnestStream {
                         produced {} output batches containing {} rows in {}",
                         self.metrics.input_batches,
                         self.metrics.input_rows,
-                        self.metrics.output_batches,
+                        self.metrics.baseline_metrics.output_batches(),
                         self.metrics.baseline_metrics.output_rows(),
                         self.metrics.baseline_metrics.elapsed_compute(),
                     );
@@ -760,7 +753,6 @@ fn build_batch(
 /// ```ignore
 /// longest_length: [3, 1, 1, 2]
 /// ```
-///
 fn find_longest_length(
     list_arrays: &[ArrayRef],
     options: &UnnestOptions,
@@ -881,7 +873,6 @@ fn unnest_list_arrays(
 /// ```ignore
 /// [1, null, 2, 3, 4, null, null, 5, null, null]
 /// ```
-///
 fn unnest_list_array(
     list_array: &dyn ListArrayType,
     length_array: &PrimitiveArray<Int64Type>,
@@ -929,7 +920,6 @@ fn unnest_list_array(
 /// ```ignore
 /// [0, 0, 1, 1, 1, 2]
 /// ```
-///
 fn create_take_indices(
     length_array: &PrimitiveArray<Int64Type>,
     capacity: usize,
@@ -994,7 +984,6 @@ fn create_take_indices(
 /// ```ignore
 /// c1: 1, null, 2, 3, 4, null, 5, 6  // Repeated using `indices`
 /// c2: null, null, null, null, null, null, null, null  // Replaced with nulls
-///
 fn repeat_arrs_from_indices(
     batch: &[ArrayRef],
     indices: &PrimitiveArray<Int64Type>,
diff --git a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
index 891fd0ae4851..a76316369ec7 100644
--- a/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
+++ b/datafusion/physical-plan/src/windows/bounded_window_agg_exec.rs
@@ -1696,7 +1696,7 @@ mod tests {
 
         // Get string representation of the plan
         assert_snapshot!(displayable(physical_plan.as_ref()).indent(true), @r#"
-        BoundedWindowAggExec: wdw=[last: Field { name: "last", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { name: "nth_value(-1)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { name: "nth_value(-2)", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+        BoundedWindowAggExec: wdw=[last: Field { "last": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-1): Field { "nth_value(-1)": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(-2): Field { "nth_value(-2)": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
           DataSourceExec: partitions=1, partition_sizes=[3]
         "#);
 
@@ -1814,7 +1814,7 @@ mod tests {
         // Get string representation of the plan
         assert_snapshot!(displayable(plan.as_ref()).indent(true), @r#"
         ProjectionExec: expr=[sn@0 as sn, hash@1 as hash, count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]@2 as col_2]
-          BoundedWindowAggExec: wdw=[count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { name: "count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]
+          BoundedWindowAggExec: wdw=[count([Column { name: "sn", index: 0 }]) PARTITION BY: [[Column { name: "hash", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: "sn", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]: Field { "count([Column { name: \"sn\", index: 0 }]) PARTITION BY: [[Column { name: \"hash\", index: 1 }]], ORDER BY: [[PhysicalSortExpr { expr: Column { name: \"sn\", index: 0 }, options: SortOptions { descending: false, nulls_first: true } }]]": Int64 }, frame: RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Linear]
             StreamingTableExec: partition_sizes=1, projection=[sn, hash], infinite_source=true, output_ordering=[sn@0 ASC NULLS LAST]
         "#);
 
diff --git a/datafusion/proto-common/gen/Cargo.toml b/datafusion/proto-common/gen/Cargo.toml
index ef56d2697d81..2d2557811d0d 100644
--- a/datafusion/proto-common/gen/Cargo.toml
+++ b/datafusion/proto-common/gen/Cargo.toml
@@ -29,6 +29,9 @@ publish = false
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/proto-common/src/lib.rs b/datafusion/proto-common/src/lib.rs
index 9efb234e3994..6e95f86bccbf 100644
--- a/datafusion/proto-common/src/lib.rs
+++ b/datafusion/proto-common/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
@@ -62,28 +65,33 @@
 //! # use datafusion_proto_common::protobuf_common;
 //! # use prost::Message;
 //! # fn main() -> Result<()>{
-//!     // Create a new ScalarValue
-//!     let val = ScalarValue::UInt64(Some(3));
-//!     let mut buffer = BytesMut::new();
-//!     let protobuf: protobuf_common::ScalarValue = match val {
-//!         ScalarValue::UInt64(Some(val)) => {
-//!             protobuf_common::ScalarValue{value: Some(protobuf_common::scalar_value::Value::Uint64Value(val))}
-//!         }
-//!         _ => unreachable!(),
-//!     };
+//! // Create a new ScalarValue
+//! let val = ScalarValue::UInt64(Some(3));
+//! let mut buffer = BytesMut::new();
+//! let protobuf: protobuf_common::ScalarValue = match val {
+//!     ScalarValue::UInt64(Some(val)) => protobuf_common::ScalarValue {
+//!         value: Some(protobuf_common::scalar_value::Value::Uint64Value(val)),
+//!     },
+//!     _ => unreachable!(),
+//! };
 //!
-//!     protobuf.encode(&mut buffer)
+//! protobuf
+//!     .encode(&mut buffer)
 //!     .map_err(|e| plan_datafusion_err!("Error encoding protobuf as bytes: {e}"))?;
-//!     // Convert it to bytes (for sending over the network, etc.)
-//!     let bytes: Bytes = buffer.into();
+//! // Convert it to bytes (for sending over the network, etc.)
+//! let bytes: Bytes = buffer.into();
 //!
-//!     let protobuf = protobuf_common::ScalarValue::decode(bytes).map_err(|e| plan_datafusion_err!("Error decoding ScalarValue as protobuf: {e}"))?;
-//!     // Decode bytes from somewhere (over network, etc.) back to ScalarValue
-//!     let decoded_val: ScalarValue = match protobuf.value {
-//!         Some(protobuf_common::scalar_value::Value::Uint64Value(val)) => ScalarValue::UInt64(Some(val)),
-//!         _ => unreachable!(),
-//!     };
-//!     assert_eq!(val, decoded_val);
+//! let protobuf = protobuf_common::ScalarValue::decode(bytes).map_err(|e| {
+//!     plan_datafusion_err!("Error decoding ScalarValue as protobuf: {e}")
+//! })?;
+//! // Decode bytes from somewhere (over network, etc.) back to ScalarValue
+//! let decoded_val: ScalarValue = match protobuf.value {
+//!     Some(protobuf_common::scalar_value::Value::Uint64Value(val)) => {
+//!         ScalarValue::UInt64(Some(val))
+//!     }
+//!     _ => unreachable!(),
+//! };
+//! assert_eq!(val, decoded_val);
 //! # Ok(())
 //! # }
 //! ```
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
index 8e4131479e50..e9de1d9e9a9e 100644
--- a/datafusion/proto-common/src/to_proto/mod.rs
+++ b/datafusion/proto-common/src/to_proto/mod.rs
@@ -28,7 +28,9 @@ use arrow::datatypes::{
     DataType, Field, IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, Schema,
     SchemaRef, TimeUnit, UnionMode,
 };
-use arrow::ipc::writer::{DictionaryTracker, IpcDataGenerator};
+use arrow::ipc::writer::{
+    CompressionContext, DictionaryTracker, IpcDataGenerator, IpcWriteOptions,
+};
 use datafusion_common::{
     config::{
         CsvOptions, JsonOptions, ParquetColumnOptions, ParquetOptions,
@@ -1018,8 +1020,15 @@ fn encode_scalar_nested_value(
 
     let gen = IpcDataGenerator {};
     let mut dict_tracker = DictionaryTracker::new(false);
+    let write_options = IpcWriteOptions::default();
+    let mut compression_context = CompressionContext::default();
     let (encoded_dictionaries, encoded_message) = gen
-        .encoded_batch(&batch, &mut dict_tracker, &Default::default())
+        .encode(
+            &batch,
+            &mut dict_tracker,
+            &write_options,
+            &mut compression_context,
+        )
         .map_err(|e| {
             Error::General(format!("Error encoding ScalarValue::List as IPC: {e}"))
         })?;
diff --git a/datafusion/proto/gen/Cargo.toml b/datafusion/proto/gen/Cargo.toml
index c2096b601112..d446ab0d8974 100644
--- a/datafusion/proto/gen/Cargo.toml
+++ b/datafusion/proto/gen/Cargo.toml
@@ -29,6 +29,9 @@ publish = false
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/proto/src/bytes/mod.rs b/datafusion/proto/src/bytes/mod.rs
index 12d9938373ce..6eab2239015a 100644
--- a/datafusion/proto/src/bytes/mod.rs
+++ b/datafusion/proto/src/bytes/mod.rs
@@ -313,7 +313,7 @@ pub fn physical_plan_from_json(
     let back: protobuf::PhysicalPlanNode = serde_json::from_str(json)
         .map_err(|e| plan_datafusion_err!("Error serializing plan: {e}"))?;
     let extension_codec = DefaultPhysicalExtensionCodec {};
-    back.try_into_physical_plan(&ctx, &extension_codec)
+    back.try_into_physical_plan(ctx, &extension_codec)
 }
 
 /// Deserialize a PhysicalPlan from bytes
diff --git a/datafusion/proto/src/lib.rs b/datafusion/proto/src/lib.rs
index b1590b9ad2aa..b16b12bc0516 100644
--- a/datafusion/proto/src/lib.rs
+++ b/datafusion/proto/src/lib.rs
@@ -64,15 +64,15 @@
 //! # use datafusion_expr::{col, lit, Expr};
 //! # use datafusion_proto::bytes::Serializeable;
 //! # fn main() -> Result<()>{
-//!  // Create a new `Expr` a < 32
-//!  let expr = col("a").lt(lit(5i32));
+//! // Create a new `Expr` a < 32
+//! let expr = col("a").lt(lit(5i32));
 //!
-//!  // Convert it to bytes (for sending over the network, etc.)
-//!  let bytes = expr.to_bytes()?;
+//! // Convert it to bytes (for sending over the network, etc.)
+//! let bytes = expr.to_bytes()?;
 //!
-//!  // Decode bytes from somewhere (over network, etc.) back to Expr
-//!  let decoded_expr = Expr::from_bytes(&bytes)?;
-//!  assert_eq!(expr, decoded_expr);
+//! // Decode bytes from somewhere (over network, etc.) back to Expr
+//! let decoded_expr = Expr::from_bytes(&bytes)?;
+//! assert_eq!(expr, decoded_expr);
 //! # Ok(())
 //! # }
 //! ```
diff --git a/datafusion/proto/src/physical_plan/from_proto.rs b/datafusion/proto/src/physical_plan/from_proto.rs
index 7c4b9e55b813..f1a9abe6ea7b 100644
--- a/datafusion/proto/src/physical_plan/from_proto.rs
+++ b/datafusion/proto/src/physical_plan/from_proto.rs
@@ -34,7 +34,7 @@ use datafusion_datasource::file::FileSource;
 use datafusion_datasource::file_groups::FileGroup;
 use datafusion_datasource::file_scan_config::{FileScanConfig, FileScanConfigBuilder};
 use datafusion_datasource::file_sink_config::FileSinkConfig;
-use datafusion_datasource::{FileRange, ListingTableUrl, PartitionedFile};
+use datafusion_datasource::{FileRange, ListingTableUrl, PartitionedFile, TableSchema};
 use datafusion_datasource_csv::file_format::CsvSink;
 use datafusion_datasource_json::file_format::JsonSink;
 #[cfg(feature = "parquet")]
@@ -481,6 +481,37 @@ pub fn parse_protobuf_file_scan_schema(
     Ok(Arc::new(convert_required!(proto.schema)?))
 }
 
+/// Parses a TableSchema from protobuf, extracting the file schema and partition columns
+pub fn parse_table_schema_from_proto(
+    proto: &protobuf::FileScanExecConf,
+) -> Result<TableSchema> {
+    let schema: Arc<Schema> = parse_protobuf_file_scan_schema(proto)?;
+
+    // Reacquire the partition column types from the schema before removing them below.
+    let table_partition_cols = proto
+        .table_partition_cols
+        .iter()
+        .map(|col| Ok(Arc::new(schema.field_with_name(col)?.clone())))
+        .collect::<Result<Vec<_>>>()?;
+
+    // Remove partition columns from the schema after recreating table_partition_cols
+    // because the partition columns are not in the file. They are present to allow
+    // the partition column types to be reconstructed after serde.
+    let file_schema = Arc::new(
+        Schema::new(
+            schema
+                .fields()
+                .iter()
+                .filter(|field| !table_partition_cols.contains(field))
+                .cloned()
+                .collect::<Vec<_>>(),
+        )
+        .with_metadata(schema.metadata.clone()),
+    );
+
+    Ok(TableSchema::new(file_schema, table_partition_cols))
+}
+
 pub fn parse_protobuf_file_scan_config(
     proto: &protobuf::FileScanExecConf,
     ctx: &TaskContext,
@@ -508,28 +539,6 @@ pub fn parse_protobuf_file_scan_config(
         true => ObjectStoreUrl::local_filesystem(),
     };
 
-    // Reacquire the partition column types from the schema before removing them below.
-    let table_partition_cols = proto
-        .table_partition_cols
-        .iter()
-        .map(|col| Ok(schema.field_with_name(col)?.clone()))
-        .collect::<Result<Vec<_>>>()?;
-
-    // Remove partition columns from the schema after recreating table_partition_cols
-    // because the partition columns are not in the file. They are present to allow
-    // the partition column types to be reconstructed after serde.
-    let file_schema = Arc::new(
-        Schema::new(
-            schema
-                .fields()
-                .iter()
-                .filter(|field| !table_partition_cols.contains(field))
-                .cloned()
-                .collect::<Vec<_>>(),
-        )
-        .with_metadata(schema.metadata.clone()),
-    );
-
     let mut output_ordering = vec![];
     for node_collection in &proto.output_ordering {
         let sort_exprs = parse_physical_sort_exprs(
@@ -541,13 +550,12 @@ pub fn parse_protobuf_file_scan_config(
         output_ordering.extend(LexOrdering::new(sort_exprs));
     }
 
-    let config = FileScanConfigBuilder::new(object_store_url, file_schema, file_source)
+    let config = FileScanConfigBuilder::new(object_store_url, file_source)
         .with_file_groups(file_groups)
         .with_constraints(constraints)
         .with_statistics(statistics)
-        .with_projection(Some(projection))
+        .with_projection_indices(Some(projection))
         .with_limit(proto.limit.as_ref().map(|sl| sl.limit as usize))
-        .with_table_partition_cols(table_partition_cols)
         .with_output_ordering(output_ordering)
         .with_batch_size(proto.batch_size.map(|s| s as usize))
         .build();
@@ -572,7 +580,9 @@ impl TryFrom<&protobuf::PartitionedFile> for PartitionedFile {
     fn try_from(val: &protobuf::PartitionedFile) -> Result<Self, Self::Error> {
         Ok(PartitionedFile {
             object_meta: ObjectMeta {
-                location: Path::from(val.path.as_str()),
+                location: Path::parse(val.path.as_str()).map_err(|e| {
+                    proto_error(format!("Invalid object_store path: {e}"))
+                })?,
                 last_modified: Utc.timestamp_nanos(val.last_modified_ns as i64),
                 size: val.size,
                 e_tag: None,
@@ -694,3 +704,55 @@ impl TryFrom<&protobuf::FileSinkConfig> for FileSinkConfig {
         })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::{TimeZone, Utc};
+    use datafusion_datasource::PartitionedFile;
+    use object_store::path::Path;
+    use object_store::ObjectMeta;
+
+    #[test]
+    fn partitioned_file_path_roundtrip_percent_encoded() {
+        let path_str = "foo/foo%2Fbar/baz%252Fqux";
+        let pf = PartitionedFile {
+            object_meta: ObjectMeta {
+                location: Path::parse(path_str).unwrap(),
+                last_modified: Utc.timestamp_nanos(1_000),
+                size: 42,
+                e_tag: None,
+                version: None,
+            },
+            partition_values: vec![],
+            range: None,
+            statistics: None,
+            extensions: None,
+            metadata_size_hint: None,
+        };
+
+        let proto = protobuf::PartitionedFile::try_from(&pf).unwrap();
+        assert_eq!(proto.path, path_str);
+
+        let pf2 = PartitionedFile::try_from(&proto).unwrap();
+        assert_eq!(pf2.object_meta.location.as_ref(), path_str);
+        assert_eq!(pf2.object_meta.location, pf.object_meta.location);
+        assert_eq!(pf2.object_meta.size, pf.object_meta.size);
+        assert_eq!(pf2.object_meta.last_modified, pf.object_meta.last_modified);
+    }
+
+    #[test]
+    fn partitioned_file_from_proto_invalid_path() {
+        let proto = protobuf::PartitionedFile {
+            path: "foo//bar".to_string(),
+            size: 1,
+            last_modified_ns: 0,
+            partition_values: vec![],
+            range: None,
+            statistics: None,
+        };
+
+        let err = PartitionedFile::try_from(&proto).unwrap_err();
+        assert!(err.to_string().contains("Invalid object_store path"));
+    }
+}
diff --git a/datafusion/proto/src/physical_plan/mod.rs b/datafusion/proto/src/physical_plan/mod.rs
index e5f4a1f7d026..fc7818fe461a 100644
--- a/datafusion/proto/src/physical_plan/mod.rs
+++ b/datafusion/proto/src/physical_plan/mod.rs
@@ -24,6 +24,7 @@ use crate::common::{byte_to_string, str_to_byte};
 use crate::physical_plan::from_proto::{
     parse_physical_expr, parse_physical_sort_expr, parse_physical_sort_exprs,
     parse_physical_window_expr, parse_protobuf_file_scan_config, parse_record_batches,
+    parse_table_schema_from_proto,
 };
 use crate::physical_plan::to_proto::{
     serialize_file_scan_config, serialize_maybe_filter, serialize_physical_aggr_expr,
@@ -42,6 +43,7 @@ use crate::{convert_required, into_required};
 use arrow::compute::SortOptions;
 use arrow::datatypes::{IntervalMonthDayNanoType, SchemaRef};
 use datafusion_catalog::memory::MemorySourceConfig;
+use datafusion_common::config::CsvOptions;
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, DataFusionError, Result,
 };
@@ -612,14 +614,21 @@ impl protobuf::PhysicalPlanNode {
             None
         };
 
+        // Parse table schema with partition columns
+        let table_schema =
+            parse_table_schema_from_proto(scan.base_conf.as_ref().unwrap())?;
+
+        let csv_options = CsvOptions {
+            has_header: Some(scan.has_header),
+            delimiter: str_to_byte(&scan.delimiter, "delimiter")?,
+            quote: str_to_byte(&scan.quote, "quote")?,
+            ..Default::default()
+        };
         let source = Arc::new(
-            CsvSource::new(
-                scan.has_header,
-                str_to_byte(&scan.delimiter, "delimiter")?,
-                0,
-            )
-            .with_escape(escape)
-            .with_comment(comment),
+            CsvSource::new(table_schema)
+                .with_csv_options(csv_options)
+                .with_escape(escape)
+                .with_comment(comment),
         );
 
         let conf = FileScanConfigBuilder::from(parse_protobuf_file_scan_config(
@@ -641,11 +650,13 @@ impl protobuf::PhysicalPlanNode {
 
         extension_codec: &dyn PhysicalExtensionCodec,
     ) -> Result<Arc<dyn ExecutionPlan>> {
+        let base_conf = scan.base_conf.as_ref().unwrap();
+        let table_schema = parse_table_schema_from_proto(base_conf)?;
         let scan_conf = parse_protobuf_file_scan_config(
-            scan.base_conf.as_ref().unwrap(),
+            base_conf,
             ctx,
             extension_codec,
-            Arc::new(JsonSource::new()),
+            Arc::new(JsonSource::new(table_schema)),
         )?;
         Ok(DataSourceExec::from_data_source(scan_conf))
     }
@@ -695,7 +706,12 @@ impl protobuf::PhysicalPlanNode {
             if let Some(table_options) = scan.parquet_options.as_ref() {
                 options = table_options.try_into()?;
             }
-            let mut source = ParquetSource::new(options);
+
+            // Parse table schema with partition columns
+            let table_schema = parse_table_schema_from_proto(base_conf)?;
+
+            let mut source =
+                ParquetSource::new(table_schema).with_table_parquet_options(options);
 
             if let Some(predicate) = predicate {
                 source = source.with_predicate(predicate);
@@ -717,16 +733,17 @@ impl protobuf::PhysicalPlanNode {
         &self,
         scan: &protobuf::AvroScanExecNode,
         ctx: &TaskContext,
-
         extension_codec: &dyn PhysicalExtensionCodec,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         #[cfg(feature = "avro")]
         {
+            let table_schema =
+                parse_table_schema_from_proto(scan.base_conf.as_ref().unwrap())?;
             let conf = parse_protobuf_file_scan_config(
                 scan.base_conf.as_ref().unwrap(),
                 ctx,
                 extension_codec,
-                Arc::new(AvroSource::new()),
+                Arc::new(AvroSource::new(table_schema)),
             )?;
             Ok(DataSourceExec::from_data_source(conf))
         }
diff --git a/datafusion/proto/src/physical_plan/to_proto.rs b/datafusion/proto/src/physical_plan/to_proto.rs
index 399c234191aa..dc0a78dbccf1 100644
--- a/datafusion/proto/src/physical_plan/to_proto.rs
+++ b/datafusion/proto/src/physical_plan/to_proto.rs
@@ -532,9 +532,10 @@ pub fn serialize_file_scan_config(
         statistics: Some((&conf.file_source.statistics().unwrap()).into()),
         limit: conf.limit.map(|l| protobuf::ScanLimit { limit: l as u32 }),
         projection: conf
-            .projection
+            .projection_exprs
             .as_ref()
-            .unwrap_or(&(0..schema.fields().len()).collect::<Vec<_>>())
+            .map(|p| p.column_indices())
+            .unwrap_or((0..schema.fields().len()).collect::<Vec<_>>())
             .iter()
             .map(|n| *n as u32)
             .collect(),
diff --git a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
index a0456e2031be..73f39eaa7bf9 100644
--- a/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/proto/tests/cases/roundtrip_physical_plan.rs
@@ -33,6 +33,7 @@ use arrow::datatypes::{Fields, TimeUnit};
 use datafusion::physical_expr::aggregate::AggregateExprBuilder;
 use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
 use datafusion::physical_plan::metrics::MetricType;
+use datafusion_datasource::TableSchema;
 use datafusion_expr::dml::InsertOp;
 use datafusion_functions_aggregate::approx_percentile_cont::approx_percentile_cont_udaf;
 use datafusion_functions_aggregate::array_agg::array_agg_udaf;
@@ -883,25 +884,26 @@ fn roundtrip_parquet_exec_with_pruning_predicate() -> Result<()> {
     let mut options = TableParquetOptions::new();
     options.global.pushdown_filters = true;
 
-    let file_source = Arc::new(ParquetSource::new(options).with_predicate(predicate));
+    let file_source = Arc::new(
+        ParquetSource::new(Arc::clone(&file_schema))
+            .with_table_parquet_options(options)
+            .with_predicate(predicate),
+    );
 
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        file_source,
-    )
-    .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
-        "/path/to/file.parquet".to_string(),
-        1024,
-    )])])
-    .with_statistics(Statistics {
-        num_rows: Precision::Inexact(100),
-        total_byte_size: Precision::Inexact(1024),
-        column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(vec![
-            Field::new("col", DataType::Utf8, false),
-        ]))),
-    })
-    .build();
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
+                "/path/to/file.parquet".to_string(),
+                1024,
+            )])])
+            .with_statistics(Statistics {
+                num_rows: Precision::Inexact(100),
+                total_byte_size: Precision::Inexact(1024),
+                column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(
+                    vec![Field::new("col", DataType::Utf8, false)],
+                ))),
+            })
+            .build();
 
     roundtrip_test(DataSourceExec::from_data_source(scan_config))
 }
@@ -914,21 +916,22 @@ async fn roundtrip_parquet_exec_with_table_partition_cols() -> Result<()> {
         vec![wrap_partition_value_in_dict(ScalarValue::Int64(Some(0)))];
     let schema = Arc::new(Schema::new(vec![Field::new("col", DataType::Utf8, false)]));
 
-    let file_source = Arc::new(ParquetSource::default());
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema,
-        file_source,
-    )
-    .with_projection(Some(vec![0, 1]))
-    .with_file_group(FileGroup::new(vec![file_group]))
-    .with_table_partition_cols(vec![Field::new(
-        "part".to_string(),
-        wrap_partition_type_in_dict(DataType::Int16),
-        false,
-    )])
-    .with_newlines_in_values(false)
-    .build();
+    let table_schema = TableSchema::new(
+        schema.clone(),
+        vec![Arc::new(Field::new(
+            "part".to_string(),
+            wrap_partition_type_in_dict(DataType::Int16),
+            false,
+        ))],
+    );
+
+    let file_source = Arc::new(ParquetSource::new(table_schema.clone()));
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_projection_indices(Some(vec![0, 1]))
+            .with_file_group(FileGroup::new(vec![file_group]))
+            .with_newlines_in_values(false)
+            .build();
 
     roundtrip_test(DataSourceExec::from_data_source(scan_config))
 }
@@ -942,26 +945,25 @@ fn roundtrip_parquet_exec_with_custom_predicate_expr() -> Result<()> {
         inner: Arc::new(Column::new("col", 1)),
     });
 
-    let file_source =
-        Arc::new(ParquetSource::default().with_predicate(custom_predicate_expr));
+    let file_source = Arc::new(
+        ParquetSource::new(Arc::clone(&file_schema))
+            .with_predicate(custom_predicate_expr),
+    );
 
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        file_schema,
-        file_source,
-    )
-    .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
-        "/path/to/file.parquet".to_string(),
-        1024,
-    )])])
-    .with_statistics(Statistics {
-        num_rows: Precision::Inexact(100),
-        total_byte_size: Precision::Inexact(1024),
-        column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(vec![
-            Field::new("col", DataType::Utf8, false),
-        ]))),
-    })
-    .build();
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
+                "/path/to/file.parquet".to_string(),
+                1024,
+            )])])
+            .with_statistics(Statistics {
+                num_rows: Precision::Inexact(100),
+                total_byte_size: Precision::Inexact(1024),
+                column_statistics: Statistics::unknown_column(&Arc::new(Schema::new(
+                    vec![Field::new("col", DataType::Utf8, false)],
+                ))),
+            })
+            .build();
 
     #[derive(Debug, Clone, Eq)]
     struct CustomPredicateExpr {
@@ -1803,19 +1805,17 @@ async fn roundtrip_projection_source() -> Result<()> {
 
     let statistics = Statistics::new_unknown(&schema);
 
-    let file_source = ParquetSource::default().with_statistics(statistics.clone());
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        schema.clone(),
-        file_source,
-    )
-    .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
-        "/path/to/file.parquet".to_string(),
-        1024,
-    )])])
-    .with_statistics(statistics)
-    .with_projection(Some(vec![0, 1, 2]))
-    .build();
+    let file_source =
+        ParquetSource::new(Arc::clone(&schema)).with_statistics(statistics.clone());
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), file_source)
+            .with_file_groups(vec![FileGroup::new(vec![PartitionedFile::new(
+                "/path/to/file.parquet".to_string(),
+                1024,
+            )])])
+            .with_statistics(statistics)
+            .with_projection_indices(Some(vec![0, 1, 2]))
+            .build();
 
     let filter = Arc::new(
         FilterExec::try_new(
diff --git a/datafusion/pruning/Cargo.toml b/datafusion/pruning/Cargo.toml
index 2429123bdf96..bd898cba202b 100644
--- a/datafusion/pruning/Cargo.toml
+++ b/datafusion/pruning/Cargo.toml
@@ -9,6 +9,9 @@ repository = { workspace = true }
 license = { workspace = true }
 authors = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/pruning/src/lib.rs b/datafusion/pruning/src/lib.rs
index cec4fab2262f..35e1baef239b 100644
--- a/datafusion/pruning/src/lib.rs
+++ b/datafusion/pruning/src/lib.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+
 mod file_pruner;
 mod pruning_predicate;
 
diff --git a/datafusion/pruning/src/pruning_predicate.rs b/datafusion/pruning/src/pruning_predicate.rs
index fa3454ce5644..380ada10df6e 100644
--- a/datafusion/pruning/src/pruning_predicate.rs
+++ b/datafusion/pruning/src/pruning_predicate.rs
@@ -882,7 +882,7 @@ impl From<Vec<(phys_expr::Column, StatisticsType, Field)>> for RequiredColumns {
 /// ```text
 /// ("s1", Min, Field:s1_min)
 /// ("s2", Max, field:s2_max)
-///```
+/// ```
 ///
 /// And the input statistics had
 /// ```text
@@ -5108,7 +5108,6 @@ mod tests {
     ///
     /// `expected` is a vector of bools, where true means the row group should
     /// be kept, and false means it should be pruned.
-    ///
     // TODO refactor other tests to use this to reduce boiler plate
     fn prune_with_expr(
         expr: Expr,
diff --git a/datafusion/session/Cargo.toml b/datafusion/session/Cargo.toml
index 0489da61eed8..230e26d1fc9f 100644
--- a/datafusion/session/Cargo.toml
+++ b/datafusion/session/Cargo.toml
@@ -38,5 +38,8 @@ datafusion-expr = { workspace = true }
 datafusion-physical-plan = { workspace = true }
 parking_lot = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
diff --git a/datafusion/session/src/lib.rs b/datafusion/session/src/lib.rs
index a2e1d9ca3ae8..bce9e4a74477 100644
--- a/datafusion/session/src/lib.rs
+++ b/datafusion/session/src/lib.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
+
 //! Session management for DataFusion query execution environment
 //!
 //! This module provides the core session management functionality for DataFusion,
diff --git a/datafusion/session/src/session.rs b/datafusion/session/src/session.rs
index de23dba491fd..fd033172f224 100644
--- a/datafusion/session/src/session.rs
+++ b/datafusion/session/src/session.rs
@@ -57,9 +57,12 @@ use std::sync::{Arc, Weak};
 /// // Given a `Session` reference, get the concrete `SessionState` reference
 /// // Note: this may stop working in future versions,
 /// fn session_state_from_session(session: &dyn Session) -> Result<&SessionState> {
-///    session.as_any()
-///     .downcast_ref::<SessionState>()
-///     .ok_or_else(|| exec_datafusion_err!("Failed to downcast Session to SessionState"))
+///     session
+///         .as_any()
+///         .downcast_ref::<SessionState>()
+///         .ok_or_else(|| {
+///             exec_datafusion_err!("Failed to downcast Session to SessionState")
+///         })
 /// }
 /// ```
 ///
diff --git a/datafusion/spark/Cargo.toml b/datafusion/spark/Cargo.toml
index 7f6210fb32bf..279c88b525d3 100644
--- a/datafusion/spark/Cargo.toml
+++ b/datafusion/spark/Cargo.toml
@@ -29,6 +29,9 @@ edition = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/spark/benches/char.rs b/datafusion/spark/benches/char.rs
index e30e21f69d18..02eab7630d07 100644
--- a/datafusion/spark/benches/char.rs
+++ b/datafusion/spark/benches/char.rs
@@ -19,12 +19,13 @@ extern crate criterion;
 
 use arrow::datatypes::{DataType, Field};
 use arrow::{array::PrimitiveArray, datatypes::Int64Type};
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{criterion_group, criterion_main, Criterion};
 use datafusion_common::config::ConfigOptions;
 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
 use datafusion_spark::function::string::char;
 use rand::rngs::StdRng;
 use rand::{Rng, SeedableRng};
+use std::hint::black_box;
 use std::sync::Arc;
 
 /// Returns fixed seedable RNG
diff --git a/datafusion/spark/src/function/array/shuffle.rs b/datafusion/spark/src/function/array/shuffle.rs
index abeafd3a9366..9f345b53b89a 100644
--- a/datafusion/spark/src/function/array/shuffle.rs
+++ b/datafusion/spark/src/function/array/shuffle.rs
@@ -15,21 +15,25 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::function::functions_nested_utils::make_scalar_function;
 use arrow::array::{
     Array, ArrayRef, Capacities, FixedSizeListArray, GenericListArray, MutableArrayData,
     OffsetSizeTrait,
 };
 use arrow::buffer::OffsetBuffer;
+use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{FixedSizeList, LargeList, List, Null};
-use arrow::datatypes::{DataType, FieldRef};
+use arrow::datatypes::FieldRef;
 use datafusion_common::cast::{
     as_fixed_size_list_array, as_large_list_array, as_list_array,
 };
-use datafusion_common::{exec_err, utils::take_function_args, Result};
-use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_common::{exec_err, utils::take_function_args, Result, ScalarValue};
+use datafusion_expr::{
+    ArrayFunctionArgument, ArrayFunctionSignature, ColumnarValue, ScalarUDFImpl,
+    Signature, TypeSignature, Volatility,
+};
 use rand::rng;
-use rand::seq::SliceRandom;
+use rand::rngs::StdRng;
+use rand::{seq::SliceRandom, Rng, SeedableRng};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -47,7 +51,25 @@ impl Default for SparkShuffle {
 impl SparkShuffle {
     pub fn new() -> Self {
         Self {
-            signature: Signature::arrays(1, None, Volatility::Volatile),
+            signature: Signature {
+                type_signature: TypeSignature::OneOf(vec![
+                    // Only array argument
+                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
+                        arguments: vec![ArrayFunctionArgument::Array],
+                        array_coercion: None,
+                    }),
+                    // Array + Index (seed) argument
+                    TypeSignature::ArraySignature(ArrayFunctionSignature::Array {
+                        arguments: vec![
+                            ArrayFunctionArgument::Array,
+                            ArrayFunctionArgument::Index,
+                        ],
+                        array_coercion: None,
+                    }),
+                ]),
+                volatility: Volatility::Volatile,
+                parameter_names: None,
+            },
         }
     }
 }
@@ -73,25 +95,63 @@ impl ScalarUDFImpl for SparkShuffle {
         &self,
         args: datafusion_expr::ScalarFunctionArgs,
     ) -> Result<ColumnarValue> {
-        make_scalar_function(array_shuffle_inner)(&args.args)
+        if args.args.is_empty() {
+            return exec_err!("shuffle expects at least 1 argument");
+        }
+        if args.args.len() > 2 {
+            return exec_err!("shuffle expects at most 2 arguments");
+        }
+
+        // Extract seed from second argument if present
+        let seed = if args.args.len() == 2 {
+            extract_seed(&args.args[1])?
+        } else {
+            None
+        };
+
+        // Convert arguments to arrays
+        let arrays = ColumnarValue::values_to_arrays(&args.args[..1])?;
+        array_shuffle_with_seed(&arrays, seed).map(ColumnarValue::Array)
+    }
+}
+
+/// Extract seed value from ColumnarValue
+fn extract_seed(seed_arg: &ColumnarValue) -> Result<Option<u64>> {
+    match seed_arg {
+        ColumnarValue::Scalar(scalar) => {
+            let seed = match scalar {
+                ScalarValue::Int64(Some(v)) => Some(*v as u64),
+                ScalarValue::Null => None,
+                _ => {
+                    return exec_err!(
+                        "shuffle seed must be Int64 type, got '{}'",
+                        scalar.data_type()
+                    );
+                }
+            };
+            Ok(seed)
+        }
+        ColumnarValue::Array(_) => {
+            exec_err!("shuffle seed must be a scalar value, not an array")
+        }
     }
 }
 
-/// array_shuffle SQL function
-pub fn array_shuffle_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
+/// array_shuffle SQL function with optional seed
+fn array_shuffle_with_seed(arg: &[ArrayRef], seed: Option<u64>) -> Result<ArrayRef> {
     let [input_array] = take_function_args("shuffle", arg)?;
     match &input_array.data_type() {
         List(field) => {
             let array = as_list_array(input_array)?;
-            general_array_shuffle::<i32>(array, field)
+            general_array_shuffle::<i32>(array, field, seed)
         }
         LargeList(field) => {
             let array = as_large_list_array(input_array)?;
-            general_array_shuffle::<i64>(array, field)
+            general_array_shuffle::<i64>(array, field, seed)
         }
         FixedSizeList(field, _) => {
             let array = as_fixed_size_list_array(input_array)?;
-            fixed_size_array_shuffle(array, field)
+            fixed_size_array_shuffle(array, field, seed)
         }
         Null => Ok(Arc::clone(input_array)),
         array_type => exec_err!("shuffle does not support type '{array_type}'."),
@@ -101,6 +161,7 @@ pub fn array_shuffle_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
 fn general_array_shuffle<O: OffsetSizeTrait>(
     array: &GenericListArray<O>,
     field: &FieldRef,
+    seed: Option<u64>,
 ) -> Result<ArrayRef> {
     let values = array.values();
     let original_data = values.to_data();
@@ -109,7 +170,13 @@ fn general_array_shuffle<O: OffsetSizeTrait>(
     let mut nulls = vec![];
     let mut mutable =
         MutableArrayData::with_capacities(vec![&original_data], false, capacity);
-    let mut rng = rng();
+    let mut rng = if let Some(s) = seed {
+        StdRng::seed_from_u64(s)
+    } else {
+        // Use a random seed from the thread-local RNG
+        let seed = rng().random::<u64>();
+        StdRng::seed_from_u64(seed)
+    };
 
     for (row_index, offset_window) in array.offsets().windows(2).enumerate() {
         // skip the null value
@@ -149,6 +216,7 @@ fn general_array_shuffle<O: OffsetSizeTrait>(
 fn fixed_size_array_shuffle(
     array: &FixedSizeListArray,
     field: &FieldRef,
+    seed: Option<u64>,
 ) -> Result<ArrayRef> {
     let values = array.values();
     let original_data = values.to_data();
@@ -157,7 +225,13 @@ fn fixed_size_array_shuffle(
     let mut mutable =
         MutableArrayData::with_capacities(vec![&original_data], false, capacity);
     let value_length = array.value_length() as usize;
-    let mut rng = rng();
+    let mut rng = if let Some(s) = seed {
+        StdRng::seed_from_u64(s)
+    } else {
+        // Use a random seed from the thread-local RNG
+        let seed = rng().random::<u64>();
+        StdRng::seed_from_u64(seed)
+    };
 
     for row_index in 0..array.len() {
         // skip the null value
diff --git a/datafusion/spark/src/function/array/spark_array.rs b/datafusion/spark/src/function/array/spark_array.rs
index bf5842cb5a5a..bb9665613de9 100644
--- a/datafusion/spark/src/function/array/spark_array.rs
+++ b/datafusion/spark/src/function/array/spark_array.rs
@@ -24,7 +24,7 @@ use arrow::array::{
 use arrow::buffer::OffsetBuffer;
 use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::utils::SingleRowListArrayBuilder;
-use datafusion_common::{plan_datafusion_err, plan_err, Result};
+use datafusion_common::{internal_err, plan_datafusion_err, plan_err, Result};
 use datafusion_expr::type_coercion::binary::comparison_coercion;
 use datafusion_expr::{
     ColumnarValue, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl, Signature,
@@ -72,9 +72,20 @@ impl ScalarUDFImpl for SparkArray {
         &self.signature
     }
 
-    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
+        internal_err!("return_field_from_args should be used instead")
+    }
+
+    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
+        let data_types = args
+            .arg_fields
+            .iter()
+            .map(|f| f.data_type())
+            .cloned()
+            .collect::<Vec<_>>();
+
         let mut expr_type = DataType::Null;
-        for arg_type in arg_types {
+        for arg_type in &data_types {
             if !arg_type.equals_datatype(&DataType::Null) {
                 expr_type = arg_type.clone();
                 break;
@@ -85,21 +96,12 @@ impl ScalarUDFImpl for SparkArray {
             expr_type = DataType::Int32;
         }
 
-        Ok(DataType::List(Arc::new(Field::new(
+        let return_type = DataType::List(Arc::new(Field::new(
             ARRAY_FIELD_DEFAULT_NAME,
             expr_type,
             true,
-        ))))
-    }
+        )));
 
-    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
-        let data_types = args
-            .arg_fields
-            .iter()
-            .map(|f| f.data_type())
-            .cloned()
-            .collect::<Vec<_>>();
-        let return_type = self.return_type(&data_types)?;
         Ok(Arc::new(Field::new(
             "this_field_name_is_irrelevant",
             return_type,
@@ -166,7 +168,6 @@ pub fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
                     .build_list_array(),
             ))
         }
-        DataType::LargeList(..) => array_array::<i64>(arrays, data_type),
         _ => array_array::<i32>(arrays, data_type),
     }
 }
diff --git a/datafusion/spark/src/function/bitmap/bitmap_count.rs b/datafusion/spark/src/function/bitmap/bitmap_count.rs
index 15bd33229a3d..56a9c5edb812 100644
--- a/datafusion/spark/src/function/bitmap/bitmap_count.rs
+++ b/datafusion/spark/src/function/bitmap/bitmap_count.rs
@@ -19,13 +19,13 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, BinaryArray, BinaryViewArray, FixedSizeBinaryArray, Int64Array,
-    LargeBinaryArray,
+    as_dictionary_array, Array, ArrayRef, BinaryArray, BinaryViewArray,
+    FixedSizeBinaryArray, Int64Array, LargeBinaryArray,
 };
-use arrow::datatypes::DataType;
 use arrow::datatypes::DataType::{
-    Binary, BinaryView, FixedSizeBinary, Int64, LargeBinary,
+    Binary, BinaryView, Dictionary, FixedSizeBinary, LargeBinary,
 };
+use arrow::datatypes::{DataType, Int16Type, Int32Type, Int64Type, Int8Type};
 use datafusion_common::utils::take_function_args;
 use datafusion_common::{internal_err, Result};
 use datafusion_expr::{
@@ -71,7 +71,7 @@ impl ScalarUDFImpl for BitmapCount {
     }
 
     fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
-        Ok(Int64)
+        Ok(DataType::Int64)
     }
 
     fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
@@ -90,6 +90,17 @@ macro_rules! downcast_and_count_ones {
     }};
 }
 
+macro_rules! downcast_dict_and_count_ones {
+    ($input_dict:expr, $key_array_type:ident) => {{
+        let dict_array = as_dictionary_array::<$key_array_type>($input_dict);
+        let array = dict_array.downcast_dict::<BinaryArray>().unwrap();
+        Ok(array
+            .into_iter()
+            .map(binary_count_ones)
+            .collect::<Int64Array>())
+    }};
+}
+
 pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
     let [input_array] = take_function_args("bitmap_count", arg)?;
 
@@ -100,6 +111,17 @@ pub fn bitmap_count_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
         FixedSizeBinary(_size) => {
             downcast_and_count_ones!(input_array, FixedSizeBinaryArray)
         }
+        Dictionary(k, v) if v.as_ref() == &Binary => match k.as_ref() {
+            DataType::Int8 => downcast_dict_and_count_ones!(input_array, Int8Type),
+            DataType::Int16 => downcast_dict_and_count_ones!(input_array, Int16Type),
+            DataType::Int32 => downcast_dict_and_count_ones!(input_array, Int32Type),
+            DataType::Int64 => downcast_dict_and_count_ones!(input_array, Int64Type),
+            data_type => {
+                internal_err!(
+                    "bitmap_count does not support Dictionary({data_type}, Binary)"
+                )
+            }
+        },
         data_type => {
             internal_err!("bitmap_count does not support {data_type}")
         }
@@ -114,8 +136,12 @@ mod tests {
     use crate::function::utils::test::test_scalar_function;
     use arrow::array::{Array, Int64Array};
     use arrow::datatypes::DataType::Int64;
+    use arrow::datatypes::{DataType, Field};
+    use datafusion_common::config::ConfigOptions;
     use datafusion_common::{Result, ScalarValue};
-    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
+    use datafusion_expr::ColumnarValue::Scalar;
+    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
+    use std::sync::Arc;
 
     macro_rules! test_bitmap_count_binary_invoke {
         ($INPUT:expr, $EXPECTED:expr) => {
@@ -171,4 +197,31 @@ mod tests {
         );
         Ok(())
     }
+
+    #[test]
+    fn test_dictionary_encoded_bitmap_count_invoke() -> Result<()> {
+        let dict = Scalar(ScalarValue::Dictionary(
+            Box::new(DataType::Int32),
+            Box::new(ScalarValue::Binary(Some(vec![0xFFu8, 0xFFu8]))),
+        ));
+
+        let arg_fields = vec![Field::new(
+            "a",
+            DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Binary)),
+            true,
+        )
+        .into()];
+        let args = ScalarFunctionArgs {
+            args: vec![dict.clone()],
+            arg_fields,
+            number_rows: 1,
+            return_field: Field::new("f", Int64, true).into(),
+            config_options: Arc::new(ConfigOptions::default()),
+        };
+        let udf = BitmapCount::new();
+        let actual = udf.invoke_with_args(args)?;
+        let expect = Scalar(ScalarValue::Int64(Some(16)));
+        assert_eq!(*actual.into_array(1)?, *expect.into_array(1)?);
+        Ok(())
+    }
 }
diff --git a/datafusion/spark/src/function/bitwise/bit_count.rs b/datafusion/spark/src/function/bitwise/bit_count.rs
index ba44d3bc0a95..4b414b57cb77 100644
--- a/datafusion/spark/src/function/bitwise/bit_count.rs
+++ b/datafusion/spark/src/function/bitwise/bit_count.rs
@@ -23,6 +23,7 @@ use arrow::datatypes::{
     DataType, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type,
     UInt64Type, UInt8Type,
 };
+use datafusion_common::cast::as_boolean_array;
 use datafusion_common::{plan_err, Result};
 use datafusion_expr::{
     ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature,
@@ -46,6 +47,7 @@ impl SparkBitCount {
         Self {
             signature: Signature::one_of(
                 vec![
+                    TypeSignature::Exact(vec![DataType::Boolean]),
                     TypeSignature::Exact(vec![DataType::Int8]),
                     TypeSignature::Exact(vec![DataType::Int16]),
                     TypeSignature::Exact(vec![DataType::Int32]),
@@ -90,28 +92,34 @@ impl ScalarUDFImpl for SparkBitCount {
 fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
     let value_array = value_array[0].as_ref();
     match value_array.data_type() {
+        DataType::Boolean => {
+            let result: Int32Array = as_boolean_array(value_array)?
+                .iter()
+                .map(|x| x.map(|y| y as i32))
+                .collect();
+            Ok(Arc::new(result))
+        }
         DataType::Int8 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int8Type>()
-                .unary(|v| v.count_ones() as i32);
+                .unary(|v| bit_count(v.into()));
             Ok(Arc::new(result))
         }
         DataType::Int16 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int16Type>()
-                .unary(|v| v.count_ones() as i32);
+                .unary(|v| bit_count(v.into()));
             Ok(Arc::new(result))
         }
         DataType::Int32 => {
             let result: Int32Array = value_array
                 .as_primitive::<Int32Type>()
-                .unary(|v| v.count_ones() as i32);
+                .unary(|v| bit_count(v.into()));
             Ok(Arc::new(result))
         }
         DataType::Int64 => {
-            let result: Int32Array = value_array
-                .as_primitive::<Int64Type>()
-                .unary(|v| v.count_ones() as i32);
+            let result: Int32Array =
+                value_array.as_primitive::<Int64Type>().unary(bit_count);
             Ok(Arc::new(result))
         }
         DataType::UInt8 => {
@@ -147,12 +155,26 @@ fn spark_bit_count(value_array: &[ArrayRef]) -> Result<ArrayRef> {
     }
 }
 
+// Here’s the equivalent Rust implementation of the bitCount function (similar to Apache Spark's bitCount for LongType)
+// Spark: https://github.com/apache/spark/blob/ac717dd7aec665de578d7c6b0070e8fcdde3cea9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala#L243
+// Java impl: https://github.com/openjdk/jdk/blob/d226023643f90027a8980d161ec6d423887ae3ce/src/java.base/share/classes/java/lang/Long.java#L1584
+fn bit_count(i: i64) -> i32 {
+    let mut u = i as u64;
+    u = u - ((u >> 1) & 0x5555555555555555);
+    u = (u & 0x3333333333333333) + ((u >> 2) & 0x3333333333333333);
+    u = (u + (u >> 4)) & 0x0f0f0f0f0f0f0f0f;
+    u = u + (u >> 8);
+    u = u + (u >> 16);
+    u = u + (u >> 32);
+    (u as i32) & 0x7f
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
     use arrow::array::{
-        Array, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, UInt32Array,
-        UInt64Array, UInt8Array,
+        Array, BooleanArray, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array,
+        UInt32Array, UInt64Array, UInt8Array,
     };
     use arrow::datatypes::Int32Type;
 
@@ -192,7 +214,18 @@ mod tests {
         assert_eq!(arr.value(2), 2);
         assert_eq!(arr.value(3), 3);
         assert_eq!(arr.value(4), 4);
-        assert_eq!(arr.value(5), 8);
+        assert_eq!(arr.value(5), 64);
+    }
+
+    #[test]
+    fn test_bit_count_boolean() {
+        // Test bit_count on BooleanArray
+        let result =
+            spark_bit_count(&[Arc::new(BooleanArray::from(vec![true, false]))]).unwrap();
+
+        let arr = result.as_primitive::<Int32Type>();
+        assert_eq!(arr.value(0), 1);
+        assert_eq!(arr.value(1), 0);
     }
 
     #[test]
@@ -207,7 +240,7 @@ mod tests {
         assert_eq!(arr.value(1), 1);
         assert_eq!(arr.value(2), 8);
         assert_eq!(arr.value(3), 10);
-        assert_eq!(arr.value(4), 16);
+        assert_eq!(arr.value(4), 64);
     }
 
     #[test]
@@ -222,7 +255,7 @@ mod tests {
         assert_eq!(arr.value(1), 1); // 0b00000000000000000000000000000001 = 1
         assert_eq!(arr.value(2), 8); // 0b00000000000000000000000011111111 = 8
         assert_eq!(arr.value(3), 10); // 0b00000000000000000000001111111111 = 10
-        assert_eq!(arr.value(4), 32); // -1 in two's complement = all 32 bits set
+        assert_eq!(arr.value(4), 64); // -1 in two's complement = all 32 bits set
     }
 
     #[test]
diff --git a/datafusion/spark/src/function/bitwise/bit_shift.rs b/datafusion/spark/src/function/bitwise/bit_shift.rs
index bb645b766058..68911b0492c5 100644
--- a/datafusion/spark/src/function/bitwise/bit_shift.rs
+++ b/datafusion/spark/src/function/bitwise/bit_shift.rs
@@ -42,7 +42,6 @@ use crate::function::error_utils::{
 ///
 /// # Returns
 /// A new array with the shifted values.
-///
 fn shift_left<T: ArrowPrimitiveType>(
     value: &PrimitiveArray<T>,
     shift: &PrimitiveArray<Int32Type>,
@@ -71,7 +70,6 @@ where
 ///
 /// # Returns
 /// A new array with the shifted values.
-///
 fn shift_right<T: ArrowPrimitiveType>(
     value: &PrimitiveArray<T>,
     shift: &PrimitiveArray<Int32Type>,
@@ -132,7 +130,6 @@ impl UShr<i32> for i64 {
 ///
 /// # Returns
 /// A new array with the shifted values.
-///
 fn shift_right_unsigned<T: ArrowPrimitiveType>(
     value: &PrimitiveArray<T>,
     shift: &PrimitiveArray<Int32Type>,
diff --git a/datafusion/spark/src/function/hash/sha1.rs b/datafusion/spark/src/function/hash/sha1.rs
index 25cbdd445350..5b2a2653ed7c 100644
--- a/datafusion/spark/src/function/hash/sha1.rs
+++ b/datafusion/spark/src/function/hash/sha1.rs
@@ -100,6 +100,7 @@ impl ScalarUDFImpl for SparkSha1 {
 fn spark_sha1_digest(value: &[u8]) -> String {
     let result = Sha1::digest(value);
     let mut s = String::with_capacity(result.len() * 2);
+    #[allow(deprecated)]
     for b in result.as_slice() {
         #[allow(clippy::unwrap_used)]
         write!(&mut s, "{b:02x}").unwrap();
diff --git a/datafusion/spark/src/function/url/parse_url.rs b/datafusion/spark/src/function/url/parse_url.rs
index d93c260b4f34..a8afa1d9639f 100644
--- a/datafusion/spark/src/function/url/parse_url.rs
+++ b/datafusion/spark/src/function/url/parse_url.rs
@@ -80,7 +80,6 @@ impl ParseUrl {
     /// * `Ok(Some(String))` - The extracted URL component as a string
     /// * `Ok(None)` - If the requested component doesn't exist or is empty
     /// * `Err(DataFusionError)` - If the URL is malformed and cannot be parsed
-    ///
     fn parse(value: &str, part: &str, key: Option<&str>) -> Result<Option<String>> {
         let url: std::result::Result<Url, ParseError> = Url::parse(value);
         if let Err(ParseError::RelativeUrlWithoutBase) = url {
@@ -168,7 +167,6 @@ impl ScalarUDFImpl for ParseUrl {
 /// - A string array with extracted URL components
 /// - `None` values where extraction failed or component doesn't exist
 /// - The output array type (StringArray or LargeStringArray) is determined by input types
-///
 fn spark_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> {
     spark_handled_parse_url(args, |x| x)
 }
diff --git a/datafusion/spark/src/lib.rs b/datafusion/spark/src/lib.rs
index 4d45f3c482af..5b1fa06cb2c7 100644
--- a/datafusion/spark/src/lib.rs
+++ b/datafusion/spark/src/lib.rs
@@ -88,7 +88,7 @@
 //! use datafusion_spark::expr_fn::sha2;
 //! // Create the expression `sha2(my_data, 256)`
 //! let expr = sha2(col("my_data"), lit(256));
-//!```
+//! ```
 //!
 //![`Expr`]: datafusion_expr::Expr
 
diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml
index ea2cd6dfcc7d..5e107814176f 100644
--- a/datafusion/sql/Cargo.toml
+++ b/datafusion/sql/Cargo.toml
@@ -31,6 +31,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs
index aceec676761c..8ccab9dd9a0b 100644
--- a/datafusion/sql/src/cte.rs
+++ b/datafusion/sql/src/cte.rs
@@ -46,7 +46,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
             // Create a logical plan for the CTE
             let cte_plan = if is_recursive {
-                self.recursive_cte(cte_name.clone(), *cte.query, planner_context)?
+                self.recursive_cte(&cte_name, *cte.query, planner_context)?
             } else {
                 self.non_recursive_cte(*cte.query, planner_context)?
             };
@@ -70,7 +70,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
     fn recursive_cte(
         &self,
-        cte_name: String,
+        cte_name: &str,
         mut cte_query: Query,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
@@ -136,7 +136,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // Step 2.1: Create a table source for the temporary relation
         let work_table_source = self
             .context_provider
-            .create_cte_work_table(&cte_name, Arc::clone(static_plan.schema().inner()))?;
+            .create_cte_work_table(cte_name, Arc::clone(static_plan.schema().inner()))?;
 
         // Step 2.2: Create a temporary relation logical plan that will be used
         // as the input to the recursive term
@@ -147,14 +147,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         )?
         .build()?;
 
-        let name = cte_name.clone();
+        let name = cte_name.to_string();
 
         // Step 2.3: Register the temporary relation in the planning context
         // For all the self references in the variadic term, we'll replace it
         // with the temporary relation we created above by temporarily registering
         // it as a CTE. This temporary relation in the planning context will be
         // replaced by the actual CTE plan once we're done with the planning.
-        planner_context.insert_cte(cte_name.clone(), work_table_plan);
+        planner_context.insert_cte(cte_name.to_string(), work_table_plan);
 
         // ---------- Step 3: Compile the recursive term ------------------
         // this uses the named_relation we inserted above to resolve the
@@ -166,7 +166,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         // if not, it is a non-recursive CTE
         if !has_work_table_reference(&recursive_plan, &work_table_source) {
             // Remove the work table plan from the context
-            planner_context.remove_cte(&cte_name);
+            planner_context.remove_cte(cte_name);
             // Compile it as a non-recursive CTE
             return self.set_operation_to_plan(
                 SetOperator::Union,
diff --git a/datafusion/sql/src/expr/binary_op.rs b/datafusion/sql/src/expr/binary_op.rs
index 1c06f5ee926f..f0ca54161782 100644
--- a/datafusion/sql/src/expr/binary_op.rs
+++ b/datafusion/sql/src/expr/binary_op.rs
@@ -21,8 +21,8 @@ use datafusion_expr::Operator;
 use sqlparser::ast::BinaryOperator;
 
 impl<S: ContextProvider> SqlToRel<'_, S> {
-    pub(crate) fn parse_sql_binary_op(&self, op: BinaryOperator) -> Result<Operator> {
-        match op {
+    pub(crate) fn parse_sql_binary_op(&self, op: &BinaryOperator) -> Result<Operator> {
+        match *op {
             BinaryOperator::Gt => Ok(Operator::Gt),
             BinaryOperator::GtEq => Ok(Operator::GtEq),
             BinaryOperator::Lt => Ok(Operator::Lt),
diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs
index eabf645a5eaf..50e479af3620 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -274,8 +274,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         }
         // User-defined function (UDF) should have precedence
         if let Some(fm) = self.context_provider.get_function_meta(&name) {
-            let args = self.function_args_to_expr(args, schema, planner_context)?;
-            let inner = ScalarFunction::new_udf(fm, args);
+            let (args, arg_names) =
+                self.function_args_to_expr_with_names(args, schema, planner_context)?;
+
+            let resolved_args = if arg_names.iter().any(|name| name.is_some()) {
+                if let Some(param_names) = &fm.signature().parameter_names {
+                    datafusion_expr::arguments::resolve_function_arguments(
+                        param_names,
+                        args,
+                        arg_names,
+                    )?
+                } else {
+                    return plan_err!(
+                        "Function '{}' does not support named arguments",
+                        fm.name()
+                    );
+                }
+            } else {
+                args
+            };
+
+            // After resolution, all arguments are positional
+            let inner = ScalarFunction::new_udf(fm, resolved_args);
 
             if name.eq_ignore_ascii_case(inner.name()) {
                 return Ok(Expr::ScalarFunction(inner));
@@ -366,7 +386,30 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             };
 
             if let Ok(fun) = self.find_window_func(&name) {
-                let args = self.function_args_to_expr(args, schema, planner_context)?;
+                let (args, arg_names) =
+                    self.function_args_to_expr_with_names(args, schema, planner_context)?;
+
+                let resolved_args = if arg_names.iter().any(|name| name.is_some()) {
+                    let signature = match &fun {
+                        WindowFunctionDefinition::AggregateUDF(udaf) => udaf.signature(),
+                        WindowFunctionDefinition::WindowUDF(udwf) => udwf.signature(),
+                    };
+
+                    if let Some(param_names) = &signature.parameter_names {
+                        datafusion_expr::arguments::resolve_function_arguments(
+                            param_names,
+                            args,
+                            arg_names,
+                        )?
+                    } else {
+                        return plan_err!(
+                            "Window function '{}' does not support named arguments",
+                            name
+                        );
+                    }
+                } else {
+                    args
+                };
 
                 // Plan FILTER clause if present
                 let filter = filter
@@ -376,7 +419,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
                 let mut window_expr = RawWindowExpr {
                     func_def: fun,
-                    args,
+                    args: resolved_args,
                     partition_by,
                     order_by,
                     window_frame,
@@ -444,10 +487,10 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     );
                 }
 
-                let mut args =
-                    self.function_args_to_expr(args, schema, planner_context)?;
+                let (mut args, mut arg_names) =
+                    self.function_args_to_expr_with_names(args, schema, planner_context)?;
 
-                let order_by = if fm.is_ordered_set_aggregate() {
+                let order_by = if fm.supports_within_group_clause() {
                     let within_group = self.order_by_to_sort_expr(
                         within_group,
                         schema,
@@ -459,6 +502,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     // Add the WITHIN GROUP ordering expressions to the front of the argument list
                     // So function(arg) WITHIN GROUP (ORDER BY x) becomes function(x, arg)
                     if !within_group.is_empty() {
+                        // Prepend None arg names for each WITHIN GROUP expression
+                        let within_group_count = within_group.len();
+                        arg_names = std::iter::repeat_n(None, within_group_count)
+                            .chain(arg_names)
+                            .collect();
+
                         args = within_group
                             .iter()
                             .map(|sort| sort.expr.clone())
@@ -486,9 +535,26 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     .transpose()?
                     .map(Box::new);
 
+                let resolved_args = if arg_names.iter().any(|name| name.is_some()) {
+                    if let Some(param_names) = &fm.signature().parameter_names {
+                        datafusion_expr::arguments::resolve_function_arguments(
+                            param_names,
+                            args,
+                            arg_names,
+                        )?
+                    } else {
+                        return plan_err!(
+                            "Aggregate function '{}' does not support named arguments",
+                            fm.name()
+                        );
+                    }
+                } else {
+                    args
+                };
+
                 let mut aggregate_expr = RawAggregateExpr {
                     func: fm,
-                    args,
+                    args: resolved_args,
                     distinct,
                     filter,
                     order_by,
@@ -624,14 +690,29 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
+        let (expr, _) =
+            self.sql_fn_arg_to_logical_expr_with_name(sql, schema, planner_context)?;
+        Ok(expr)
+    }
+
+    fn sql_fn_arg_to_logical_expr_with_name(
+        &self,
+        sql: FunctionArg,
+        schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<(Expr, Option<String>)> {
         match sql {
             FunctionArg::Named {
-                name: _,
+                name,
                 arg: FunctionArgExpr::Expr(arg),
                 operator: _,
-            } => self.sql_expr_to_logical_expr(arg, schema, planner_context),
+            } => {
+                let expr = self.sql_expr_to_logical_expr(arg, schema, planner_context)?;
+                let arg_name = crate::utils::normalize_ident(name);
+                Ok((expr, Some(arg_name)))
+            }
             FunctionArg::Named {
-                name: _,
+                name,
                 arg: FunctionArgExpr::Wildcard,
                 operator: _,
             } => {
@@ -640,11 +721,12 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     qualifier: None,
                     options: Box::new(WildcardOptions::default()),
                 };
-
-                Ok(expr)
+                let arg_name = crate::utils::normalize_ident(name);
+                Ok((expr, Some(arg_name)))
             }
             FunctionArg::Unnamed(FunctionArgExpr::Expr(arg)) => {
-                self.sql_expr_to_logical_expr(arg, schema, planner_context)
+                let expr = self.sql_expr_to_logical_expr(arg, schema, planner_context)?;
+                Ok((expr, None))
             }
             FunctionArg::Unnamed(FunctionArgExpr::Wildcard) => {
                 #[expect(deprecated)]
@@ -652,8 +734,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     qualifier: None,
                     options: Box::new(WildcardOptions::default()),
                 };
-
-                Ok(expr)
+                Ok((expr, None))
             }
             FunctionArg::Unnamed(FunctionArgExpr::QualifiedWildcard(object_name)) => {
                 let qualifier = self.object_name_to_table_reference(object_name)?;
@@ -668,8 +749,30 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     qualifier: qualifier.into(),
                     options: Box::new(WildcardOptions::default()),
                 };
-
-                Ok(expr)
+                Ok((expr, None))
+            }
+            // PostgreSQL dialect uses ExprNamed variant with expression for name
+            FunctionArg::ExprNamed {
+                name: SQLExpr::Identifier(name),
+                arg: FunctionArgExpr::Expr(arg),
+                operator: _,
+            } => {
+                let expr = self.sql_expr_to_logical_expr(arg, schema, planner_context)?;
+                let arg_name = crate::utils::normalize_ident(name);
+                Ok((expr, Some(arg_name)))
+            }
+            FunctionArg::ExprNamed {
+                name: SQLExpr::Identifier(name),
+                arg: FunctionArgExpr::Wildcard,
+                operator: _,
+            } => {
+                #[expect(deprecated)]
+                let expr = Expr::Wildcard {
+                    qualifier: None,
+                    options: Box::new(WildcardOptions::default()),
+                };
+                let arg_name = crate::utils::normalize_ident(name);
+                Ok((expr, Some(arg_name)))
             }
             _ => not_impl_err!("Unsupported qualified wildcard argument: {sql:?}"),
         }
@@ -686,6 +789,24 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             .collect::<Result<Vec<Expr>>>()
     }
 
+    pub(super) fn function_args_to_expr_with_names(
+        &self,
+        args: Vec<FunctionArg>,
+        schema: &DFSchema,
+        planner_context: &mut PlannerContext,
+    ) -> Result<(Vec<Expr>, Vec<Option<String>>)> {
+        let results: Result<Vec<(Expr, Option<String>)>> = args
+            .into_iter()
+            .map(|a| {
+                self.sql_fn_arg_to_logical_expr_with_name(a, schema, planner_context)
+            })
+            .collect();
+
+        let pairs = results?;
+        let (exprs, names): (Vec<Expr>, Vec<Option<String>>) = pairs.into_iter().unzip();
+        Ok((exprs, names))
+    }
+
     pub(crate) fn check_unnest_arg(arg: &Expr, schema: &DFSchema) -> Result<()> {
         // Check argument type, array types are supported
         match arg.get_type(schema)? {
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index fef0505e993f..c076c501cd0d 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -20,10 +20,12 @@ use datafusion_expr::planner::{
     PlannerResult, RawBinaryExpr, RawDictionaryExpr, RawFieldAccessExpr,
 };
 use sqlparser::ast::{
-    AccessExpr, BinaryOperator, CastFormat, CastKind, DataType as SQLDataType,
-    DictionaryField, Expr as SQLExpr, ExprWithAlias as SQLExprWithAlias, MapEntry,
-    StructField, Subscript, TrimWhereField, TypedString, Value, ValueWithSpan,
+    AccessExpr, BinaryOperator, CastFormat, CastKind, CeilFloorKind,
+    DataType as SQLDataType, DateTimeField, DictionaryField, Expr as SQLExpr,
+    ExprWithAlias as SQLExprWithAlias, MapEntry, StructField, Subscript, TrimWhereField,
+    TypedString, Value, ValueWithSpan,
 };
+use std::sync::Arc;
 
 use datafusion_common::{
     internal_datafusion_err, internal_err, not_impl_err, plan_err, DFSchema, Result,
@@ -139,7 +141,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         let RawBinaryExpr { op, left, right } = binary_expr;
         Ok(Expr::BinaryExpr(BinaryExpr::new(
             Box::new(left),
-            self.parse_sql_binary_op(op)?,
+            self.parse_sql_binary_op(&op)?,
             Box::new(right),
         )))
     }
@@ -269,7 +271,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 expr,
                 data_type,
                 format,
-            } => self.sql_cast_to_expr(*expr, data_type, format, schema, planner_context),
+            } => {
+                self.sql_cast_to_expr(*expr, &data_type, format, schema, planner_context)
+            }
 
             SQLExpr::Cast {
                 kind: CastKind::TryCast | CastKind::SafeCast,
@@ -297,12 +301,23 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 data_type,
                 value,
                 uses_odbc_syntax: _,
-            }) => Ok(Expr::Cast(Cast::new(
-                Box::new(lit(value.into_string().unwrap())),
-                self.convert_data_type_to_field(&data_type)?
+            }) => {
+                let string_value = value.into_string().unwrap();
+                let mut cast_data_type = self
+                    .convert_data_type_to_field(&data_type)?
                     .data_type()
-                    .clone(),
-            ))),
+                    .clone();
+                if let DataType::Timestamp(time_unit, None) = &cast_data_type {
+                    if let Some(tz) = extract_tz_from_string(&string_value) {
+                        cast_data_type =
+                            DataType::Timestamp(*time_unit, Some(Arc::from(tz)));
+                    }
+                }
+                Ok(Expr::Cast(Cast::new(
+                    Box::new(lit(string_value)),
+                    cast_data_type,
+                )))
+            }
 
             SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
                 self.sql_expr_to_logical_expr(*expr, schema, planner_context)?,
@@ -498,14 +513,28 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 self.sql_grouping_sets_to_expr(exprs, schema, planner_context)
             }
 
-            SQLExpr::Floor {
-                expr,
-                field: _field,
-            } => self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context),
-            SQLExpr::Ceil {
-                expr,
-                field: _field,
-            } => self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context),
+            SQLExpr::Floor { expr, field } => match field {
+                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
+                    self.sql_fn_name_to_expr(*expr, "floor", schema, planner_context)
+                }
+                CeilFloorKind::DateTimeField(_) => {
+                    not_impl_err!("FLOOR with datetime is not supported")
+                }
+                CeilFloorKind::Scale(_) => {
+                    not_impl_err!("FLOOR with scale is not supported")
+                }
+            },
+            SQLExpr::Ceil { expr, field } => match field {
+                CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) => {
+                    self.sql_fn_name_to_expr(*expr, "ceil", schema, planner_context)
+                }
+                CeilFloorKind::DateTimeField(_) => {
+                    not_impl_err!("CEIL with datetime is not supported")
+                }
+                CeilFloorKind::Scale(_) => {
+                    not_impl_err!("CEIL with scale is not supported")
+                }
+            },
             SQLExpr::Overlay {
                 expr,
                 overlay_what,
@@ -538,7 +567,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             }
 
             SQLExpr::Struct { values, fields } => {
-                self.parse_struct(schema, planner_context, values, fields)
+                self.parse_struct(schema, planner_context, values, &fields)
             }
             SQLExpr::Position { expr, r#in } => {
                 self.sql_position_to_expr(*expr, *r#in, schema, planner_context)
@@ -624,7 +653,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
         values: Vec<SQLExpr>,
-        fields: Vec<StructField>,
+        fields: &[StructField],
     ) -> Result<Expr> {
         if !fields.is_empty() {
             return not_impl_err!("Struct fields are not supported yet");
@@ -658,7 +687,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             Some(SQLExpr::Identifier(_))
             | Some(SQLExpr::Value(_))
             | Some(SQLExpr::CompoundIdentifier(_)) => {
-                self.parse_struct(schema, planner_context, values, vec![])
+                self.parse_struct(schema, planner_context, values, &[])
             }
             None => not_impl_err!("Empty tuple not supported yet"),
             _ => {
@@ -964,7 +993,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     fn sql_cast_to_expr(
         &self,
         expr: SQLExpr,
-        data_type: SQLDataType,
+        data_type: &SQLDataType,
         format: Option<CastFormat>,
         schema: &DFSchema,
         planner_context: &mut PlannerContext,
@@ -973,7 +1002,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             return not_impl_err!("CAST with format is not supported: {format}");
         }
 
-        let dt = self.convert_data_type_to_field(&data_type)?;
+        let dt = self.convert_data_type_to_field(data_type)?;
         let expr = self.sql_expr_to_logical_expr(expr, schema, planner_context)?;
 
         // numeric constants are treated as seconds (rather as nanoseconds)
@@ -1078,11 +1107,11 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                             // index can be a name, in which case it is a named field access
                             match index {
                                 SQLExpr::Value(ValueWithSpan {
-                                    value:
-                                        Value::SingleQuotedString(s)
-                                        | Value::DoubleQuotedString(s),
-                                    span: _,
-                                }) => Ok(Some(GetFieldAccess::NamedStructField {
+                                                   value:
+                                                   Value::SingleQuotedString(s)
+                                                   | Value::DoubleQuotedString(s),
+                                                   span: _,
+                                               }) => Ok(Some(GetFieldAccess::NamedStructField {
                                     name: ScalarValue::from(s),
                                 })),
                                 SQLExpr::JsonAccess { .. } => {
@@ -1146,9 +1175,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 }
                 AccessExpr::Dot(expr) => match expr {
                     SQLExpr::Value(ValueWithSpan {
-                        value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
-                        span    : _
-                    }) => Ok(Some(GetFieldAccess::NamedStructField {
+                                       value: Value::SingleQuotedString(s) | Value::DoubleQuotedString(s),
+                                       span    : _
+                                   }) => Ok(Some(GetFieldAccess::NamedStructField {
                         name: ScalarValue::from(s),
                     })),
                     _ => {
@@ -1180,6 +1209,21 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     }
 }
 
+fn extract_tz_from_string(s: &str) -> Option<String> {
+    if let Some(pos) = s.rfind(|c| ['+', '-'].contains(&c)) {
+        let tz_str = &s[pos..];
+        if tz_str.len() == 6 && tz_str.chars().nth(3) == Some(':') {
+            Some(tz_str.to_string())
+        } else {
+            None
+        }
+    } else if s.ends_with('Z') {
+        Some("+00:00".to_string())
+    } else {
+        None
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashMap;
diff --git a/datafusion/sql/src/expr/subquery.rs b/datafusion/sql/src/expr/subquery.rs
index 24bb813634cc..4bca6f7e49ba 100644
--- a/datafusion/sql/src/expr/subquery.rs
+++ b/datafusion/sql/src/expr/subquery.rs
@@ -74,7 +74,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
         self.validate_single_column(
             &sub_plan,
-            spans.clone(),
+            &spans,
             "Too many columns! The subquery should only return one column",
             "Select only one column in the subquery",
         )?;
@@ -116,7 +116,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
         self.validate_single_column(
             &sub_plan,
-            spans.clone(),
+            &spans,
             "Too many columns! The subquery should only return one column",
             "Select only one column in the subquery",
         )?;
@@ -131,7 +131,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     fn validate_single_column(
         &self,
         sub_plan: &LogicalPlan,
-        spans: Spans,
+        spans: &Spans,
         error_message: &str,
         help_message: &str,
     ) -> Result<()> {
@@ -148,7 +148,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
     fn build_multi_column_diagnostic(
         &self,
-        spans: Spans,
+        spans: &Spans,
         error_message: &str,
         help_message: &str,
     ) -> Diagnostic {
diff --git a/datafusion/sql/src/lib.rs b/datafusion/sql/src/lib.rs
index da15b90d22a8..9f8105e9a85b 100644
--- a/datafusion/sql/src/lib.rs
+++ b/datafusion/sql/src/lib.rs
@@ -23,6 +23,9 @@
 // Make sure fast / cheap clones on Arc are explicit:
 // https://github.com/apache/datafusion/issues/11143
 #![deny(clippy::clone_on_ref_ptr)]
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 
 //! This crate provides:
 //!
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 271ad8a856b4..05dd87890763 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -58,7 +58,7 @@ fn parse_file_type(s: &str) -> Result<String, DataFusionError> {
 /// Syntax:
 /// ```sql
 /// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement
-///```
+/// ```
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct ExplainStatement {
     /// `EXPLAIN ANALYZE ..`
@@ -243,7 +243,19 @@ impl fmt::Display for CreateExternalTable {
         }
         write!(f, "{} ", self.name)?;
         write!(f, "STORED AS {} ", self.file_type)?;
-        write!(f, "LOCATION {} ", self.location)
+        if !self.order_exprs.is_empty() {
+            write!(f, "WITH ORDER (")?;
+            let mut first = true;
+            for expr in self.order_exprs.iter().flatten() {
+                if !first {
+                    write!(f, ", ")?;
+                }
+                write!(f, "{expr}")?;
+                first = false;
+            }
+            write!(f, ") ")?;
+        }
+        write!(f, "LOCATION {}", self.location)
     }
 }
 
@@ -308,8 +320,7 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
 /// # use datafusion_sql::parser::DFParserBuilder;
 /// # use datafusion_common::Result;
 /// # fn test() -> Result<()> {
-/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2")
-///   .build()?;
+/// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2").build()?;
 /// // parse the SQL into DFStatements
 /// let statements = parser.parse_statements()?;
 /// assert_eq!(statements.len(), 2);
@@ -324,13 +335,13 @@ const DEFAULT_DIALECT: GenericDialect = GenericDialect {};
 /// # use datafusion_sql::sqlparser::dialect::MySqlDialect;
 /// # use datafusion_sql::sqlparser::ast::Expr;
 /// # fn test() -> Result<()> {
-/// let dialect = MySqlDialect{}; // Parse using MySQL dialect
+/// let dialect = MySqlDialect {}; // Parse using MySQL dialect
 /// let mut parser = DFParserBuilder::new("1 + 2")
-///   .with_dialect(&dialect)
-///   .build()?;
+///     .with_dialect(&dialect)
+///     .build()?;
 /// // parse 1+2 into an sqlparser::ast::Expr
 /// let res = parser.parse_expr()?;
-/// assert!(matches!(res.expr, Expr::BinaryOp {..}));
+/// assert!(matches!(res.expr, Expr::BinaryOp { .. }));
 /// # Ok(())
 /// # }
 /// ```
@@ -445,7 +456,7 @@ impl<'a> DFParser<'a> {
                 break;
             }
             if expecting_statement_delimiter {
-                return self.expected("end of statement", self.parser.peek_token());
+                return self.expected("end of statement", &self.parser.peek_token());
             }
 
             let statement = self.parse_statement()?;
@@ -459,7 +470,7 @@ impl<'a> DFParser<'a> {
     fn expected<T>(
         &self,
         expected: &str,
-        found: TokenWithSpan,
+        found: &TokenWithSpan,
     ) -> Result<T, DataFusionError> {
         let sql_parser_span = found.span;
         let span = Span::try_from_sqlparser_span(sql_parser_span);
@@ -477,11 +488,11 @@ impl<'a> DFParser<'a> {
     fn expect_token(
         &mut self,
         expected: &str,
-        token: Token,
+        token: &Token,
     ) -> Result<(), DataFusionError> {
         let next_token = self.parser.peek_token_ref();
-        if next_token.token != token {
-            self.expected(expected, next_token.clone())
+        if next_token.token != *token {
+            self.expected(expected, next_token)
         } else {
             Ok(())
         }
@@ -542,7 +553,7 @@ impl<'a> DFParser<'a> {
     /// contains any trailing, unparsed tokens.
     pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> {
         let expr = self.parse_expr()?;
-        self.expect_token("end of expression", Token::EOF)?;
+        self.expect_token("end of expression", &Token::EOF)?;
         Ok(expr)
     }
 
@@ -627,7 +638,7 @@ impl<'a> DFParser<'a> {
                 if token == Token::EOF || token == Token::SemiColon {
                     break;
                 } else {
-                    return self.expected("end of statement or ;", token)?;
+                    return self.expected("end of statement or ;", &token)?;
                 }
             }
         }
@@ -664,7 +675,7 @@ impl<'a> DFParser<'a> {
                         // Unquoted namespaced keys have to conform to the syntax
                         // "<WORD>[\.<WORD>]*". If we have a key that breaks this
                         // pattern, error out:
-                        return self.expected("key name", next_token);
+                        return self.expected("key name", &next_token);
                     }
                 }
                 Ok(parts.join("."))
@@ -672,7 +683,7 @@ impl<'a> DFParser<'a> {
             Token::SingleQuotedString(s) => Ok(s),
             Token::DoubleQuotedString(s) => Ok(s),
             Token::EscapedStringLiteral(s) => Ok(s),
-            _ => self.expected("key name", next_token),
+            _ => self.expected("key name", &next_token),
         }
     }
 
@@ -691,7 +702,7 @@ impl<'a> DFParser<'a> {
             Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)),
             Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)),
             Token::Number(n, l) => Ok(Value::Number(n, l)),
-            _ => self.expected("string or numeric value", next_token),
+            _ => self.expected("string or numeric value", &next_token),
         }
     }
 
@@ -721,7 +732,7 @@ impl<'a> DFParser<'a> {
             Token::Word(w) => Ok(w.value),
             Token::SingleQuotedString(w) => Ok(w),
             Token::DoubleQuotedString(w) => Ok(w),
-            _ => self.expected("an explain format such as TREE", next_token),
+            _ => self.expected("an explain format such as TREE", &next_token),
         }?;
         Ok(Some(format))
     }
@@ -766,7 +777,7 @@ impl<'a> DFParser<'a> {
                 let identifier = self.parser.parse_identifier()?;
                 partitions.push(identifier.to_string());
             } else {
-                return self.expected("partition name", self.parser.peek_token());
+                return self.expected("partition name", &self.parser.peek_token());
             }
             let comma = self.parser.consume_token(&Token::Comma);
             if self.parser.consume_token(&Token::RParen) {
@@ -775,7 +786,7 @@ impl<'a> DFParser<'a> {
             } else if !comma {
                 return self.expected(
                     "',' or ')' after partition definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
         }
@@ -846,7 +857,7 @@ impl<'a> DFParser<'a> {
             } else {
                 return self.expected(
                     "column name or constraint definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
             let comma = self.parser.consume_token(&Token::Comma);
@@ -856,7 +867,7 @@ impl<'a> DFParser<'a> {
             } else if !comma {
                 return self.expected(
                     "',' or ')' after column definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
         }
@@ -876,7 +887,7 @@ impl<'a> DFParser<'a> {
                 } else {
                     return self.expected(
                         "constraint details after CONSTRAINT <name>",
-                        self.parser.peek_token(),
+                        &self.parser.peek_token(),
                     );
                 }
             } else if let Some(option) = self.parser.parse_optional_column_option()? {
@@ -1001,7 +1012,7 @@ impl<'a> DFParser<'a> {
                 if token == Token::EOF || token == Token::SemiColon {
                     break;
                 } else {
-                    return self.expected("end of statement or ;", token)?;
+                    return self.expected("end of statement or ;", &token)?;
                 }
             }
         }
@@ -1040,7 +1051,7 @@ impl<'a> DFParser<'a> {
         let token = self.parser.next_token();
         match &token.token {
             Token::Word(w) => parse_file_type(&w.value),
-            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", token),
+            _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token),
         }
     }
 
@@ -1063,7 +1074,7 @@ impl<'a> DFParser<'a> {
             } else if !comma {
                 return self.expected(
                     "',' or ')' after option definition",
-                    self.parser.peek_token(),
+                    &self.parser.peek_token(),
                 );
             }
         }
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 99138e1b0016..eb1e711eb4fd 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -251,7 +251,6 @@ impl IdentNormalizer {
 /// This helps resolve scoping issues of CTEs.
 /// By using cloning, a subquery can inherit CTEs from the outer query
 /// and can also define its own private CTEs without affecting the outer query.
-///
 #[derive(Debug, Clone)]
 pub struct PlannerContext {
     /// Data types for numbered parameters ($1, $2, etc), if supplied
@@ -693,7 +692,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     // Timestamp With Time Zone
                     // INPUT : [SQLDataType]   TimestampTz + [Config] Time Zone
                     // OUTPUT: [ArrowDataType] Timestamp<TimeUnit, Some(Time Zone)>
-                    Some(self.context_provider.options().execution.time_zone.clone())
+                    self.context_provider.options().execution.time_zone.clone()
                 } else {
                     // Timestamp Without Time zone
                     None
diff --git a/datafusion/sql/src/resolve.rs b/datafusion/sql/src/resolve.rs
index 9e909f66fa97..db5ddd511519 100644
--- a/datafusion/sql/src/resolve.rs
+++ b/datafusion/sql/src/resolve.rs
@@ -175,14 +175,14 @@ fn visit_statement(statement: &DFStatement, visitor: &mut RelationVisitor) {
 /// ## Example with CTEs  
 ///  
 /// ```  
-/// # use datafusion_sql::parser::DFParser;  
+/// # use datafusion_sql::parser::DFParser;
 /// # use datafusion_sql::resolve::resolve_table_references;
-/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";  
-/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();  
-/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();  
+/// let query = "with my_cte as (values (1), (2)) SELECT * from my_cte;";
+/// let statement = DFParser::parse_sql(query).unwrap().pop_back().unwrap();
+/// let (table_refs, ctes) = resolve_table_references(&statement, true).unwrap();
 /// assert_eq!(table_refs.len(), 0);
-/// assert_eq!(ctes.len(), 1);  
-/// assert_eq!(ctes[0].to_string(), "my_cte");  
+/// assert_eq!(ctes.len(), 1);
+/// assert_eq!(ctes[0].to_string(), "my_cte");
 /// ```
 pub fn resolve_table_references(
     statement: &crate::parser::Statement,
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 81381bf49fc5..d09923690f86 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1037,7 +1037,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 if limit.is_some() {
                     return not_impl_err!("Update-limit clause not supported")?;
                 }
-                self.update_to_plan(table, assignments, update_from, selection)
+                self.update_to_plan(table, &assignments, update_from, selection)
             }
 
             Statement::Delete(Delete {
@@ -1070,7 +1070,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 }
 
                 let table_name = self.get_delete_target(from)?;
-                self.delete_to_plan(table_name, selection)
+                self.delete_to_plan(&table_name, selection)
             }
 
             Statement::StartTransaction {
@@ -1100,7 +1100,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 if has_end_keyword {
                     return not_impl_err!("Transaction with END keyword not supported");
                 }
-                self.validate_transaction_kind(transaction)?;
+                self.validate_transaction_kind(transaction.as_ref())?;
                 let isolation_level: ast::TransactionIsolationLevel = modes
                     .iter()
                     .filter_map(|m: &TransactionMode| match m {
@@ -1903,7 +1903,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
 
     fn delete_to_plan(
         &self,
-        table_name: ObjectName,
+        table_name: &ObjectName,
         predicate_expr: Option<SQLExpr>,
     ) -> Result<LogicalPlan> {
         // Do a table lookup to verify the table exists
@@ -1947,7 +1947,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
     fn update_to_plan(
         &self,
         table: TableWithJoins,
-        assignments: Vec<Assignment>,
+        assignments: &[Assignment],
         from: Option<TableWithJoins>,
         predicate_expr: Option<SQLExpr>,
     ) -> Result<LogicalPlan> {
@@ -2353,7 +2353,7 @@ ON p.function_name = r.routine_name
 
     fn validate_transaction_kind(
         &self,
-        kind: Option<BeginTransactionKind>,
+        kind: Option<&BeginTransactionKind>,
     ) -> Result<()> {
         match kind {
             // BEGIN
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs
index a7fe8efa153c..8dc3092e9ce0 100644
--- a/datafusion/sql/src/unparser/expr.rs
+++ b/datafusion/sql/src/unparser/expr.rs
@@ -70,9 +70,8 @@ use sqlparser::tokenizer::Span;
 /// use datafusion_expr::{col, lit};
 /// use datafusion_sql::unparser::expr_to_sql;
 /// let expr = col("a").gt(lit(4)); // form an expression `a > 4`
-/// let sql = expr_to_sql(&expr).unwrap(); // convert to ast::Expr
-/// // use the Display impl to convert to SQL text
-/// assert_eq!(sql.to_string(), "(a > 4)")
+/// let sql = expr_to_sql(&expr).unwrap(); // convert to ast::Expr, using
+/// assert_eq!(sql.to_string(), "(a > 4)"); // use Display impl for SQL text
 /// ```
 ///
 /// [`SqlToRel::sql_to_expr`]: crate::planner::SqlToRel::sql_to_expr
@@ -336,7 +335,7 @@ impl Unparser<'_> {
                     None => None,
                 };
                 let within_group: Vec<ast::OrderByExpr> =
-                    if agg.func.is_ordered_set_aggregate() {
+                    if agg.func.supports_within_group_clause() {
                         order_by
                             .iter()
                             .map(|sort_expr| self.sort_to_sql(sort_expr))
diff --git a/datafusion/sql/src/unparser/plan.rs b/datafusion/sql/src/unparser/plan.rs
index e7535338b767..68b42ba05af5 100644
--- a/datafusion/sql/src/unparser/plan.rs
+++ b/datafusion/sql/src/unparser/plan.rs
@@ -81,9 +81,13 @@ use std::{sync::Arc, vec};
 ///     .unwrap()
 ///     .build()
 ///     .unwrap();
-/// let sql = plan_to_sql(&plan).unwrap(); // convert to AST
+/// // convert to AST
+/// let sql = plan_to_sql(&plan).unwrap();
 /// // use the Display impl to convert to SQL text
-/// assert_eq!(sql.to_string(), "SELECT \"table\".id, \"table\".\"value\" FROM \"table\"")
+/// assert_eq!(
+///     sql.to_string(),
+///     "SELECT \"table\".id, \"table\".\"value\" FROM \"table\""
+/// )
 /// ```
 ///
 /// [`SqlToRel::sql_statement_to_plan`]: crate::planner::SqlToRel::sql_statement_to_plan
diff --git a/datafusion/sql/src/unparser/rewrite.rs b/datafusion/sql/src/unparser/rewrite.rs
index c961f1d6f1f0..1b6c3433f79f 100644
--- a/datafusion/sql/src/unparser/rewrite.rs
+++ b/datafusion/sql/src/unparser/rewrite.rs
@@ -119,7 +119,6 @@ fn rewrite_sort_expr_for_union(exprs: Vec<SortExpr>) -> Result<Vec<SortExpr>> {
 ///     Projection: table.column1, table.column2
 ///       Window: window_function
 ///         TableScan: table
-///
 pub(super) fn rewrite_qualify(plan: LogicalPlan) -> Result<LogicalPlan> {
     let transformed_plan = plan.transform_up(|plan| match plan {
         // Check if the filter's input is a Window plan
diff --git a/datafusion/sql/src/utils.rs b/datafusion/sql/src/utils.rs
index 3c86d2d04905..042ee5373093 100644
--- a/datafusion/sql/src/utils.rs
+++ b/datafusion/sql/src/utils.rs
@@ -531,7 +531,6 @@ impl TreeNodeRewriter for RecursiveUnnestRewriter<'_> {
     ///                          / /
     ///                       column2
     /// ```
-    ///
     fn f_up(&mut self, expr: Expr) -> Result<Transformed<Expr>> {
         if let Expr::Unnest(ref traversing_unnest) = expr {
             if traversing_unnest == self.top_most_unnest.as_ref().unwrap() {
diff --git a/datafusion/sql/tests/cases/diagnostic.rs b/datafusion/sql/tests/cases/diagnostic.rs
index 8648dffb5004..7ae839851d04 100644
--- a/datafusion/sql/tests/cases/diagnostic.rs
+++ b/datafusion/sql/tests/cases/diagnostic.rs
@@ -69,10 +69,12 @@ fn do_query(sql: &'static str) -> Diagnostic {
 /// ## Example
 ///
 /// ```rust
-/// let spans = get_spans("SELECT /*whole+left*/speed/*left*/ + /*right*/10/*right+whole*/ FROM cars");
-/// // whole is                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
-/// // left is                                  ^^^^^
-/// // right is                                                          ^^
+/// let spans = get_spans(
+///     "SELECT /*whole+left*/speed/*left*/ + /*right*/10/*right+whole*/ FROM cars",
+///     // whole is           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+///     // left is            ^^^^^
+///     // right is                                    ^^
+/// );
 /// dbg!(&spans["whole"]);
 /// dbg!(&spans["left"]);
 /// dbg!(&spans["right"]);
diff --git a/datafusion/sql/tests/cases/params.rs b/datafusion/sql/tests/cases/params.rs
index 4a484b1171bc..147628656d8f 100644
--- a/datafusion/sql/tests/cases/params.rs
+++ b/datafusion/sql/tests/cases/params.rs
@@ -667,11 +667,11 @@ fn test_insert_infer() {
         @r#"
     ** Initial Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: ($1, $2, $3)
     ** Final Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
     "#
     );
@@ -698,11 +698,11 @@ fn test_prepare_statement_insert_infer() {
     ** Initial Plan:
     Prepare: "my_plan" [UInt32, Utf8, Utf8]
       Dml: op=[Insert Into] table=[person]
-        Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
           Values: ($1, $2, $3)
     ** Final Plan:
     Dml: op=[Insert Into] table=[person]
-      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
         Values: (UInt32(1) AS $1, Utf8("Alan") AS $2, Utf8("Turing") AS $3)
     "#
     );
diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs
index f66af28f436e..96d9f23522f1 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -669,10 +669,10 @@ fn plan_insert() {
     assert_snapshot!(
         plan,
         @r#"
-        Dml: op=[Insert Into] table=[person]
-          Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(Nanosecond, None)) AS birth_date, CAST(NULL AS Int32) AS 😀
-            Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
-        "#
+    Dml: op=[Insert Into] table=[person]
+      Projection: column1 AS id, column2 AS first_name, column3 AS last_name, CAST(NULL AS Int32) AS age, CAST(NULL AS Utf8) AS state, CAST(NULL AS Float64) AS salary, CAST(NULL AS Timestamp(ns)) AS birth_date, CAST(NULL AS Int32) AS 😀
+        Values: (CAST(Int64(1) AS UInt32), Utf8("Alan"), Utf8("Turing"))
+    "#
     );
 }
 
@@ -875,11 +875,11 @@ fn test_timestamp_filter() {
     let plan = logical_plan(sql).unwrap();
     assert_snapshot!(
         plan,
-        @r#"
-        Projection: person.state
-          Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(Second, None)) AS Timestamp(Nanosecond, None))
-            TableScan: person
-        "#
+        @r"
+    Projection: person.state
+      Filter: person.birth_date < CAST(CAST(Int64(158412331400600000) AS Timestamp(s)) AS Timestamp(ns))
+        TableScan: person
+    "
     );
 }
 
@@ -1586,11 +1586,11 @@ fn select_from_typed_string_values() {
     assert_snapshot!(
         plan,
         @r#"
-        Projection: t.col1, t.col2
-          SubqueryAlias: t
-            Projection: column1 AS col1, column2 AS col2
-              Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(Nanosecond, None)), CAST(Utf8("2004-04-09") AS Date32))
-        "#
+    Projection: t.col1, t.col2
+      SubqueryAlias: t
+        Projection: column1 AS col1, column2 AS col2
+          Values: (CAST(Utf8("2021-06-10 17:01:00Z") AS Timestamp(ns)), CAST(Utf8("2004-04-09") AS Date32))
+    "#
     );
 }
 
@@ -3151,7 +3151,7 @@ fn select_typed_time_string() {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(Utf8("08:09:10.123") AS Time64(Nanosecond)) AS time
+    Projection: CAST(Utf8("08:09:10.123") AS Time64(ns)) AS time
       EmptyRelation: rows=1
     "#
     );
@@ -4686,7 +4686,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None))
+    Projection: CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns))
       EmptyRelation: rows=1
     "#
     );
@@ -4696,7 +4696,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)) AS Timestamp(Nanosecond, None))
+    Projection: CAST(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns)) AS Timestamp(ns))
       EmptyRelation: rows=1
     "#
     );
@@ -4708,7 +4708,7 @@ fn test_custom_type_plan() -> Result<()> {
     assert_snapshot!(
         plan,
         @r#"
-    Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(Nanosecond, None)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(Nanosecond, None)))
+    Projection: make_array(CAST(Utf8("2001-01-01 18:00:00") AS Timestamp(ns)), CAST(Utf8("2001-01-02 18:00:00") AS Timestamp(ns)))
       EmptyRelation: rows=1
     "#
     );
diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml
index d02d5f9cb5e4..177761e4af54 100644
--- a/datafusion/sqllogictest/Cargo.toml
+++ b/datafusion/sqllogictest/Cargo.toml
@@ -30,6 +30,9 @@ version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -42,8 +45,8 @@ async-trait = { workspace = true }
 bigdecimal = { workspace = true }
 bytes = { workspace = true, optional = true }
 chrono = { workspace = true, optional = true }
-clap = { version = "4.5.47", features = ["derive", "env"] }
-datafusion = { workspace = true, default-features = true, features = ["avro", "parquet_encryption"] }
+clap = { version = "4.5.50", features = ["derive", "env"] }
+datafusion = { workspace = true, default-features = true, features = ["avro"] }
 datafusion-spark = { workspace = true, default-features = true }
 datafusion-substrait = { workspace = true, default-features = true }
 futures = { workspace = true }
@@ -53,7 +56,7 @@ itertools = { workspace = true }
 log = { workspace = true }
 object_store = { workspace = true }
 postgres-protocol = { version = "0.6.7", optional = true }
-postgres-types = { version = "0.2.10", features = ["derive", "with-chrono-0_4"], optional = true }
+postgres-types = { version = "0.2.11", features = ["derive", "with-chrono-0_4"], optional = true }
 rust_decimal = { version = "1.38.0", features = ["tokio-pg"] }
 # When updating the following dependency verify that sqlite test file regeneration works correctly
 # by running the regenerate_sqlite_files.sh script.
@@ -78,6 +81,9 @@ postgres = [
     "testcontainers-modules",
     "tokio-postgres",
 ]
+parquet_encryption = [
+    "datafusion/parquet_encryption",
+]
 
 [dev-dependencies]
 env_logger = { workspace = true }
diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
index 87108b67424b..cb6410d857a8 100644
--- a/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
+++ b/datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs
@@ -185,7 +185,6 @@ macro_rules! get_row_value {
 /// [NULL Values and empty strings]: https://duckdb.org/dev/sqllogictest/result_verification#null-values-and-empty-strings
 ///
 /// Floating numbers are rounded to have a consistent representation with the Postgres runner.
-///
 pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) -> Result<String> {
     if !col.is_valid(row) {
         // represent any null value with the string "NULL"
diff --git a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
index 375f06d34b44..4d310711687f 100644
--- a/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
+++ b/datafusion/sqllogictest/src/engines/postgres_engine/mod.rs
@@ -76,8 +76,8 @@ impl Postgres {
     ///
     /// See https://docs.rs/tokio-postgres/latest/tokio_postgres/config/struct.Config.html#url for format
     pub async fn connect(relative_path: PathBuf, pb: ProgressBar) -> Result<Self> {
-        let uri =
-            std::env::var("PG_URI").map_or(PG_URI.to_string(), std::convert::identity);
+        let uri = std::env::var("PG_URI")
+            .map_or_else(|_| PG_URI.to_string(), std::convert::identity);
 
         info!("Using postgres connection string: {uri}");
 
diff --git a/datafusion/sqllogictest/src/util.rs b/datafusion/sqllogictest/src/util.rs
index 695fe463fa67..2c3bd12d897d 100644
--- a/datafusion/sqllogictest/src/util.rs
+++ b/datafusion/sqllogictest/src/util.rs
@@ -95,7 +95,7 @@ pub fn df_value_validator(
             warn!("[{i}] {}<eol>", normalized_actual[i]);
             warn!(
                 "[{i}] {}<eol>",
-                if normalized_expected.len() >= i {
+                if normalized_expected.len() > i {
                     &normalized_expected[i]
                 } else {
                     "No more results"
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt b/datafusion/sqllogictest/test_files/aggregate.slt
index a5973afc0a93..692204b7b9cc 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1180,10 +1180,9 @@ physical_plan
 04)------AggregateExec: mode=Partial, gby=[], aggr=[median(alias1)]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[alias1@0 as alias1], aggr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=4
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-09)----------------AggregateExec: mode=Partial, gby=[c@0 as alias1], aggr=[]
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+07)------------RepartitionExec: partitioning=Hash([alias1@0], 4), input_partitions=1
+08)--------------AggregateExec: mode=Partial, gby=[c@0 as alias1], aggr=[]
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 statement ok
 drop table t;
@@ -7170,10 +7169,9 @@ physical_plan
 02)--FilterExec: max(having_test.v1)@2 = 3, projection=[v1@0, v2@1]
 03)----AggregateExec: mode=FinalPartitioned, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([v1@0, v2@1], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([v1@0, v2@1], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[v1@0 as v1, v2@1 as v2], aggr=[max(having_test.v1)]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 query error
diff --git a/datafusion/sqllogictest/test_files/aggregate_repartition.slt b/datafusion/sqllogictest/test_files/aggregate_repartition.slt
new file mode 100644
index 000000000000..7612fc84d423
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/aggregate_repartition.slt
@@ -0,0 +1,135 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Reproducer for https://github.com/apache/datafusion/issues/18341
+# Tests for aggregate repartition behavior
+# Comparing CSV vs Parquet execution plans for GROUP BY queries
+
+# Create CSV version of the dimension data
+query I
+COPY (
+  SELECT * FROM (VALUES 
+    ('prod', 100, 'A'),
+    ('dev', 200, 'B'),
+    ('test', 150, 'A'),
+    ('prod', 300, 'C'),
+    ('dev', 250, 'B')
+  ) AS t(env, value, category)
+)
+TO 'test_files/scratch/aggregate_repartition/dim.csv'
+STORED AS CSV
+OPTIONS ('format.has_header' 'true');
+----
+5
+
+# Create Parquet version of the dimension data
+query I
+COPY (
+  SELECT * FROM (VALUES 
+    ('prod', 100, 'A'),
+    ('dev', 200, 'B'),
+    ('test', 150, 'A'),
+    ('prod', 300, 'C'),
+    ('dev', 250, 'B')
+  ) AS t(env, value, category)
+)
+TO 'test_files/scratch/aggregate_repartition/dim.parquet'
+STORED AS PARQUET;
+----
+5
+
+# Create external table for CSV
+statement ok
+CREATE EXTERNAL TABLE dim_csv
+STORED AS CSV 
+LOCATION 'test_files/scratch/aggregate_repartition/dim.csv'
+OPTIONS ('format.has_header' 'true');
+
+# Create external table for Parquet
+statement ok
+CREATE EXTERNAL TABLE dim_parquet
+STORED AS PARQUET 
+LOCATION 'test_files/scratch/aggregate_repartition/dim.parquet';
+
+# Test 1: EXPLAIN query for CSV table with GROUP BY
+# This plans looks reasonable
+query TT
+EXPLAIN SELECT env, count(*) FROM dim_csv GROUP BY env;
+----
+logical_plan
+01)Projection: dim_csv.env, count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[dim_csv.env]], aggr=[[count(Int64(1))]]
+03)----TableScan: dim_csv projection=[env]
+physical_plan
+01)ProjectionExec: expr=[env@0 as env, count(Int64(1))@1 as count(*)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[env@0 as env], aggr=[count(Int64(1))]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------RepartitionExec: partitioning=Hash([env@0], 4), input_partitions=4
+05)--------AggregateExec: mode=Partial, gby=[env@0 as env], aggr=[count(Int64(1))]
+06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.csv]]}, projection=[env], file_type=csv, has_header=true
+
+# Test 2: EXPLAIN query for Parquet table with GROUP BY
+# This plan differs from the one above and includes two consecutive repartitions — one round-robin and one hash —
+# which seems unnecessary. We may want to align it with the previous plan (push the round robin down or remove the round robin), or, if the input file is small,
+# avoid repartitioning altogether. A single partition should suffice for a single-step aggregate as the plan after this.
+
+query TT
+EXPLAIN SELECT env, count(*) FROM dim_parquet GROUP BY env;
+----
+logical_plan
+01)Projection: dim_parquet.env, count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[dim_parquet.env]], aggr=[[count(Int64(1))]]
+03)----TableScan: dim_parquet projection=[env]
+physical_plan
+01)ProjectionExec: expr=[env@0 as env, count(Int64(1))@1 as count(*)]
+02)--AggregateExec: mode=FinalPartitioned, gby=[env@0 as env], aggr=[count(Int64(1))]
+03)----CoalesceBatchesExec: target_batch_size=8192
+04)------RepartitionExec: partitioning=Hash([env@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[env@0 as env], aggr=[count(Int64(1))]
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.parquet]]}, projection=[env], file_type=parquet
+
+# Verify the queries actually work and return the same results
+query TI rowsort
+SELECT env, count(*) FROM dim_csv GROUP BY env;
+----
+dev 2
+prod 2
+test 1
+
+query TI rowsort
+SELECT env, count(*) FROM dim_parquet GROUP BY env;
+----
+dev 2
+prod 2
+test 1
+
+# Test 3: Change target partitions to 1 to have single-aggregate plan
+statement ok
+SET datafusion.execution.target_partitions = 1;
+
+query TT
+EXPLAIN SELECT env, count(*) FROM dim_parquet GROUP BY env;
+----
+logical_plan
+01)Projection: dim_parquet.env, count(Int64(1)) AS count(*)
+02)--Aggregate: groupBy=[[dim_parquet.env]], aggr=[[count(Int64(1))]]
+03)----TableScan: dim_parquet projection=[env]
+physical_plan
+01)ProjectionExec: expr=[env@0 as env, count(Int64(1))@1 as count(*)]
+02)--AggregateExec: mode=Single, gby=[env@0 as env], aggr=[count(Int64(1))]
+03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/aggregate_repartition/dim.parquet]]}, projection=[env], file_type=parquet
diff --git a/datafusion/sqllogictest/test_files/aggregates_topk.slt b/datafusion/sqllogictest/test_files/aggregates_topk.slt
index cc1693843848..76f8eb954cf9 100644
--- a/datafusion/sqllogictest/test_files/aggregates_topk.slt
+++ b/datafusion/sqllogictest/test_files/aggregates_topk.slt
@@ -47,10 +47,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TI
 select * from (select trace_id, MAX(timestamp) max_ts from traces t group by trace_id) where trace_id != 'b' order by max_ts desc limit 3;
@@ -111,10 +110,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)], lim=[4]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MIN(timestamp) from traces group by trace_id order by MIN(timestamp) desc limit 4;
@@ -128,10 +126,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[min(traces.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[min(traces.timestamp)]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by MAX(timestamp) asc limit 4;
@@ -145,10 +142,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[max(traces.timestamp)@1 ASC NULLS LAST], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 explain select trace_id, MAX(timestamp) from traces group by trace_id order by trace_id asc limit 4;
@@ -162,10 +158,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[trace_id@0 ASC NULLS LAST], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces.timestamp)]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TI
 select trace_id, max(timestamp) from traces group by trace_id order by MAX(timestamp) desc limit 4;
@@ -236,10 +231,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[max(traces_utf8view.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces_utf8view.timestamp)], lim=[4]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_utf8view.timestamp)], lim=[4]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_utf8view.timestamp)], lim=[4]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 # Also add LargeUtf8 to test PR https://github.com/apache/datafusion/pull/15152
@@ -264,10 +258,9 @@ physical_plan
 02)--SortExec: TopK(fetch=4), expr=[max(traces_largeutf8.timestamp)@1 DESC], preserve_partitioning=[true]
 03)----AggregateExec: mode=FinalPartitioned, gby=[trace_id@0 as trace_id], aggr=[max(traces_largeutf8.timestamp)], lim=[4]
 04)------CoalesceBatchesExec: target_batch_size=8192
-05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=4
-06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_largeutf8.timestamp)], lim=[4]
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+05)--------RepartitionExec: partitioning=Hash([trace_id@0], 4), input_partitions=1
+06)----------AggregateExec: mode=Partial, gby=[trace_id@0 as trace_id], aggr=[max(traces_largeutf8.timestamp)], lim=[4]
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/array.slt b/datafusion/sqllogictest/test_files/array.slt
index 43899642a93a..7aa267a4dc6d 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -710,13 +710,13 @@ select
 query TTT
 select arrow_typeof(column1), arrow_typeof(column2), arrow_typeof(column3) from arrays;
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) List(Field { name: "item", data_type: Utf8, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
+List(nullable List(nullable Int64)) List(nullable Float64) List(nullable Utf8)
 
 # arrays table
 query ???
@@ -1182,7 +1182,7 @@ select make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)'))
 query T
 select arrow_typeof(make_array(make_array(1), arrow_cast(make_array(-1), 'LargeList(Int8)')));
 ----
-List(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable LargeList(nullable Int64))
 
 
 query ???
@@ -1943,6 +1943,19 @@ select array_slice(make_array(1, 2, 3, 4, 5), 5, 1, -2), array_slice(make_array(
 ----
 [5, 3, 1] [o, l, h]
 
+# Test NULL stride
+query ??
+select array_slice(make_array(1, 2, 3, 4, 5), 1, 5, NULL), array_slice(make_array('h', 'e', 'l', 'l', 'o'), 1, 5, NULL);
+----
+NULL NULL
+
+# Test NULL stride
+query ??
+select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 1, 5, NULL),
+       array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 5, NULL);
+----
+NULL NULL
+
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2, 4), array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeList(Utf8)'), 1, 2);
 ----
@@ -1965,6 +1978,14 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 0,
 ----
 [1, 2, 3, 4, 5] [h, e, l, l, o]
 
+# TODO: Enable once arrow_cast supports ListView types.
+# Expected output (once supported):
+# ----
+# [1, 2, 3, 4, 5] [h, e, l, l, o]
+query error DataFusion error: Execution error: Unsupported type 'ListView\(Int64\)'. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'. Error unknown token: ListView
+select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'), 0, 6),
+       array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'ListView(Utf8)'), 0, 5);
+
 query ??
 select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'FixedSizeList(5, Int64)'), 0, 6),
        array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'FixedSizeList(5, Utf8)'), 0, 5);
@@ -2004,6 +2025,14 @@ select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeList(Int64)'), 2,
 ----
 [2, 3, 4, 5] [l, l, o]
 
+# TODO: Enable once arrow_cast supports LargeListView types.
+# Expected output (once supported):
+# ----
+# [2, 3, 4, 5] [l, l, o]
+query error DataFusion error: Execution error: Unsupported type 'LargeListView\(Int64\)'. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'. Error unknown token: LargeListView
+select array_slice(arrow_cast(make_array(1, 2, 3, 4, 5), 'LargeListView(Int64)'), 2, 6),
+       array_slice(arrow_cast(make_array('h', 'e', 'l', 'l', 'o'), 'LargeListView(Utf8)'), 3, 7);
+
 # array_slice scalar function #6 (with positive indexes; nested array)
 query ?
 select array_slice(make_array(make_array(1, 2, 3, 4, 5), make_array(6, 7, 8, 9, 10)), 1, 1);
@@ -3292,7 +3321,7 @@ select
     array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]),
     arrow_typeof(array_concat([arrow_cast('1', 'Utf8'), arrow_cast('2', 'Utf8')], [arrow_cast('3', 'Utf8View')]));
 ----
-[1, 2, 3] List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] List(nullable Utf8View)
 
 # array_concat error
 query error DataFusion error: Error during planning: Execution error: Function 'array_concat' user-defined coercion failed with "Error during planning: array_concat does not support type Int64"
@@ -4585,7 +4614,7 @@ NULL [baz] baz
 query T
 SELECT arrow_typeof(make_array(arrow_cast('a', 'Utf8View'), 'b', 'c', 'd'));
 ----
-List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Utf8View)
 
 # expect a,b,c,d. make_array forces all types to be of a common type (see above)
 query T
@@ -6054,7 +6083,7 @@ NULL NULL
 # array_has([], 1) -> 'false' (empty array should return false)
 # array_has(null, 1) -> 'null' (null array should return null)
 query ?T
-SELECT column1, COALESCE(CAST(array_has(column1, column2) AS VARCHAR), 'null') 
+SELECT column1, COALESCE(CAST(array_has(column1, column2) AS VARCHAR), 'null')
 from array_has_table_empty;
 ----
 [1, 3, 5] true
@@ -6315,7 +6344,7 @@ true false false true
 false true false false
 NULL NULL false false
 false false NULL false
-false false false NULL 
+false false false NULL
 
 query BBBB
 select array_has_all(make_array(1,2,3), []),
@@ -6949,6 +6978,23 @@ select range(5),
 ----
 [0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] []
 
+# Ensure can coerce from other valid types
+query ???????????
+select range(5),
+       range(2, 5),
+       range(2, 10, 3),
+       range(10, 2, -3),
+       range(arrow_cast(1, 'Int8'), 5, -1),
+       range(arrow_cast(1, 'Int16'), arrow_cast(-5, 'Int8'), 1),
+       range(arrow_cast(1, 'Int32'), arrow_cast(-5, 'Int16'), arrow_cast(-1, 'Int8')),
+       range(DATE '1992-09-01', DATE '1993-03-01', arrow_cast('1 MONTH', 'Interval(YearMonth)')),
+       range(DATE '1993-02-01', arrow_cast(DATE '1993-01-01', 'Date64'), INTERVAL '-1' DAY),
+       range(arrow_cast(DATE '1989-04-01', 'Date64'), DATE '1993-03-01', INTERVAL '1' YEAR),
+       range(arrow_cast(DATE '1993-03-01', 'Date64'), arrow_cast(DATE '1989-04-01', 'Date64'), INTERVAL '1' YEAR)
+;
+----
+[0, 1, 2, 3, 4] [2, 3, 4] [2, 5, 8] [10, 7, 4] [] [] [1, 0, -1, -2, -3, -4] [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] [1993-02-01, 1993-01-31, 1993-01-30, 1993-01-29, 1993-01-28, 1993-01-27, 1993-01-26, 1993-01-25, 1993-01-24, 1993-01-23, 1993-01-22, 1993-01-21, 1993-01-20, 1993-01-19, 1993-01-18, 1993-01-17, 1993-01-16, 1993-01-15, 1993-01-14, 1993-01-13, 1993-01-12, 1993-01-11, 1993-01-10, 1993-01-09, 1993-01-08, 1993-01-07, 1993-01-06, 1993-01-05, 1993-01-04, 1993-01-03, 1993-01-02] [1989-04-01, 1990-04-01, 1991-04-01] []
+
 # Test range with zero step
 query error DataFusion error: Execution error: step can't be 0 for function range\(start \[, stop, step\]\)
 select range(1, 1, 0);
@@ -7114,6 +7160,17 @@ select generate_series('2021-01-01'::timestamp, '2021-01-01T15:00:00'::timestamp
 ----
 [2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00]
 
+# Other timestamp types are coerced to nanosecond
+query ?
+select generate_series(arrow_cast('2021-01-01'::timestamp, 'Timestamp(Second, None)'), '2021-01-01T15:00:00'::timestamp, INTERVAL '1' HOUR);
+----
+[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00]
+
+query ?
+select generate_series('2021-01-01'::timestamp, arrow_cast('2021-01-01T15:00:00'::timestamp, 'Timestamp(Microsecond, None)'), INTERVAL '1' HOUR);
+----
+[2021-01-01T00:00:00, 2021-01-01T01:00:00, 2021-01-01T02:00:00, 2021-01-01T03:00:00, 2021-01-01T04:00:00, 2021-01-01T05:00:00, 2021-01-01T06:00:00, 2021-01-01T07:00:00, 2021-01-01T08:00:00, 2021-01-01T09:00:00, 2021-01-01T10:00:00, 2021-01-01T11:00:00, 2021-01-01T12:00:00, 2021-01-01T13:00:00, 2021-01-01T14:00:00, 2021-01-01T15:00:00]
+
 query ?
 select generate_series('2021-01-01T00:00:00EST'::timestamp, '2021-01-01T15:00:00-12:00'::timestamp, INTERVAL '1' HOUR);
 ----
@@ -7131,9 +7188,18 @@ select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond,
 [2021-01-01T00:00:00-05:00, 2021-01-01T01:29:54.500-05:00, 2021-01-01T02:59:49-05:00, 2021-01-01T04:29:43.500-05:00, 2021-01-01T05:59:38-05:00]
 
 ## mixing types for timestamps is not supported
-query error DataFusion error: Internal error: Unexpected argument type for GENERATE_SERIES : Date32
+query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
 select generate_series(arrow_cast('2021-01-01T00:00:00', 'Timestamp(Nanosecond, Some("-05:00"))'), DATE '2021-01-02', INTERVAL '1' HOUR);
 
+## mixing types not allowed even if an argument is null
+query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
+select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL);
+
+query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
+select generate_series(1, '2024-01-01', '2025-01-02');
+
+query error DataFusion error: Error during planning: Internal error: Function 'generate_series' failed to match any signature
+select generate_series('2024-01-01'::timestamp, '2025-01-02', interval '1 day');
 
 ## should return NULL
 query ?
@@ -7152,11 +7218,6 @@ select generate_series(DATE '1992-09-01', DATE '1993-03-01', NULL);
 ----
 NULL
 
-query ?
-select generate_series(TIMESTAMP '1992-09-01', DATE '1993-03-01', NULL);
-----
-NULL
-
 query ?
 select generate_series(NULL, DATE '1993-03-01', INTERVAL '1' YEAR);
 ----
@@ -7239,7 +7300,7 @@ query error DataFusion error: Execution error: step can't be 0 for function gene
 select generate_series(1, 1, 0);
 
 # Test generate_series with zero step
-query error DataFusion error: Execution error: Interval argument to GENERATE_SERIES must not be 0
+query error DataFusion error: Execution error: Interval argument to generate_series must not be 0
 select generate_series(TIMESTAMP '2000-01-02', TIMESTAMP '2000-01-01', INTERVAL '0' MINUTE);
 
 # Test generate_series with big steps
@@ -7653,8 +7714,8 @@ CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION '../core/
 query T
 select arrow_typeof(f0) from fixed_size_list_array;
 ----
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2)
+FixedSizeList(2 x nullable Int64)
+FixedSizeList(2 x nullable Int64)
 
 query ?
 select * from fixed_size_list_array;
@@ -7683,8 +7744,8 @@ select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(arrow_cast(f0, 'List(Int64)'))) from fixed_size_list_array
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64))
+List(nullable List(nullable Int64))
 
 query ?
 select make_array(f0) from fixed_size_list_array
@@ -7695,8 +7756,8 @@ select make_array(f0) from fixed_size_list_array
 query T
 select arrow_typeof(make_array(f0)) from fixed_size_list_array
 ----
-List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
-List(Field { name: "item", data_type: FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 2), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable FixedSizeList(2 x nullable Int64))
+List(nullable FixedSizeList(2 x nullable Int64))
 
 query ?
 select array_concat(column1, [7]) from arrays_values_v2;
@@ -7725,7 +7786,7 @@ select flatten(make_array(1, 2, 1, 3, 2)),
 
 query ???
 select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'LargeList(Int64)')),
-       flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')),
+       flatten(arrow_cast(make_array([1], null, [2, 3], [null], make_array(4, null, 5)), 'LargeList(LargeList(Int64))')),
        flatten(arrow_cast(make_array([[1.1]], [[2.2]], [[3.3], [4.4]]), 'LargeList(LargeList(LargeList(Float64)))'));
 ----
 [1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]]
@@ -7737,6 +7798,14 @@ select flatten(arrow_cast(make_array(1, 2, 1, 3, 2), 'FixedSizeList(5, Int64)'))
 ----
 [1, 2, 1, 3, 2] [1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]]
 
+query ??TT
+select flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))')),
+       flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')),
+       arrow_typeof(flatten(arrow_cast(make_array([1], [2, 3], [null], make_array(4, null, 5)), 'FixedSizeList(4, LargeList(Int64))'))),
+       arrow_typeof(flatten(arrow_cast(make_array([[1.1], [2.2]], [[3.3], [4.4]]), 'List(LargeList(FixedSizeList(1, Float64)))')));
+----
+[1, 2, 3, NULL, 4, NULL, 5] [[1.1], [2.2], [3.3], [4.4]] LargeList(nullable Int64) LargeList(nullable FixedSizeList(1 x nullable Float64))
+
 # flatten with column values
 query ????
 select flatten(column1),
@@ -8209,7 +8278,7 @@ select array_reverse(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)')), array
 [3, 2, 1] [1]
 
 query ????
-select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')), 
+select array_reverse(arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)')),
   array_reverse(arrow_cast(make_array(1), 'FixedSizeList(1, Int64)')),
   array_reverse(arrow_cast(make_array(1, NULL, 3), 'FixedSizeList(3, Int64)')),
   array_reverse(arrow_cast(make_array(NULL, NULL, NULL), 'FixedSizeList(3, Int64)'));
@@ -8275,19 +8344,19 @@ select * from test_create_array_table;
 query T
 select arrow_typeof(a) from test_create_array_table;
 ----
-List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Int32)
 
 query T
 select arrow_typeof(c) from test_create_array_table;
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int32))
 
 # Test casting to array types
 # issue: https://github.com/apache/datafusion/issues/9440
 query ??T
 select [1,2,3]::int[], [['1']]::int[][], arrow_typeof([]::text[]);
 ----
-[1, 2, 3] [[1]] List(Field { name: "item", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2, 3] [[1]] List(nullable Utf8View)
 
 # test empty arrays return length
 # issue: https://github.com/apache/datafusion/pull/12459
@@ -8307,8 +8376,8 @@ create table fixed_size_col_table (a int[3]) as values ([1,2,3]), ([4,5,6]);
 query T
 select arrow_typeof(a) from fixed_size_col_table;
 ----
-FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
-FixedSizeList(Field { name: "item", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+FixedSizeList(3 x nullable Int32)
+FixedSizeList(3 x nullable Int32)
 
 query ? rowsort
 SELECT DISTINCT a FROM fixed_size_col_table
@@ -8344,6 +8413,13 @@ select array_contains(a, b) from array_has order by 1 nulls last;
 true
 NULL
 
+# TODO: Enable once arrow_cast supports ListView types.
+# Expected output (once supported):
+# ----
+# [5, 4, 3, 2, 1]
+query error
+select array_reverse(arrow_cast(make_array(1, 2, 3, 4, 5), 'ListView(Int64)'));
+
 ### Delete tables
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/arrow_files.slt b/datafusion/sqllogictest/test_files/arrow_files.slt
index b3975e0c3f47..c3bc967bafb9 100644
--- a/datafusion/sqllogictest/test_files/arrow_files.slt
+++ b/datafusion/sqllogictest/test_files/arrow_files.slt
@@ -128,3 +128,263 @@ physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/
 # Errors in partition filters should be reported
 query error Divide by zero error
 SELECT f0 FROM arrow_partitioned WHERE CASE WHEN true THEN 1 / 0 ELSE part END = 1;
+
+#############
+## Arrow IPC stream format support
+#############
+
+# Test CREATE EXTERNAL TABLE with stream format
+statement ok
+CREATE EXTERNAL TABLE arrow_stream
+STORED AS ARROW
+LOCATION '../datasource-arrow/tests/data/example_stream.arrow';
+
+# physical plan for stream format
+query TT
+EXPLAIN SELECT * FROM arrow_stream
+----
+logical_plan TableScan: arrow_stream projection=[f0, f1, f2]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example_stream.arrow]]}, projection=[f0, f1, f2], file_type=arrow_stream
+
+# stream format should return same data as file format
+query ITB
+SELECT * FROM arrow_stream
+----
+1 foo true
+2 bar NULL
+3 baz false
+4 NULL true
+
+# Verify both file and stream formats return identical results
+query ITB
+SELECT * FROM arrow_simple ORDER BY f0
+----
+1 foo true
+2 bar NULL
+3 baz false
+4 NULL true
+
+query ITB
+SELECT * FROM arrow_stream ORDER BY f0
+----
+1 foo true
+2 bar NULL
+3 baz false
+4 NULL true
+
+# Both formats should support projection pushdown
+query IT
+SELECT f0, f1 FROM arrow_simple ORDER BY f0
+----
+1 foo
+2 bar
+3 baz
+4 NULL
+
+query IT
+SELECT f0, f1 FROM arrow_stream ORDER BY f0
+----
+1 foo
+2 bar
+3 baz
+4 NULL
+
+# Both formats should support filtering
+query ITB
+SELECT * FROM arrow_simple WHERE f0 > 2 ORDER BY f0
+----
+3 baz false
+4 NULL true
+
+query ITB
+SELECT * FROM arrow_stream WHERE f0 > 2 ORDER BY f0
+----
+3 baz false
+4 NULL true
+
+# Test aggregations on stream format
+query I
+SELECT COUNT(*) FROM arrow_stream
+----
+4
+
+query I
+SELECT SUM(f0) FROM arrow_stream
+----
+10
+
+query I
+SELECT MAX(f0) FROM arrow_stream
+----
+4
+
+query I
+SELECT MIN(f0) FROM arrow_stream WHERE f0 IS NOT NULL
+----
+1
+
+# Test aggregations on file format for comparison
+query I
+SELECT COUNT(*) FROM arrow_simple
+----
+4
+
+query I
+SELECT SUM(f0) FROM arrow_simple
+----
+10
+
+# Test joins between file and stream formats
+query ITBITB
+SELECT a.f0, a.f1, a.f2, b.f0, b.f1, b.f2
+FROM arrow_simple a
+JOIN arrow_stream b ON a.f0 = b.f0
+WHERE a.f0 <= 2
+ORDER BY a.f0
+----
+1 foo true 1 foo true
+2 bar NULL 2 bar NULL
+
+# Test that both formats work in UNION
+query ITB
+SELECT * FROM arrow_simple WHERE f0 = 1
+UNION ALL
+SELECT * FROM arrow_stream WHERE f0 = 2
+ORDER BY f0
+----
+1 foo true
+2 bar NULL
+
+# Test GROUP BY on stream format
+query BI
+SELECT f2, COUNT(*) as cnt FROM arrow_stream GROUP BY f2 ORDER BY f2
+----
+false 1
+true 2
+NULL 1
+
+# Test DISTINCT on stream format
+query B
+SELECT DISTINCT f2 FROM arrow_stream ORDER BY f2
+----
+false
+true
+NULL
+
+# Test subquery with stream format
+query I
+SELECT f0 FROM arrow_simple WHERE f0 IN (SELECT f0 FROM arrow_stream WHERE f0 < 3) ORDER BY f0
+----
+1
+2
+
+# ARROW partitioned table (stream format)
+statement ok
+CREATE EXTERNAL TABLE arrow_partitioned_stream (
+    part Int,
+    f0 Bigint,
+    f1 String,
+    f2 Boolean
+)
+STORED AS ARROW
+LOCATION '../core/tests/data/partitioned_table_arrow_stream/'
+PARTITIONED BY (part);
+
+# select wildcard
+query ITBI
+SELECT * FROM arrow_partitioned_stream ORDER BY f0;
+----
+1 foo true 123
+2 bar false 123
+3 baz true 456
+4 NULL NULL 456
+
+# select all fields
+query IITB
+SELECT part, f0, f1, f2 FROM arrow_partitioned_stream ORDER BY f0;
+----
+123 1 foo true
+123 2 bar false
+456 3 baz true
+456 4 NULL NULL
+
+# select without partition column
+query IB
+SELECT f0, f2 FROM arrow_partitioned_stream ORDER BY f0
+----
+1 true
+2 false
+3 true
+4 NULL
+
+# select only partition column
+query I
+SELECT part FROM arrow_partitioned_stream ORDER BY part
+----
+123
+123
+456
+456
+
+# select without any table-related columns in projection
+query I
+SELECT 1 FROM arrow_partitioned_stream
+----
+1
+1
+1
+1
+
+# select with partition filter
+query I
+SELECT f0 FROM arrow_partitioned_stream WHERE part = 123 ORDER BY f0
+----
+1
+2
+
+# select with partition filter should scan only one directory
+query TT
+EXPLAIN SELECT f0 FROM arrow_partitioned_stream WHERE part = 456
+----
+logical_plan TableScan: arrow_partitioned_stream projection=[f0], full_filters=[arrow_partitioned_stream.part = Int32(456)]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/partitioned_table_arrow_stream/part=456/data.arrow]]}, projection=[f0], file_type=arrow_stream
+
+
+# Errors in partition filters should be reported
+query error Divide by zero error
+SELECT f0 FROM arrow_partitioned_stream WHERE CASE WHEN true THEN 1 / 0 ELSE part END = 1;
+
+# Test CREATE EXTERNAL TABLE with empty stream format
+statement ok
+CREATE EXTERNAL TABLE arrow_stream_empty
+STORED AS ARROW
+LOCATION '../datasource-arrow/tests/data/example_stream_empty.arrow'; 
+
+# physical plan for empty stream format
+query TT
+EXPLAIN SELECT * FROM arrow_stream_empty
+----
+logical_plan TableScan: arrow_stream_empty projection=[f0, f1, f2]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example_stream_empty.arrow]]}, projection=[f0, f1, f2], file_type=arrow_stream
+
+# stream format should return same data as file format
+query ITB
+SELECT * FROM arrow_stream_empty
+----
+
+# Test CREATE EXTERNAL TABLE with corrupted stream format
+statement ok
+CREATE EXTERNAL TABLE arrow_stream_corrupted_metadata_length
+STORED AS ARROW
+LOCATION '../datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow'; 
+
+# physical plan for corrupted stream format
+query TT
+EXPLAIN SELECT * FROM arrow_stream_corrupted_metadata_length
+----
+logical_plan TableScan: arrow_stream_corrupted_metadata_length projection=[f0, f1, f2]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/datasource-arrow/tests/data/example_stream_corrupted_metadata_length.arrow]]}, projection=[f0, f1, f2], file_type=arrow_stream
+
+# querying corrupted stream format should result in error
+query error DataFusion error: Arrow error: Parser error: Unsupported message header type in IPC stream: 'NONE'
+SELECT * FROM arrow_stream_corrupted_metadata_length
diff --git a/datafusion/sqllogictest/test_files/arrow_typeof.slt b/datafusion/sqllogictest/test_files/arrow_typeof.slt
index 654218531f1d..5ba62be6873c 100644
--- a/datafusion/sqllogictest/test_files/arrow_typeof.slt
+++ b/datafusion/sqllogictest/test_files/arrow_typeof.slt
@@ -61,13 +61,13 @@ Decimal128(38, 10)
 query T
 SELECT arrow_typeof(now()::timestamp)
 ----
-Timestamp(Nanosecond, None)
+Timestamp(ns)
 
 # arrow_typeof_timestamp_utc
 query T
 SELECT arrow_typeof(now())
 ----
-Timestamp(Nanosecond, Some("+00:00"))
+Timestamp(ns)
 
 # arrow_typeof_timestamp_date32(
 query T
@@ -98,7 +98,7 @@ SELECT arrow_cast('1')
 query error DataFusion error: Execution error: arrow_cast requires its second argument to be a non\-empty constant string
 SELECT arrow_cast('1', 43)
 
-query error Error unrecognized word: unknown
+query error DataFusion error: Execution error: Unsupported type 'unknown'\. Must be a supported arrow type name such as 'Int32' or 'Timestamp\(ns\)'\. Error unknown token: unknown
 SELECT arrow_cast('1', 'unknown')
 
 # Round Trip tests:
@@ -130,7 +130,7 @@ SELECT
   arrow_typeof(arrow_cast(to_timestamp('2020-01-02 01:01:11.1234567890Z'), 'Timestamp(Nanosecond, Some("+08:00"))')) as col_tstz_ns,
   arrow_typeof(arrow_cast('foo', 'Dictionary(Int32, Utf8)')) as col_dict
 ----
-Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Utf8View Binary LargeBinary Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None) Timestamp(Second, Some("+08:00")) Timestamp(Millisecond, Some("+08:00")) Timestamp(Microsecond, Some("+08:00")) Timestamp(Nanosecond, Some("+08:00")) Dictionary(Int32, Utf8)
+Int8 Int16 Int32 Int64 UInt8 UInt16 UInt32 UInt64 Float16 Float32 Float64 Utf8 LargeUtf8 Utf8View Binary LargeBinary Timestamp(s) Timestamp(ms) Timestamp(µs) Timestamp(ns) Timestamp(s, "+08:00") Timestamp(ms, "+08:00") Timestamp(µs, "+08:00") Timestamp(ns, "+08:00") Dictionary(Int32, Utf8)
 
 
 
@@ -255,7 +255,7 @@ SELECT
   arrow_typeof(col_ts_ns)
   FROM foo;
 ----
-Timestamp(Second, None) Timestamp(Millisecond, None) Timestamp(Microsecond, None) Timestamp(Nanosecond, None)
+Timestamp(s) Timestamp(ms) Timestamp(µs) Timestamp(ns)
 
 
 statement ok
@@ -316,7 +316,7 @@ select arrow_cast(interval '30 minutes', 'Duration(Second)');
 ----
 0 days 0 hours 30 mins 0 secs
 
-query error DataFusion error: This feature is not implemented: Unsupported CAST from Utf8 to Duration\(Second\)
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*This feature is not implemented: Unsupported CAST from Utf8 to Duration\(s\)
 select arrow_cast('30 minutes', 'Duration(Second)');
 
 
@@ -337,7 +337,7 @@ select arrow_cast(timestamp '2000-01-01T00:00:00Z', 'Timestamp(Nanosecond, Some(
 ----
 2000-01-01T00:00:00+08:00
 
-statement error DataFusion error: Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone
+statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "\+25:00": failed to parse timezone
 select arrow_cast(timestamp '2000-01-01T00:00:00', 'Timestamp(Nanosecond, Some( "+25:00" ))');
 
 
@@ -357,12 +357,12 @@ select arrow_cast(make_array(1, 2, 3), 'List(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'List(Int64)'));
 ----
-List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Int64)
 
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'), 'List(List(Int64))'));
 ----
-List(Field { name: "item", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable List(nullable Int64))
 
 ## LargeList
 
@@ -380,12 +380,12 @@ select arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)');
 query T
 select arrow_typeof(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'));
 ----
-LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+LargeList(nullable Int64)
 
 query T
 select arrow_typeof(arrow_cast(make_array([1, 2, 3]), 'LargeList(LargeList(Int64))'));
 ----
-LargeList(Field { name: "item", data_type: LargeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+LargeList(nullable LargeList(nullable Int64))
 
 ## FixedSizeList
 
@@ -406,7 +406,7 @@ select arrow_cast([1], 'FixedSizeList(1, Int64)');
 ----
 [1]
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast to FixedSizeList\(4\): value at index 0 has length 3
 select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(4, Int64)');
 
 query ?
@@ -417,7 +417,7 @@ select arrow_cast(make_array(1, 2, 3), 'FixedSizeList(3, Int64)');
 query T
 select arrow_typeof(arrow_cast(arrow_cast(make_array(1, 2, 3), 'LargeList(Int64)'), 'FixedSizeList(3, Int64)'));
 ----
-FixedSizeList(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, 3)
+FixedSizeList(3 x nullable Int64)
 
 query ?
 select arrow_cast([1, 2, 3], 'FixedSizeList(3, Int64)');
diff --git a/datafusion/sqllogictest/test_files/case.slt b/datafusion/sqllogictest/test_files/case.slt
index 2f9173d2dcbd..1a4b6a7a2b4a 100644
--- a/datafusion/sqllogictest/test_files/case.slt
+++ b/datafusion/sqllogictest/test_files/case.slt
@@ -594,4 +594,92 @@ query I
 SELECT CASE WHEN a = 'a' THEN 0 WHEN a = 'b' THEN 1 ELSE 2 END FROM (VALUES (NULL), ('z')) t(a)
 ----
 2
-2
\ No newline at end of file
+2
+
+# The `WHEN 1/0` is not effectively reachable in this query and should never be executed
+query T
+SELECT CASE a WHEN 1 THEN 'a' WHEN 2 THEN 'b' WHEN 1 / 0 THEN 'c' ELSE 'd' END FROM (VALUES (1), (2)) t(a)
+----
+a
+b
+
+# The `WHEN 1/0` is not effectively reachable in this query and should never be executed
+query T
+SELECT CASE WHEN a = 1 THEN 'a' WHEN a = 2 THEN 'b' WHEN a = 1 / 0 THEN 'c' ELSE 'd' END FROM (VALUES (1), (2)) t(a)
+----
+a
+b
+
+# The `WHEN 1/0` is not effectively reachable in this query and should never be executed
+query T
+SELECT CASE WHEN a = 0 THEN 'a' WHEN 1 / a = 1 THEN 'b' ELSE 'c' END FROM (VALUES (0), (1), (2)) t(a)
+----
+a
+b
+c
+
+# EvalMethod::WithExpression using subset of all selected columns in case expression
+query III
+SELECT CASE a1 WHEN 1 THEN a1 WHEN 2 THEN a2 WHEN 3 THEN b END, b, c
+FROM (SELECT a as a1, a as a2, b, c FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c));
+----
+1 10 100
+2 20 200
+30 30 300
+
+# EvalMethod::NoExpression using subset of all selected columns in case expression
+query III
+SELECT CASE WHEN a1 = 1 THEN a2 WHEN a2 = 2 THEN a1 WHEN 3 THEN b END, b, c
+FROM (SELECT a as a1, a as a2, b, c FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c));
+----
+1 10 100
+2 20 200
+30 30 300
+
+# EvalMethod::ExpressionOrExpression using subset of all selected columns in case expression
+query III
+SELECT CASE WHEN a1 = 1 THEN a2 ELSE b END, b, c
+FROM (SELECT a as a1, a as a2, b, c FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c));
+----
+1 10 100
+20 20 200
+30 30 300
+
+# EvalMethod::WithExpression using all selected columns in case expression
+query I
+SELECT CASE a1 WHEN 1 THEN a1 WHEN 2 THEN a2 WHEN 3 THEN NULL END
+FROM (SELECT a as a1, a as a2, b, c FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c));
+----
+1
+2
+NULL
+
+# EvalMethod::NoExpression using all selected columns in case expression
+query I
+SELECT CASE WHEN a1 = 1 THEN a2 WHEN a2 = 2 THEN a1 WHEN 3 THEN NULL END
+FROM (SELECT a as a1, a as a2, b, c FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c));
+----
+1
+2
+NULL
+
+# EvalMethod::ExpressionOrExpression using all selected columns in case expression
+query I
+SELECT CASE WHEN a1 = 1 THEN a2 ELSE NULL END
+FROM (SELECT a as a1, a as a2 FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c));
+----
+1
+NULL
+NULL
+
+# Nested case with projection
+query III
+SELECT CASE WHEN a = -1 THEN b WHEN a = -2 THEN -b END, b, c
+FROM (
+  SELECT b, c, CASE WHEN a1 = 1 THEN -a2 WHEN a1 = 2 THEN -a1 END as a
+  FROM (SELECT a as a1, a as a2, b, c FROM (VALUES (1, 10, 100), (2, 20, 200), (3, 30, 300)) t(a, b, c))
+);
+----
+10 10 100
+-20 20 200
+NULL 30 300
diff --git a/datafusion/sqllogictest/test_files/coalesce.slt b/datafusion/sqllogictest/test_files/coalesce.slt
index 9740bade5e27..e34a601851d7 100644
--- a/datafusion/sqllogictest/test_files/coalesce.slt
+++ b/datafusion/sqllogictest/test_files/coalesce.slt
@@ -199,14 +199,14 @@ select
   coalesce(array[1, 2], array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2] List(nullable Int64)
 
 query ?T
 select
   coalesce(null, array[3, 4]),
   arrow_typeof(coalesce(array[1, 2], array[3, 4]));
 ----
-[3, 4] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[3, 4] List(nullable Int64)
 
 # coalesce with array
 query ?T
@@ -214,7 +214,7 @@ select
   coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]),
   arrow_typeof(coalesce(array[1, 2], array[arrow_cast(3, 'Int32'), arrow_cast(4, 'Int32')]));
 ----
-[1, 2] List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+[1, 2] List(nullable Int64)
 
 # test dict(int32, utf8)
 statement ok
diff --git a/datafusion/sqllogictest/test_files/count_star_rule.slt b/datafusion/sqllogictest/test_files/count_star_rule.slt
index 826742267290..19d9ddecc9ff 100644
--- a/datafusion/sqllogictest/test_files/count_star_rule.slt
+++ b/datafusion/sqllogictest/test_files/count_star_rule.slt
@@ -1,4 +1,4 @@
-# Licensed to the Apache Software Foundation (ASF) under one
+# Licensed to the Apache Software Foundation (ASF) under onecount_star
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
@@ -50,10 +50,9 @@ physical_plan
 01)ProjectionExec: expr=[a@0 as a, count(Int64(1))@1 as count()]
 02)--AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]
 03)----CoalesceBatchesExec: target_batch_size=8192
-04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-05)--------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 0;
@@ -69,10 +68,9 @@ physical_plan
 03)----FilterExec: count(Int64(1))@1 > 0
 04)------AggregateExec: mode=FinalPartitioned, gby=[a@0 as a], aggr=[count(Int64(1))]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([a@0], 4), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[a@0 as a], aggr=[count(Int64(1))]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query II
 SELECT t1.a, COUNT() AS cnt FROM t1 GROUP BY t1.a HAVING COUNT() > 1;
@@ -88,7 +86,7 @@ logical_plan
 03)----TableScan: t1 projection=[a]
 physical_plan
 01)ProjectionExec: expr=[a@0 as a, count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as count_a]
-02)--WindowAggExec: wdw=[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) PARTITION BY [t1.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----SortExec: expr=[a@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/cte.slt b/datafusion/sqllogictest/test_files/cte.slt
index a581bcb539a9..e7ca7a5ae1d8 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -764,7 +764,7 @@ WITH RECURSIVE my_cte AS (
 
 # Test issue: https://github.com/apache/datafusion/issues/9794
 # Non-recursive term and recursive term have different types, and cannot be casted
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'abc' to value of Int64 type
 WITH RECURSIVE my_cte AS (
     SELECT 1 AS a
     UNION ALL
diff --git a/datafusion/sqllogictest/test_files/current_time_timezone.slt b/datafusion/sqllogictest/test_files/current_time_timezone.slt
index a9e27bd4045f..c80c4b51d5ac 100644
--- a/datafusion/sqllogictest/test_files/current_time_timezone.slt
+++ b/datafusion/sqllogictest/test_files/current_time_timezone.slt
@@ -29,7 +29,7 @@ true
 query T
 SELECT arrow_typeof(current_time());
 ----
-Time64(Nanosecond)
+Time64(ns)
 
 # Test 3: Set timezone to +08:00 and verify current_time is still stable
 statement ok
@@ -44,7 +44,7 @@ true
 query T
 SELECT arrow_typeof(current_time());
 ----
-Time64(Nanosecond)
+Time64(ns)
 
 # Test 5: Test with negative offset timezone
 statement ok
diff --git a/datafusion/sqllogictest/test_files/dates.slt b/datafusion/sqllogictest/test_files/dates.slt
index 2e91a0363db0..abf64675e903 100644
--- a/datafusion/sqllogictest/test_files/dates.slt
+++ b/datafusion/sqllogictest/test_files/dates.slt
@@ -85,9 +85,14 @@ g
 h
 
 ## Plan error when compare Utf8 and timestamp in where clause
-statement error DataFusion error: type_coercion\ncaused by\nError during planning: Cannot coerce arithmetic expression Timestamp\(Nanosecond, Some\("\+00:00"\)\) \+ Utf8 to valid types
+statement error
 select i_item_desc from test
 where d3_date > now() + '5 days';
+----
+DataFusion error: type_coercion
+caused by
+Error during planning: Cannot coerce arithmetic expression Timestamp(ns) + Utf8 to valid types
+
 
 # DATE minus DATE
 # https://github.com/apache/arrow-rs/issues/4383
@@ -311,6 +316,14 @@ select to_date('2022-01-23', '%Y-%m-%d');
 ----
 2022-01-23
 
+# invalid date_trunc format
+query error DataFusion error: Execution error: Unsupported date_trunc granularity: ''. Supported values are: microsecond, millisecond, second, minute, hour, day, week, month, quarter, year
+SELECT date_trunc('', to_date('2022-02-23', '%Y-%m-%d'))
+
+# invalid date_trunc format
+query error DataFusion error: Execution error: Unsupported date_trunc granularity: 'invalid'. Supported values are: microsecond, millisecond, second, minute, hour, day, week, month, quarter, year
+SELECT date_trunc('invalid', to_date('2022-02-23', '%Y-%m-%d'))
+
 query PPPP
 select
     date_trunc('YEAR', to_date('2022-02-23', '%Y-%m-%d')),
diff --git a/datafusion/sqllogictest/test_files/ddl.slt b/datafusion/sqllogictest/test_files/ddl.slt
index bc6cbfab0cae..0579659832fe 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -867,7 +867,7 @@ query TTTTTT
 show columns FROM table_with_pk;
 ----
 datafusion public table_with_pk sn Int32 NO
-datafusion public table_with_pk ts Timestamp(Nanosecond, Some("+00:00")) NO
+datafusion public table_with_pk ts Timestamp(ns) NO
 datafusion public table_with_pk currency Utf8View NO
 datafusion public table_with_pk amount Float32 YES
 
diff --git a/datafusion/sqllogictest/test_files/decimal.slt b/datafusion/sqllogictest/test_files/decimal.slt
index 502821fcc304..f350d9b3bfe1 100644
--- a/datafusion/sqllogictest/test_files/decimal.slt
+++ b/datafusion/sqllogictest/test_files/decimal.slt
@@ -794,15 +794,47 @@ select 100000000000000000000000000000000000::decimal(38,0), arrow_typeof(1000000
 ----
 100000000000000000000000000000000000 Decimal128(38, 0)
 
+# log for small decimal32
+query R
+select log(arrow_cast(100, 'Decimal32(9, 0)'));
+----
+2
+
+query R
+select log(arrow_cast(100, 'Decimal32(9, 2)'));
+----
+2
+
+# log for small decimal64
+query R
+select log(arrow_cast(100, 'Decimal64(18, 0)'));
+----
+2
+
+query R
+select log(arrow_cast(100, 'Decimal64(18, 2)'));
+----
+2
+
 # log for small decimal128
 query R
-select log(100::decimal(38,0));
+select log(arrow_cast(100, 'Decimal128(38, 0)'));
+----
+2
+
+query R
+select log(arrow_cast(100, 'Decimal128(38, 2)'));
 ----
 2
 
 # log for small decimal256
 query R
-select log(100::decimal(76,0));
+select log(arrow_cast(100, 'Decimal256(76, 0)'));
+----
+2
+
+query R
+select log(arrow_cast(100, 'Decimal256(76, 2)'));
 ----
 2
 
@@ -858,10 +890,12 @@ select log(2, 100000000000000000000000000000000000::decimal(38,0));
 116
 
 # log(10^35) for decimal128 with another base
+# TODO: this should be 116.267483321058, error with native decimal log impl
+#       https://github.com/apache/datafusion/issues/18524
 query R
 select log(2.0, 100000000000000000000000000000000000::decimal(38,0));
 ----
-116.267483321058
+116
 
 # null cases
 query R
diff --git a/datafusion/sqllogictest/test_files/describe.slt b/datafusion/sqllogictest/test_files/describe.slt
index 4c184c04d128..88347965c67a 100644
--- a/datafusion/sqllogictest/test_files/describe.slt
+++ b/datafusion/sqllogictest/test_files/describe.slt
@@ -83,7 +83,7 @@ float_col Float32 YES
 double_col Float64 YES
 date_string_col Utf8View YES
 string_col Utf8View YES
-timestamp_col Timestamp(Nanosecond, None) YES
+timestamp_col Timestamp(ns) YES
 year Int32 YES
 month Int32 YES
 
diff --git a/datafusion/sqllogictest/test_files/dictionary.slt b/datafusion/sqllogictest/test_files/dictionary.slt
index 9e8a39494095..fd9a7fb9ce44 100644
--- a/datafusion/sqllogictest/test_files/dictionary.slt
+++ b/datafusion/sqllogictest/test_files/dictionary.slt
@@ -85,7 +85,7 @@ f1 Float64 YES
 f2 Utf8 YES
 f3 Utf8 YES
 f4 Float64 YES
-time Timestamp(Nanosecond, None) YES
+time Timestamp(ns) YES
 
 # in list with dictionary input
 query BBB
@@ -157,7 +157,7 @@ DESCRIBE m2;
 type Dictionary(Int32, Utf8) YES
 tag_id Dictionary(Int32, Utf8) YES
 f5 Float64 YES
-time Timestamp(Nanosecond, None) YES
+time Timestamp(ns) YES
 
 query I
 select count(*) from m1 where tag_id = '1000' and time < '2024-01-03T14:46:35+01:00';
diff --git a/datafusion/sqllogictest/test_files/errors.slt b/datafusion/sqllogictest/test_files/errors.slt
index 3e60423df8a0..41f747df5baa 100644
--- a/datafusion/sqllogictest/test_files/errors.slt
+++ b/datafusion/sqllogictest/test_files/errors.slt
@@ -145,7 +145,7 @@ SELECT
    LIMIT 5;
 
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'foo' to value of Int64 type
 create table foo as values (1), ('foo');
 
 query error DataFusion error: Error during planning: Substring without for/from is not valid
diff --git a/datafusion/sqllogictest/test_files/explain.slt b/datafusion/sqllogictest/test_files/explain.slt
index a3b6d40aea2d..ec3d9f746577 100644
--- a/datafusion/sqllogictest/test_files/explain.slt
+++ b/datafusion/sqllogictest/test_files/explain.slt
@@ -238,7 +238,6 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after LimitPushPastWindows SAME TEXT AS ABOVE
@@ -317,7 +316,6 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements
 01)GlobalLimitExec: skip=0, fetch=10, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet, statistics=[Rows=Exact(8), Bytes=Exact(671), [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:),(Col[4]:),(Col[5]:),(Col[6]:),(Col[7]:),(Col[8]:),(Col[9]:),(Col[10]:)]]
@@ -362,7 +360,6 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements
 01)GlobalLimitExec: skip=0, fetch=10
 02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/parquet-testing/data/alltypes_plain.parquet]]}, projection=[id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col, timestamp_col], limit=10, file_type=parquet
@@ -602,7 +599,6 @@ physical_plan after EnforceSorting SAME TEXT AS ABOVE
 physical_plan after OptimizeAggregateOrder SAME TEXT AS ABOVE
 physical_plan after ProjectionPushdown SAME TEXT AS ABOVE
 physical_plan after coalesce_batches SAME TEXT AS ABOVE
-physical_plan after coalesce_async_exec_input SAME TEXT AS ABOVE
 physical_plan after OutputRequirements DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/example.csv]]}, projection=[a, b, c], file_type=csv, has_header=true
 physical_plan after LimitAggregation SAME TEXT AS ABOVE
 physical_plan after LimitPushPastWindows SAME TEXT AS ABOVE
diff --git a/datafusion/sqllogictest/test_files/explain_tree.slt b/datafusion/sqllogictest/test_files/explain_tree.slt
index 22f19a0af32e..5f3c778fc961 100644
--- a/datafusion/sqllogictest/test_files/explain_tree.slt
+++ b/datafusion/sqllogictest/test_files/explain_tree.slt
@@ -1405,33 +1405,24 @@ physical_plan
 34)│      RepartitionExec      ││      RepartitionExec      │
 35)│    --------------------   ││    --------------------   │
 36)│ partition_count(in->out): ││ partition_count(in->out): │
-37)│           4 -> 4          ││           4 -> 4          │
+37)│           1 -> 4          ││           1 -> 4          │
 38)│                           ││                           │
 39)│    partitioning_scheme:   ││    partitioning_scheme:   │
 40)│     Hash([name@0], 4)     ││     Hash([name@0], 4)     │
 41)└─────────────┬─────────────┘└─────────────┬─────────────┘
 42)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-43)│      RepartitionExec      ││      RepartitionExec      │
+43)│       AggregateExec       ││       AggregateExec       │
 44)│    --------------------   ││    --------------------   │
-45)│ partition_count(in->out): ││ partition_count(in->out): │
-46)│           1 -> 4          ││           1 -> 4          │
-47)│                           ││                           │
-48)│    partitioning_scheme:   ││    partitioning_scheme:   │
-49)│     RoundRobinBatch(4)    ││     RoundRobinBatch(4)    │
-50)└─────────────┬─────────────┘└─────────────┬─────────────┘
-51)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-52)│       AggregateExec       ││       AggregateExec       │
-53)│    --------------------   ││    --------------------   │
-54)│       group_by: name      ││       group_by: name      │
-55)│       mode: Partial       ││       mode: Partial       │
-56)└─────────────┬─────────────┘└─────────────┬─────────────┘
-57)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
-58)│       DataSourceExec      ││       DataSourceExec      │
-59)│    --------------------   ││    --------------------   │
-60)│         bytes: 296        ││         bytes: 288        │
-61)│       format: memory      ││       format: memory      │
-62)│          rows: 1          ││          rows: 1          │
-63)└───────────────────────────┘└───────────────────────────┘
+45)│       group_by: name      ││       group_by: name      │
+46)│       mode: Partial       ││       mode: Partial       │
+47)└─────────────┬─────────────┘└─────────────┬─────────────┘
+48)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
+49)│       DataSourceExec      ││       DataSourceExec      │
+50)│    --------------------   ││    --------------------   │
+51)│         bytes: 296        ││         bytes: 288        │
+52)│       format: memory      ││       format: memory      │
+53)│          rows: 1          ││          rows: 1          │
+54)└───────────────────────────┘└───────────────────────────┘
 
 # Test explain tree for UnionExec
 query TT
diff --git a/datafusion/sqllogictest/test_files/expr/date_part.slt b/datafusion/sqllogictest/test_files/expr/date_part.slt
index 64f16f72421a..bee8602d80bd 100644
--- a/datafusion/sqllogictest/test_files/expr/date_part.slt
+++ b/datafusion/sqllogictest/test_files/expr/date_part.slt
@@ -1005,10 +1005,10 @@ SELECT extract(day from arrow_cast(864000, 'Duration(Second)'))
 ----
 10
 
-query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(Second\)
+query error DataFusion error: Arrow error: Compute error: Month does not support: Duration\(s\)
 SELECT extract(month from arrow_cast(864000, 'Duration(Second)'))
 
-query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(Second\)
+query error DataFusion error: Arrow error: Compute error: Year does not support: Duration\(s\)
 SELECT extract(year from arrow_cast(864000, 'Duration(Second)'))
 
 query I
diff --git a/datafusion/sqllogictest/test_files/extract_tz.slt b/datafusion/sqllogictest/test_files/extract_tz.slt
new file mode 100644
index 000000000000..c13c37e15c14
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/extract_tz.slt
@@ -0,0 +1,112 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Tests for timezone-aware extract SQL statement support.
+# Test with different timezone
+statement ok
+SET datafusion.execution.time_zone = '-03:00';
+
+query I
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-11-18 10:00:00');
+----
+7
+
+query II
+SELECT EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 10:45:30'),
+       EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 10:45:30');
+----
+45 30
+
+query III
+SELECT EXTRACT(YEAR FROM DATE '2023-10-30'),
+       EXTRACT(MONTH FROM DATE '2023-10-30'),
+       EXTRACT(DAY FROM DATE '2023-10-30');
+----
+2023 10 30
+
+query I
+SELECT EXTRACT(HOUR FROM CAST(NULL AS TIMESTAMP));
+----
+NULL
+
+statement ok
+SET datafusion.execution.time_zone = '+04:00';
+
+query I
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 02:00:00');
+----
+6
+
+query III
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 18:20:59'),
+       EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 18:20:59'),
+       EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 18:20:59');
+----
+22 20 59
+
+query II
+SELECT EXTRACT(DOW FROM DATE '2025-11-01'),
+       EXTRACT(DOY FROM DATE '2026-12-31');
+----
+6 365
+
+statement ok
+SET datafusion.execution.time_zone = '+00:00';
+
+query I
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-10-30 10:45:30+02:00');
+----
+12
+
+query I
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-10-30 10:45:30-05:00');
+----
+5
+
+query II
+SELECT EXTRACT(YEAR FROM TIMESTAMP '2026-11-30 10:45:30Z'),
+       EXTRACT(MONTH FROM TIMESTAMP '2023-10-30 10:45:30Z');
+----
+2026 10
+
+query III
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 18:20:59+04:00'),
+       EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 18:20:59+04:00'),
+       EXTRACT(SECOND FROM TIMESTAMP '2023-10-30 18:20:59+04:00');
+----
+22 20 59
+
+query II
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2025-10-30 10:45:30+02:30'),
+       EXTRACT(MINUTE FROM TIMESTAMP '2023-10-30 18:20:59-04:30');
+----
+13 50
+
+#query I
+#SELECT EXTRACT(HOUR FROM CAST('2025-10-30 10:45:30' AS TIMESTAMP) AT TIME ZONE 'Asia/Tokyo');
+#----
+#19
+
+query III
+SELECT EXTRACT(HOUR FROM TIMESTAMP '2023-10-30 18:20:59+08:00'),
+       EXTRACT(DAY FROM TIMESTAMP '2023-10-30 18:20:59+07:00'),
+       EXTRACT(DAY FROM TIMESTAMP '2023-10-30 07:20:59-12:00');
+----
+2 31 29
+
+
+
diff --git a/datafusion/sqllogictest/test_files/float16.slt b/datafusion/sqllogictest/test_files/float16.slt
index 5e59c730f078..699eb81844a4 100644
--- a/datafusion/sqllogictest/test_files/float16.slt
+++ b/datafusion/sqllogictest/test_files/float16.slt
@@ -51,13 +51,14 @@ NULL NULL NULL NULL NULL NULL
 NaN NaN NaN NaN NaN NaN
 
 # Try coercing with literal NULL
-query error
+query R
 select column1 + NULL from float16s;
 ----
-DataFusion error: type_coercion
-caused by
-Error during planning: Cannot automatically convert Null to Float16
-
+NULL
+NULL
+NULL
+NULL
+NULL
 
 # Test coercions with equality
 query BBBBBB
@@ -78,11 +79,14 @@ false false false false false false
 
 
 # Try coercing with literal NULL
-query error
+query B
 select column1 = NULL from float16s;
 ----
-DataFusion error: Error during planning: Cannot infer common argument type for comparison operation Float16 = Null
-
+NULL
+NULL
+NULL
+NULL
+NULL
 
 # Cleanup
 statement ok
diff --git a/datafusion/sqllogictest/test_files/group_by.slt b/datafusion/sqllogictest/test_files/group_by.slt
index b72f73d44698..b74815edaa57 100644
--- a/datafusion/sqllogictest/test_files/group_by.slt
+++ b/datafusion/sqllogictest/test_files/group_by.slt
@@ -2989,10 +2989,9 @@ physical_plan
 03)----ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@2 as fv2]
 04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TRR
 SELECT country, FIRST_VALUE(amount ORDER BY ts ASC) AS fv1,
@@ -3025,10 +3024,9 @@ physical_plan
 03)----ProjectionExec: expr=[country@0 as country, first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST]@1 as fv1, last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]@2 as fv2]
 04)------AggregateExec: mode=FinalPartitioned, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=8
-07)------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-08)--------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
-09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
+06)----------RepartitionExec: partitioning=Hash([country@0], 8), input_partitions=1
+07)------------AggregateExec: mode=Partial, gby=[country@0 as country], aggr=[first_value(sales_global.amount) ORDER BY [sales_global.ts ASC NULLS LAST], last_value(sales_global.amount) ORDER BY [sales_global.ts DESC NULLS FIRST]]
+08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 
 query TRR
@@ -3646,7 +3644,7 @@ physical_plan
 07)------------AggregateExec: mode=Partial, gby=[sn@2 as sn, zip_code@0 as zip_code, country@1 as country, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum_amount@6 as sum_amount], aggr=[]
 08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 09)----------------ProjectionExec: expr=[zip_code@0 as zip_code, country@1 as country, sn@2 as sn, ts@3 as ts, currency@4 as currency, amount@5 as amount, sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@6 as sum_amount]
-10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+10)------------------BoundedWindowAggExec: wdw=[sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(l.amount) ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 11)--------------------DataSourceExec: partitions=1, partition_sizes=[2]
 
 
@@ -3943,7 +3941,7 @@ physical_plan
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # reset partition number to 8.
@@ -4065,7 +4063,7 @@ logical_plan
 05)--------TableScan: multiple_ordered_table_with_pk projection=[b, c, d]
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, sum1@2 as sum1, sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as sumb]
-02)--WindowAggExec: wdw=[sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(multiple_ordered_table_with_pk.b) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----ProjectionExec: expr=[c@0 as c, b@1 as b, sum(multiple_ordered_table_with_pk.d)@2 as sum1]
 04)------AggregateExec: mode=Single, gby=[c@1 as c, b@0 as b], aggr=[sum(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallySorted([0])
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, d], output_ordering=[c@1 ASC NULLS LAST], constraints=[PrimaryKey([3])], file_type=csv, has_header=true
@@ -4245,10 +4243,9 @@ physical_plan
 01)ProjectionExec: expr=[sum(DISTINCT t1.x)@1 as sum(DISTINCT t1.x), max(DISTINCT t1.x)@2 as max(DISTINCT t1.x)]
 02)--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
 03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
-05)--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-06)----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=1
+05)--------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[sum(DISTINCT t1.x), max(DISTINCT t1.x)]
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query TT
 EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS DOUBLE)) FROM t1 GROUP BY y;
@@ -4266,10 +4263,9 @@ physical_plan
 05)--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[sum(alias1), max(alias1)]
 06)----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as alias1], aggr=[]
 07)------------CoalesceBatchesExec: target_batch_size=2
-08)--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(x@0 AS Float64) as alias1], aggr=[]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), input_partitions=1
+09)----------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(x@0 AS Float64) as alias1], aggr=[]
+10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # create an unbounded table that contains ordered timestamp.
 statement ok
@@ -5556,7 +5552,7 @@ SELECT
     arrow_cast('2024-01-01T00:00:00Z'::timestamptz, 'Timestamp(Second, Some("+08:00"))') AS ts
 GROUP BY ts, text
 ----
-foo 2024-01-01T08:00:00+08:00
+foo 2024-01-01T00:00:00+08:00
 
 # Test multi group by int + Decimal128
 statement ok
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt
index b15ec026372d..7a34b240bd7c 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -223,6 +223,7 @@ datafusion.execution.keep_partition_by_columns false
 datafusion.execution.listing_table_factory_infer_partitions true
 datafusion.execution.listing_table_ignore_subdirectory true
 datafusion.execution.max_buffered_batches_per_output_file 2
+datafusion.execution.max_spill_file_size_bytes 134217728
 datafusion.execution.meta_fetch_concurrency 32
 datafusion.execution.minimum_parallel_output_files 4
 datafusion.execution.objectstore_writer_buffer_size 10485760
@@ -246,7 +247,7 @@ datafusion.execution.parquet.max_predicate_cache_size NULL
 datafusion.execution.parquet.max_row_group_size 1048576
 datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2
 datafusion.execution.parquet.maximum_parallel_row_group_writers 1
-datafusion.execution.parquet.metadata_size_hint NULL
+datafusion.execution.parquet.metadata_size_hint 524288
 datafusion.execution.parquet.pruning true
 datafusion.execution.parquet.pushdown_filters false
 datafusion.execution.parquet.reorder_filters false
@@ -267,7 +268,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760
 datafusion.execution.spill_compression uncompressed
 datafusion.execution.split_file_groups_by_statistics false
 datafusion.execution.target_partitions 7
-datafusion.execution.time_zone +00:00
+datafusion.execution.time_zone NULL
 datafusion.execution.use_row_number_estimates_to_optimize_partitioning false
 datafusion.explain.analyze_level dev
 datafusion.explain.format indent
@@ -343,6 +344,7 @@ datafusion.execution.keep_partition_by_columns false Should DataFusion keep the
 datafusion.execution.listing_table_factory_infer_partitions true Should a `ListingTable` created through the `ListingTableFactory` infer table partitions from Hive compliant directories. Defaults to true (partition columns are inferred and will be represented in the table schema).
 datafusion.execution.listing_table_ignore_subdirectory true Should sub directories be ignored when scanning directories for data files. Defaults to true (ignores subdirectories), consistent with Hive. Note that this setting does not affect reading partitioned tables (e.g. `/table/year=2021/month=01/data.parquet`).
 datafusion.execution.max_buffered_batches_per_output_file 2 This is the maximum number of RecordBatches buffered for each output file being worked. Higher values can potentially give faster write performance at the cost of higher peak memory consumption
+datafusion.execution.max_spill_file_size_bytes 134217728 Maximum size in bytes for individual spill files before rotating to a new file. When operators spill data to disk (e.g., RepartitionExec), they write multiple batches to the same file until this size limit is reached, then rotate to a new file. This reduces syscall overhead compared to one-file-per-batch while preventing files from growing too large. A larger value reduces file creation overhead but may hold more disk space. A smaller value creates more files but allows finer-grained space reclamation as files can be deleted once fully consumed. Now only `RepartitionExec` supports this spill file rotation feature, other spilling operators may create spill files larger than the limit. Default: 128 MB
 datafusion.execution.meta_fetch_concurrency 32 Number of files to read in parallel when inferring schema and statistics
 datafusion.execution.minimum_parallel_output_files 4 Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.
 datafusion.execution.objectstore_writer_buffer_size 10485760 Size (bytes) of data buffer DataFusion uses when writing output files. This affects the size of the data chunks that are uploaded to remote object stores (e.g. AWS S3). If very large (>= 100 GiB) output files are being written, it may be necessary to increase this size to avoid errors from the remote end point.
@@ -366,7 +368,7 @@ datafusion.execution.parquet.max_predicate_cache_size NULL (reading) The maximum
 datafusion.execution.parquet.max_row_group_size 1048576 (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.
 datafusion.execution.parquet.maximum_buffered_record_batches_per_stream 2 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
 datafusion.execution.parquet.maximum_parallel_row_group_writers 1 (writing) By default parallel parquet writer is tuned for minimum memory usage in a streaming execution plan. You may see a performance benefit when writing large parquet files by increasing maximum_parallel_row_group_writers and maximum_buffered_record_batches_per_stream if your system has idle cores and can tolerate additional memory usage. Boosting these values is likely worthwhile when writing out already in-memory data, such as from a cached data frame.
-datafusion.execution.parquet.metadata_size_hint NULL (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer
+datafusion.execution.parquet.metadata_size_hint 524288 (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer Default setting to 512 KiB, which should be sufficient for most parquet files, it can reduce one I/O operation per parquet file. If the metadata is larger than the hint, two reads will still be performed.
 datafusion.execution.parquet.pruning true (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file
 datafusion.execution.parquet.pushdown_filters false (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".
 datafusion.execution.parquet.reorder_filters false (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query
@@ -387,7 +389,7 @@ datafusion.execution.sort_spill_reservation_bytes 10485760 Specifies the reserve
 datafusion.execution.spill_compression uncompressed Sets the compression codec used when spilling data to disk. Since datafusion writes spill files using the Arrow IPC Stream format, only codecs supported by the Arrow IPC Stream Writer are allowed. Valid values are: uncompressed, lz4_frame, zstd. Note: lz4_frame offers faster (de)compression, but typically results in larger spill files. In contrast, zstd achieves higher compression ratios at the cost of slower (de)compression speed.
 datafusion.execution.split_file_groups_by_statistics false Attempt to eliminate sorts by packing & sorting files with non-overlapping statistics into the same file groups. Currently experimental
 datafusion.execution.target_partitions 7 Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system
-datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
+datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone
 datafusion.execution.use_row_number_estimates_to_optimize_partitioning false Should DataFusion use row number estimates at the input to decide whether increasing parallelism is beneficial or not. By default, only exact row numbers (not estimates) are used for this decision. Setting this flag to `true` will likely produce better plans. if the source of statistics is accurate. We plan to make this the default in the future.
 datafusion.explain.analyze_level dev Verbosity level for "EXPLAIN ANALYZE". Default is "dev" "summary" shows common metrics for high-level insights. "dev" provides deep operator-level introspection for developers.
 datafusion.explain.format indent Display format of explain. Default is "indent". When set to "tree", it will print the plan in a tree-rendered format.
@@ -459,14 +461,14 @@ datafusion.execution.batch_size 8192 Default batch size while creating new batch
 query TT
 SHOW TIME ZONE
 ----
-datafusion.execution.time_zone +00:00
+datafusion.execution.time_zone NULL
 
 # show_timezone_default_utc
 # https://github.com/apache/datafusion/issues/3255
 query TT
 SHOW TIMEZONE
 ----
-datafusion.execution.time_zone +00:00
+datafusion.execution.time_zone NULL
 
 
 # show_time_zone_default_utc_verbose
@@ -474,14 +476,14 @@ datafusion.execution.time_zone +00:00
 query TTT
 SHOW TIME ZONE VERBOSE
 ----
-datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
+datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone
 
 # show_timezone_default_utc
 # https://github.com/apache/datafusion/issues/3255
 query TTT
 SHOW TIMEZONE VERBOSE
 ----
-datafusion.execution.time_zone +00:00 The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour
+datafusion.execution.time_zone NULL The default time zone Some functions, e.g. `now` return timestamps in this time zone
 
 
 # show empty verbose
@@ -705,6 +707,54 @@ SHOW CREATE TABLE abc;
 ----
 datafusion public abc CREATE EXTERNAL TABLE abc STORED AS CSV LOCATION ../../testing/data/csv/aggregate_test_100.csv
 
+# show_external_create_table_with_order
+statement ok
+CREATE EXTERNAL TABLE abc_ordered
+STORED AS CSV
+WITH ORDER (c1)
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
+
+query TTTT
+SHOW CREATE TABLE abc_ordered;
+----
+datafusion public abc_ordered CREATE EXTERNAL TABLE abc_ordered STORED AS CSV WITH ORDER (c1) LOCATION ../../testing/data/csv/aggregate_test_100.csv
+
+statement ok
+DROP TABLE abc_ordered;
+
+# show_external_create_table_with_multiple_order_columns
+statement ok
+CREATE EXTERNAL TABLE abc_multi_order
+STORED AS CSV
+WITH ORDER (c1, c2 DESC)
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
+
+query TTTT
+SHOW CREATE TABLE abc_multi_order;
+----
+datafusion public abc_multi_order CREATE EXTERNAL TABLE abc_multi_order STORED AS CSV WITH ORDER (c1, c2 DESC) LOCATION ../../testing/data/csv/aggregate_test_100.csv
+
+statement ok
+DROP TABLE abc_multi_order;
+
+# show_external_create_table_with_order_nulls
+statement ok
+CREATE EXTERNAL TABLE abc_order_nulls
+STORED AS CSV
+WITH ORDER (c1 NULLS LAST, c2 DESC NULLS FIRST)
+LOCATION '../../testing/data/csv/aggregate_test_100.csv'
+OPTIONS ('format.has_header' 'true');
+
+query TTTT
+SHOW CREATE TABLE abc_order_nulls;
+----
+datafusion public abc_order_nulls CREATE EXTERNAL TABLE abc_order_nulls STORED AS CSV WITH ORDER (c1 NULLS LAST, c2 DESC NULLS FIRST) LOCATION ../../testing/data/csv/aggregate_test_100.csv
+
+statement ok
+DROP TABLE abc_order_nulls;
+
 # string_agg has different arg_types but same return type. Test avoiding duplicate entries for the same function.
 query TTT
 select routine_name, data_type, function_type from information_schema.routines where routine_name = 'string_agg';
diff --git a/datafusion/sqllogictest/test_files/information_schema_columns.slt b/datafusion/sqllogictest/test_files/information_schema_columns.slt
index d348a764fa85..c733b3baa7a4 100644
--- a/datafusion/sqllogictest/test_files/information_schema_columns.slt
+++ b/datafusion/sqllogictest/test_files/information_schema_columns.slt
@@ -42,7 +42,7 @@ my_catalog my_schema table_with_many_types float64_col 1 NULL YES Float64 NULL N
 my_catalog my_schema table_with_many_types int32_col 0 NULL NO Int32 NULL NULL 32 2 NULL NULL NULL
 my_catalog my_schema table_with_many_types large_binary_col 5 NULL NO LargeBinary NULL 9223372036854775807 NULL NULL NULL NULL NULL
 my_catalog my_schema table_with_many_types large_utf8_col 3 NULL NO LargeUtf8 NULL 9223372036854775807 NULL NULL NULL NULL NULL
-my_catalog my_schema table_with_many_types timestamp_nanos 6 NULL NO Timestamp(Nanosecond, None) NULL NULL NULL NULL NULL NULL NULL
+my_catalog my_schema table_with_many_types timestamp_nanos 6 NULL NO Timestamp(ns) NULL NULL NULL NULL NULL NULL NULL
 my_catalog my_schema table_with_many_types utf8_col 2 NULL YES Utf8 NULL 2147483647 NULL NULL NULL NULL NULL
 
 # Cleanup
diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt
index 9a3c959884aa..4551b6cb4975 100644
--- a/datafusion/sqllogictest/test_files/insert.slt
+++ b/datafusion/sqllogictest/test_files/insert.slt
@@ -68,12 +68,11 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 query I
 INSERT INTO table_without_values SELECT
@@ -128,12 +127,11 @@ physical_plan
 01)DataSinkExec: sink=MemoryTable (partitions=1)
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 
 
@@ -179,12 +177,11 @@ physical_plan
 02)--ProjectionExec: expr=[a1@0 as a1, a2@1 as a2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 
 query I
diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt
index 075256ae4b92..2642b2780f98 100644
--- a/datafusion/sqllogictest/test_files/insert_to_external.slt
+++ b/datafusion/sqllogictest/test_files/insert_to_external.slt
@@ -422,12 +422,11 @@ physical_plan
 02)--ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2]
 03)----SortPreservingMergeExec: [c1@2 ASC NULLS LAST]
 04)------ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 query I
 INSERT INTO table_without_values SELECT
@@ -483,12 +482,11 @@ physical_plan
 01)DataSinkExec: sink=ParquetSink(file_groups=[])
 02)--CoalescePartitionsExec
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as field1, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as field2]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 ASC NULLS LAST, c9@2 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=8
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
-09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
+07)------------RepartitionExec: partitioning=Hash([c1@0], 8), input_partitions=1
+08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c4, c9], file_type=csv, has_header=true
 
 
 
diff --git a/datafusion/sqllogictest/test_files/interval.slt b/datafusion/sqllogictest/test_files/interval.slt
index 1ef3048ddc66..8c5a4382ed2c 100644
--- a/datafusion/sqllogictest/test_files/interval.slt
+++ b/datafusion/sqllogictest/test_files/interval.slt
@@ -444,7 +444,7 @@ select '1 month'::interval + '1980-01-01T12:00:00'::timestamp;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - '1980-01-01'::date;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select '1 month'::interval - '1980-01-01T12:00:00'::timestamp;
 
 # interval (array) + date / timestamp (array)
@@ -466,7 +466,7 @@ select i + ts from t;
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select i - d from t;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select i - ts from t;
 
 # interval unit abreiviation and plurals
@@ -530,7 +530,7 @@ SELECT interval '5 day' hour
 query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Date32 to valid types
 select '1 month'::interval - d from t;
 
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 select '1 month'::interval - ts from t;
 
 # interval + date
diff --git a/datafusion/sqllogictest/test_files/join.slt.part b/datafusion/sqllogictest/test_files/join.slt.part
index fe3356af88fc..87373af1472a 100644
--- a/datafusion/sqllogictest/test_files/join.slt.part
+++ b/datafusion/sqllogictest/test_files/join.slt.part
@@ -1438,11 +1438,10 @@ physical_plan
 05)--------ProjectionExec: expr=[sum(t1.v1)@1 as sum(t1.v1), v0@0 as v0]
 06)----------AggregateExec: mode=FinalPartitioned, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
 07)------------CoalesceBatchesExec: target_batch_size=8192
-08)--------------RepartitionExec: partitioning=Hash([v0@0], 4), input_partitions=4
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-10)------------------AggregateExec: mode=Partial, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
-12)------DataSourceExec: partitions=1, partition_sizes=[1]
+08)--------------RepartitionExec: partitioning=Hash([v0@0], 4), input_partitions=1
+09)----------------AggregateExec: mode=Partial, gby=[v0@0 as v0], aggr=[sum(t1.v1)]
+10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+11)------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 SELECT *
diff --git a/datafusion/sqllogictest/test_files/join_lists.slt b/datafusion/sqllogictest/test_files/join_lists.slt
index c07bd85551f3..0a48a4f9203e 100644
--- a/datafusion/sqllogictest/test_files/join_lists.slt
+++ b/datafusion/sqllogictest/test_files/join_lists.slt
@@ -60,4 +60,3 @@ DROP TABLE categories_raw;
 
 statement ok
 DROP TABLE places;
-
diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt
index 0174321dd831..62804ad76bd6 100644
--- a/datafusion/sqllogictest/test_files/joins.slt
+++ b/datafusion/sqllogictest/test_files/joins.slt
@@ -2746,14 +2746,12 @@ physical_plan
 01)SortMergeJoin: join_type=Inner, on=[(c1@0, c1@0)]
 02)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
 03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------DataSourceExec: partitions=1, partition_sizes=[1]
-07)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
-08)----CoalesceBatchesExec: target_batch_size=2
-09)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-10)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)----------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+05)--------DataSourceExec: partitions=1, partition_sizes=[1]
+06)--SortExec: expr=[c1@0 ASC], preserve_partitioning=[true]
+07)----CoalesceBatchesExec: target_batch_size=2
+08)------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+09)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # sort_merge_join_on_date32 inner sort merge join on data type (Date32)
 query DDRTDDRT rowsort
@@ -2783,9 +2781,8 @@ physical_plan
 08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
 09)----SortExec: expr=[c3@2 ASC], preserve_partitioning=[true]
 10)------CoalesceBatchesExec: target_batch_size=2
-11)--------RepartitionExec: partitioning=Hash([c3@2], 2), input_partitions=2
-12)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------DataSourceExec: partitions=1, partition_sizes=[1]
+11)--------RepartitionExec: partitioning=Hash([c3@2], 2), input_partitions=1
+12)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # sort_merge_join_on_decimal right join on data type (Decimal)
 query DDRTDDRT rowsort
@@ -3196,15 +3193,13 @@ physical_plan
 01)SortPreservingMergeExec: [rn1@5 ASC NULLS LAST]
 02)--SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
 03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-07)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-09)----CoalesceBatchesExec: target_batch_size=2
-10)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
-11)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1
+05)--------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+06)----------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+08)----CoalesceBatchesExec: target_batch_size=2
+09)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1
+10)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # sort merge join should propagate ordering equivalence of the right side
 # for right join. Hence final requirement rn1 ASC is already satisfied at
@@ -3230,15 +3225,13 @@ physical_plan
 01)SortPreservingMergeExec: [rn1@10 ASC NULLS LAST]
 02)--SortMergeJoin: join_type=Right, on=[(a@1, a@1)]
 03)----CoalesceBatchesExec: target_batch_size=2
-04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-07)----CoalesceBatchesExec: target_batch_size=2
-08)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@5 ASC NULLS LAST
-09)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)----------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-11)------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-12)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+06)----CoalesceBatchesExec: target_batch_size=2
+07)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1
+08)--------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+09)----------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+10)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
 set datafusion.optimizer.prefer_existing_sort = false;
@@ -3269,22 +3262,17 @@ logical_plan
 10)----------TableScan: annotated_data projection=[a0, a, b, c, d]
 physical_plan
 01)SortPreservingMergeExec: [a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST]
-02)--SortExec: expr=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, rn1@11 ASC NULLS LAST], preserve_partitioning=[true]
-03)----SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
-04)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-09)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
-11)------SortExec: expr=[a@1 ASC], preserve_partitioning=[true]
-12)--------CoalesceBatchesExec: target_batch_size=2
-13)----------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2
-14)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)--------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-16)----------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
-17)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+02)--SortMergeJoin: join_type=Inner, on=[(a@1, a@1)]
+03)----CoalesceBatchesExec: target_batch_size=2
+04)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1
+05)--------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+06)----------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
+08)----CoalesceBatchesExec: target_batch_size=2
+09)------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=1
+10)--------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
+11)----------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+12)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
 set datafusion.optimizer.prefer_hash_join = true;
@@ -3318,7 +3306,7 @@ physical_plan
 02)--HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(a@1, a@1)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # hash join should propagate ordering equivalence of the right side for RIGHT ANTI join.
@@ -3345,7 +3333,7 @@ physical_plan
 02)--HashJoinExec: mode=CollectLeft, join_type=RightAnti, on=[(a@0, a@1)]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a], output_ordering=[a@0 ASC], file_type=csv, has_header=true
 04)----ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1]
-05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], file_type=csv, has_header=true
 
 # Test ordering preservation for RIGHT join
@@ -3441,7 +3429,7 @@ physical_plan
 04)------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(d@1, d@1)], filter=CAST(a@0 AS Int64) >= CAST(a@1 AS Int64) - 10, projection=[a@0, d@1, row_n@4]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 06)--------ProjectionExec: expr=[a@0 as a, d@1 as d, row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as row_n]
-07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)----------BoundedWindowAggExec: wdw=[row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [r.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, d], output_ordering=[a@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 # run query above in multiple partitions
@@ -3471,22 +3459,19 @@ logical_plan
 08)----------TableScan: annotated_data projection=[a, b]
 physical_plan
 01)SortPreservingMergeExec: [a@0 ASC]
-02)--SortExec: expr=[a@0 ASC], preserve_partitioning=[true]
-03)----ProjectionExec: expr=[a@0 as a, last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1]
-04)------AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]]
-05)--------CoalesceBatchesExec: target_batch_size=2
-06)----------RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 2), input_partitions=2
-07)------------AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]]
-08)--------------CoalesceBatchesExec: target_batch_size=2
-09)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0)]
-10)------------------CoalesceBatchesExec: target_batch_size=2
-11)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-12)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-13)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
-14)------------------CoalesceBatchesExec: target_batch_size=2
-15)--------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-16)----------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-17)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], file_type=csv, has_header=true
+02)--ProjectionExec: expr=[a@0 as a, last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]@3 as last_col1]
+03)----AggregateExec: mode=FinalPartitioned, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]], ordering_mode=PartiallySorted([0])
+04)------CoalesceBatchesExec: target_batch_size=2
+05)--------RepartitionExec: partitioning=Hash([a@0, b@1, c@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@0 ASC
+06)----------AggregateExec: mode=Partial, gby=[a@0 as a, b@1 as b, c@2 as c], aggr=[last_value(r.b) ORDER BY [r.a ASC NULLS FIRST]], ordering_mode=PartiallySorted([0])
+07)------------CoalesceBatchesExec: target_batch_size=2
+08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(a@0, a@0)]
+09)----------------CoalesceBatchesExec: target_batch_size=2
+10)------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
+12)----------------CoalesceBatchesExec: target_batch_size=2
+13)------------------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1
+14)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC, b@1 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT *
@@ -4036,12 +4021,12 @@ logical_plan
 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))|depth=1] structs[]
 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t1.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t1.t1_int)))
 11)----------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t1" }), name: "t1_int" })
 
 
 # Test CROSS JOIN LATERAL syntax (execution)
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t1" \}\), name: "t1_int" \}\)
 select t1_id, t1_name, i from join_t1 t1 cross join lateral (select * from unnest(generate_series(1, t1_int))) as series(i);
 
 
@@ -4061,12 +4046,12 @@ logical_plan
 09)------------Unnest: lists[__unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))|depth=1] structs[]
 10)--------------Projection: generate_series(Int64(1), CAST(outer_ref(t2.t1_int) AS Int64)) AS __unnest_placeholder(generate_series(Int64(1),outer_ref(t2.t1_int)))
 11)----------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "t1_int", data_type: UInt32, nullable: true }, Column { relation: Some(Bare { table: "t2" }), name: "t1_int" })
 
 
 # Test INNER JOIN LATERAL syntax (execution)
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "t1_int", data_type: UInt32, nullable: true \}, Column \{ relation: Some\(Bare \{ table: "t2" \}\), name: "t1_int" \}\)
 select t1_id, t1_name, i from join_t1 t2 inner join lateral (select * from unnest(generate_series(1, t1_int))) as series(i) on(t1_id > i);
 
 # Test RIGHT JOIN LATERAL syntax (unsupported)
@@ -4671,7 +4656,7 @@ logical_plan
 05)------Subquery:
 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id
 07)----------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1 JOIN (j2 JOIN j3 ON(j2_id = j3_id - 2)) ON(j1_id = j2_id), LATERAL (SELECT * FROM j3 WHERE j3_string = j2_string) as j4
@@ -4687,7 +4672,7 @@ logical_plan
 08)----Subquery:
 09)------Filter: j3.j3_string = outer_ref(j2.j2_string)
 10)--------TableScan: j3 projection=[j3_string, j3_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j2_string", data_type: Utf8View, nullable: true }, Column { relation: Some(Bare { table: "j2" }), name: "j2_string" })
 
 query TT
 explain SELECT * FROM j1, LATERAL (SELECT * FROM j1, LATERAL (SELECT * FROM j2 WHERE j1_id = j2_id) as j2) as j2;
@@ -4703,7 +4688,7 @@ logical_plan
 08)----------Subquery:
 09)------------Filter: outer_ref(j1.j1_id) = j2.j2_id
 10)--------------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT j1_string, j2_string FROM j1 LEFT JOIN LATERAL (SELECT * FROM j2 WHERE j1_id < j2_id) AS j2 ON(true);
@@ -4716,7 +4701,7 @@ logical_plan
 05)------Subquery:
 06)--------Filter: outer_ref(j1.j1_id) < j2.j2_id
 07)----------TableScan: j2 projection=[j2_string, j2_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1, (j2 LEFT JOIN LATERAL (SELECT * FROM j3 WHERE j1_id + j2_id = j3_id) AS j3 ON(true));
@@ -4730,7 +4715,7 @@ logical_plan
 06)------Subquery:
 07)--------Filter: outer_ref(j1.j1_id) + outer_ref(j2.j2_id) = j3.j3_id
 08)----------TableScan: j3 projection=[j3_string, j3_id]
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "j1_id", data_type: Int32, nullable: true }, Column { relation: Some(Bare { table: "j1" }), name: "j1_id" })
 
 query TT
 explain SELECT * FROM j1, LATERAL (SELECT 1) AS j2;
diff --git a/datafusion/sqllogictest/test_files/limit.slt b/datafusion/sqllogictest/test_files/limit.slt
index ae82aee5e155..25b741b025a7 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -406,10 +406,9 @@ logical_plan
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[i@0 as i], aggr=[]
 02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([i@0], 4), input_partitions=4
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------AggregateExec: mode=Partial, gby=[i@0 as i], aggr=[]
-06)----------DataSourceExec: partitions=1
+03)----RepartitionExec: partitioning=Hash([i@0], 4), input_partitions=1
+04)------AggregateExec: mode=Partial, gby=[i@0 as i], aggr=[]
+05)--------DataSourceExec: partitions=1
 
 statement ok
 set datafusion.explain.show_sizes = true;
diff --git a/datafusion/sqllogictest/test_files/map.slt b/datafusion/sqllogictest/test_files/map.slt
index 4f1e5ef39a00..45f8c5d25fbe 100644
--- a/datafusion/sqllogictest/test_files/map.slt
+++ b/datafusion/sqllogictest/test_files/map.slt
@@ -43,8 +43,8 @@ LOCATION '../core/tests/data/parquet_map.parquet';
 query TTT
 describe data;
 ----
-ints Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
-strings Map(Field { name: "entries", data_type: Struct([Field { name: "key", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "value", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, false) NO
+ints Map("entries": Struct("key": Utf8, "value": Int64), unsorted) NO
+strings Map("entries": Struct("key": Utf8, "value": Utf8), unsorted) NO
 timestamp Utf8View NO
 
 query ??T
@@ -155,7 +155,7 @@ SELECT MAKE_MAP('POST', 41, 'HEAD', 53, 'PATCH', 30);
 ----
 {POST: 41, HEAD: 53, PATCH: 30}
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'ab' to value of Int64 type
 SELECT MAKE_MAP('POST', 41, 'HEAD', 'ab', 'PATCH', 30);
 
 # Map keys can not be NULL
@@ -523,26 +523,46 @@ SELECT MAP { 'a': 1, 'b': 3 };
 ----
 {a: 1, b: 3}
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
 SELECT MAP { 'a': 1, 2: 3 };
 
-# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
-# query ?
-# SELECT MAP { 1: 'a', 2: 'b', 3: 'c' }[1];
-# ----
-# a
+# accessing map with non-string key
+query T
+SELECT MAP { 1: 'a', 2: 'b', 3: 'c' }[1];
+----
+a
+
+# accessing map with string key
+query I
+SELECT MAP { 'a': 1, 'b': 2, 'c': 3 }['a'];
+----
+1
 
-# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
-# query ?
-# SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}];
-# ----
-# 1
+# accessing map with non-string key in case expression
+query I
+SELECT (CASE WHEN 1 > 0 THEN MAP {'x': 100} ELSE MAP {'y': 200} END)['x']; 
+----
+100
 
-# TODO(https://github.com/apache/datafusion/issues/11785): fix accessing map with non-string key
-# query ?
-# SELECT MAKE_MAP(1, null, 2, 33, 3, null)[2];
-# ----
-# 33
+# fix accessing map with nested key
+query I
+SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {1:'a', 2:'b'}];
+----
+1
+
+query I
+SELECT MAP { MAP {1:'a', 2:'b'}:1, MAP {1:'c', 2:'d'}:2 }[MAP {2:'b', 1:'a'}];
+----
+NULL
+
+# TODO(https://github.com/apache/datafusion/pull/18394): Test accessing map with empty map as key
+# TODO(https://github.com/apache/datafusion/pull/18394): Test accessing map with null map as key
+
+# accessing map with non-string key
+query I
+SELECT MAKE_MAP(1, null, 2, 33, 3, null)[2];
+----
+33
 
 ## cardinality
 
@@ -658,7 +678,7 @@ SELECT map_entries(MAP { 'a': 1, 'b': 3 });
 ----
 [{key: a, value: 1}, {key: b, value: 3}]
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
 SELECT map_entries(MAP { 'a': 1, 2: 3 });
 
 query ?
@@ -709,7 +729,7 @@ SELECT map_keys(MAP { 'a': 1, 'b': 3 });
 ----
 [a, b]
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
 SELECT map_keys(MAP { 'a': 1, 2: 3 });
 
 query ?
@@ -756,7 +776,7 @@ NULL
 
 # Tests for map_values
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
 SELECT map_values(MAP { 'a': 1, 2: 3 });
 
 query ?
diff --git a/datafusion/sqllogictest/test_files/math.slt b/datafusion/sqllogictest/test_files/math.slt
index e206aa16b8a9..edba5354e001 100644
--- a/datafusion/sqllogictest/test_files/math.slt
+++ b/datafusion/sqllogictest/test_files/math.slt
@@ -139,16 +139,16 @@ select abs(arrow_cast('-1.2', 'Utf8'));
 
 statement ok
 CREATE TABLE test_nullable_integer(
-    c1 TINYINT, 
-    c2 SMALLINT, 
-    c3 INT, 
-    c4 BIGINT, 
-    c5 TINYINT UNSIGNED, 
-    c6 SMALLINT UNSIGNED, 
-    c7 INT UNSIGNED, 
-    c8 BIGINT UNSIGNED, 
+    c1 TINYINT,
+    c2 SMALLINT,
+    c3 INT,
+    c4 BIGINT,
+    c5 TINYINT UNSIGNED,
+    c6 SMALLINT UNSIGNED,
+    c7 INT UNSIGNED,
+    c8 BIGINT UNSIGNED,
     dataset TEXT
-    ) 
+    )
     AS VALUES
     (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 'nulls'),
     (0, 0, 0, 0, 0, 0, 0, 0, 'zeros'),
@@ -237,7 +237,7 @@ SELECT c8%0 FROM test_nullable_integer
 
 # abs: return type
 query TTTTTTTT rowsort
-select 
+select
    arrow_typeof(abs(c1)), arrow_typeof(abs(c2)), arrow_typeof(abs(c3)), arrow_typeof(abs(c4)),
    arrow_typeof(abs(c5)), arrow_typeof(abs(c6)), arrow_typeof(abs(c7)), arrow_typeof(abs(c8))
 from test_nullable_integer limit 1
@@ -285,13 +285,13 @@ drop table test_nullable_integer
 
 statement ok
 CREATE TABLE test_non_nullable_integer(
-    c1 TINYINT NOT NULL, 
-    c2 SMALLINT NOT NULL, 
-    c3 INT NOT NULL, 
-    c4 BIGINT NOT NULL, 
-    c5 TINYINT UNSIGNED NOT NULL, 
-    c6 SMALLINT UNSIGNED NOT NULL, 
-    c7 INT UNSIGNED NOT NULL, 
+    c1 TINYINT NOT NULL,
+    c2 SMALLINT NOT NULL,
+    c3 INT NOT NULL,
+    c4 BIGINT NOT NULL,
+    c5 TINYINT UNSIGNED NOT NULL,
+    c6 SMALLINT UNSIGNED NOT NULL,
+    c7 INT UNSIGNED NOT NULL,
     c8 BIGINT UNSIGNED NOT NULL
     );
 
@@ -363,7 +363,7 @@ CREATE TABLE test_nullable_float(
     c2 double
     ) AS VALUES
     (-1.0, -1.0),
-    (1.0, 1.0), 
+    (1.0, 1.0),
     (NULL, NULL),
     (0., 0.),
     ('NaN'::double, 'NaN'::double);
@@ -412,7 +412,7 @@ Float32 Float64
 
 # abs: floats
 query RR rowsort
-SELECT abs(c1), abs(c2) from test_nullable_float 
+SELECT abs(c1), abs(c2) from test_nullable_float
 ----
 0 0
 1 1
@@ -420,6 +420,17 @@ SELECT abs(c1), abs(c2) from test_nullable_float
 NULL NULL
 NaN NaN
 
+# f16
+query TR rowsort
+SELECT arrow_typeof(abs(arrow_cast(c1, 'Float16'))), abs(arrow_cast(c1, 'Float16'))
+FROM test_nullable_float
+----
+Float16 0
+Float16 1
+Float16 1
+Float16 NULL
+Float16 NaN
+
 statement ok
 drop table test_nullable_float
 
@@ -428,7 +439,7 @@ statement ok
 CREATE TABLE test_non_nullable_float(
     c1 float NOT NULL,
     c2 double NOT NULL
-    ); 
+    );
 
 query I
 INSERT INTO test_non_nullable_float VALUES
@@ -478,27 +489,27 @@ drop table test_non_nullable_float
 statement ok
 CREATE TABLE test_nullable_decimal(
     c1 DECIMAL(10, 2),    /* Decimal128 */
-    c2 DECIMAL(38, 10),   /* Decimal128 with max precision */ 
+    c2 DECIMAL(38, 10),   /* Decimal128 with max precision */
     c3 DECIMAL(40, 2),    /* Decimal256 */
-    c4 DECIMAL(76, 10)    /* Decimal256 with max precision */ 
- ) AS VALUES 
-    (0, 0, 0, 0), 
+    c4 DECIMAL(76, 10)    /* Decimal256 with max precision */
+ ) AS VALUES
+    (0, 0, 0, 0),
     (NULL, NULL, NULL, NULL);
 
 query I
 INSERT into test_nullable_decimal values
     (
-        -99999999.99, 
-        '-9999999999999999999999999999.9999999999', 
-        '-99999999999999999999999999999999999999.99', 
+        -99999999.99,
+        '-9999999999999999999999999999.9999999999',
+        '-99999999999999999999999999999999999999.99',
         '-999999999999999999999999999999999999999999999999999999999999999999.9999999999'
-    ), 
+    ),
     (
-        99999999.99, 
-        '9999999999999999999999999999.9999999999', 
-        '99999999999999999999999999999999999999.99', 
+        99999999.99,
+        '9999999999999999999999999999.9999999999',
+        '99999999999999999999999999999999999999.99',
         '999999999999999999999999999999999999999999999999999999999999999999.9999999999'
-    ) 
+    )
 ----
 2
 
@@ -533,9 +544,9 @@ SELECT c1%0 FROM test_nullable_decimal WHERE c1 IS NOT NULL;
 
 # abs: return type
 query TTTT
-SELECT 
-    arrow_typeof(abs(c1)), 
-    arrow_typeof(abs(c2)), 
+SELECT
+    arrow_typeof(abs(c1)),
+    arrow_typeof(abs(c2)),
     arrow_typeof(abs(c3)),
     arrow_typeof(abs(c4))
 FROM test_nullable_decimal limit 1
@@ -552,11 +563,11 @@ SELECT abs(c1), abs(c2), abs(c3), abs(c4) FROM test_nullable_decimal
 NULL NULL NULL NULL
 
 statement ok
-drop table test_nullable_decimal  
+drop table test_nullable_decimal
 
 
 statement ok
-CREATE TABLE test_non_nullable_decimal(c1 DECIMAL(9,2) NOT NULL); 
+CREATE TABLE test_non_nullable_decimal(c1 DECIMAL(9,2) NOT NULL);
 
 query I
 INSERT INTO test_non_nullable_decimal VALUES(1)
@@ -569,13 +580,13 @@ SELECT c1*0 FROM test_non_nullable_decimal
 0
 
 query error DataFusion error: Arrow error: Divide by zero error
-SELECT c1/0 FROM test_non_nullable_decimal 
+SELECT c1/0 FROM test_non_nullable_decimal
 
 query error DataFusion error: Arrow error: Divide by zero error
-SELECT c1%0 FROM test_non_nullable_decimal 
+SELECT c1%0 FROM test_non_nullable_decimal
 
 statement ok
-drop table test_non_nullable_decimal 
+drop table test_non_nullable_decimal
 
 statement ok
 CREATE TABLE signed_integers(
@@ -615,7 +626,7 @@ NULL NULL NULL
 
 # scalar maxes and/or negative 1
 query III
-select 
+select
   gcd(9223372036854775807, -9223372036854775808), -- i64::MAX, i64::MIN
   gcd(9223372036854775807, -1), -- i64::MAX, -1
   gcd(-9223372036854775808, -1); -- i64::MIN, -1
@@ -694,3 +705,30 @@ select FACTORIAL(350943270);
 
 statement ok
 drop table signed_integers
+
+# Null propagation for log
+query TT
+EXPLAIN SELECT log(NULL, c2) from aggregate_simple;
+----
+logical_plan
+01)Projection: Float64(NULL) AS log(NULL,aggregate_simple.c2)
+02)--TableScan: aggregate_simple projection=[]
+physical_plan
+01)ProjectionExec: expr=[NULL as log(NULL,aggregate_simple.c2)]
+02)--DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/aggregate_simple.csv]]}, file_type=csv, has_header=true
+
+# Float 16/32/64 for log
+query RT
+SELECT log(2.5, arrow_cast(10.9, 'Float16')), arrow_typeof(log(2.5, arrow_cast(10.9, 'Float16')));
+----
+2.6074219 Float16
+
+query RT
+SELECT log(2.5, 10.9::float), arrow_typeof(log(2.5, 10.9::float));
+----
+2.606992 Float32
+
+query RT
+SELECT log(2.5, 10.9::double), arrow_typeof(log(2.5, 10.9::double));
+----
+2.606992198152 Float64
diff --git a/datafusion/sqllogictest/test_files/metadata.slt b/datafusion/sqllogictest/test_files/metadata.slt
index 7252c84caf14..8753d39cb7ef 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -165,7 +165,7 @@ GROUP BY ts
 ORDER BY ts
 LIMIT 1;
 ----
-2020-09-08T13:42:29.190855123Z
+2020-09-08T13:42:29.190855123
 
 
 
diff --git a/datafusion/sqllogictest/test_files/named_arguments.slt b/datafusion/sqllogictest/test_files/named_arguments.slt
new file mode 100644
index 000000000000..4eab799fd261
--- /dev/null
+++ b/datafusion/sqllogictest/test_files/named_arguments.slt
@@ -0,0 +1,271 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+
+#   http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#############
+## Tests for Named Arguments (PostgreSQL-style param => value syntax)
+#############
+
+# Test positional arguments still work (baseline)
+query T
+SELECT substr('hello world', 7, 5);
+----
+world
+
+# Test named arguments in order
+query T
+SELECT substr(str => 'hello world', start_pos => 7, length => 5);
+----
+world
+
+# Test named arguments out of order
+query T
+SELECT substr(length => 5, str => 'hello world', start_pos => 7);
+----
+world
+
+# Test mixed positional and named arguments
+query T
+SELECT substr('hello world', start_pos => 7, length => 5);
+----
+world
+
+# Test with only 2 parameters (length optional)
+query T
+SELECT substr(str => 'hello world', start_pos => 7);
+----
+world
+
+# Test all parameters named with substring alias
+query T
+SELECT substring(str => 'hello', start_pos => 1, length => 3);
+----
+hel
+
+# Error: positional argument after named argument
+query error DataFusion error: Error during planning: Positional argument.*follows named argument
+SELECT substr(str => 'hello', 1, 3);
+
+# Error: unknown parameter name
+query error DataFusion error: Error during planning: Unknown parameter name 'invalid'
+SELECT substr(invalid => 'hello', start_pos => 1, length => 3);
+
+# Error: duplicate parameter name
+query error DataFusion error: Error during planning: Parameter 'str' specified multiple times
+SELECT substr(str => 'hello', str => 'world', start_pos => 1);
+
+# Test case-insensitive parameter names (unquoted identifiers)
+query T
+SELECT substr(STR => 'hello world', START_POS => 7, LENGTH => 5);
+----
+world
+
+# Test case-insensitive with mixed case
+query T
+SELECT substr(Str => 'hello world', Start_Pos => 7);
+----
+world
+
+# Error: case-sensitive quoted parameter names don't match
+query error DataFusion error: Error during planning: Unknown parameter name 'STR'
+SELECT substr("STR" => 'hello world', "start_pos" => 7);
+
+# Error: wrong number of arguments
+# This query provides only 1 argument but substr requires 2 or 3
+query error DataFusion error: Error during planning: Execution error: Function 'substr' user-defined coercion failed with "Error during planning: The substr function requires 2 or 3 arguments, but got 1."
+SELECT substr(str => 'hello world');
+
+#############
+## PostgreSQL Dialect Tests (uses ExprNamed variant)
+#############
+
+statement ok
+set datafusion.sql_parser.dialect = 'PostgreSQL';
+
+# Test named arguments in order
+query T
+SELECT substr(str => 'hello world', start_pos => 7, length => 5);
+----
+world
+
+# Test named arguments out of order
+query T
+SELECT substr(length => 5, str => 'hello world', start_pos => 7);
+----
+world
+
+# Test mixed positional and named arguments
+query T
+SELECT substr('hello world', start_pos => 7, length => 5);
+----
+world
+
+# Test with only 2 parameters (length optional)
+query T
+SELECT substr(str => 'hello world', start_pos => 7);
+----
+world
+
+# Reset to default dialect
+statement ok
+set datafusion.sql_parser.dialect = 'Generic';
+
+#############
+## MsSQL Dialect Tests (does NOT support => operator)
+#############
+
+statement ok
+set datafusion.sql_parser.dialect = 'MsSQL';
+
+# Error: MsSQL dialect does not support => operator
+query error DataFusion error: SQL error: ParserError\("Expected: \), found: => at Line: 1, Column: 19"\)
+SELECT substr(str => 'hello world', start_pos => 7, length => 5);
+
+# Reset to default dialect
+statement ok
+set datafusion.sql_parser.dialect = 'Generic';
+
+#############
+## Aggregate UDF Tests - using corr(y, x) function
+#############
+
+# Setup test data
+statement ok
+CREATE TABLE correlation_test(col1 DOUBLE, col2 DOUBLE) AS VALUES
+  (1.0, 2.0),
+  (2.0, 4.0),
+  (3.0, 6.0),
+  (4.0, 8.0);
+
+# Test positional arguments (baseline)
+query R
+SELECT corr(col1, col2) FROM correlation_test;
+----
+1
+
+# Test named arguments out of order (proves named args work for aggregates)
+query R
+SELECT corr(x => col2, y => col1) FROM correlation_test;
+----
+1
+
+# Error: function doesn't support named arguments (count has no parameter names)
+query error DataFusion error: Error during planning: Aggregate function 'count' does not support named arguments
+SELECT count(value => col1) FROM correlation_test;
+
+# Cleanup
+statement ok
+DROP TABLE correlation_test;
+
+#############
+## Aggregate UDF with WITHIN GROUP Tests - using percentile_cont(expression, percentile)
+## This tests the special handling where WITHIN GROUP ORDER BY expressions are prepended to args
+#############
+
+# Setup test data
+statement ok
+CREATE TABLE percentile_test(salary DOUBLE) AS VALUES
+  (50000.0),
+  (60000.0),
+  (70000.0),
+  (80000.0),
+  (90000.0);
+
+# Test positional arguments (baseline) - standard call without WITHIN GROUP
+query R
+SELECT percentile_cont(salary, 0.5) FROM percentile_test;
+----
+70000
+
+# Test WITHIN GROUP with positional argument
+query R
+SELECT percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) FROM percentile_test;
+----
+70000
+
+# Test WITHIN GROUP with named argument for percentile
+# The ORDER BY expression (salary) is prepended internally, becoming: percentile_cont(salary, 0.5)
+# We use named argument for percentile, which should work correctly
+query R
+SELECT percentile_cont(percentile => 0.5) WITHIN GROUP (ORDER BY salary) FROM percentile_test;
+----
+70000
+
+# Verify the WITHIN GROUP prepending logic with different percentile value
+query R
+SELECT percentile_cont(percentile => 0.25) WITHIN GROUP (ORDER BY salary) FROM percentile_test;
+----
+60000
+
+# Cleanup
+statement ok
+DROP TABLE percentile_test;
+
+#############
+## Window UDF Tests - using lead(expression, offset, default) function
+#############
+
+# Setup test data
+statement ok
+CREATE TABLE window_test(id INT, value INT) AS VALUES
+  (1, 10),
+  (2, 20),
+  (3, 30),
+  (4, 40);
+
+# Test positional arguments (baseline)
+query II
+SELECT id, lead(value, 1, 0) OVER (ORDER BY id) FROM window_test ORDER BY id;
+----
+1 20
+2 30
+3 40
+4 0
+
+# Test named arguments out of order (proves named args work for window functions)
+query II
+SELECT id, lead(default => 0, offset => 1, expr => value) OVER (ORDER BY id) FROM window_test ORDER BY id;
+----
+1 20
+2 30
+3 40
+4 0
+
+# Test with 1 argument (offset and default use defaults)
+query II
+SELECT id, lead(expr => value) OVER (ORDER BY id) FROM window_test ORDER BY id;
+----
+1 20
+2 30
+3 40
+4 NULL
+
+# Test with 2 arguments (default uses default)
+query II
+SELECT id, lead(expr => value, offset => 2) OVER (ORDER BY id) FROM window_test ORDER BY id;
+----
+1 30
+2 40
+3 NULL
+4 NULL
+
+# Error: function doesn't support named arguments (row_number has no parameter names)
+query error DataFusion error: Error during planning: Window function 'row_number' does not support named arguments
+SELECT row_number(value => 1) OVER (ORDER BY id) FROM window_test;
+
+# Cleanup
+statement ok
+DROP TABLE window_test;
diff --git a/datafusion/sqllogictest/test_files/nullif.slt b/datafusion/sqllogictest/test_files/nullif.slt
index 6acb9aea26d5..7b4c59b26394 100644
--- a/datafusion/sqllogictest/test_files/nullif.slt
+++ b/datafusion/sqllogictest/test_files/nullif.slt
@@ -112,7 +112,7 @@ select nullif(1.0, 2);
 ----
 1
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Int64 type
 select nullif(2, 'a');
 
 query T
diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt
index 04a7615c764b..b1d02f6dc16e 100644
--- a/datafusion/sqllogictest/test_files/order.slt
+++ b/datafusion/sqllogictest/test_files/order.slt
@@ -675,11 +675,11 @@ query TT
 ----
 logical_plan
 01)Sort: log_c11_base_c12 ASC NULLS LAST
-02)--Projection: log(aggregate_test_100.c12, CAST(aggregate_test_100.c11 AS Float64)) AS log_c11_base_c12
+02)--Projection: log(aggregate_test_100.c12, aggregate_test_100.c11) AS log_c11_base_c12
 03)----TableScan: aggregate_test_100 projection=[c11, c12]
 physical_plan
 01)SortPreservingMergeExec: [log_c11_base_c12@0 ASC NULLS LAST]
-02)--ProjectionExec: expr=[log(c12@1, CAST(c11@0 AS Float64)) as log_c11_base_c12]
+02)--ProjectionExec: expr=[log(c12@1, c11@0) as log_c11_base_c12]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC NULLS LAST]], file_type=csv, has_header=true
 
@@ -690,11 +690,11 @@ ORDER BY log_c12_base_c11 DESC NULLS LAST;
 ----
 logical_plan
 01)Sort: log_c12_base_c11 DESC NULLS LAST
-02)--Projection: log(CAST(aggregate_test_100.c11 AS Float64), aggregate_test_100.c12) AS log_c12_base_c11
+02)--Projection: log(aggregate_test_100.c11, aggregate_test_100.c12) AS log_c12_base_c11
 03)----TableScan: aggregate_test_100 projection=[c11, c12]
 physical_plan
 01)SortPreservingMergeExec: [log_c12_base_c11@0 DESC NULLS LAST]
-02)--ProjectionExec: expr=[log(CAST(c11@0 AS Float64), c12@1) as log_c12_base_c11]
+02)--ProjectionExec: expr=[log(c11@0, c12@1) as log_c12_base_c11]
 03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c11, c12], output_orderings=[[c11@0 ASC NULLS LAST], [c12@1 DESC NULLS LAST]], file_type=csv, has_header=true
 
@@ -894,19 +894,17 @@ physical_plan
 04)------ProjectionExec: expr=[0 as m, t@0 as t]
 05)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
 06)----------CoalesceBatchesExec: target_batch_size=8192
-07)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=2
-08)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-09)----------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
-10)------------------ProjectionExec: expr=[column1@0 as t]
-11)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
-12)------ProjectionExec: expr=[1 as m, t@0 as t]
-13)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
-14)----------CoalesceBatchesExec: target_batch_size=8192
-15)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=2
-16)--------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-17)----------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
-18)------------------ProjectionExec: expr=[column1@0 as t]
-19)--------------------DataSourceExec: partitions=1, partition_sizes=[1]
+07)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=1
+08)--------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
+09)----------------ProjectionExec: expr=[column1@0 as t]
+10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+11)------ProjectionExec: expr=[1 as m, t@0 as t]
+12)--------AggregateExec: mode=FinalPartitioned, gby=[t@0 as t], aggr=[]
+13)----------CoalesceBatchesExec: target_batch_size=8192
+14)------------RepartitionExec: partitioning=Hash([t@0], 2), input_partitions=1
+15)--------------AggregateExec: mode=Partial, gby=[t@0 as t], aggr=[]
+16)----------------ProjectionExec: expr=[column1@0 as t]
+17)------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 #####
 # Multi column sorting with lists
@@ -1024,10 +1022,10 @@ ORDER BY SUM(column1)
 
 # ORDER BY with a GROUP BY clause
 query I
-SELECT SUM(column1) 
-  FROM foo 
-GROUP BY column2 
-ORDER BY SUM(column1) 
+SELECT SUM(column1)
+  FROM foo
+GROUP BY column2
+ORDER BY SUM(column1)
 ----
 0
 2
@@ -1039,12 +1037,12 @@ ORDER BY SUM(column1)
 
 # ORDER BY with a GROUP BY clause and a HAVING clause
 query I
-SELECT 
-  SUM(column1) 
-FROM foo 
-GROUP BY column2 
-HAVING SUM(column1) < 3 
-ORDER BY SUM(column1) 
+SELECT
+  SUM(column1)
+FROM foo
+GROUP BY column2
+HAVING SUM(column1) < 3
+ORDER BY SUM(column1)
 ----
 0
 2
@@ -1179,7 +1177,7 @@ physical_plan
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
-drop table ordered_table; 
+drop table ordered_table;
 
 
 # ABS(x) breaks the ordering if x's range contains both negative and positive values.
@@ -1215,7 +1213,7 @@ physical_plan
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 statement ok
-drop table ordered_table; 
+drop table ordered_table;
 
 # ABS(x) preserves the ordering if x's range falls into positive values.
 # Since x is defined as INT UNSIGNED, its range is assumed to be from 0 to INF.
diff --git a/datafusion/sqllogictest/test_files/parquet.slt b/datafusion/sqllogictest/test_files/parquet.slt
index 11942108ab2b..c21f3129d4ee 100644
--- a/datafusion/sqllogictest/test_files/parquet.slt
+++ b/datafusion/sqllogictest/test_files/parquet.slt
@@ -268,7 +268,7 @@ FROM (
 ) t
 GROUP BY 1
 ----
-Timestamp(Millisecond, Some("UTC")) 2014-08-27T14:00:00Z 131072
+Timestamp(ms, "UTC") 2014-08-27T14:00:00Z 131072
 
 # Test config listing_table_ignore_subdirectory:
 
@@ -689,7 +689,7 @@ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
 query TTT
 describe int96_from_spark
 ----
-a Timestamp(Nanosecond, None) YES
+a Timestamp(ns) YES
 
 # Note that the values are read as nanosecond precision
 query P
@@ -718,7 +718,7 @@ LOCATION '../../parquet-testing/data/int96_from_spark.parquet';
 query TTT
 describe int96_from_spark;
 ----
-a Timestamp(Millisecond, None) YES
+a Timestamp(ms) YES
 
 # Per https://github.com/apache/parquet-testing/blob/6e851ddd768d6af741c7b15dc594874399fc3cff/data/int96_from_spark.md?plain=1#L37
 # these values should be
@@ -742,7 +742,7 @@ select * from int96_from_spark
 9999-12-31T03:00:00
 2024-12-30T23:00:00
 NULL
-ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(Millisecond, None)
+ERROR: Cast error: Failed to convert -9357363680509551 to datetime for Timestamp(ms)
 
 # Cleanup / reset default setting
 statement ok
diff --git a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
index 6dc2c264aeb8..0166cd2572ce 100644
--- a/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
+++ b/datafusion/sqllogictest/test_files/parquet_filter_pushdown.slt
@@ -474,10 +474,7 @@ EXPLAIN select * from t_pushdown where part != val
 logical_plan
 01)Filter: t_pushdown.val != t_pushdown.part
 02)--TableScan: t_pushdown projection=[val, part], partial_filters=[t_pushdown.val != t_pushdown.part]
-physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: val@0 != part@1
-03)----DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=b/file.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=c/file.parquet]]}, projection=[val, part], file_type=parquet
+physical_plan DataSourceExec: file_groups={3 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=b/file.parquet], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=c/file.parquet]]}, projection=[val, part], file_type=parquet, predicate=val@0 != part@1
 
 # If we reference only a partition column it gets evaluated during the listing phase
 query TT
@@ -505,11 +502,7 @@ EXPLAIN select * from t_pushdown where val != 'd' AND val != 'c' AND part = 'a'
 logical_plan
 01)Filter: t_pushdown.val != Utf8View("d") AND t_pushdown.val != Utf8View("c") AND t_pushdown.val != t_pushdown.part
 02)--TableScan: t_pushdown projection=[val, part], full_filters=[t_pushdown.part = Utf8View("a")], partial_filters=[t_pushdown.val != Utf8View("d"), t_pushdown.val != Utf8View("c"), t_pushdown.val != t_pushdown.part]
-physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: val@0 != part@1
-03)----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet]]}, projection=[val, part], file_type=parquet, predicate=val@0 != d AND val@0 != c, pruning_predicate=val_null_count@2 != row_count@3 AND (val_min@0 != d OR d != val_max@1) AND val_null_count@2 != row_count@3 AND (val_min@0 != c OR c != val_max@1), required_guarantees=[val not in (c, d)]
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet]]}, projection=[val, part], file_type=parquet, predicate=val@0 != d AND val@0 != c AND val@0 != part@1, pruning_predicate=val_null_count@2 != row_count@3 AND (val_min@0 != d OR d != val_max@1) AND val_null_count@2 != row_count@3 AND (val_min@0 != c OR c != val_max@1), required_guarantees=[val not in (c, d)]
 
 # The order of filters should not matter
 query TT
@@ -518,10 +511,7 @@ EXPLAIN select val, part from t_pushdown where part = 'a' AND part = val;
 logical_plan
 01)Filter: t_pushdown.val = t_pushdown.part
 02)--TableScan: t_pushdown projection=[val, part], full_filters=[t_pushdown.part = Utf8View("a")], partial_filters=[t_pushdown.val = t_pushdown.part]
-physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: val@0 = part@1
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet]]}, projection=[val, part], file_type=parquet
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet]]}, projection=[val, part], file_type=parquet, predicate=val@0 = part@1
 
 query TT
 select val, part from t_pushdown where part = 'a' AND part = val;
@@ -534,10 +524,7 @@ EXPLAIN select val, part from t_pushdown where part = val AND part = 'a';
 logical_plan
 01)Filter: t_pushdown.val = t_pushdown.part
 02)--TableScan: t_pushdown projection=[val, part], full_filters=[t_pushdown.part = Utf8View("a")], partial_filters=[t_pushdown.val = t_pushdown.part]
-physical_plan
-01)CoalesceBatchesExec: target_batch_size=8192
-02)--FilterExec: val@0 = part@1
-03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet]]}, projection=[val, part], file_type=parquet
+physical_plan DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/parquet_filter_pushdown/parquet_part_test/part=a/file.parquet]]}, projection=[val, part], file_type=parquet, predicate=val@0 = part@1
 
 query TT
 select val, part from t_pushdown where part = val AND part = 'a';
@@ -583,4 +570,4 @@ WHERE trace_id = '00000000000000000000000000000002' AND deployment_environment =
 ORDER BY start_timestamp, trace_id
 LIMIT 1;
 ----
-2024-10-01T00:00:00Z
+2024-10-01T00:00:00
diff --git a/datafusion/sqllogictest/test_files/predicates.slt b/datafusion/sqllogictest/test_files/predicates.slt
index 77ee3e4f05a0..c10e67a22535 100644
--- a/datafusion/sqllogictest/test_files/predicates.slt
+++ b/datafusion/sqllogictest/test_files/predicates.slt
@@ -767,15 +767,14 @@ physical_plan
 06)--------CoalesceBatchesExec: target_batch_size=8192
 07)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(l_partkey@0, p_partkey@0)], projection=[l_extendedprice@1, l_discount@2, p_partkey@3]
 08)------------CoalesceBatchesExec: target_batch_size=8192
-09)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=4
-10)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], file_type=csv, has_header=true
-12)------------CoalesceBatchesExec: target_batch_size=8192
-13)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-14)----------------CoalesceBatchesExec: target_batch_size=8192
-15)------------------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23, projection=[p_partkey@0]
-16)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-17)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], file_type=csv, has_header=true
+09)--------------RepartitionExec: partitioning=Hash([l_partkey@0], 4), input_partitions=1
+10)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/lineitem.csv]]}, projection=[l_partkey, l_extendedprice, l_discount], file_type=csv, has_header=true
+11)------------CoalesceBatchesExec: target_batch_size=8192
+12)--------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+13)----------------CoalesceBatchesExec: target_batch_size=8192
+14)------------------FilterExec: p_brand@1 = Brand#12 OR p_brand@1 = Brand#23, projection=[p_partkey@0]
+15)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+16)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/tpch-csv/part.csv]]}, projection=[p_partkey, p_brand], file_type=csv, has_header=true
 
 # Simplification of a binary operator with a NULL value
 
diff --git a/datafusion/sqllogictest/test_files/prepare.slt b/datafusion/sqllogictest/test_files/prepare.slt
index d61603ae6558..650527cef620 100644
--- a/datafusion/sqllogictest/test_files/prepare.slt
+++ b/datafusion/sqllogictest/test_files/prepare.slt
@@ -204,9 +204,11 @@ EXECUTE my_plan6(20.0);
 statement error Cast error: Cannot cast string 'foo' to value of Int32 type
 EXECUTE my_plan6('foo');
 
-# TODO: support non-literal expressions
-statement error Unsupported parameter type
-EXECUTE my_plan6(10 + 20);
+# support non-literal expressions
+query II
+EXECUTE my_plan6(10 + 10);
+----
+1 20
 
 statement ok
 DEALLOCATE my_plan6;
@@ -327,3 +329,35 @@ EXECUTE my_plan('a', 'b');
 ----
 1 a
 2 b
+
+statement ok
+SET datafusion.explain.logical_plan_only=false;
+
+statement ok
+DEALLOCATE my_plan
+
+statement ok
+SET datafusion.explain.logical_plan_only=true;
+
+# Prepare with alias
+query TT
+EXPLAIN PREPARE my_plan(INT, INT) AS SELECT $1 AS one, $2 AS two;
+----
+logical_plan
+01)Prepare: "my_plan" [Int32, Int32]
+02)--Projection: $1 AS one, $2 AS two
+03)----EmptyRelation: rows=1
+
+statement ok
+PREPARE my_plan(INT, INT) AS SELECT $1 AS one, $2 AS two;
+
+query II
+EXECUTE my_plan(1, 2)
+----
+1 2
+
+statement ok
+SET datafusion.explain.logical_plan_only=false;
+
+statement ok
+DEALLOCATE my_plan
diff --git a/datafusion/sqllogictest/test_files/pwmj.slt b/datafusion/sqllogictest/test_files/pwmj.slt
index 0014b3c545f2..eafa4d0ba394 100644
--- a/datafusion/sqllogictest/test_files/pwmj.slt
+++ b/datafusion/sqllogictest/test_files/pwmj.slt
@@ -158,7 +158,7 @@ ORDER BY 1,2;
 33 44
 44 55
 
-query TT 
+query TT
 EXPLAIN
 SELECT t1.t1_id, t2.t2_id
 FROM join_t1 t1
diff --git a/datafusion/sqllogictest/test_files/qualify.slt b/datafusion/sqllogictest/test_files/qualify.slt
index d53b56ce58de..524f6baad2be 100644
--- a/datafusion/sqllogictest/test_files/qualify.slt
+++ b/datafusion/sqllogictest/test_files/qualify.slt
@@ -275,7 +275,7 @@ physical_plan
 01)SortExec: expr=[id@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 > 1, projection=[id@0, name@1]
-04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------WindowAggExec: wdw=[count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "count(Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 05)--------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # plan row_number()
@@ -293,7 +293,7 @@ physical_plan
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@0 > 1
 04)------ProjectionExec: expr=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[false]
 07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
@@ -321,7 +321,7 @@ physical_plan
 02)--CoalesceBatchesExec: target_batch_size=8192
 03)----FilterExec: avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 > Some(60000000000),14,6
 04)------ProjectionExec: expr=[dept@0 as dept, avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "avg(users.salary) PARTITION BY [users.dept] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Decimal128(14, 6), nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------SortExec: expr=[dept@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
@@ -358,15 +358,14 @@ physical_plan
 04)------CoalesceBatchesExec: target_batch_size=8192
 05)--------FilterExec: rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 = 1, projection=[dept@0, sum(users.salary)@1]
 06)----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sum(users.salary) DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)--------------SortPreservingMergeExec: [sum(users.salary)@1 DESC]
 09)----------------SortExec: expr=[sum(users.salary)@1 DESC], preserve_partitioning=[true]
 10)------------------AggregateExec: mode=FinalPartitioned, gby=[dept@0 as dept], aggr=[sum(users.salary)]
 11)--------------------CoalesceBatchesExec: target_batch_size=8192
-12)----------------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=4
-13)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-14)--------------------------AggregateExec: mode=Partial, gby=[dept@1 as dept], aggr=[sum(users.salary)]
-15)----------------------------DataSourceExec: partitions=1, partition_sizes=[1]
+12)----------------------RepartitionExec: partitioning=Hash([dept@0], 4), input_partitions=1
+13)------------------------AggregateExec: mode=Partial, gby=[dept@1 as dept], aggr=[sum(users.salary)]
+14)--------------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # Clean up
 statement ok
diff --git a/datafusion/sqllogictest/test_files/repartition.slt b/datafusion/sqllogictest/test_files/repartition.slt
index 29d20d10b671..a3b6b380c57f 100644
--- a/datafusion/sqllogictest/test_files/repartition.slt
+++ b/datafusion/sqllogictest/test_files/repartition.slt
@@ -45,10 +45,9 @@ logical_plan
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
 02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([column1@0], 4), input_partitions=4
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-05)--------AggregateExec: mode=Partial, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
-06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition/parquet_table/2.parquet]]}, projection=[column1, column2], file_type=parquet
+03)----RepartitionExec: partitioning=Hash([column1@0], 4), input_partitions=1
+04)------AggregateExec: mode=Partial, gby=[column1@0 as column1], aggr=[sum(parquet_table.column2)]
+05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/scratch/repartition/parquet_table/2.parquet]]}, projection=[column1, column2], file_type=parquet
 
 # disable round robin repartitioning
 statement ok
diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt
index b0e200015dfd..faa0d69ae84b 100644
--- a/datafusion/sqllogictest/test_files/scalar.slt
+++ b/datafusion/sqllogictest/test_files/scalar.slt
@@ -309,6 +309,14 @@ select ceil(a), ceil(b), ceil(c) from small_floats;
 1 0 0
 1 0 1
 
+# ceil with scale parameter(Scale not supported)
+query error DataFusion error: This feature is not implemented: CEIL with scale is not supported
+select ceil(100.1234, 1)
+
+# ceil with datetime parameter (not supported)
+query error DataFusion error: This feature is not implemented: CEIL with datetime is not supported
+select ceil(100.1234 to year)
+
 ## degrees
 
 # degrees scalar function
@@ -448,6 +456,14 @@ select floor(a), floor(b), floor(c) from signed_integers;
 2 -1000 123
 4 NULL NULL
 
+# floor with scale parameter(Scale not supported)
+query error DataFusion error: This feature is not implemented: FLOOR with scale is not supported
+select floor(a, 1)
+
+# floor with datetime parameter ( not supported)
+query error DataFusion error: This feature is not implemented: FLOOR with datetime is not supported
+select floor(a to year)
+
 ## ln
 
 # ln scalar function
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index cd1f90c42efd..5c684eb83d1a 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1775,7 +1775,7 @@ DROP TABLE test;
 query error DataFusion error: Arrow error: Parser error: Error parsing timestamp from 'I AM NOT A TIMESTAMP': error parsing date
 SELECT to_timestamp('I AM NOT A TIMESTAMP');
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string '' to value of Int32 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '' to value of Int32 type
 SELECT CAST('' AS int);
 
 # See issue: https://github.com/apache/datafusion/issues/8978
@@ -1912,7 +1912,7 @@ select * from t_with_user where user = 'foo';
 statement count 0
 create table t_with_current_time(a int, current_time text) as values (1,'now'), (2,null), (3,'later');
 
-# here it's clear the the column was meant
+# here it's clear the column was meant
 query B
 select t_with_current_time.current_time is not null from t_with_current_time;
 ----
diff --git a/datafusion/sqllogictest/test_files/simplify_expr.slt b/datafusion/sqllogictest/test_files/simplify_expr.slt
index c77163dc996d..2387385369cb 100644
--- a/datafusion/sqllogictest/test_files/simplify_expr.slt
+++ b/datafusion/sqllogictest/test_files/simplify_expr.slt
@@ -107,3 +107,14 @@ query B
 SELECT a / NULL::DECIMAL(4,3) > 1.2::decimal(2,1) FROM VALUES (1) AS t(a);
 ----
 NULL
+
+query TT
+explain SELECT CASE WHEN 1 > 0 THEN MAP {'x': 100} ELSE MAP {'y': 200} END AS a;
+----
+logical_plan
+01)Projection: Map([{"x":"100"}]) AS a
+02)--EmptyRelation: rows=1
+physical_plan
+01)ProjectionExec: expr=[[{x:100}] as a]
+02)--PlaceholderRowExec
+
diff --git a/datafusion/sqllogictest/test_files/spark/array/array.slt b/datafusion/sqllogictest/test_files/spark/array/array.slt
index 09821e6d582d..79dca1c10a7d 100644
--- a/datafusion/sqllogictest/test_files/spark/array/array.slt
+++ b/datafusion/sqllogictest/test_files/spark/array/array.slt
@@ -70,3 +70,18 @@ query ?
 SELECT array(array(1,2));
 ----
 [[1, 2]]
+
+query ?
+SELECT array(arrow_cast(array(1), 'LargeList(Int64)'));
+----
+[[1]]
+
+query ?
+SELECT array(arrow_cast(array(1), 'LargeList(Int64)'), arrow_cast(array(), 'LargeList(Int64)'));
+----
+[[1], []]
+
+query ?
+SELECT array(arrow_cast(array(1,2), 'LargeList(Int64)'), array(3));
+----
+[[1, 2], [3]]
diff --git a/datafusion/sqllogictest/test_files/spark/array/shuffle.slt b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
index cb3c77cac8fb..35aad58144c9 100644
--- a/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
+++ b/datafusion/sqllogictest/test_files/spark/array/shuffle.slt
@@ -16,27 +16,16 @@
 # under the License.
 
 # Test shuffle function with simple arrays
-query B
-SELECT array_sort(shuffle([1, 2, 3, 4, 5, NULL])) = [NULL,1, 2, 3, 4, 5];
-----
-true
-
-query B
-SELECT shuffle([1, 2, 3, 4, 5, NULL]) != [1, 2, 3, 4, 5, NULL];
+query ?
+SELECT shuffle([1, 2, 3, 4, 5, NULL], 1);
 ----
-true
+[1, 4, NULL, 2, 5, 3]
 
 # Test shuffle function with string arrays
-
-query B
-SELECT array_sort(shuffle(['a', 'b', 'c', 'd', 'e', 'f'])) = ['a', 'b', 'c', 'd', 'e', 'f'];
-----
-true
-
-query B
-SELECT shuffle(['a', 'b', 'c', 'd', 'e', 'f']) != ['a', 'b', 'c', 'd', 'e', 'f'];;
+query ?
+SELECT shuffle(['a', 'b', 'c', 'd', 'e', 'f'], 1);
 ----
-true
+[a, d, f, b, e, c]
 
 # Test shuffle function with empty array
 query ?
@@ -57,15 +46,10 @@ SELECT shuffle(NULL);
 NULL
 
 # Test shuffle function with fixed size list arrays
-query B
-SELECT array_sort(shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)'))) = [NULL, 1, 2, 3, 4, 5];
-----
-true
-
-query B
-SELECT shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)')) != [1, 2, NULL, 3, 4, 5];
+query ?
+SELECT shuffle(arrow_cast([1, 2, NULL, 3, 4, 5], 'FixedSizeList(6, Int64)'), 1);
 ----
-true
+[1, 3, 5, 2, 4, NULL]
 
 # Test shuffle on table data with different list types
 statement ok
@@ -78,10 +62,10 @@ CREATE TABLE test_shuffle_list_types AS VALUES
 
 # Test shuffle with large list from table
 query ?
-SELECT array_sort(shuffle(column1)) FROM test_shuffle_list_types;
+SELECT shuffle(column1, 1) FROM test_shuffle_list_types;
 ----
-[1, 2, 3, 4]
-[5, 6, 7, 8, 9]
+[1, 4, 3, 2]
+[8, 9, 6, 5, 7]
 [10]
 NULL
 []
@@ -96,11 +80,11 @@ CREATE TABLE test_shuffle_fixed_size AS VALUES
 
 # Test shuffle with fixed size list from table
 query ?
-SELECT array_sort(shuffle(column1)) FROM test_shuffle_fixed_size;
+SELECT shuffle(column1, 1) FROM test_shuffle_fixed_size;
 ----
 [1, 2, 3]
-[4, 5, 6]
-[NULL, 8, 9]
+[4, 6, 5]
+[9, NULL, 8]
 NULL
 
 # Clean up
@@ -109,5 +93,3 @@ DROP TABLE test_shuffle_list_types;
 
 statement ok
 DROP TABLE test_shuffle_fixed_size;
-
-
diff --git a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
index 2789efef7bf3..39dca512226b 100644
--- a/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitmap/bitmap_count.slt
@@ -59,3 +59,35 @@ SELECT bitmap_count(arrow_cast(a, 'FixedSizeBinary(2)')) FROM (VALUES (X'1010'),
 5
 16
 NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int32, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int8, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int16, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
+
+query I
+SELECT bitmap_count(arrow_cast(a, 'Dictionary(Int64, Binary)')) FROM (VALUES (X'1010'), (X'0AB0'), (X'FFFF'), (NULL)) AS t(a);
+----
+2
+5
+16
+NULL
diff --git a/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt b/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt
index 2a75c7648d40..216d99025171 100644
--- a/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt
+++ b/datafusion/sqllogictest/test_files/spark/bitwise/bit_count.slt
@@ -59,17 +59,17 @@ SELECT bit_count(1023::int);
 query I
 SELECT bit_count(-1::int);
 ----
-32
+64
 
 query I
 SELECT bit_count(-2::int);
 ----
-31
+63
 
 query I
 SELECT bit_count(-3::int);
 ----
-31
+63
 
 # Tests with different integer types
 query I
@@ -85,7 +85,7 @@ SELECT bit_count(arrow_cast(15, 'Int8'));
 query I
 SELECT bit_count(arrow_cast(-1, 'Int8'));
 ----
-8
+64
 
 query I
 SELECT bit_count(arrow_cast(0, 'Int16'));
@@ -100,7 +100,7 @@ SELECT bit_count(arrow_cast(255, 'Int16'));
 query I
 SELECT bit_count(arrow_cast(-1, 'Int16'));
 ----
-16
+64
 
 query I
 SELECT bit_count(arrow_cast(0, 'Int64'));
@@ -214,7 +214,7 @@ SELECT bit_count(arrow_cast(2147483647, 'Int32'));
 query I
 SELECT bit_count(arrow_cast(-2147483648, 'Int32'));
 ----
-1
+33
 
 query I
 SELECT bit_count(arrow_cast(9223372036854775807, 'Int64'));
diff --git a/datafusion/sqllogictest/test_files/struct.slt b/datafusion/sqllogictest/test_files/struct.slt
index 95eeffc31903..dce5fe036b4e 100644
--- a/datafusion/sqllogictest/test_files/struct.slt
+++ b/datafusion/sqllogictest/test_files/struct.slt
@@ -53,9 +53,9 @@ select * from struct_values;
 query TT
 select arrow_typeof(s1), arrow_typeof(s2) from struct_values;
 ----
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
-Struct(c0 Int32) Struct(a Int32, b Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
+Struct("c0": nullable Int32) Struct("a": nullable Int32, "b": nullable Utf8View)
 
 
 # struct[i]
@@ -229,12 +229,12 @@ select named_struct('field_a', 1, 'field_b', 2);
 query T
 select arrow_typeof(named_struct('first', 1, 'second', 2, 'third', 3));
 ----
-Struct(first Int64, second Int64, third Int64)
+Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
 
 query T
 select arrow_typeof({'first': 1, 'second': 2, 'third': 3});
 ----
-Struct(first Int64, second Int64, third Int64)
+Struct("first": nullable Int64, "second": nullable Int64, "third": nullable Int64)
 
 # test nested struct literal
 query ?
@@ -413,7 +413,7 @@ create table t(a struct<r varchar, c int>, b struct<r varchar, c float>) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
 
 query ?
 select [a, b] from t;
@@ -464,12 +464,12 @@ select * from t;
 query T
 select arrow_typeof(c1) from t;
 ----
-Struct(r Utf8View, b Int32)
+Struct("r": nullable Utf8View, "b": nullable Int32)
 
 query T
 select arrow_typeof(c2) from t;
 ----
-Struct(r Utf8View, b Float32)
+Struct("r": nullable Utf8View, "b": nullable Float32)
 
 statement ok
 drop table t;
@@ -486,13 +486,13 @@ select * from t;
 query T
 select arrow_typeof(column1) from t;
 ----
-Struct(r Utf8, c Float64)
-Struct(r Utf8, c Float64)
+Struct("r": nullable Utf8, "c": nullable Float64)
+Struct("r": nullable Utf8, "c": nullable Float64)
 
 statement ok
 drop table t;
 
-query error DataFusion error: Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'a' to value of Float64 type
 create table t as values({r: 'a', c: 1}), ({c: 2.3, r: 'b'});
 
 ##################################
@@ -519,9 +519,9 @@ select coalesce(s1) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
 
 statement ok
 drop table t;
@@ -546,22 +546,22 @@ select coalesce(s1, s2) from t;
 query T
 select arrow_typeof(coalesce(s1, s2)) from t;
 ----
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
-Struct(a Float32, b Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
+Struct("a": nullable Float32, "b": nullable Utf8View)
 
 statement ok
 drop table t;
 
 # row() with incorrect order
-statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type
-create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values 
+statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'blue' to value of Float32 type
+create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as values
     (row('red', 1), row(2.3, 'blue')),
     (row('purple', 1), row('green', 2.3));
 
 # out of order struct literal
 # TODO: This query should not fail
-statement error DataFusion error: Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type
+statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'b' to value of Int32 type
 create table t(a struct(r varchar, c int)) as values ({r: 'a', c: 1}), ({c: 2, r: 'b'});
 
 ##################################
@@ -583,7 +583,7 @@ create table t(a struct(r varchar, c int), b struct(r varchar, c float)) as valu
 query T
 select arrow_typeof([a, b]) from t;
 ----
-List(Field { name: "item", data_type: Struct([Field { name: "r", data_type: Utf8View, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Field { name: "c", data_type: Float32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }]), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+List(nullable Struct("r": nullable Utf8View, "c": nullable Float32))
 
 statement ok
 drop table t;
@@ -606,13 +606,13 @@ create table t(a struct(r varchar, c int, g float), b struct(r varchar, c float,
 query T
 select arrow_typeof(a) from t;
 ----
-Struct(r Utf8View, c Int32, g Float32)
+Struct("r": nullable Utf8View, "c": nullable Int32, "g": nullable Float32)
 
 # type of each column should not coerced but perserve as it is
 query T
 select arrow_typeof(b) from t;
 ----
-Struct(r Utf8View, c Float32, g Int32)
+Struct("r": nullable Utf8View, "c": nullable Float32, "g": nullable Int32)
 
 statement ok
 drop table t;
diff --git a/datafusion/sqllogictest/test_files/subquery_sort.slt b/datafusion/sqllogictest/test_files/subquery_sort.slt
index 1e5a3c8f526a..ea7addd8e36f 100644
--- a/datafusion/sqllogictest/test_files/subquery_sort.slt
+++ b/datafusion/sqllogictest/test_files/subquery_sort.slt
@@ -100,7 +100,7 @@ physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
-04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sink_table.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c3, c9], file_type=csv, has_header=true
 
@@ -126,7 +126,7 @@ physical_plan
 01)ProjectionExec: expr=[c1@0 as c1, r@1 as r]
 02)--SortExec: TopK(fetch=2), expr=[c1@0 ASC NULLS LAST, c3@2 ASC NULLS LAST, c9@3 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[c1@0 as c1, rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as r, c3@1 as c3, c9@2 as c9]
-04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [sink_table_with_utf8view.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c1@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: partitions=1, partition_sizes=[1]
 
diff --git a/datafusion/sqllogictest/test_files/table_functions.slt b/datafusion/sqllogictest/test_files/table_functions.slt
index 0159abe8d06b..fa27d50a93fa 100644
--- a/datafusion/sqllogictest/test_files/table_functions.slt
+++ b/datafusion/sqllogictest/test_files/table_functions.slt
@@ -188,6 +188,21 @@ SELECT generate_series(1, t1.end) FROM generate_series(3, 5) as t1(end)
 [1, 2, 3, 4]
 [1, 2, 3]
 
+# join with projection on generate_series
+query I
+select g1.value from generate_series(1, 3) g1 CROSS JOIN generate_series(1, 3) g2;
+----
+1
+1
+1
+2
+2
+2
+3
+3
+3
+
+
 # Test range table function
 query I
 SELECT * FROM range(6)
@@ -353,8 +368,8 @@ SELECT * FROM generate_series(TIMESTAMP '2023-01-01T00:00:00', TIMESTAMP '2023-0
 query P
 SELECT * FROM range(TIMESTAMP '2023-01-01T00:00:00+00:00', TIMESTAMP '2023-01-03T00:00:00+00:00', INTERVAL '1' DAY)
 ----
-2023-01-01T00:00:00
-2023-01-02T00:00:00
+2023-01-01T00:00:00Z
+2023-01-02T00:00:00Z
 
 # Negative timestamp range (going backwards)
 query P
@@ -399,7 +414,7 @@ SELECT * FROM range(TIMESTAMP '2023-01-01T00:00:00', TIMESTAMP '2023-01-03T00:00
 query P
 SELECT * FROM range(TIMESTAMPTZ '2023-02-01T00:00:00-07:00', TIMESTAMPTZ '2023-02-01T09:00:00+01:00', INTERVAL '1' HOUR);
 ----
-2023-02-01T07:00:00Z
+2023-02-01T07:00:00
 
 # Basic date range with hour interval
 query P
diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt
index 6fe9995c7b67..cdacad0fda0d 100644
--- a/datafusion/sqllogictest/test_files/timestamps.slt
+++ b/datafusion/sqllogictest/test_files/timestamps.slt
@@ -79,7 +79,7 @@ SET TIME ZONE = '+08'
 query T
 select arrow_typeof(now());
 ----
-Timestamp(Nanosecond, Some("+08"))
+Timestamp(ns, "+08")
 
 query I
 SELECT count(1) result FROM (SELECT now() as n) a WHERE n > '2000-01-01'::date;
@@ -691,11 +691,11 @@ select
 ----
 08:09:10.123456789 13:14:15.123456 13:14:15.123 13:14:15
 
-query error Cannot cast string 'not a time' to value of Time64\(Nanosecond\) type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string 'not a time' to value of Time64\(ns\) type
 SELECT TIME 'not a time' as time;
 
 # invalid time
-query error Cannot cast string '24:01:02' to value of Time64\(Nanosecond\) type
+query error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Cast error: Cannot cast string '24:01:02' to value of Time64\(ns\) type
 SELECT TIME '24:01:02' as time;
 
 # invalid timezone
@@ -908,7 +908,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_micros(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Microsecond, None)
+Timestamp(µs)
 
 query P
 SELECT DATE_BIN(INTERVAL '15 minute', to_timestamp_millis(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z')
@@ -926,7 +926,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_millis(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Millisecond, None)
+Timestamp(ms)
 
 query P
 SELECT DATE_BIN(INTERVAL '15 minute', to_timestamp_seconds(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z')
@@ -944,7 +944,7 @@ from (values
 query T
 SELECT arrow_typeof(DATE_BIN(INTERVAL '15 minute', to_timestamp_seconds(TIMESTAMP '2022-08-03 14:38:50Z'), TIMESTAMP '1970-01-01 00:00:00Z'))
 ----
-Timestamp(Second, None)
+Timestamp(s)
 
 # month interval with INTERVAL keyword in date_bin with default start time
 query P
@@ -1540,24 +1540,24 @@ from timestamp_utc; -- have to convert to utc prior to converting to berlin
 query PT
 select ts, arrow_typeof(ts) from timestamp_utc order by ts;
 ----
-2024-10-27T00:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T00:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T01:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T02:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T02:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T03:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2024-10-27T03:30:00Z Timestamp(Nanosecond, Some("UTC"))
+2024-10-27T00:00:00Z Timestamp(ns, "UTC")
+2024-10-27T00:30:00Z Timestamp(ns, "UTC")
+2024-10-27T01:30:00Z Timestamp(ns, "UTC")
+2024-10-27T02:00:00Z Timestamp(ns, "UTC")
+2024-10-27T02:30:00Z Timestamp(ns, "UTC")
+2024-10-27T03:00:00Z Timestamp(ns, "UTC")
+2024-10-27T03:30:00Z Timestamp(ns, "UTC")
 
 query PT
 select ts, arrow_typeof(ts) from timestamp_berlin order by ts;
 ----
-2024-10-27T02:00:00+02:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T02:30:00+02:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T02:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T03:00:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T03:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T04:00:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
-2024-10-27T04:30:00+01:00 Timestamp(Nanosecond, Some("Europe/Berlin"))
+2024-10-27T02:00:00+02:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T02:30:00+02:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T02:30:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T03:00:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T03:30:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T04:00:00+01:00 Timestamp(ns, "Europe/Berlin")
+2024-10-27T04:30:00+01:00 Timestamp(ns, "Europe/Berlin")
 
 #  date trunc in utc with DST
 query PPPP
@@ -1624,24 +1624,24 @@ from timestamp_utc; -- have to convert to utc prior to converting to Sau Paulo
 query PT
 select ts, arrow_typeof(ts) from timestamp_utc order by ts;
 ----
-2018-11-04T01:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T01:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T02:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T03:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T03:30:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T04:00:00Z Timestamp(Nanosecond, Some("UTC"))
-2018-11-04T04:30:00Z Timestamp(Nanosecond, Some("UTC"))
+2018-11-04T01:00:00Z Timestamp(ns, "UTC")
+2018-11-04T01:30:00Z Timestamp(ns, "UTC")
+2018-11-04T02:30:00Z Timestamp(ns, "UTC")
+2018-11-04T03:00:00Z Timestamp(ns, "UTC")
+2018-11-04T03:30:00Z Timestamp(ns, "UTC")
+2018-11-04T04:00:00Z Timestamp(ns, "UTC")
+2018-11-04T04:30:00Z Timestamp(ns, "UTC")
 
 query PT
 select ts, arrow_typeof(ts) from timestamp_sao_paulo order by ts;
 ----
-2018-11-03T22:00:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-03T22:30:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-03T23:30:00-03:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T01:00:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T01:30:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T02:00:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
-2018-11-04T02:30:00-02:00 Timestamp(Nanosecond, Some("America/Sao_Paulo"))
+2018-11-03T22:00:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-03T22:30:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-03T23:30:00-03:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T01:00:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T01:30:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T02:00:00-02:00 Timestamp(ns, "America/Sao_Paulo")
+2018-11-04T02:30:00-02:00 Timestamp(ns, "America/Sao_Paulo")
 
 #  date trunc in utc with DST
 query PPPP
@@ -1687,6 +1687,30 @@ SELECT DATE_TRUNC('second', '2022-08-03 14:38:50Z');
 ----
 2022-08-03T14:38:50
 
+# DATE_TRUNC handling of times before the unix epoch (issue 18334)
+query PPPPPPPPPPP
+SELECT
+    d,
+    DATE_TRUNC('year', d),
+    DATE_TRUNC('quarter', d),
+    DATE_TRUNC('month', d),
+    DATE_TRUNC('week', d),
+    DATE_TRUNC('day', d),
+    DATE_TRUNC('hour', d),
+    DATE_TRUNC('minute', d),
+    DATE_TRUNC('second', d),
+    DATE_TRUNC('millisecond', d),
+    DATE_TRUNC('microsecond', d),
+FROM (VALUES
+    (TIMESTAMP '1900-06-15 07:09:00'),
+    (TIMESTAMP '1970-01-01 00:00:00'),
+    (TIMESTAMP '2024-12-31 23:39:01.123456789')
+) AS t(d);
+----
+1900-06-15T07:09:00 1900-01-01T00:00:00 1900-04-01T00:00:00 1900-06-01T00:00:00 1900-06-11T00:00:00 1900-06-15T00:00:00 1900-06-15T07:00:00 1900-06-15T07:09:00 1900-06-15T07:09:00 1900-06-15T07:09:00 1900-06-15T07:09:00
+1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 1969-12-29T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00 1970-01-01T00:00:00
+2024-12-31T23:39:01.123456789 2024-01-01T00:00:00 2024-10-01T00:00:00 2024-12-01T00:00:00 2024-12-30T00:00:00 2024-12-31T00:00:00 2024-12-31T23:00:00 2024-12-31T23:39:00 2024-12-31T23:39:01 2024-12-31T23:39:01.123 2024-12-31T23:39:01.123456
+
 # Test that interval can add a timestamp
 query P
 SELECT timestamp '2013-07-01 12:00:00' + INTERVAL '8' DAY;
@@ -1797,7 +1821,7 @@ SELECT ts1 + i FROM foo;
 2003-07-12T01:31:15.000123463
 
 # Timestamp + Timestamp => error
-query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(Nanosecond, None\) \+ Timestamp\(Nanosecond, None\)
+query error DataFusion error: Error during planning: Cannot get result type for temporal operation Timestamp\(ns\) \+ Timestamp\(ns\): Invalid argument error: Invalid timestamp arithmetic operation: Timestamp\(ns\) \+ Timestamp\(ns\)
 SELECT ts1 + ts2
 FROM foo;
 
@@ -2256,7 +2280,7 @@ SET TIME ZONE = '+00'
 query T
 SELECT arrow_typeof(time) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 # check date_trunc
 query P
@@ -2271,27 +2295,27 @@ SELECT date_trunc('day', time) FROM foo
 query T
 SELECT arrow_typeof(date_trunc('day', time)) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 query T
 select arrow_typeof(date_trunc('minute', to_timestamp_seconds(61)))
 ----
-Timestamp(Second, None)
+Timestamp(s)
 
 query T
 select arrow_typeof(date_trunc('second', to_timestamp_millis(61)))
 ----
-Timestamp(Millisecond, None)
+Timestamp(ms)
 
 query T
 select arrow_typeof(date_trunc('millisecond', to_timestamp_micros(61)))
 ----
-Timestamp(Microsecond, None)
+Timestamp(µs)
 
 query T
 select arrow_typeof(date_trunc('microsecond', to_timestamp(61)))
 ----
-Timestamp(Nanosecond, None)  
+Timestamp(ns)
 
 # check date_bin
 query P
@@ -2306,7 +2330,7 @@ SELECT date_bin(INTERVAL '1 day', time, '1970-01-01T00:00:00+05:00') FROM foo
 query T
 SELECT arrow_typeof(date_bin(INTERVAL '1 day', time, '1970-01-01T00:00:00+05:00')) FROM foo LIMIT 1
 ----
-Timestamp(Nanosecond, Some("+05:00"))
+Timestamp(ns, "+05:00")
 
 
 # timestamp comparison with and without timezone
@@ -2348,7 +2372,7 @@ true true true true true true true true true true true true true
 query TTT
 SELECT arrow_typeof(to_timestamp(1)), arrow_typeof(to_timestamp(null)), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000'))
 ----
-Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # verify timestamp output types using timestamp literal syntax
 query BBBBBB
@@ -2384,7 +2408,7 @@ NULL 1970-01-01T00:00:00 2031-01-19T23:33:25 1970-01-01T00:00:01 1969-12-31T23:5
 query TTT
 SELECT arrow_typeof(to_timestamp(1, '%c', '%s')), arrow_typeof(to_timestamp(null, '%+', '%s')), arrow_typeof(to_timestamp('2023-01-10 12:34:56.000', '%Y-%m-%d %H:%M:%S%.f'))
 ----
-Timestamp(Nanosecond, None) Timestamp(Nanosecond, None) Timestamp(Nanosecond, None)
+Timestamp(ns) Timestamp(ns) Timestamp(ns)
 
 # to_timestamp with invalid formatting
 query error input contains invalid characters
@@ -2690,8 +2714,8 @@ SELECT t1.ts, t1.ts + INTERVAL '1' SECOND FROM t1;
 query PT
 SELECT t1.ts::timestamptz, arrow_typeof(t1.ts::timestamptz) FROM t1;
 ----
-2018-07-01T06:00:00Z Timestamp(Nanosecond, Some("+00"))
-2018-07-01T07:00:00Z Timestamp(Nanosecond, Some("+00"))
+2018-07-01T06:00:00Z Timestamp(ns, "+00")
+2018-07-01T07:00:00Z Timestamp(ns, "+00")
 
 query D
 SELECT 0::TIME
@@ -3247,7 +3271,7 @@ statement error The to_local_time function can only accept Timestamp as the arg
 select to_local_time('2024-04-01T00:00:20Z');
 
 # invalid timezone
-statement error DataFusion error: Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
+statement error DataFusion error: Optimizer rule 'simplify_expressions' failed[\s\S]*Arrow error: Parser error: Invalid timezone "Europe/timezone": failed to parse timezone
 select to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/timezone');
 
 # valid query
@@ -3281,7 +3305,7 @@ from (
   select '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' as time
 );
 ----
-2024-04-01T00:00:20+02:00 Timestamp(Nanosecond, Some("Europe/Brussels")) 2024-04-01T00:00:20 Timestamp(Nanosecond, None)
+2024-04-01T00:00:20+02:00 Timestamp(ns, "Europe/Brussels") 2024-04-01T00:00:20 Timestamp(ns)
 
 # use to_local_time() in date_bin()
 query P
@@ -3326,53 +3350,53 @@ from t;
 query PPT
 select column1, to_local_time(column1::timestamp), arrow_typeof(to_local_time(column1::timestamp)) from t_utc;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(ns)
 
 query PPT
 select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_utc;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01Z 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01Z 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01Z 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01Z 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01Z 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01Z 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01Z 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01Z 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01Z 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01Z 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01Z 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01Z 2024-12-01T00:00:01 Timestamp(ns)
 
 query PPT
 select column1, to_local_time(column1), arrow_typeof(to_local_time(column1)) from t_timezone;
 ----
-NULL NULL Timestamp(Nanosecond, None)
-2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(Nanosecond, None)
-2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(Nanosecond, None)
-2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(Nanosecond, None)
-2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(Nanosecond, None)
-2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(Nanosecond, None)
-2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(Nanosecond, None)
-2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(Nanosecond, None)
-2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(Nanosecond, None)
-2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(Nanosecond, None)
-2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(Nanosecond, None)
-2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(Nanosecond, None)
-2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(Nanosecond, None)
+NULL NULL Timestamp(ns)
+2024-01-01T00:00:01+01:00 2024-01-01T00:00:01 Timestamp(ns)
+2024-02-01T00:00:01+01:00 2024-02-01T00:00:01 Timestamp(ns)
+2024-03-01T00:00:01+01:00 2024-03-01T00:00:01 Timestamp(ns)
+2024-04-01T00:00:01+02:00 2024-04-01T00:00:01 Timestamp(ns)
+2024-05-01T00:00:01+02:00 2024-05-01T00:00:01 Timestamp(ns)
+2024-06-01T00:00:01+02:00 2024-06-01T00:00:01 Timestamp(ns)
+2024-07-01T00:00:01+02:00 2024-07-01T00:00:01 Timestamp(ns)
+2024-08-01T00:00:01+02:00 2024-08-01T00:00:01 Timestamp(ns)
+2024-09-01T00:00:01+02:00 2024-09-01T00:00:01 Timestamp(ns)
+2024-10-01T00:00:01+02:00 2024-10-01T00:00:01 Timestamp(ns)
+2024-11-01T00:00:01+01:00 2024-11-01T00:00:01 Timestamp(ns)
+2024-12-01T00:00:01+01:00 2024-12-01T00:00:01 Timestamp(ns)
 
 # combine to_local_time() with date_bin()
 query P
@@ -3667,7 +3691,7 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT TIMESTAMP '2005-09-10 13:31:00' AS a)
 ----
-Timestamp(Nanosecond, None) 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00
+Timestamp(ns) 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00 2005-09-10T13:31:00
 
 query TTTTT
 SELECT
@@ -3678,4 +3702,4 @@ SELECT
     arrow_cast(a, 'LargeUtf8')
 FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a)
 ----
-Timestamp(Nanosecond, Some("+00")) 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
+Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part
index 04de9153a047..4cfd69bbc24f 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q10.slt.part
@@ -87,20 +87,18 @@ physical_plan
 16)------------------------------CoalesceBatchesExec: target_batch_size=8192
 17)--------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, c_address@2, c_nationkey@3, c_phone@4, c_acctbal@5, c_comment@6, o_orderkey@7]
 18)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-20)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], file_type=csv, has_header=false
-22)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-23)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-24)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)----------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
-26)------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
-27)--------------------------CoalesceBatchesExec: target_batch_size=8192
-28)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-29)------------------------------CoalesceBatchesExec: target_batch_size=8192
-30)--------------------------------FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
-31)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], file_type=csv, has_header=false
-32)------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-34)----------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+19)------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+20)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_comment], file_type=csv, has_header=false
+21)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+22)------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+23)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
+24)----------------------------------------FilterExec: o_orderdate@2 >= 1993-10-01 AND o_orderdate@2 < 1994-01-01, projection=[o_orderkey@0, o_custkey@1]
+25)------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
+26)--------------------------CoalesceBatchesExec: target_batch_size=8192
+27)----------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+28)------------------------------CoalesceBatchesExec: target_batch_size=8192
+29)--------------------------------FilterExec: l_returnflag@3 = R, projection=[l_orderkey@0, l_extendedprice@1, l_discount@2]
+30)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_extendedprice, l_discount, l_returnflag], file_type=csv, has_header=false
+31)------------------CoalesceBatchesExec: target_batch_size=8192
+32)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+33)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
index 6b03d708c7fa..52bbd7b63afb 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q11.slt.part
@@ -92,35 +92,33 @@ physical_plan
 17)--------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
 18)----------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
 19)------------------------------CoalesceBatchesExec: target_batch_size=8192
-20)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-21)----------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-22)------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-23)----------------------CoalesceBatchesExec: target_batch_size=8192
-24)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-25)--------------------------CoalesceBatchesExec: target_batch_size=8192
-26)----------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-27)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-28)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-29)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
-30)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-31)----------CoalescePartitionsExec
-32)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
-33)--------------CoalesceBatchesExec: target_batch_size=8192
-34)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
-35)------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-37)----------------------CoalesceBatchesExec: target_batch_size=8192
-38)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
-39)--------------------------CoalesceBatchesExec: target_batch_size=8192
-40)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-41)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
-42)--------------------------CoalesceBatchesExec: target_batch_size=8192
-43)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-45)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-46)------------------CoalesceBatchesExec: target_batch_size=8192
-47)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-48)----------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
-50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-51)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+20)--------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+21)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+22)----------------------CoalesceBatchesExec: target_batch_size=8192
+23)------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+24)--------------------------CoalesceBatchesExec: target_batch_size=8192
+25)----------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+26)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+27)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+28)------ProjectionExec: expr=[CAST(CAST(sum(partsupp.ps_supplycost * partsupp.ps_availqty)@0 AS Float64) * 0.0001 AS Decimal128(38, 15)) as sum(partsupp.ps_supplycost * partsupp.ps_availqty) * Float64(0.0001)]
+29)--------AggregateExec: mode=Final, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+30)----------CoalescePartitionsExec
+31)------------AggregateExec: mode=Partial, gby=[], aggr=[sum(partsupp.ps_supplycost * partsupp.ps_availqty)]
+32)--------------CoalesceBatchesExec: target_batch_size=8192
+33)----------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_availqty@0, ps_supplycost@1]
+34)------------------CoalesceBatchesExec: target_batch_size=8192
+35)--------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+36)----------------------CoalesceBatchesExec: target_batch_size=8192
+37)------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@0, s_suppkey@0)], projection=[ps_availqty@1, ps_supplycost@2, s_nationkey@4]
+38)--------------------------CoalesceBatchesExec: target_batch_size=8192
+39)----------------------------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+40)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_suppkey, ps_availqty, ps_supplycost], file_type=csv, has_header=false
+41)--------------------------CoalesceBatchesExec: target_batch_size=8192
+42)----------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+43)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+44)------------------CoalesceBatchesExec: target_batch_size=8192
+45)--------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+46)----------------------CoalesceBatchesExec: target_batch_size=8192
+47)------------------------FilterExec: n_name@1 = GERMANY, projection=[n_nationkey@0]
+48)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+49)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
index 96f3bd6edf32..17d827cebb82 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q13.slt.part
@@ -65,11 +65,10 @@ physical_plan
 10)------------------CoalesceBatchesExec: target_batch_size=8192
 11)--------------------HashJoinExec: mode=Partitioned, join_type=Left, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, o_orderkey@1]
 12)----------------------CoalesceBatchesExec: target_batch_size=8192
-13)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey], file_type=csv, has_header=false
-16)----------------------CoalesceBatchesExec: target_batch_size=8192
-17)------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-18)--------------------------CoalesceBatchesExec: target_batch_size=8192
-19)----------------------------FilterExec: o_comment@2 NOT LIKE %special%requests%, projection=[o_orderkey@0, o_custkey@1]
-20)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], file_type=csv, has_header=false
+13)------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+14)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey], file_type=csv, has_header=false
+15)----------------------CoalesceBatchesExec: target_batch_size=8192
+16)------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+17)--------------------------CoalesceBatchesExec: target_batch_size=8192
+18)----------------------------FilterExec: o_comment@2 NOT LIKE %special%requests%, projection=[o_orderkey@0, o_custkey@1]
+19)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_comment], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part
index 8d8dd68c3d7b..71dea1a5e12a 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q14.slt.part
@@ -54,6 +54,5 @@ physical_plan
 11)--------------------FilterExec: l_shipdate@3 >= 1995-09-01 AND l_shipdate@3 < 1995-10-01, projection=[l_partkey@0, l_extendedprice@1, l_discount@2]
 12)----------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
 13)--------------CoalesceBatchesExec: target_batch_size=8192
-14)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-15)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-16)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], file_type=csv, has_header=false
+14)----------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=1
+15)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_type], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part
index 0636a033b25a..a3284b484122 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q15.slt.part
@@ -89,14 +89,13 @@ physical_plan
 16)--------CoalesceBatchesExec: target_batch_size=8192
 17)----------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, supplier_no@0)], projection=[s_suppkey@0, s_name@1, s_address@2, s_phone@3, total_revenue@5]
 18)------------CoalesceBatchesExec: target_batch_size=8192
-19)--------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-20)----------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-21)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_phone], file_type=csv, has_header=false
-22)------------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
-23)--------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-24)----------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
-26)--------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
-27)----------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
-29)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+19)--------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+20)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_phone], file_type=csv, has_header=false
+21)------------ProjectionExec: expr=[l_suppkey@0 as supplier_no, sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)@1 as total_revenue]
+22)--------------AggregateExec: mode=FinalPartitioned, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+23)----------------CoalesceBatchesExec: target_batch_size=8192
+24)------------------RepartitionExec: partitioning=Hash([l_suppkey@0], 4), input_partitions=4
+25)--------------------AggregateExec: mode=Partial, gby=[l_suppkey@0 as l_suppkey], aggr=[sum(lineitem.l_extendedprice * Int64(1) - lineitem.l_discount)]
+26)----------------------CoalesceBatchesExec: target_batch_size=8192
+27)------------------------FilterExec: l_shipdate@3 >= 1996-01-01 AND l_shipdate@3 < 1996-04-01, projection=[l_suppkey@0, l_extendedprice@1, l_discount@2]
+28)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part
index 55da5371671e..16a5b7eb39b5 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q18.slt.part
@@ -82,19 +82,18 @@ physical_plan
 13)------------------------CoalesceBatchesExec: target_batch_size=8192
 14)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_custkey@0, c_name@1, o_orderkey@2, o_totalprice@4, o_orderdate@5]
 15)----------------------------CoalesceBatchesExec: target_batch_size=8192
-16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-17)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-18)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name], file_type=csv, has_header=false
-19)----------------------------CoalesceBatchesExec: target_batch_size=8192
-20)------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-21)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate], file_type=csv, has_header=false
-22)--------------------CoalesceBatchesExec: target_batch_size=8192
-23)----------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-24)------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
-25)----------------CoalesceBatchesExec: target_batch_size=8192
-26)------------------FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0]
-27)--------------------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
-28)----------------------CoalesceBatchesExec: target_batch_size=8192
-29)------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-30)--------------------------AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
-31)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
+16)------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_name], file_type=csv, has_header=false
+18)----------------------------CoalesceBatchesExec: target_batch_size=8192
+19)------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+20)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_totalprice, o_orderdate], file_type=csv, has_header=false
+21)--------------------CoalesceBatchesExec: target_batch_size=8192
+22)----------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+23)------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
+24)----------------CoalesceBatchesExec: target_batch_size=8192
+25)------------------FilterExec: sum(lineitem.l_quantity)@1 > Some(30000),25,2, projection=[l_orderkey@0]
+26)--------------------AggregateExec: mode=FinalPartitioned, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
+27)----------------------CoalesceBatchesExec: target_batch_size=8192
+28)------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+29)--------------------------AggregateExec: mode=Partial, gby=[l_orderkey@0 as l_orderkey], aggr=[sum(lineitem.l_quantity)]
+30)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_quantity], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part
index b2e0fb0cd1cc..c299fa88a9c4 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q2.slt.part
@@ -131,50 +131,46 @@ physical_plan
 30)----------------------------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
 31)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
 32)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-34)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], file_type=csv, has_header=false
-36)--------------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-38)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-39)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
-40)------------------CoalesceBatchesExec: target_batch_size=8192
-41)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-42)----------------------CoalesceBatchesExec: target_batch_size=8192
-43)------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
-44)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-45)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
-46)----------CoalesceBatchesExec: target_batch_size=8192
-47)------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
-48)--------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
-49)----------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-50)------------------CoalesceBatchesExec: target_batch_size=8192
-51)--------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-52)----------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
-53)------------------------CoalesceBatchesExec: target_batch_size=8192
-54)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
-55)----------------------------CoalesceBatchesExec: target_batch_size=8192
-56)------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
-57)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-58)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
-59)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-60)--------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
-61)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
-62)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
-63)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-64)----------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
-65)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
-66)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-67)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-68)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-69)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-70)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-71)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-72)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-73)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
-74)----------------------------CoalesceBatchesExec: target_batch_size=8192
-75)------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-76)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-77)----------------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
-78)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-79)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+33)--------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+34)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment], file_type=csv, has_header=false
+35)--------------------------CoalesceBatchesExec: target_batch_size=8192
+36)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+37)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
+38)------------------CoalesceBatchesExec: target_batch_size=8192
+39)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+40)----------------------CoalesceBatchesExec: target_batch_size=8192
+41)------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
+42)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+43)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+44)----------CoalesceBatchesExec: target_batch_size=8192
+45)------------RepartitionExec: partitioning=Hash([ps_partkey@1, min(partsupp.ps_supplycost)@0], 4), input_partitions=4
+46)--------------ProjectionExec: expr=[min(partsupp.ps_supplycost)@1 as min(partsupp.ps_supplycost), ps_partkey@0 as ps_partkey]
+47)----------------AggregateExec: mode=FinalPartitioned, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+48)------------------CoalesceBatchesExec: target_batch_size=8192
+49)--------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+50)----------------------AggregateExec: mode=Partial, gby=[ps_partkey@0 as ps_partkey], aggr=[min(partsupp.ps_supplycost)]
+51)------------------------CoalesceBatchesExec: target_batch_size=8192
+52)--------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@2, r_regionkey@0)], projection=[ps_partkey@0, ps_supplycost@1]
+53)----------------------------CoalesceBatchesExec: target_batch_size=8192
+54)------------------------------RepartitionExec: partitioning=Hash([n_regionkey@2], 4), input_partitions=4
+55)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+56)----------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@2, n_nationkey@0)], projection=[ps_partkey@0, ps_supplycost@1, n_regionkey@4]
+57)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+58)--------------------------------------RepartitionExec: partitioning=Hash([s_nationkey@2], 4), input_partitions=4
+59)----------------------------------------CoalesceBatchesExec: target_batch_size=8192
+60)------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_suppkey@1, s_suppkey@0)], projection=[ps_partkey@0, ps_supplycost@2, s_nationkey@4]
+61)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+62)----------------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1], 4), input_partitions=4
+63)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
+64)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+65)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+66)------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+67)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+68)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+69)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
+70)----------------------------CoalesceBatchesExec: target_batch_size=8192
+71)------------------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+72)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+73)----------------------------------FilterExec: r_name@1 = EUROPE, projection=[r_regionkey@0]
+74)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+75)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part
index 0b994de411ea..492c68d6aaa0 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q20.slt.part
@@ -90,37 +90,36 @@ physical_plan
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_nationkey@3, n_nationkey@0)], projection=[s_suppkey@0, s_name@1, s_address@2]
 09)----------------CoalesceBatchesExec: target_batch_size=8192
-10)------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=4
-11)--------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=csv, has_header=false
-13)----------------CoalesceBatchesExec: target_batch_size=8192
-14)------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-15)--------------------CoalesceBatchesExec: target_batch_size=8192
-16)----------------------FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0]
-17)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-18)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-19)--------CoalesceBatchesExec: target_batch_size=8192
-20)----------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
-21)------------CoalesceBatchesExec: target_batch_size=8192
-22)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1]
-23)----------------CoalesceBatchesExec: target_batch_size=8192
-24)------------------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4
-25)--------------------CoalesceBatchesExec: target_batch_size=8192
-26)----------------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(ps_partkey@0, p_partkey@0)]
-27)------------------------CoalesceBatchesExec: target_batch_size=8192
-28)--------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
-29)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=csv, has_header=false
-30)------------------------CoalesceBatchesExec: target_batch_size=8192
-31)--------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
-32)----------------------------CoalesceBatchesExec: target_batch_size=8192
-33)------------------------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0]
-34)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-35)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false
-36)----------------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey]
-37)------------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
-38)--------------------CoalesceBatchesExec: target_batch_size=8192
-39)----------------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4
-40)------------------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
-41)--------------------------CoalesceBatchesExec: target_batch_size=8192
-42)----------------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2]
-43)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=csv, has_header=false
+10)------------------RepartitionExec: partitioning=Hash([s_nationkey@3], 4), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_address, s_nationkey], file_type=csv, has_header=false
+12)----------------CoalesceBatchesExec: target_batch_size=8192
+13)------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+14)--------------------CoalesceBatchesExec: target_batch_size=8192
+15)----------------------FilterExec: n_name@1 = CANADA, projection=[n_nationkey@0]
+16)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+17)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+18)--------CoalesceBatchesExec: target_batch_size=8192
+19)----------RepartitionExec: partitioning=Hash([ps_suppkey@0], 4), input_partitions=4
+20)------------CoalesceBatchesExec: target_batch_size=8192
+21)--------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(ps_partkey@0, l_partkey@1), (ps_suppkey@1, l_suppkey@2)], filter=CAST(ps_availqty@0 AS Float64) > Float64(0.5) * sum(lineitem.l_quantity)@1, projection=[ps_suppkey@1]
+22)----------------CoalesceBatchesExec: target_batch_size=8192
+23)------------------RepartitionExec: partitioning=Hash([ps_partkey@0, ps_suppkey@1], 4), input_partitions=4
+24)--------------------CoalesceBatchesExec: target_batch_size=8192
+25)----------------------HashJoinExec: mode=Partitioned, join_type=LeftSemi, on=[(ps_partkey@0, p_partkey@0)]
+26)------------------------CoalesceBatchesExec: target_batch_size=8192
+27)--------------------------RepartitionExec: partitioning=Hash([ps_partkey@0], 4), input_partitions=4
+28)----------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_availqty], file_type=csv, has_header=false
+29)------------------------CoalesceBatchesExec: target_batch_size=8192
+30)--------------------------RepartitionExec: partitioning=Hash([p_partkey@0], 4), input_partitions=4
+31)----------------------------CoalesceBatchesExec: target_batch_size=8192
+32)------------------------------FilterExec: p_name@1 LIKE forest%, projection=[p_partkey@0]
+33)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+34)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/part.tbl]]}, projection=[p_partkey, p_name], file_type=csv, has_header=false
+35)----------------ProjectionExec: expr=[0.5 * CAST(sum(lineitem.l_quantity)@2 AS Float64) as Float64(0.5) * sum(lineitem.l_quantity), l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey]
+36)------------------AggregateExec: mode=FinalPartitioned, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
+37)--------------------CoalesceBatchesExec: target_batch_size=8192
+38)----------------------RepartitionExec: partitioning=Hash([l_partkey@0, l_suppkey@1], 4), input_partitions=4
+39)------------------------AggregateExec: mode=Partial, gby=[l_partkey@0 as l_partkey, l_suppkey@1 as l_suppkey], aggr=[sum(lineitem.l_quantity)]
+40)--------------------------CoalesceBatchesExec: target_batch_size=8192
+41)----------------------------FilterExec: l_shipdate@3 >= 1994-01-01 AND l_shipdate@3 < 1995-01-01, projection=[l_partkey@0, l_suppkey@1, l_quantity@2]
+42)------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_partkey, l_suppkey, l_quantity, l_shipdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part
index e52171524007..96341ba32311 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q21.slt.part
@@ -114,30 +114,29 @@ physical_plan
 22)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
 23)--------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_name@1, s_nationkey@2, l_orderkey@3, l_suppkey@4]
 24)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-25)------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-26)--------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-27)----------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_nationkey], file_type=csv, has_header=false
-28)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-29)------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
-30)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-31)----------------------------------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
-32)------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
-33)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
-34)----------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-35)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------------------------------FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0]
-37)----------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderstatus], file_type=csv, has_header=false
-38)------------------------------CoalesceBatchesExec: target_batch_size=8192
-39)--------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-40)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-41)------------------------------------FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0]
-42)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-43)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-44)----------------------CoalesceBatchesExec: target_batch_size=8192
-45)------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-46)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey], file_type=csv, has_header=false
-47)------------------CoalesceBatchesExec: target_batch_size=8192
-48)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-49)----------------------CoalesceBatchesExec: target_batch_size=8192
-50)------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
-51)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
+25)------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+26)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_name, s_nationkey], file_type=csv, has_header=false
+27)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+28)------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
+29)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+30)----------------------------------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
+31)------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
+32)--------------------------------------CoalesceBatchesExec: target_batch_size=8192
+33)----------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+34)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+35)--------------------------------------------FilterExec: o_orderstatus@1 = F, projection=[o_orderkey@0]
+36)----------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderstatus], file_type=csv, has_header=false
+37)------------------------------CoalesceBatchesExec: target_batch_size=8192
+38)--------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+39)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+40)------------------------------------FilterExec: n_name@1 = SAUDI ARABIA, projection=[n_nationkey@0]
+41)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+42)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+43)----------------------CoalesceBatchesExec: target_batch_size=8192
+44)------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+45)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey], file_type=csv, has_header=false
+46)------------------CoalesceBatchesExec: target_batch_size=8192
+47)--------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+48)----------------------CoalesceBatchesExec: target_batch_size=8192
+49)------------------------FilterExec: l_receiptdate@3 > l_commitdate@2, projection=[l_orderkey@0, l_suppkey@1]
+50)--------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_commitdate, l_receiptdate], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part
index 15636056b871..dcf462915899 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q5.slt.part
@@ -93,28 +93,25 @@ physical_plan
 24)----------------------------------------------CoalesceBatchesExec: target_batch_size=8192
 25)------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(c_custkey@0, o_custkey@1)], projection=[c_nationkey@1, o_orderkey@2]
 26)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-27)----------------------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-28)------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-29)--------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
-30)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-31)----------------------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
-32)------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-33)--------------------------------------------------------FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01, projection=[o_orderkey@0, o_custkey@1]
-34)----------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
-35)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-36)--------------------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
-37)----------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
-38)----------------------------------CoalesceBatchesExec: target_batch_size=8192
-39)------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=4
-40)--------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-41)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-42)--------------------------CoalesceBatchesExec: target_batch_size=8192
-43)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-44)------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-45)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
-46)------------------CoalesceBatchesExec: target_batch_size=8192
-47)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-48)----------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------FilterExec: r_name@1 = ASIA, projection=[r_regionkey@0]
-50)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-51)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+27)----------------------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+28)------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
+29)--------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+30)----------------------------------------------------RepartitionExec: partitioning=Hash([o_custkey@1], 4), input_partitions=4
+31)------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+32)--------------------------------------------------------FilterExec: o_orderdate@2 >= 1994-01-01 AND o_orderdate@2 < 1995-01-01, projection=[o_orderkey@0, o_custkey@1]
+33)----------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
+34)------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+35)--------------------------------------------RepartitionExec: partitioning=Hash([l_orderkey@0], 4), input_partitions=4
+36)----------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
+37)----------------------------------CoalesceBatchesExec: target_batch_size=8192
+38)------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0, s_nationkey@1], 4), input_partitions=1
+39)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+40)--------------------------CoalesceBatchesExec: target_batch_size=8192
+41)----------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+42)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name, n_regionkey], file_type=csv, has_header=false
+43)------------------CoalesceBatchesExec: target_batch_size=8192
+44)--------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+45)----------------------CoalesceBatchesExec: target_batch_size=8192
+46)------------------------FilterExec: r_name@1 = ASIA, projection=[r_regionkey@0]
+47)--------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+48)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part
index 291d56e43f2d..53ab43ba491b 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q7.slt.part
@@ -111,30 +111,28 @@ physical_plan
 25)------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
 26)--------------------------------------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(s_suppkey@0, l_suppkey@1)], projection=[s_nationkey@1, l_orderkey@2, l_extendedprice@4, l_discount@5, l_shipdate@6]
 27)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-28)------------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-29)--------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-30)----------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-31)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-32)------------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
-33)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-34)----------------------------------------------------------FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31
-35)------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
-36)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-38)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey], file_type=csv, has_header=false
-39)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-40)--------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-41)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-42)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
-43)----------------------------CoalesceBatchesExec: target_batch_size=8192
-44)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-45)--------------------------------CoalesceBatchesExec: target_batch_size=8192
-46)----------------------------------FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY
-47)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-48)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-49)--------------------CoalesceBatchesExec: target_batch_size=8192
-50)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-51)------------------------CoalesceBatchesExec: target_batch_size=8192
-52)--------------------------FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE
-53)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-54)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+28)------------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+29)--------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+30)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+31)------------------------------------------------------RepartitionExec: partitioning=Hash([l_suppkey@1], 4), input_partitions=4
+32)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+33)----------------------------------------------------------FilterExec: l_shipdate@4 >= 1995-01-01 AND l_shipdate@4 <= 1996-12-31
+34)------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_suppkey, l_extendedprice, l_discount, l_shipdate], file_type=csv, has_header=false
+35)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+36)----------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+37)------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey], file_type=csv, has_header=false
+38)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+39)--------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+40)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
+41)----------------------------CoalesceBatchesExec: target_batch_size=8192
+42)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+43)--------------------------------CoalesceBatchesExec: target_batch_size=8192
+44)----------------------------------FilterExec: n_name@1 = FRANCE OR n_name@1 = GERMANY
+45)------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+46)--------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+47)--------------------CoalesceBatchesExec: target_batch_size=8192
+48)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
+49)------------------------CoalesceBatchesExec: target_batch_size=8192
+50)--------------------------FilterExec: n_name@1 = GERMANY OR n_name@1 = FRANCE
+51)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+52)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part
index 50171c528db6..cd68d6bb3e3e 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q8.slt.part
@@ -60,7 +60,7 @@ logical_plan
 02)--Projection: all_nations.o_year, CAST(CAST(sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END) AS Decimal128(12, 2)) / CAST(sum(all_nations.volume) AS Decimal128(12, 2)) AS Decimal128(15, 2)) AS mkt_share
 03)----Aggregate: groupBy=[[all_nations.o_year]], aggr=[[sum(CASE WHEN all_nations.nation = Utf8View("BRAZIL") THEN all_nations.volume ELSE Decimal128(Some(0),38,4) END) AS sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]]
 04)------SubqueryAlias: all_nations
-05)--------Projection: date_part(Utf8("YEAR"), orders.o_orderdate) AS o_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume, n2.n_name AS nation
+05)--------Projection: n1.n_name AS supp_nation, n2.n_name AS cust_nation, date_part(Utf8("YEAR"), lineitem.l_shipdate) AS l_year, lineitem.l_extendedprice * (Decimal128(Some(1),20,0) - lineitem.l_discount) AS volume
 06)----------Inner Join: n1.n_regionkey = region.r_regionkey
 07)------------Projection: lineitem.l_extendedprice, lineitem.l_discount, orders.o_orderdate, n1.n_regionkey, n2.n_name
 08)--------------Inner Join: supplier.s_nationkey = n2.n_nationkey
@@ -97,7 +97,7 @@ physical_plan
 05)--------CoalesceBatchesExec: target_batch_size=8192
 06)----------RepartitionExec: partitioning=Hash([o_year@0], 4), input_partitions=4
 07)------------AggregateExec: mode=Partial, gby=[o_year@0 as o_year], aggr=[sum(CASE WHEN all_nations.nation = Utf8("BRAZIL") THEN all_nations.volume ELSE Int64(0) END), sum(all_nations.volume)]
-08)--------------ProjectionExec: expr=[date_part(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation]
+08)--------------ProjectionExec: expr=[extract(YEAR, o_orderdate@2) as o_year, l_extendedprice@0 * (Some(1),20,0 - l_discount@1) as volume, n_name@3 as nation]
 09)----------------CoalesceBatchesExec: target_batch_size=8192
 10)------------------HashJoinExec: mode=Partitioned, join_type=Inner, on=[(n_regionkey@3, r_regionkey@0)], projection=[l_extendedprice@0, l_discount@1, o_orderdate@2, n_name@4]
 11)--------------------CoalesceBatchesExec: target_batch_size=8192
@@ -134,29 +134,25 @@ physical_plan
 42)----------------------------------------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
 43)------------------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_extendedprice, l_discount], file_type=csv, has_header=false
 44)------------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-45)--------------------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-46)----------------------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-47)------------------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-48)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-49)------------------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-50)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-51)----------------------------------------------------------FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31
-52)------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
-53)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-54)----------------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=4
-55)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-56)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
-57)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-58)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-59)----------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-60)------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
-61)----------------------------CoalesceBatchesExec: target_batch_size=8192
-62)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-63)--------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-64)----------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
-65)--------------------CoalesceBatchesExec: target_batch_size=8192
-66)----------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
-67)------------------------CoalesceBatchesExec: target_batch_size=8192
-68)--------------------------FilterExec: r_name@1 = AMERICA, projection=[r_regionkey@0]
-69)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-70)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
+45)--------------------------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+46)----------------------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+47)----------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+48)------------------------------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+49)--------------------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+50)----------------------------------------------------------FilterExec: o_orderdate@2 >= 1995-01-01 AND o_orderdate@2 <= 1996-12-31
+51)------------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_custkey, o_orderdate], file_type=csv, has_header=false
+52)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
+53)----------------------------------------------RepartitionExec: partitioning=Hash([c_custkey@0], 4), input_partitions=1
+54)------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/customer.tbl]]}, projection=[c_custkey, c_nationkey], file_type=csv, has_header=false
+55)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+56)--------------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+57)----------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_regionkey], file_type=csv, has_header=false
+58)----------------------------CoalesceBatchesExec: target_batch_size=8192
+59)------------------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+60)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+61)--------------------CoalesceBatchesExec: target_batch_size=8192
+62)----------------------RepartitionExec: partitioning=Hash([r_regionkey@0], 4), input_partitions=4
+63)------------------------CoalesceBatchesExec: target_batch_size=8192
+64)--------------------------FilterExec: r_name@1 = AMERICA, projection=[r_regionkey@0]
+65)----------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+66)------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/region.tbl]]}, projection=[r_regionkey, r_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part b/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part
index 3b31c1bc2e8e..dcee31dfecd3 100644
--- a/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part
+++ b/datafusion/sqllogictest/test_files/tpch/plans/q9.slt.part
@@ -111,16 +111,14 @@ physical_plan
 34)------------------------------------------------------RepartitionExec: partitioning=Hash([l_partkey@1], 4), input_partitions=4
 35)--------------------------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:0..18561749], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:18561749..37123498], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:37123498..55685247], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/lineitem.tbl:55685247..74246996]]}, projection=[l_orderkey, l_partkey, l_suppkey, l_quantity, l_extendedprice, l_discount], file_type=csv, has_header=false
 36)--------------------------------------------CoalesceBatchesExec: target_batch_size=8192
-37)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=4
-38)------------------------------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-39)--------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
-40)------------------------------------CoalesceBatchesExec: target_batch_size=8192
-41)--------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4
-42)----------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
-43)----------------------------CoalesceBatchesExec: target_batch_size=8192
-44)------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
-45)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate], file_type=csv, has_header=false
-46)--------------------CoalesceBatchesExec: target_batch_size=8192
-47)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=4
-48)------------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-49)--------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
+37)----------------------------------------------RepartitionExec: partitioning=Hash([s_suppkey@0], 4), input_partitions=1
+38)------------------------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/supplier.tbl]]}, projection=[s_suppkey, s_nationkey], file_type=csv, has_header=false
+39)------------------------------------CoalesceBatchesExec: target_batch_size=8192
+40)--------------------------------------RepartitionExec: partitioning=Hash([ps_suppkey@1, ps_partkey@0], 4), input_partitions=4
+41)----------------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:0..2932049], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:2932049..5864098], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:5864098..8796147], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/partsupp.tbl:8796147..11728193]]}, projection=[ps_partkey, ps_suppkey, ps_supplycost], file_type=csv, has_header=false
+42)----------------------------CoalesceBatchesExec: target_batch_size=8192
+43)------------------------------RepartitionExec: partitioning=Hash([o_orderkey@0], 4), input_partitions=4
+44)--------------------------------DataSourceExec: file_groups={4 groups: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:0..4223281], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:4223281..8446562], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:8446562..12669843], [WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/orders.tbl:12669843..16893122]]}, projection=[o_orderkey, o_orderdate], file_type=csv, has_header=false
+45)--------------------CoalesceBatchesExec: target_batch_size=8192
+46)----------------------RepartitionExec: partitioning=Hash([n_nationkey@0], 4), input_partitions=1
+47)------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/sqllogictest/test_files/tpch/data/nation.tbl]]}, projection=[n_nationkey, n_name], file_type=csv, has_header=false
diff --git a/datafusion/sqllogictest/test_files/type_coercion.slt b/datafusion/sqllogictest/test_files/type_coercion.slt
index 3175a0646b79..e3baa8fedcf6 100644
--- a/datafusion/sqllogictest/test_files/type_coercion.slt
+++ b/datafusion/sqllogictest/test_files/type_coercion.slt
@@ -47,7 +47,7 @@ query error DataFusion error: Error during planning: Cannot coerce arithmetic ex
 select interval '1 month' - '2023-05-01'::date;
 
 # interval - timestamp
-query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(Nanosecond, None\) to valid types
+query error DataFusion error: Error during planning: Cannot coerce arithmetic expression Interval\(MonthDayNano\) \- Timestamp\(ns\) to valid types
 SELECT interval '1 month' - '2023-05-01 12:30:00'::timestamp;
 
 # dictionary(int32, utf8) -> utf8
diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt
index 1f7605d220c5..f7ab6a0c9281 100644
--- a/datafusion/sqllogictest/test_files/union.slt
+++ b/datafusion/sqllogictest/test_files/union.slt
@@ -235,14 +235,13 @@ logical_plan
 physical_plan
 01)AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[]
 02)--CoalesceBatchesExec: target_batch_size=8192
-03)----RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=4
-04)------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=3
-05)--------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
-06)----------UnionExec
-07)------------DataSourceExec: partitions=1, partition_sizes=[1]
-08)------------DataSourceExec: partitions=1, partition_sizes=[1]
-09)------------ProjectionExec: expr=[name@0 || _new as name]
-10)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+03)----RepartitionExec: partitioning=Hash([name@0], 4), input_partitions=3
+04)------AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[]
+05)--------UnionExec
+06)----------DataSourceExec: partitions=1, partition_sizes=[1]
+07)----------DataSourceExec: partitions=1, partition_sizes=[1]
+08)----------ProjectionExec: expr=[name@0 || _new as name]
+09)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # nested_union_all
 query T rowsort
@@ -521,7 +520,7 @@ physical_plan
 16)----ProjectionExec: expr=[1 as cnt]
 17)------PlaceholderRowExec
 18)----ProjectionExec: expr=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as cnt]
-19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+19)------BoundedWindowAggExec: wdw=[lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "lead(b.c1,Int64(1)) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 20)--------ProjectionExec: expr=[1 as c1]
 21)----------PlaceholderRowExec
 
@@ -953,3 +952,40 @@ drop table u1;
 
 statement count 0
 drop table u2;
+
+# repro for https://github.com/apache/datafusion/issues/18327
+# should not error
+query TITT
+  WITH typ(oid, typnamespace, typname, typtype) AS (
+                      SELECT * FROM (VALUES (1, 10, 't1', 'b'))
+            UNION ALL SELECT * FROM (VALUES (2, NULL, 't2', 'b'))
+            UNION ALL SELECT * FROM (VALUES (3, 12, 't3', NULL))
+       )
+       , ns(oid, nspname) AS (VALUES (1, 'ns1'), (2, 'ns2'))
+    SELECT ns.nspname, typ.oid, typ.typname, typ.typtype
+      FROM typ JOIN ns ON (ns.oid = typ.typnamespace)
+     WHERE typ.typtype IN ('b','r','m','e','d')
+     ORDER BY CASE WHEN typ.typtype IN ('b','e','p') THEN 0
+                   WHEN typ.typtype = 'r' THEN 1
+              END
+----
+
+# Add another row with a non-NULL value `m` which is retained by the
+# filter but not matching any WHEN branch m?
+query TITT
+  WITH typ(oid, typnamespace, typname, typtype) AS (
+                      SELECT * FROM (VALUES (1, 10, 't1', 'b'))
+            UNION ALL SELECT * FROM (VALUES (2, NULL, 't2', 'b'))
+            UNION ALL SELECT * FROM (VALUES (3, 12, 't3', NULL))
+            UNION ALL SELECT * FROM (VALUES (4, 40, 't3', 'm'))
+       ), ns(oid, nspname) AS (
+         VALUES (1, 'ns1'), (2, 'ns2'), (40, 'ns3')
+       )
+    SELECT ns.nspname, typ.oid, typ.typname, typ.typtype
+      FROM typ JOIN ns ON (ns.oid = typ.typnamespace)
+     WHERE typ.typtype IN ('b','r','m','e','d')
+     ORDER BY CASE WHEN typ.typtype IN ('b','e','p') THEN 0
+                   WHEN typ.typtype = 'r' THEN 1
+              END
+----
+ns3 4 t3 m
diff --git a/datafusion/sqllogictest/test_files/unnest.slt b/datafusion/sqllogictest/test_files/unnest.slt
index 38fcc1ba9016..50121813133b 100644
--- a/datafusion/sqllogictest/test_files/unnest.slt
+++ b/datafusion/sqllogictest/test_files/unnest.slt
@@ -863,11 +863,11 @@ select count(*) from (select unnest(range(0, 100000)) id) t inner join (select u
 # Test implicit LATERAL support for UNNEST
 # Issue: https://github.com/apache/datafusion/issues/13659
 # TODO: https://github.com/apache/datafusion/issues/10048
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
 select * from unnest_table u, unnest(u.column1);
 
 # Test implicit LATERAL support for UNNEST (INNER JOIN)
-query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: \{\} \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
+query error DataFusion error: This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn\(Field \{ name: "column1", data_type: List\(Field \{ data_type: Int64, nullable: true \}\), nullable: true \}, Column \{ relation: Some\(Bare \{ table: "u" \}\), name: "column1" \}\)
 select * from unnest_table u INNER JOIN unnest(u.column1) AS t(column1) ON u.column3 = t.column1;
 
 # Test implicit LATERAL planning for UNNEST
@@ -883,7 +883,7 @@ logical_plan
 06)------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
 07)--------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
 08)----------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
 
 # Test implicit LATERAL planning for UNNEST (INNER JOIN)
 query TT
@@ -899,7 +899,7 @@ logical_plan
 07)--------Unnest: lists[__unnest_placeholder(outer_ref(u.column1))|depth=1] structs[]
 08)----------Projection: outer_ref(u.column1) AS __unnest_placeholder(outer_ref(u.column1))
 09)------------EmptyRelation: rows=1
-physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
+physical_plan_error This feature is not implemented: Physical plan does not support logical expression OuterReferenceColumn(Field { name: "column1", data_type: List(Field { data_type: Int64, nullable: true }), nullable: true }, Column { relation: Some(Bare { table: "u" }), name: "column1" })
 
 # uncorrelated EXISTS with unnest
 query I
@@ -969,7 +969,7 @@ physical_plan
 08)--------------UnnestExec
 09)----------------ProjectionExec: expr=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as generated_id, make_array(value@0) as __unnest_placeholder(make_array(range().value))]
 10)------------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+11)--------------------BoundedWindowAggExec: wdw=[row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 12)----------------------LazyMemoryExec: partitions=1, batch_generators=[range: start=1, end=5, batch_size=8192]
 
 # Unnest array where data is already ordered by column2 (100, 200, 300, 400)
diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt
index f1a708d84dd3..26cb71acbdfe 100644
--- a/datafusion/sqllogictest/test_files/window.slt
+++ b/datafusion/sqllogictest/test_files/window.slt
@@ -360,7 +360,7 @@ physical_plan
 02)--ProjectionExec: expr=[b@0 as b, max(d.a)@1 as max_a, max(d.seq)@2 as max(d.seq)]
 03)----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[max(d.a), max(d.seq)], ordering_mode=Sorted
 04)------ProjectionExec: expr=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b]
-05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[b@1 ASC NULLS LAST, a@0 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=8192
 08)--------------RepartitionExec: partitioning=Hash([b@1], 4), input_partitions=4
@@ -942,22 +942,22 @@ CREATE TABLE table1 (
 
 statement ok
 INSERT INTO table1 (bar, foo, time) VALUES
-(200.0, 'me', '1970-01-01T00:00:00.000000010Z'),
-(1.0, 'me', '1970-01-01T00:00:00.000000030Z'),
-(1.0, 'me', '1970-01-01T00:00:00.000000040Z'),
-(2.0, 'you', '1970-01-01T00:00:00.000000020Z');
+(200.0, 'me', '1970-01-01T00:00:00.000000010'),
+(1.0, 'me', '1970-01-01T00:00:00.000000030'),
+(1.0, 'me', '1970-01-01T00:00:00.000000040'),
+(2.0, 'you', '1970-01-01T00:00:00.000000020');
 
 query TP
 SELECT foo, first_value(time ORDER BY time DESC NULLS LAST) AS time FROM table1 GROUP BY foo ORDER BY foo;
 ----
-me 1970-01-01T00:00:00.000000040Z
-you 1970-01-01T00:00:00.000000020Z
+me 1970-01-01T00:00:00.000000040
+you 1970-01-01T00:00:00.000000020
 
 query TP
 SELECT foo, last_value(time ORDER BY time DESC NULLS LAST) AS time FROM table1 GROUP BY foo ORDER BY foo;
 ----
-me 1970-01-01T00:00:00.000000010Z
-you 1970-01-01T00:00:00.000000020Z
+me 1970-01-01T00:00:00.000000010
+you 1970-01-01T00:00:00.000000020
 
 statement ok
 drop table table1;
@@ -1241,9 +1241,9 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c8, c9]
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2]
-02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c8@0 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], file_type=csv, has_header=true
 
@@ -1262,9 +1262,9 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c2, c9]
 physical_plan
 01)ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true
 
@@ -1286,10 +1286,10 @@ logical_plan
 physical_plan
 01)SortExec: expr=[c2@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c2@0 as c2, max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------SortExec: expr=[c2@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c9], file_type=csv, has_header=true
 
@@ -1311,17 +1311,16 @@ logical_plan
 05)--------TableScan: aggregate_test_100 projection=[c1, c2, c4]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@2 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as count(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 03)----SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 04)------CoalesceBatchesExec: target_batch_size=4096
 05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
 06)----------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 09)----------------CoalesceBatchesExec: target_batch_size=4096
-10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=2
-11)--------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-12)----------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c4], file_type=csv, has_header=true
+10)------------------RepartitionExec: partitioning=Hash([c1@0, c2@1], 2), input_partitions=1
+11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c4], file_type=csv, has_header=true
 
 
 # test_window_agg_sort_reversed_plan
@@ -1343,8 +1342,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1386,8 +1385,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1446,8 +1445,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as sum2]
 02)--GlobalLimitExec: skip=5, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=15), expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1488,8 +1487,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as fv1, first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as fv2, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as lag1, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as lag2, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as lead1, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "first_value(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(aggregate_test_100.c9,Int64(2),Int64(10101)) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1531,9 +1530,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as rn1, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@1 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -1573,10 +1572,10 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as rn2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[c9@2 ASC NULLS LAST, c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c1 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c9@2 DESC, c1@0 DESC], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9], file_type=csv, has_header=true
 
@@ -1655,19 +1654,19 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as a, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@18 as b, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as c, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as d, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@7 as e, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@3 as f, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@11 as g, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as i, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as j, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as l, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as m, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@15 as n, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as o, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as p, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as a1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@20 as b1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as c1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as d1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@9 as e1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@5 as f1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@13 as g1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as h1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@19 as j1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as k1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as l1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as m1, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as n1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@12 as o1, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as h11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@21 as j11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as k11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as l11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@10 as m11, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@6 as n11, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@14 as o11]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c1@0 as c1, c3@2 as c3, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@4 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@6 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@7 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@8 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@9 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@10 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@11 as sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING@12 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@13 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING@14 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING@15 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@16 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@17 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@18 as sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c3@2 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 08)--------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 ASC], preserve_partitioning=[false]
-09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS LAST, null_cases.c1 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 10)------------------SortExec: expr=[c3@2 ASC NULLS LAST, c1@0 DESC], preserve_partitioning=[false]
-11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
-12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+11)--------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(10)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(11)), end_bound: Following(Int64(NULL)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 ASC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }]
+12)----------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS LAST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
 13)------------------------SortExec: expr=[c3@2 DESC NULLS LAST], preserve_partitioning=[false]
-14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
-15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+14)--------------------------WindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(10)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow, is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND 11 FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: Following(Int64(11)), is_causal: false }, sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST] RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int64(NULL)), is_causal: false }]
+15)----------------------------BoundedWindowAggExec: wdw=[sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(null_cases.c1) ORDER BY [null_cases.c3 DESC NULLS FIRST, null_cases.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 16)------------------------------SortExec: expr=[c3@2 DESC, c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 17)--------------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/null_cases.csv]]}, projection=[c1, c2, c3], file_type=csv, has_header=true
 
@@ -1741,8 +1740,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
@@ -1785,8 +1784,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@2 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 05)--------SortExec: TopK(fetch=10), expr=[c1@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
@@ -1831,9 +1830,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c3@1 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, c3@2 as c3, c9@3 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortPreservingMergeExec: [__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST]
 07)------------SortExec: expr=[__common_expr_1@0 DESC, c9@3 DESC, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------ProjectionExec: expr=[c3@1 + c4@2 as __common_expr_1, c2@0 as c2, c3@1 as c3, c9@3 as c9]
@@ -1925,16 +1924,15 @@ logical_plan
 06)----------TableScan: aggregate_test_100 projection=[c2, c3, c9]
 physical_plan
 01)SortPreservingMergeExec: [c3@0 ASC NULLS LAST], fetch=5
-02)--ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-04)------SortExec: expr=[c3@0 ASC NULLS LAST, c9@1 DESC], preserve_partitioning=[true]
+02)--SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[true]
+03)----ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum2]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c3] ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------CoalesceBatchesExec: target_batch_size=4096
-06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-09)----------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-10)------------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([c3@0], 2), input_partitions=1
+07)------------ProjectionExec: expr=[c3@1 as c3, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
+08)--------------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 DESC NULLS FIRST, aggregate_test_100.c9 DESC NULLS FIRST, aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+09)----------------SortExec: expr=[c3@1 DESC, c9@2 DESC, c2@0 ASC NULLS LAST], preserve_partitioning=[false]
+10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c2, c3, c9], file_type=csv, has_header=true
 
 
 
@@ -1943,7 +1941,7 @@ SELECT c3,
     SUM(c9) OVER(ORDER BY c3 DESC, c9 DESC, c2 ASC) as sum1,
     SUM(c9) OVER(PARTITION BY c3 ORDER BY c9 DESC ) as sum2
     FROM aggregate_test_100
-    ORDER BY c3
+    ORDER BY c3, c9 DESC
     LIMIT 5
 ----
 -117 219796664156 3023531799
@@ -1968,12 +1966,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
-06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
 
 query TI
 SELECT c1, ROW_NUMBER() OVER (PARTITION BY c1) as rn1 FROM aggregate_test_100 ORDER BY c1 ASC
@@ -2097,12 +2094,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [aggregate_test_100.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=4096
-06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-07)------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], file_type=csv, has_header=true
 
 statement ok
 set datafusion.optimizer.repartition_sorts = true;
@@ -2123,15 +2119,14 @@ logical_plan
 physical_plan
 01)SortExec: expr=[c1@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING@2 as sum1, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@3 as sum2]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------SortPreservingMergeExec: [c9@1 ASC NULLS LAST]
 05)--------SortExec: expr=[c9@1 ASC NULLS LAST], preserve_partitioning=[true]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c9@1 ASC NULLS LAST], preserve_partitioning=[true]
 08)--------------CoalesceBatchesExec: target_batch_size=4096
-09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-10)------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
+09)----------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+10)------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], file_type=csv, has_header=true
 
 # test_window_agg_with_global_limit
 statement ok
@@ -2211,11 +2206,11 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@2 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c9@3 as c9, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c2, aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+07)------------WindowAggExec: wdw=[sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100.c9) PARTITION BY [aggregate_test_100.c1, aggregate_test_100.c2] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 08)--------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
 
@@ -2266,12 +2261,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@1 as c9, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sum1, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum2, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum3, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum4]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 04)------ProjectionExec: expr=[c2@0 as c2, c9@2 as c9, c1_alias@3 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING]
-05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+05)--------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c2, t1.c1_alias] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 06)----------ProjectionExec: expr=[c2@1 as c2, c8@2 as c8, c9@3 as c9, c1_alias@4 as c1_alias, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING@5 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING, sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@6 as sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING]
-07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+07)------------BoundedWindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+08)--------------WindowAggExec: wdw=[sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(t1.c9) PARTITION BY [t1.c1, t1.c2] ORDER BY [t1.c9 ASC NULLS LAST, t1.c8 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, c9@3 ASC NULLS LAST, c8@2 ASC NULLS LAST], preserve_partitioning=[false]
 10)------------------ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c8@2 as c8, c9@3 as c9, c1@0 as c1_alias]
 11)--------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c8, c9], file_type=csv, has_header=true
@@ -2312,9 +2307,9 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2]
 02)--SortExec: TopK(fetch=5), expr=[c9@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as sum1, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING@4 as sum2, c9@1 as c9]
-04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING: Field { "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST] GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING": nullable Float64 }, frame: GROUPS BETWEEN 5 PRECEDING AND 3 PRECEDING], mode=[Sorted]
 05)--------ProjectionExec: expr=[c1@0 as c1, c9@2 as c9, c12@3 as c12, sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { name: "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Field { "sum(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c1 ASC NULLS LAST, aggregate_test_100.c2 ASC NULLS LAST] GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING": nullable Float64 }, frame: GROUPS BETWEEN 1 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 07)------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c9, c12], file_type=csv, has_header=true
 
@@ -2348,7 +2343,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2385,7 +2380,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2422,7 +2417,7 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2462,7 +2457,7 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[rn1@1 ASC NULLS LAST, c9@0 ASC NULLS LAST], preserve_partitioning=[false], sort_prefix=[rn1@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2537,7 +2532,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2559,7 +2554,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c5@0 as c5, c9@1 as c9, row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 + aggregate_test_100.c5 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[CAST(c9@1 AS Decimal128(20, 0)) + CAST(c5@0 AS Decimal128(20, 0)) DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5, c9], file_type=csv, has_header=true
 
@@ -2580,7 +2575,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, CAST(row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 AS Int64) as rn1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -2685,10 +2680,10 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, sum3@2 as sum3, min1@3 as min1, min2@4 as min2, min3@5 as min3, max1@6 as max1, max2@7 as max2, max3@8 as max3, cnt1@9 as cnt1, cnt2@10 as cnt2, sumr1@11 as sumr1, sumr2@12 as sumr2, sumr3@13 as sumr3, minr1@14 as minr1, minr2@15 as minr2, minr3@16 as minr3, maxr1@17 as maxr1, maxr2@18 as maxr2, maxr3@19 as maxr3, cntr1@20 as cntr1, cntr2@21 as cntr2, sum4@22 as sum4, cnt3@23 as cnt3]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@24 DESC], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as sum1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@14 as sum2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@15 as sum3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as min1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as min2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as min3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as max1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as max2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as max3, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@22 as cnt1, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@23 as cnt2, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@2 as sumr1, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@3 as sumr2, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@4 as sumr3, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as minr1, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@6 as minr2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@7 as minr3, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as maxr1, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as maxr2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as maxr3, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@11 as cntr1, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@12 as cntr2, sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@24 as sum4, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@25 as cnt3, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, inc_col@3 as inc_col, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING@5 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING@6 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING@7 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@8 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@9 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@10 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@12 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@13 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING@14 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@15 as count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@16 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@17 as sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@20 as min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@21 as min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@22 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING@23 as max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING@25 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING@26 as count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { name: "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { name: "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING": Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 8 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 4 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 4 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 8 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN 8 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "min(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING: Field { "max(annotated_data_finite.desc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 5 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 6 PRECEDING AND 2 FOLLOWING": Int64 }, frame: RANGE BETWEEN 2 PRECEDING AND 6 FOLLOWING, count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING: Field { "count(Int64(1)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 8 PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN 1 PRECEDING AND 8 FOLLOWING], mode=[Sorted]
 08)--------------ProjectionExec: expr=[CAST(desc_col@2 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Int64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col, desc_col@2 as desc_col]
 09)----------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col, desc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -2771,8 +2766,8 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[ts@0 DESC], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[ts@0 as ts, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@10 as fv1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@11 as fv2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@12 as lv1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@13 as lv2, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@14 as nv1, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@15 as nv2, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@16 as rn1, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@17 as rn2, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@18 as rank1, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@19 as rank2, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@20 as dense_rank1, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@21 as dense_rank2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@22 as lag1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@23 as lag2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@24 as lead1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@25 as lead2, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@2 as fvr1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@3 as fvr2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING@4 as lvr1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@5 as lvr2, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@6 as lagr1, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@7 as lagr2, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING@8 as leadr1, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING@9 as leadr2]
-03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { name: "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(5)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "row_number() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": UInt64 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "dense_rank() ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": UInt64 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lag(annotated_data_finite.inc_col,Int64(2),Int64(1002)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(-1),Int64(1001)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 1 PRECEDING AND 10 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN 10 PRECEDING AND 1 FOLLOWING, lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING: Field { "lead(annotated_data_finite.inc_col,Int64(4),Int64(1004)) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 10 PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN 1 PRECEDING AND 10 FOLLOWING], mode=[Sorted]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIIIIIIIIIIIIIIIIIIIIIII
@@ -2843,8 +2838,8 @@ physical_plan
 01)ProjectionExec: expr=[sum1@0 as sum1, sum2@1 as sum2, min1@2 as min1, min2@3 as min2, max1@4 as max1, max2@5 as max2, count1@6 as count1, count2@7 as count2, avg1@8 as avg1, avg2@9 as avg2]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@10 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@9 as sum1, sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as sum2, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@10 as min1, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@5 as min2, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@11 as max1, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@6 as max2, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@12 as count1, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@7 as count2, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING@13 as avg1, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@8 as avg2, inc_col@3 as inc_col]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "min(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "max(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "avg(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] RANGE BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 06)----------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, CAST(inc_col@1 AS Float64) as __common_expr_2, ts@0 as ts, inc_col@1 as inc_col]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -2895,8 +2890,8 @@ physical_plan
 01)ProjectionExec: expr=[first_value1@0 as first_value1, first_value2@1 as first_value2, last_value1@2 as last_value1, last_value2@3 as last_value2, nth_value1@4 as nth_value1]
 02)--SortExec: TopK(fetch=5), expr=[inc_col@5 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@4 as first_value1, first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@2 as first_value2, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as last_value1, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as last_value2, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as nth_value1, inc_col@1 as inc_col]
-04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "nth_value(annotated_data_finite.inc_col,Int64(2)) ORDER BY [annotated_data_finite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "first_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "last_value(annotated_data_finite.inc_col) ORDER BY [annotated_data_finite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_1.csv]]}, projection=[ts, inc_col], output_ordering=[ts@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query IIIII
@@ -2939,8 +2934,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -2984,8 +2979,8 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@5 as sum1, sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@3 as sum2, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING@6 as count1, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING@4 as count2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "sum(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING, count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING: Field { "count(annotated_data_infinite.inc_col) ORDER BY [annotated_data_infinite.ts DESC NULLS FIRST] ROWS BETWEEN 3 PRECEDING AND UNBOUNDED FOLLOWING": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND 3 FOLLOWING], mode=[Sorted]
 05)--------ProjectionExec: expr=[CAST(inc_col@1 AS Int64) as __common_expr_1, ts@0 as ts, inc_col@1 as inc_col]
 06)----------StreamingTableExec: partition_sizes=1, projection=[ts, inc_col], infinite_source=true, output_ordering=[ts@0 ASC NULLS LAST]
 
@@ -3084,12 +3079,12 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])]
-08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST, annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Linear]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[PartiallySorted([1, 0])]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST, annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[PartiallySorted([0])]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0, 1])]
+08)--------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_infinite2.c) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 09)----------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 10)------------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
@@ -3152,17 +3147,17 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c@2 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[a@1 as a, b@2 as b, c@3 as c, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@9 as sum1, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING@10 as sum2, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@15 as sum3, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING@16 as sum4, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@5 as sum5, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@6 as sum6, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@11 as sum7, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING@12 as sum8, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@7 as sum9, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW@8 as sum10, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING@13 as sum11, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING@14 as sum12]
-03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.d] ORDER BY [annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING], mode=[Sorted]
 04)------SortExec: expr=[d@4 ASC NULLS LAST, a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING], mode=[Sorted]
 06)----------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+07)------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.b, annotated_data_finite2.a] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 08)--------------SortExec: expr=[b@2 ASC NULLS LAST, a@1 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 1 FOLLOWING AND 5 FOLLOWING], mode=[Sorted]
 10)------------------SortExec: expr=[a@1 ASC NULLS LAST, d@4 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
+11)--------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b, annotated_data_finite2.d] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND CURRENT ROW], mode=[Sorted]
 12)----------------------SortExec: expr=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, d@4 ASC NULLS LAST, c@3 ASC NULLS LAST], preserve_partitioning=[false]
-13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { name: "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
+13)------------------------BoundedWindowAggExec: wdw=[sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING, sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING: Field { "sum(annotated_data_finite2.c) PARTITION BY [annotated_data_finite2.a, annotated_data_finite2.b] ORDER BY [annotated_data_finite2.c ASC NULLS LAST] ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING": nullable Int64 }, frame: ROWS BETWEEN 5 PRECEDING AND 5 FOLLOWING], mode=[Sorted]
 14)--------------------------ProjectionExec: expr=[CAST(c@2 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 15)----------------------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST], file_type=csv, has_header=true
 
@@ -3226,7 +3221,7 @@ physical_plan
 01)ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as rn1]
 02)--CoalesceBatchesExec: target_batch_size=4096, fetch=5
 03)----FilterExec: row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 < 50
-04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST]
 
 # Top level sort is pushed down through BoundedWindowAggExec as its SUM result does already satisfy the required
@@ -3248,7 +3243,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c9@0 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c9@0 DESC], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
@@ -3333,11 +3328,11 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
-02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
+02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
 03)----ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
-05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
 08)--------------StreamingTableExec: partition_sizes=1, projection=[a, b, c, d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST]
 
@@ -3364,17 +3359,17 @@ logical_plan
 08)--------------TableScan: annotated_data_infinite2 projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum3, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum4]
-02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
+02)--BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Linear]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------RepartitionExec: partitioning=Hash([d@2], 2), input_partitions=2, preserve_order=true, sort_exprs=__common_expr_1@0 ASC NULLS LAST, a@1 ASC NULLS LAST
 05)--------ProjectionExec: expr=[__common_expr_1@0 as __common_expr_1, a@1 as a, d@4 as d, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+06)----------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.b, annotated_data_infinite2.a] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 07)------------CoalesceBatchesExec: target_batch_size=4096
 08)--------------RepartitionExec: partitioning=Hash([b@2, a@1], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
+09)----------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.d] ORDER BY [annotated_data_infinite2.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[PartiallySorted([0])]
 10)------------------CoalesceBatchesExec: target_batch_size=4096
 11)--------------------RepartitionExec: partitioning=Hash([a@1, d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
-12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+12)----------------------BoundedWindowAggExec: wdw=[sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(annotated_data_infinite2.a) PARTITION BY [annotated_data_infinite2.a, annotated_data_infinite2.b] ORDER BY [annotated_data_infinite2.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 13)------------------------CoalesceBatchesExec: target_batch_size=4096
 14)--------------------------RepartitionExec: partitioning=Hash([a@1, b@2], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST, __common_expr_1@0 ASC NULLS LAST
 15)----------------------------ProjectionExec: expr=[CAST(a@0 AS Int64) as __common_expr_1, a@0 as a, b@1 as b, c@2 as c, d@3 as d]
@@ -3433,10 +3428,10 @@ logical_plan
 physical_plan
 01)SortExec: TopK(fetch=5), expr=[c3@0 ASC NULLS LAST], preserve_partitioning=[false]
 02)--ProjectionExec: expr=[c3@0 as c3, max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as max1]
-03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------ProjectionExec: expr=[c3@0 as c3, c12@2 as c12, min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@3 as min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+06)----------WindowAggExec: wdw=[min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100.c12) PARTITION BY [aggregate_test_100.c11] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Float64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 07)------------SortExec: expr=[c11@1 ASC NULLS LAST], preserve_partitioning=[false]
 08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c11, c12], file_type=csv, has_header=true
 
@@ -3477,7 +3472,7 @@ physical_plan
 01)ProjectionExec: expr=[min1@0 as min1, max1@1 as max1]
 02)--SortExec: TopK(fetch=5), expr=[c3@2 ASC NULLS LAST], preserve_partitioning=[false]
 03)----ProjectionExec: expr=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as min1, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as max1, c3@0 as c3]
-04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(aggregate_test_100.c12) ORDER BY [aggregate_test_100.c12 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c12@1 ASC NULLS LAST], preserve_partitioning=[false]
 06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c12], file_type=csv, has_header=true
 
@@ -3529,7 +3524,7 @@ logical_plan
 02)--Filter: multiple_ordered_table.b = Int32(0)
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
-01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--CoalesceBatchesExec: target_batch_size=4096
 03)----FilterExec: b@2 = 0
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]], file_type=csv, has_header=true
@@ -3547,7 +3542,7 @@ logical_plan
 02)--Filter: multiple_ordered_table.b = Int32(0)
 03)----TableScan: multiple_ordered_table projection=[a0, a, b, c, d], partial_filters=[multiple_ordered_table.b = Int32(0)]
 physical_plan
-01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+01)BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.a) ORDER BY [multiple_ordered_table.b ASC NULLS LAST, multiple_ordered_table.d ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 02)--SortExec: expr=[d@4 ASC NULLS LAST], preserve_partitioning=[false]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: b@2 = 0
@@ -3584,9 +3579,9 @@ logical_plan
 05)--------TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as min1, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max1]
-02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(multiple_ordered_table.d) ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----ProjectionExec: expr=[c@2 as c, d@3 as d, max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.b, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -3603,7 +3598,7 @@ logical_plan
 04)------TableScan: multiple_ordered_table projection=[c, d], partial_filters=[multiple_ordered_table.d = Int32(0)]
 physical_plan
 01)ProjectionExec: expr=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as max_c]
-02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(multiple_ordered_table.c) PARTITION BY [multiple_ordered_table.d] ORDER BY [multiple_ordered_table.c ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
 04)------FilterExec: d@1 = 0
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
@@ -3618,7 +3613,7 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c] ORDER BY [multiple_ordered_table.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, d], output_orderings=[[a@0 ASC NULLS LAST], [c@1 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query TT
@@ -3631,7 +3626,7 @@ logical_plan
 03)----TableScan: multiple_ordered_table projection=[a, b, c, d]
 physical_plan
 01)ProjectionExec: expr=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(multiple_ordered_table.d) PARTITION BY [multiple_ordered_table.c, multiple_ordered_table.a] ORDER BY [multiple_ordered_table.b ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c, d], output_orderings=[[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], file_type=csv, has_header=true
 
 query I
@@ -3673,7 +3668,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c@0 as c, nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nv1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "nth_value(multiple_ordered_table.c,Int64(2)) ORDER BY [multiple_ordered_table.c DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int32(NULL)), is_causal: false }]
 04)------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], output_ordering=[c@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query II
@@ -3724,11 +3719,10 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c@3 ASC NULLS LAST]
 02)--ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW@5 as avg_d]
-03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { name: "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
+03)----BoundedWindowAggExec: wdw=[avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW: Field { "avg(multiple_ordered_table_inf.d) PARTITION BY [multiple_ordered_table_inf.d] ORDER BY [multiple_ordered_table_inf.a ASC NULLS LAST] RANGE BETWEEN 10 PRECEDING AND CURRENT ROW": nullable Float64 }, frame: RANGE BETWEEN 10 PRECEDING AND CURRENT ROW], mode=[Linear]
 04)------CoalesceBatchesExec: target_batch_size=4096
-05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=2, preserve_order=true, sort_exprs=a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]]
+05)--------RepartitionExec: partitioning=Hash([d@4], 2), input_partitions=1
+06)----------StreamingTableExec: partition_sizes=1, projection=[a0, a, b, c, d], infinite_source=true, output_orderings=[[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST], [c@3 ASC NULLS LAST]]
 
 # CTAS with NTILE function
 statement ok
@@ -4059,7 +4053,7 @@ logical_plan
 03)----TableScan: table_with_pk projection=[sn, ts, currency, amount]
 physical_plan
 01)ProjectionExec: expr=[sn@0 as sn, ts@1 as ts, currency@2 as currency, amount@3 as amount, sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1]
-02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(table_with_pk.amount) ORDER BY [table_with_pk.sn ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----SortExec: expr=[sn@0 ASC NULLS LAST], preserve_partitioning=[false]
 04)------DataSourceExec: partitions=1, partition_sizes=[1]
 
@@ -4178,9 +4172,9 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c3@0 as c3, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum2, sum1@3 as sum1]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(Int16(NULL)), is_causal: false }]
 04)------ProjectionExec: expr=[c3@0 as c3, c4@1 as c4, c9@2 as c9, sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as sum1]
-05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c3 + aggregate_test_100.c4 DESC NULLS FIRST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c3@0 + c4@1 DESC], preserve_partitioning=[false]
 07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c3, c4, c9], file_type=csv, has_header=true
 
@@ -4219,13 +4213,12 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as count(*) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
-02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(Int64(1)) PARTITION BY [a.a] ORDER BY [a.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
-04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CoalesceBatchesExec: target_batch_size=4096
-07)------------FilterExec: a@0 = 1
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1
+05)--------CoalesceBatchesExec: target_batch_size=4096
+06)----------FilterExec: a@0 = 1
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query I
 select ROW_NUMBER() over (partition by a) from (select * from a where a = 1);
@@ -4242,13 +4235,12 @@ logical_plan
 04)------TableScan: a projection=[a]
 physical_plan
 01)ProjectionExec: expr=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]
-02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { name: "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
+02)--BoundedWindowAggExec: wdw=[row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Field { "row_number() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING], mode=[Sorted]
 03)----CoalesceBatchesExec: target_batch_size=4096
-04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=2
-05)--------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-06)----------CoalesceBatchesExec: target_batch_size=4096
-07)------------FilterExec: a@0 = 1
-08)--------------DataSourceExec: partitions=1, partition_sizes=[1]
+04)------RepartitionExec: partitioning=Hash([a@0], 2), input_partitions=1
+05)--------CoalesceBatchesExec: target_batch_size=4096
+06)----------FilterExec: a@0 = 1
+07)------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 # LAG window function IGNORE/RESPECT NULLS support with ascending order and default offset 1
 query TTTTTT
@@ -5311,7 +5303,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank@2 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
-03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5355,12 +5347,11 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c2@1 >= 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 select c1, c2, rank
@@ -5397,7 +5388,7 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
 08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5438,12 +5429,11 @@ physical_plan
 02)--ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank]
 03)----CoalesceBatchesExec: target_batch_size=1
 04)------FilterExec: c1@0 = 1 OR c2@1 = 10
-05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+05)--------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 06)----------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 07)------------CoalesceBatchesExec: target_batch_size=1
-08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-09)----------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-10)------------------DataSourceExec: partitions=1, partition_sizes=[1]
+08)--------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+09)----------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query III
 select c1, c2, rank
@@ -5481,11 +5471,11 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
-04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=1
 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
-08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+08)--------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 09)----------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 10)------------------CoalesceBatchesExec: target_batch_size=1
 11)--------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
@@ -5532,18 +5522,17 @@ physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST]
 02)--SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST, rank1@2 ASC NULLS LAST, rank2@3 ASC NULLS LAST], preserve_partitioning=[true]
 03)----ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rank1, rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as rank2]
-04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+04)------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c2, t1.c1] ORDER BY [t1.c1 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 05)--------SortExec: expr=[c2@1 ASC NULLS LAST, c1@0 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------CoalesceBatchesExec: target_batch_size=1
 07)------------RepartitionExec: partitioning=Hash([c2@1, c1@0], 2), input_partitions=2
 08)--------------CoalesceBatchesExec: target_batch_size=1
 09)----------------FilterExec: c2@1 > 1
-10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+10)------------------BoundedWindowAggExec: wdw=[rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() PARTITION BY [t1.c1] ORDER BY [t1.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 11)--------------------SortExec: expr=[c1@0 ASC NULLS LAST, c2@1 ASC NULLS LAST], preserve_partitioning=[true]
 12)----------------------CoalesceBatchesExec: target_batch_size=1
-13)------------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2
-14)--------------------------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-15)----------------------------DataSourceExec: partitions=1, partition_sizes=[1]
+13)------------------------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+14)--------------------------DataSourceExec: partitions=1, partition_sizes=[1]
 
 query IIII
 select c1, c2, rank1, rank2
@@ -5599,11 +5588,10 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, sum_c9@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as sum_c9]
-03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------CoalesceBatchesExec: target_batch_size=1
-05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c9], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT SUM(c9) OVER() as sum_c9 FROM aggregate_test_100_ordered ORDER BY sum_c9;
@@ -5615,7 +5603,7 @@ logical_plan
 04)------TableScan: aggregate_test_100_ordered projection=[c9]
 physical_plan
 01)ProjectionExec: expr=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as sum_c9]
-02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "sum(aggregate_test_100_ordered.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c9], file_type=csv, has_header=true
 
 
@@ -5630,11 +5618,10 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [c1@0 ASC NULLS LAST, min_c5@1 DESC NULLS LAST]
 02)--ProjectionExec: expr=[c1@0 as c1, min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@2 as min_c5]
-03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+03)----WindowAggExec: wdw=[min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "min(aggregate_test_100_ordered.c5) PARTITION BY [aggregate_test_100_ordered.c1] ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 04)------CoalesceBatchesExec: target_batch_size=1
-05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=2, preserve_order=true, sort_exprs=c1@0 ASC NULLS LAST
-06)----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
-07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c5], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
+05)--------RepartitionExec: partitioning=Hash([c1@0], 2), input_partitions=1
+06)----------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c5], output_ordering=[c1@0 ASC NULLS LAST], file_type=csv, has_header=true
 
 query TT
 EXPLAIN SELECT MAX(c5) OVER() as max_c5 FROM aggregate_test_100_ordered ORDER BY max_c5;
@@ -5646,7 +5633,7 @@ logical_plan
 04)------TableScan: aggregate_test_100_ordered projection=[c5]
 physical_plan
 01)ProjectionExec: expr=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@1 as max_c5]
-02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
+02)--WindowAggExec: wdw=[max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "max(aggregate_test_100_ordered.c5) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: Int32, nullable: true }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)), is_causal: false }]
 03)----DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c5], file_type=csv, has_header=true
 
 query II rowsort
@@ -5766,15 +5753,15 @@ CREATE TABLE table_test_distinct_count (
 
 statement ok
 INSERT INTO table_test_distinct_count (k, v, time) VALUES
-    ('a', 1, '1970-01-01T00:01:00.00Z'),
-    ('a', 1, '1970-01-01T00:02:00.00Z'),
-    ('a', 1, '1970-01-01T00:03:00.00Z'),
-    ('a', 2, '1970-01-01T00:03:00.00Z'),
-    ('a', 1, '1970-01-01T00:04:00.00Z'),
-    ('b', 3, '1970-01-01T00:01:00.00Z'),
-    ('b', 3, '1970-01-01T00:02:00.00Z'),
-    ('b', 4, '1970-01-01T00:03:00.00Z'),
-    ('b', 4, '1970-01-01T00:03:00.00Z');
+    ('a', 1, '1970-01-01T00:01:00.00'),
+    ('a', 1, '1970-01-01T00:02:00.00'),
+    ('a', 1, '1970-01-01T00:03:00.00'),
+    ('a', 2, '1970-01-01T00:03:00.00'),
+    ('a', 1, '1970-01-01T00:04:00.00'),
+    ('b', 3, '1970-01-01T00:01:00.00'),
+    ('b', 3, '1970-01-01T00:02:00.00'),
+    ('b', 4, '1970-01-01T00:03:00.00'),
+    ('b', 4, '1970-01-01T00:03:00.00');
 
 query TPII
 SELECT
@@ -5793,15 +5780,15 @@ SELECT
 FROM table_test_distinct_count
 ORDER BY k, time;
 ----
-a 1970-01-01T00:01:00Z 1 1
-a 1970-01-01T00:02:00Z 2 1
-a 1970-01-01T00:03:00Z 4 2
-a 1970-01-01T00:03:00Z 4 2
-a 1970-01-01T00:04:00Z 4 2
-b 1970-01-01T00:01:00Z 1 1
-b 1970-01-01T00:02:00Z 2 1
-b 1970-01-01T00:03:00Z 4 2
-b 1970-01-01T00:03:00Z 4 2
+a 1970-01-01T00:01:00 1 1
+a 1970-01-01T00:02:00 2 1
+a 1970-01-01T00:03:00 4 2
+a 1970-01-01T00:03:00 4 2
+a 1970-01-01T00:04:00 4 2
+b 1970-01-01T00:01:00 1 1
+b 1970-01-01T00:02:00 2 1
+b 1970-01-01T00:03:00 4 2
+b 1970-01-01T00:03:00 4 2
 
 
 query TT
@@ -5829,7 +5816,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [k@0 ASC NULLS LAST, time@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[k@0 as k, time@2 as time, count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as normal_count, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as distinct_count]
-03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "count(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@0 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([k@0], 2), input_partitions=2
@@ -5854,15 +5841,15 @@ SELECT
 FROM table_test_distinct_count
 ORDER BY k, time;
 ----
-a 1970-01-01T00:01:00Z 1 1
-a 1970-01-01T00:02:00Z 2 1
-a 1970-01-01T00:03:00Z 5 3
-a 1970-01-01T00:03:00Z 5 3
-a 1970-01-01T00:04:00Z 5 3
-b 1970-01-01T00:01:00Z 3 3
-b 1970-01-01T00:02:00Z 6 3
-b 1970-01-01T00:03:00Z 14 7
-b 1970-01-01T00:03:00Z 14 7
+a 1970-01-01T00:01:00 1 1
+a 1970-01-01T00:02:00 2 1
+a 1970-01-01T00:03:00 5 3
+a 1970-01-01T00:03:00 5 3
+a 1970-01-01T00:04:00 5 3
+b 1970-01-01T00:01:00 3 3
+b 1970-01-01T00:02:00 6 3
+b 1970-01-01T00:03:00 14 7
+b 1970-01-01T00:03:00 14 7
 
 
 
@@ -5892,7 +5879,7 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [k@0 ASC NULLS LAST, time@1 ASC NULLS LAST]
 02)--ProjectionExec: expr=[k@1 as k, time@2 as time, sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@3 as sum_v, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW@4 as sum_distinct_v]
-03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { name: "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW, sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW: Field { "sum(DISTINCT table_test_distinct_count.v) PARTITION BY [table_test_distinct_count.k] ORDER BY [table_test_distinct_count.time ASC NULLS LAST] RANGE BETWEEN 2 minutes PRECEDING AND CURRENT ROW": nullable Int64 }, frame: RANGE BETWEEN IntervalMonthDayNano { months: 0, days: 0, nanoseconds: 120000000000 } PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[k@1 ASC NULLS LAST, time@2 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=1
 06)----------RepartitionExec: partitioning=Hash([k@1], 2), input_partitions=2
@@ -5937,7 +5924,7 @@ LIMIT 5
 ----
 DataFusion error: type_coercion
 caused by
-Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(Field { name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} })
+Error during planning: Cannot infer common argument type for comparison operation Int64 >= List(nullable Null)
 
 
 
@@ -5965,7 +5952,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[c1@2 as c1, c2@3 as c2, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as sum1, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as sum2, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as count1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@7 as array_agg1, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@8 as array_agg2]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "count(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": Int64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "array_agg(test.c2) FILTER (WHERE test.c2 >= Int64(2) AND test.c2 < Int64(4) AND test.c1 > Int64(0)) ORDER BY [test.c1 ASC NULLS LAST, test.c2 ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable List(nullable Int64) }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortPreservingMergeExec: [c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], fetch=5
 05)--------SortExec: TopK(fetch=5), expr=[c1@2 ASC NULLS LAST, c2@3 ASC NULLS LAST], preserve_partitioning=[true]
 06)----------ProjectionExec: expr=[__common_expr_3@0 as __common_expr_1, __common_expr_3@0 AND c2@2 < 4 AND c1@1 > 0 as __common_expr_2, c1@1 as c1, c2@2 as c2]
diff --git a/datafusion/sqllogictest/test_files/window_limits.slt b/datafusion/sqllogictest/test_files/window_limits.slt
index c1e680084f4b..8729e5ed412e 100644
--- a/datafusion/sqllogictest/test_files/window_limits.slt
+++ b/datafusion/sqllogictest/test_files/window_limits.slt
@@ -71,7 +71,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.empno) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=4), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -108,7 +108,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -170,7 +170,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead1, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as lead3, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lead5]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=10), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
 
@@ -207,7 +207,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.empno,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -244,7 +244,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Int32, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.empno,Int64(5)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Int32 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=3), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -309,7 +309,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as running_sum, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as running_avg, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as running_min, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@6 as running_max]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "avg(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable Float64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "min(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "max(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@1 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
@@ -371,7 +371,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as rn, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as rnk, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as drnk]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "row_number() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "dense_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -433,7 +433,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as pr, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as cd, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as nt]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }]
+03)----WindowAggExec: wdw=[percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "percent_rank() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "cume_dist() ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: Float64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }, ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ntile(Int64(4)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64 }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow, is_causal: true }]
 04)------SortExec: expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno], file_type=csv, has_header=true
 
@@ -498,7 +498,7 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as fv, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as l1, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@4 as lv, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@5 as n3]
 02)--GlobalLimitExec: skip=0, fetch=5
-03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "first_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lag(employees.salary,Int64(1)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "last_value(employees.salary) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "nth_value(employees.salary,Int64(3)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
 
@@ -541,12 +541,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
 # should handle partition by optimized
 statement ok
@@ -587,12 +586,11 @@ logical_plan
 physical_plan
 01)SortPreservingMergeExec: [depname@0 ASC NULLS LAST], fetch=5
 02)--ProjectionExec: expr=[depname@0 as depname, empno@1 as empno, salary@2 as salary, sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW@3 as running_sum]
-03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { name: "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW: Field { "sum(employees.salary) PARTITION BY [employees.depname] ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN 1 PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[depname@0 ASC NULLS LAST, empno@1 ASC NULLS LAST], preserve_partitioning=[true]
 05)--------CoalesceBatchesExec: target_batch_size=8192
-06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=4
-07)------------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
-08)--------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
+06)----------RepartitionExec: partitioning=Hash([depname@0], 4), input_partitions=1
+07)------------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[depname, empno, salary], file_type=csv, has_header=true
 
 # unbounded following
 statement ok
@@ -764,6 +762,6 @@ logical_plan
 physical_plan
 01)ProjectionExec: expr=[empno@0 as empno, salary@1 as salary, lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as lead2]
 02)--GlobalLimitExec: skip=0, fetch=3
-03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { name: "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
+03)----BoundedWindowAggExec: wdw=[lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Field { "lead(employees.salary,Int64(2)) ORDER BY [employees.empno ASC NULLS LAST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW": nullable UInt64 }, frame: ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], mode=[Sorted]
 04)------SortExec: TopK(fetch=5), expr=[empno@0 ASC NULLS LAST], preserve_partitioning=[false]
 05)--------DataSourceExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100_with_dates.csv]]}, projection=[empno, salary], file_type=csv, has_header=true
diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml
index 605dfc15be3f..0d7e34881c9c 100644
--- a/datafusion/substrait/Cargo.toml
+++ b/datafusion/substrait/Cargo.toml
@@ -27,6 +27,9 @@ license = { workspace = true }
 authors = { workspace = true }
 rust-version = { workspace = true }
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -40,7 +43,7 @@ itertools = { workspace = true }
 object_store = { workspace = true }
 pbjson-types = { workspace = true }
 prost = { workspace = true }
-substrait = { version = "0.58", features = ["serde"] }
+substrait = { version = "0.62", features = ["serde"] }
 url = { workspace = true }
 tokio = { workspace = true, features = ["fs"] }
 uuid = { version = "1.17.0", features = ["v4"] }
diff --git a/datafusion/substrait/src/extensions.rs b/datafusion/substrait/src/extensions.rs
index f9a2e0fb8255..079292898226 100644
--- a/datafusion/substrait/src/extensions.rs
+++ b/datafusion/substrait/src/extensions.rs
@@ -113,11 +113,15 @@ impl TryFrom<&Vec<SimpleExtensionDeclaration>> for Extensions {
 }
 
 impl From<Extensions> for Vec<SimpleExtensionDeclaration> {
+    // Silence deprecation warnings for `extension_uri_reference` during the uri -> urn migration
+    // See: https://github.com/substrait-io/substrait/issues/856
+    #[allow(deprecated)]
     fn from(val: Extensions) -> Vec<SimpleExtensionDeclaration> {
         let mut extensions = vec![];
         for (f_anchor, f_name) in val.functions {
             let function_extension = ExtensionFunction {
                 extension_uri_reference: u32::MAX,
+                extension_urn_reference: u32::MAX,
                 function_anchor: f_anchor,
                 name: f_name,
             };
@@ -130,6 +134,7 @@ impl From<Extensions> for Vec<SimpleExtensionDeclaration> {
         for (t_anchor, t_name) in val.types {
             let type_extension = ExtensionType {
                 extension_uri_reference: u32::MAX, // https://github.com/apache/datafusion/issues/11545
+                extension_urn_reference: u32::MAX, // https://github.com/apache/datafusion/issues/11545
                 type_anchor: t_anchor,
                 name: t_name,
             };
@@ -142,6 +147,7 @@ impl From<Extensions> for Vec<SimpleExtensionDeclaration> {
         for (tv_anchor, tv_name) in val.type_variations {
             let type_variation_extension = ExtensionTypeVariation {
                 extension_uri_reference: u32::MAX, // We don't register proper extension URIs yet
+                extension_urn_reference: u32::MAX, // We don't register proper extension URIs yet
                 type_variation_anchor: tv_anchor,
                 name: tv_name,
             };
diff --git a/datafusion/substrait/src/lib.rs b/datafusion/substrait/src/lib.rs
index 9a4f44e81df2..8bc31569f294 100644
--- a/datafusion/substrait/src/lib.rs
+++ b/datafusion/substrait/src/lib.rs
@@ -66,19 +66,24 @@
 //! # use datafusion::arrow::array::{Int32Array, RecordBatch};
 //! # use datafusion_substrait::logical_plan;
 //! // Create a plan that scans table 't'
-//!  let ctx = SessionContext::new();
-//!  let batch = RecordBatch::try_from_iter(vec![("x", Arc::new(Int32Array::from(vec![42])) as _)])?;
-//!  ctx.register_batch("t", batch)?;
-//!  let df = ctx.sql("SELECT x from t").await?;
-//!  let plan = df.into_optimized_plan()?;
+//! let ctx = SessionContext::new();
+//! let batch = RecordBatch::try_from_iter(vec![(
+//!     "x",
+//!     Arc::new(Int32Array::from(vec![42])) as _,
+//! )])?;
+//! ctx.register_batch("t", batch)?;
+//! let df = ctx.sql("SELECT x from t").await?;
+//! let plan = df.into_optimized_plan()?;
 //!
-//!  // Convert the plan into a substrait (protobuf) Plan
-//!  let substrait_plan = logical_plan::producer::to_substrait_plan(&plan, &ctx.state())?;
+//! // Convert the plan into a substrait (protobuf) Plan
+//! let substrait_plan = logical_plan::producer::to_substrait_plan(&plan, &ctx.state())?;
 //!
-//!  // Receive a substrait protobuf from somewhere, and turn it into a LogicalPlan
-//!  let logical_round_trip = logical_plan::consumer::from_substrait_plan(&ctx.state(), &substrait_plan).await?;
-//!  let logical_round_trip = ctx.state().optimize(&logical_round_trip)?;
-//!  assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
+//! // Receive a substrait protobuf from somewhere, and turn it into a LogicalPlan
+//! let logical_round_trip =
+//!     logical_plan::consumer::from_substrait_plan(&ctx.state(), &substrait_plan)
+//!         .await?;
+//! let logical_round_trip = ctx.state().optimize(&logical_round_trip)?;
+//! assert_eq!(format!("{:?}", plan), format!("{:?}", logical_round_trip));
 //! # Ok(())
 //! # }
 //! ```
diff --git a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
index 5392dd77b576..c734b9eb7a54 100644
--- a/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/substrait_consumer.rs
@@ -150,7 +150,6 @@ use substrait::proto::{
 ///     }
 /// }
 /// ```
-///
 pub trait SubstraitConsumer: Send + Sync + Sized {
     async fn resolve_table_ref(
         &self,
diff --git a/datafusion/substrait/src/logical_plan/consumer/types.rs b/datafusion/substrait/src/logical_plan/consumer/types.rs
index 772ea7177ca2..ef1000a1ccdb 100644
--- a/datafusion/substrait/src/logical_plan/consumer/types.rs
+++ b/datafusion/substrait/src/logical_plan/consumer/types.rs
@@ -17,7 +17,6 @@
 
 use super::utils::{from_substrait_precision, next_struct_field_name, DEFAULT_TIMEZONE};
 use super::SubstraitConsumer;
-use crate::variation_const::FLOAT_16_TYPE_NAME;
 #[allow(deprecated)]
 use crate::variation_const::{
     DATE_32_TYPE_VARIATION_REF, DATE_64_TYPE_VARIATION_REF,
@@ -33,6 +32,7 @@ use crate::variation_const::{
     TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
     VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
+use crate::variation_const::{FLOAT_16_TYPE_NAME, NULL_TYPE_NAME};
 use datafusion::arrow::datatypes::{
     DataType, Field, Fields, IntervalUnit, Schema, TimeUnit,
 };
@@ -253,6 +253,7 @@ pub fn from_substrait_type(
                         // Kept for backwards compatibility, producers should use IntervalCompound instead
                         INTERVAL_MONTH_DAY_NANO_TYPE_NAME => Ok(DataType::Interval(IntervalUnit::MonthDayNano)),
                         FLOAT_16_TYPE_NAME => Ok(DataType::Float16),
+                        NULL_TYPE_NAME => Ok(DataType::Null),
                         _ => not_impl_err!(
                                 "Unsupported Substrait user defined type with ref {} and variation {}",
                                 u.type_reference,
diff --git a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
index d37694ccea05..f4e43fd58677 100644
--- a/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
+++ b/datafusion/substrait/src/logical_plan/producer/expr/mod.rs
@@ -60,6 +60,9 @@ use substrait::version;
 ///
 /// Substrait also requires the input schema of the expressions to be included in the
 /// message.  The field names of the input schema will be serialized.
+// Silence deprecation warnings for `extension_uris` during the uri -> urn migration
+// See: https://github.com/substrait-io/substrait/issues/856
+#[allow(deprecated)]
 pub fn to_substrait_extended_expr(
     exprs: &[(&Expr, &Field)],
     schema: &DFSchemaRef,
@@ -85,6 +88,7 @@ pub fn to_substrait_extended_expr(
         advanced_extensions: None,
         expected_type_urls: vec![],
         extension_uris: vec![],
+        extension_urns: vec![],
         extensions: extensions.into(),
         version: Some(version::version_with_producer("datafusion")),
         referred_expr: substrait_exprs,
diff --git a/datafusion/substrait/src/logical_plan/producer/plan.rs b/datafusion/substrait/src/logical_plan/producer/plan.rs
index 28f6acd0890c..ad8f45ec3606 100644
--- a/datafusion/substrait/src/logical_plan/producer/plan.rs
+++ b/datafusion/substrait/src/logical_plan/producer/plan.rs
@@ -24,6 +24,9 @@ use substrait::proto::{plan_rel, Plan, PlanRel, Rel, RelRoot};
 use substrait::version;
 
 /// Convert DataFusion LogicalPlan to Substrait Plan
+// Silence deprecation warnings for `extension_uris` during the uri -> urn migration
+// See: https://github.com/substrait-io/substrait/issues/856
+#[allow(deprecated)]
 pub fn to_substrait_plan(
     plan: &LogicalPlan,
     state: &SessionState,
@@ -45,11 +48,13 @@ pub fn to_substrait_plan(
     Ok(Box::new(Plan {
         version: Some(version::version_with_producer("datafusion")),
         extension_uris: vec![],
+        extension_urns: vec![],
         extensions: extensions.into(),
         relations: plan_rels,
         advanced_extensions: None,
         expected_type_urls: vec![],
         parameter_bindings: vec![],
+        type_aliases: vec![],
     }))
 }
 
diff --git a/datafusion/substrait/src/logical_plan/producer/types.rs b/datafusion/substrait/src/logical_plan/producer/types.rs
index 2079d7fd34bb..0613ed07be2a 100644
--- a/datafusion/substrait/src/logical_plan/producer/types.rs
+++ b/datafusion/substrait/src/logical_plan/producer/types.rs
@@ -23,12 +23,12 @@ use crate::variation_const::{
     DEFAULT_CONTAINER_TYPE_VARIATION_REF, DEFAULT_INTERVAL_DAY_TYPE_VARIATION_REF,
     DEFAULT_MAP_TYPE_VARIATION_REF, DEFAULT_TYPE_VARIATION_REF,
     DICTIONARY_MAP_TYPE_VARIATION_REF, DURATION_INTERVAL_DAY_TYPE_VARIATION_REF,
-    FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, TIME_32_TYPE_VARIATION_REF,
-    TIME_64_TYPE_VARIATION_REF, UNSIGNED_INTEGER_TYPE_VARIATION_REF,
-    VIEW_CONTAINER_TYPE_VARIATION_REF,
+    FLOAT_16_TYPE_NAME, LARGE_CONTAINER_TYPE_VARIATION_REF, NULL_TYPE_NAME,
+    TIME_32_TYPE_VARIATION_REF, TIME_64_TYPE_VARIATION_REF,
+    UNSIGNED_INTEGER_TYPE_VARIATION_REF, VIEW_CONTAINER_TYPE_VARIATION_REF,
 };
 use datafusion::arrow::datatypes::{DataType, IntervalUnit};
-use datafusion::common::{internal_err, not_impl_err, plan_err, DFSchemaRef};
+use datafusion::common::{not_impl_err, plan_err, DFSchemaRef};
 use substrait::proto::{r#type, NamedStruct};
 
 pub(crate) fn to_substrait_type(
@@ -42,7 +42,17 @@ pub(crate) fn to_substrait_type(
         r#type::Nullability::Required as i32
     };
     match dt {
-        DataType::Null => internal_err!("Null cast is not valid"),
+        DataType::Null => {
+            let type_anchor = producer.register_type(NULL_TYPE_NAME.to_string());
+            Ok(substrait::proto::Type {
+                kind: Some(r#type::Kind::UserDefined(r#type::UserDefined {
+                    type_reference: type_anchor,
+                    type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
+                    nullability,
+                    type_parameters: vec![],
+                })),
+            })
+        }
         DataType::Boolean => Ok(substrait::proto::Type {
             kind: Some(r#type::Kind::Bool(r#type::Boolean {
                 type_variation_reference: DEFAULT_TYPE_VARIATION_REF,
@@ -377,6 +387,7 @@ mod tests {
     use crate::logical_plan::consumer::tests::test_consumer;
     use crate::logical_plan::consumer::{
         from_substrait_named_struct, from_substrait_type_without_names,
+        DefaultSubstraitConsumer,
     };
     use crate::logical_plan::producer::DefaultSubstraitProducer;
     use datafusion::arrow::datatypes::{Field, Fields, Schema, TimeUnit};
@@ -386,6 +397,7 @@ mod tests {
 
     #[test]
     fn round_trip_types() -> Result<()> {
+        round_trip_type(DataType::Null)?;
         round_trip_type(DataType::Boolean)?;
         round_trip_type(DataType::Int8)?;
         round_trip_type(DataType::UInt8)?;
@@ -474,7 +486,12 @@ mod tests {
         // As DataFusion doesn't consider nullability as a property of the type, but field,
         // it doesn't matter if we set nullability to true or false here.
         let substrait = to_substrait_type(&mut producer, &dt, true)?;
-        let consumer = test_consumer();
+
+        // Get the extensions from the producer so the consumer can look up
+        // any registered user-defined types (like "null" or "f16")
+        let extensions = producer.get_extensions();
+        let consumer = DefaultSubstraitConsumer::new(&extensions, &state);
+
         let roundtrip_dt = from_substrait_type_without_names(&consumer, &substrait)?;
         assert_eq!(dt, roundtrip_dt);
         Ok(())
diff --git a/datafusion/substrait/src/physical_plan/consumer.rs b/datafusion/substrait/src/physical_plan/consumer.rs
index ecf465dd3f18..8ce71acecca3 100644
--- a/datafusion/substrait/src/physical_plan/consumer.rs
+++ b/datafusion/substrait/src/physical_plan/consumer.rs
@@ -53,7 +53,6 @@ pub async fn from_substrait_rel(
 ) -> Result<Arc<dyn ExecutionPlan>> {
     let mut base_config_builder;
 
-    let source = Arc::new(ParquetSource::default());
     match &rel.rel_type {
         Some(RelType::Read(read)) => {
             if read.filter.is_some() || read.best_effort_filter.is_some() {
@@ -80,9 +79,10 @@ pub async fn from_substrait_rel(
                 .collect::<Result<Vec<Field>>>()
             {
                 Ok(fields) => {
+                    let schema = Arc::new(Schema::new(fields));
+                    let source = Arc::new(ParquetSource::new(Arc::clone(&schema)));
                     base_config_builder = FileScanConfigBuilder::new(
                         ObjectStoreUrl::local_filesystem(),
-                        Arc::new(Schema::new(fields)),
                         source,
                     );
                 }
@@ -151,8 +151,8 @@ pub async fn from_substrait_rel(
                                 .iter()
                                 .map(|item| item.field as usize)
                                 .collect();
-                            base_config_builder =
-                                base_config_builder.with_projection(Some(column_indices));
+                            base_config_builder = base_config_builder
+                                .with_projection_indices(Some(column_indices));
                         }
                     }
 
diff --git a/datafusion/substrait/src/physical_plan/producer.rs b/datafusion/substrait/src/physical_plan/producer.rs
index 63abd14d6f5e..20d41c2e6112 100644
--- a/datafusion/substrait/src/physical_plan/producer.rs
+++ b/datafusion/substrait/src/physical_plan/producer.rs
@@ -92,11 +92,12 @@ pub fn to_substrait_rel(
             };
 
             let mut select_struct = None;
-            if let Some(projection) = file_config.projection.as_ref() {
+            if let Some(projection) = file_config.projection_exprs.as_ref() {
                 let struct_items = projection
-                    .iter()
+                    .column_indices()
+                    .into_iter()
                     .map(|index| StructItem {
-                        field: *index as i32,
+                        field: index as i32,
                         // FIXME: duckdb sets this to None, but it's not clear why.
                         // https://github.com/duckdb/substrait/blob/b6f56643cb11d52de0e32c24a01dfd5947df62be/src/to_substrait.cpp#L1191
                         child: None,
diff --git a/datafusion/substrait/src/variation_const.rs b/datafusion/substrait/src/variation_const.rs
index 591f33aeb4b7..b1a922899e97 100644
--- a/datafusion/substrait/src/variation_const.rs
+++ b/datafusion/substrait/src/variation_const.rs
@@ -125,3 +125,8 @@ pub const INTERVAL_MONTH_DAY_NANO_TYPE_NAME: &str = "interval-month-day-nano";
 
 /// Defined in <https://github.com/apache/arrow/blame/main/format/substrait/extension_types.yaml>
 pub const FLOAT_16_TYPE_NAME: &str = "fp16";
+
+/// For [`DataType::Null`]
+///
+/// [`DataType::Null`]: datafusion::arrow::datatypes::DataType::Null
+pub const NULL_TYPE_NAME: &str = "null";
diff --git a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
index 64599465f96f..bafaffa8285b 100644
--- a/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
+++ b/datafusion/substrait/tests/cases/roundtrip_physical_plan.rs
@@ -35,24 +35,22 @@ use substrait::proto::extensions;
 
 #[tokio::test]
 async fn parquet_exec() -> Result<()> {
-    let source = Arc::new(ParquetSource::default());
-
-    let scan_config = FileScanConfigBuilder::new(
-        ObjectStoreUrl::local_filesystem(),
-        Arc::new(Schema::empty()),
-        source,
-    )
-    .with_file_groups(vec![
-        FileGroup::new(vec![PartitionedFile::new(
-            "file://foo/part-0.parquet".to_string(),
-            123,
-        )]),
-        FileGroup::new(vec![PartitionedFile::new(
-            "file://foo/part-1.parquet".to_string(),
-            123,
-        )]),
-    ])
-    .build();
+    let schema = Arc::new(Schema::empty());
+    let source = Arc::new(ParquetSource::new(schema.clone()));
+
+    let scan_config =
+        FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), source)
+            .with_file_groups(vec![
+                FileGroup::new(vec![PartitionedFile::new(
+                    "file://foo/part-0.parquet".to_string(),
+                    123,
+                )]),
+                FileGroup::new(vec![PartitionedFile::new(
+                    "file://foo/part-1.parquet".to_string(),
+                    123,
+                )]),
+            ])
+            .build();
     let parquet_exec: Arc<dyn ExecutionPlan> =
         DataSourceExec::from_data_source(scan_config);
 
diff --git a/datafusion/wasmtest/Cargo.toml b/datafusion/wasmtest/Cargo.toml
index c1b2f927e30c..d8b042cbb76c 100644
--- a/datafusion/wasmtest/Cargo.toml
+++ b/datafusion/wasmtest/Cargo.toml
@@ -30,6 +30,9 @@ rust-version = { workspace = true }
 [package.metadata.docs.rs]
 all-features = true
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true
 
@@ -60,7 +63,7 @@ object_store = { workspace = true }
 # needs to be compiled
 tokio = { workspace = true }
 url = { workspace = true }
-wasm-bindgen-test = "0.3.54"
+wasm-bindgen-test = "0.3.55"
 
 [package.metadata.cargo-machete]
 ignored = ["chrono", "getrandom"]
diff --git a/datafusion/wasmtest/src/lib.rs b/datafusion/wasmtest/src/lib.rs
index d2efe995f100..bd48cf27256c 100644
--- a/datafusion/wasmtest/src/lib.rs
+++ b/datafusion/wasmtest/src/lib.rs
@@ -15,6 +15,9 @@
 // specific language governing permissions and limitations
 // under the License.
 
+// https://github.com/apache/datafusion/issues/18503
+#![deny(clippy::needless_pass_by_value)]
+#![cfg_attr(test, allow(clippy::needless_pass_by_value))]
 #![doc(
     html_logo_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg",
     html_favicon_url = "https://raw.githubusercontent.com/apache/datafusion/19fe44cf2f30cbdd63d4a4f52c74055163c6cc38/docs/logos/standalone_logo/logo_original.svg"
diff --git a/dev/changelog/51.0.0.md b/dev/changelog/51.0.0.md
new file mode 100644
index 000000000000..7c0b91440a0d
--- /dev/null
+++ b/dev/changelog/51.0.0.md
@@ -0,0 +1,713 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# Apache DataFusion 51.0.0 Changelog
+
+This release consists of 531 commits from 128 contributors. See credits at the end of this changelog for more information.
+
+See the [upgrade guide](https://datafusion.apache.org/library-user-guide/upgrading.html) for information on how to upgrade from previous versions.
+
+**Breaking changes:**
+
+- Introduce `TypeSignatureClass::Binary` to allow accepting arbitrarily sized `FixedSizeBinary` arguments [#17531](https://github.com/apache/datafusion/pull/17531) (Jefffrey)
+- feat: change `datafusion-proto` to use `TaskContext` rather than`SessionContext` for physical plan serialization [#17601](https://github.com/apache/datafusion/pull/17601) (milenkovicm)
+- chore: refactor usage of `reassign_predicate_columns` [#17703](https://github.com/apache/datafusion/pull/17703) (rkrishn7)
+- fix: correct edge case where null haystack returns false instead of null [#17818](https://github.com/apache/datafusion/pull/17818) (Jefffrey)
+- clean up duplicate information in FileOpener trait [#17956](https://github.com/apache/datafusion/pull/17956) (adriangb)
+- refactor : deprecate `ParquetSource::predicate()` and merge into `FileSource::filter()` [#17971](https://github.com/apache/datafusion/pull/17971) (getChan)
+- feat: convert_array_to_scalar_vec respects null elements [#17891](https://github.com/apache/datafusion/pull/17891) (vegarsti)
+- make Union::try_new pub [#18125](https://github.com/apache/datafusion/pull/18125) (leoyvens)
+- refactor: remove unused `type_coercion/aggregate.rs` functions [#18091](https://github.com/apache/datafusion/pull/18091) (Jefffrey)
+- refactor: remove core crate from datafusion-proto [#18123](https://github.com/apache/datafusion/pull/18123) (timsaucer)
+- Use TableSchema in FileScanConfig [#18231](https://github.com/apache/datafusion/pull/18231) (adriangb)
+- Enable placeholders with extension types [#17986](https://github.com/apache/datafusion/pull/17986) (paleolimbot)
+- Implement `DESCRIBE SELECT` to show schema rather than `EXPLAIN` plan [#18238](https://github.com/apache/datafusion/pull/18238) (djanderson)
+- Push partition_statistics into DataSource [#18233](https://github.com/apache/datafusion/pull/18233) (adriangb)
+- Let `FileScanConfig` own a list of `ProjectionExpr`s [#18253](https://github.com/apache/datafusion/pull/18253) (friendlymatthew)
+- Introduce `expr_fields` to `AccumulatorArgs` to hold input argument fields [#18100](https://github.com/apache/datafusion/pull/18100) (Jefffrey)
+- Rename `is_ordered_set_aggregate` to `supports_within_group_clause` for UDAFs [#18397](https://github.com/apache/datafusion/pull/18397) (Jefffrey)
+- Move generate_series projection logic into LazyMemoryStream [#18373](https://github.com/apache/datafusion/pull/18373) (mkleen)
+
+**Performance related:**
+
+- Improve `Hash` and `Ord` speed for `dyn LogicalType` [#17437](https://github.com/apache/datafusion/pull/17437) (findepi)
+- Faster `&&String::to_string` [#17583](https://github.com/apache/datafusion/pull/17583) (findepi)
+- perf: Simplify CASE for any WHEN TRUE [#17602](https://github.com/apache/datafusion/pull/17602) (petern48)
+- perf: Improve the performance of WINDOW functions with many partitions [#17528](https://github.com/apache/datafusion/pull/17528) (nuno-faria)
+- Avoid redundant Schema clones [#17643](https://github.com/apache/datafusion/pull/17643) (findepi)
+- Prevent exponential planning time for Window functions - v2 [#17684](https://github.com/apache/datafusion/pull/17684) (berkaysynnada)
+- Add case expr simplifiers for literal comparisons [#17743](https://github.com/apache/datafusion/pull/17743) (jackkleeman)
+- Enable Projection Pushdown Optimization for Recursive CTEs [#16696](https://github.com/apache/datafusion/pull/16696) (kosiew)
+- perf: Optimize CASE for any WHEN false [#17835](https://github.com/apache/datafusion/pull/17835) (petern48)
+- feat: Simplify `NOT(IN ..)` to `NOT IN` and `NOT (EXISTS ..)` to `NOT EXISTS` [#17848](https://github.com/apache/datafusion/pull/17848) (Tpt)
+- perf: Faster `string_agg()` aggregate function (1000x speed for no DISTINCT and ORDER case) [#17837](https://github.com/apache/datafusion/pull/17837) (2010YOUY01)
+- optimizer: allow projection pushdown through aliased recursive CTE references [#17875](https://github.com/apache/datafusion/pull/17875) (kosiew)
+- perf: Implement boolean group values [#17726](https://github.com/apache/datafusion/pull/17726) (ashdnazg)
+- #17838 Rewrite `regexp_like` calls as `~` and `*~` operator expressions when possible [#17839](https://github.com/apache/datafusion/pull/17839) (pepijnve)
+- perf: add to `aggregate_vectorized` bench benchmark for `PrimitiveGroupValueBuilder` as well [#17930](https://github.com/apache/datafusion/pull/17930) (rluvaton)
+- #17972 Restore case expr/expr optimisation while ensuring lazy evaluation [#17973](https://github.com/apache/datafusion/pull/17973) (pepijnve)
+- chore: use `NullBuffer::union` for Spark `concat` [#18087](https://github.com/apache/datafusion/pull/18087) (comphead)
+- Short circuit complex case evaluation modes as soon as possible [#17898](https://github.com/apache/datafusion/pull/17898) (pepijnve)
+- perf: Fix NLJ slow join with condition `array_has` [#18161](https://github.com/apache/datafusion/pull/18161) (2010YOUY01)
+- perf: improve `ScalarValue::to_array_of_size` for Boolean and some null values [#18180](https://github.com/apache/datafusion/pull/18180) (rluvaton)
+- Allow filter pushdown through AggregateExec [#18404](https://github.com/apache/datafusion/pull/18404) (LiaCastaneda)
+- Avoid scatter operation in `ExpressionOrExpression` case evaluation method [#18444](https://github.com/apache/datafusion/pull/18444) (pepijnve)
+
+**Implemented enhancements:**
+
+- feat: Implement `DFSchema.print_schema_tree()` method [#17459](https://github.com/apache/datafusion/pull/17459) (comphead)
+- feat(spark): implement Spark `length` function [#17475](https://github.com/apache/datafusion/pull/17475) (wForget)
+- feat: Add binary to `join_fuzz` testing [#17497](https://github.com/apache/datafusion/pull/17497) (jonathanc-n)
+- feat: Support log for Decimal128 and Decimal256 [#17023](https://github.com/apache/datafusion/pull/17023) (theirix)
+- feat(spark): implement Spark bitwise function shiftleft/shiftright/shiftrightunsighed [#17013](https://github.com/apache/datafusion/pull/17013) (chenkovsky)
+- feat: Ensure explain format in config is valid [#17549](https://github.com/apache/datafusion/pull/17549) (Weijun-H)
+- feat: Simplify CASE WHEN true THEN expr to expr [#17450](https://github.com/apache/datafusion/pull/17450) (EeshanBembi)
+- feat: add `sql` feature to make sql planning optional [#17332](https://github.com/apache/datafusion/pull/17332) (timsaucer)
+- feat: Add `OR REPLACE` to creating external tables [#17580](https://github.com/apache/datafusion/pull/17580) (jonathanc-n)
+- feat(substrait): add support for RightAnti and RightSemi join types [#17604](https://github.com/apache/datafusion/pull/17604) (bvolpato)
+- feat(small): Display `NullEquality` in join executor's `EXPLAIN` output [#17664](https://github.com/apache/datafusion/pull/17664) (2010YOUY01)
+- feat(substrait): add time literal support [#17655](https://github.com/apache/datafusion/pull/17655) (bvolpato)
+- feat(spark): implement Spark `make_interval` function [#17424](https://github.com/apache/datafusion/pull/17424) (davidlghellin)
+- feat: expose `udafs` and `udwfs` methods on `FunctionRegistry` [#17650](https://github.com/apache/datafusion/pull/17650) (milenkovicm)
+- feat: Support Seconds and Milliseconds literals in substrait [#17707](https://github.com/apache/datafusion/pull/17707) (petern48)
+- feat: support for null, date, and timestamp types in approx_distinct [#17618](https://github.com/apache/datafusion/pull/17618) (killme2008)
+- feat: support `Utf8View` for more args of `regexp_replace` [#17195](https://github.com/apache/datafusion/pull/17195) (mbutrovich)
+- feat(spark): implement Spark `map` function `map_from_arrays` [#17456](https://github.com/apache/datafusion/pull/17456) (SparkApplicationMaster)
+- feat: Display window function's alias name in output column [#17788](https://github.com/apache/datafusion/pull/17788) (devampatel03)
+- feat(spark): implement Spark `make_dt_interval` function [#17728](https://github.com/apache/datafusion/pull/17728) (davidlghellin)
+- feat: support multi-threaded writing of Parquet files with modular encryption [#16738](https://github.com/apache/datafusion/pull/16738) (rok)
+- feat(spark): implement Spark `map` function `map_from_entries` [#17779](https://github.com/apache/datafusion/pull/17779) (SparkApplicationMaster)
+- feat: Add Hash Join benchmarks [#17636](https://github.com/apache/datafusion/pull/17636) (jonathanc-n)
+- feat: Support swap for `RightMark` Join [#17651](https://github.com/apache/datafusion/pull/17651) (jonathanc-n)
+- feat: support spark udf format_string [#17561](https://github.com/apache/datafusion/pull/17561) (chenkovsky)
+- feat(spark): implement Spark `try_parse_url` function [#17485](https://github.com/apache/datafusion/pull/17485) (rafafrdz)
+- feat: Support reading CSV files with inconsistent column counts [#17553](https://github.com/apache/datafusion/pull/17553) (EeshanBembi)
+- feat: Adds Instrumented Object Store Registry to datafusion-cli [#17953](https://github.com/apache/datafusion/pull/17953) (BlakeOrth)
+- feat: add cargo-machete in CI [#18030](https://github.com/apache/datafusion/pull/18030) (Weijun-H)
+- feat(spark): implement Spark `elt` function [#17729](https://github.com/apache/datafusion/pull/17729) (davidlghellin)
+- feat: support Spark `concat` string function [#18063](https://github.com/apache/datafusion/pull/18063) (comphead)
+- feat: support `null_treatment`, `distinct`, and `filter` for window functions in proto [#18024](https://github.com/apache/datafusion/pull/18024) (dqkqd)
+- feat: Add percentile_cont aggregate function [#17988](https://github.com/apache/datafusion/pull/17988) (adriangb)
+- feat: spark udf array shuffle [#17674](https://github.com/apache/datafusion/pull/17674) (chenkovsky)
+- feat: Support configurable `EXPLAIN ANALYZE` detail level [#18098](https://github.com/apache/datafusion/pull/18098) (2010YOUY01)
+- feat: add fp16 support to Substrait [#18086](https://github.com/apache/datafusion/pull/18086) (westonpace)
+- feat: `ClassicJoin` for PWMJ [#17482](https://github.com/apache/datafusion/pull/17482) (jonathanc-n)
+- feat(docs): display compatible logo for dark mode [#18197](https://github.com/apache/datafusion/pull/18197) (foskey51)
+- feat: Add `deregister_object_store` [#17999](https://github.com/apache/datafusion/pull/17999) (jonathanc-n)
+- feat: Add existence join to NestedLoopJoin benchmarks [#18005](https://github.com/apache/datafusion/pull/18005) (jonathanc-n)
+- feat(small): Set 'summary' level metrics for `DataSourceExec` with parquet source [#18196](https://github.com/apache/datafusion/pull/18196) (2010YOUY01)
+- feat: be indifferent to padding when decoding base64 [#18264](https://github.com/apache/datafusion/pull/18264) (colinmarc)
+- feat: Add `output_bytes` to baseline metrics [#18268](https://github.com/apache/datafusion/pull/18268) (2010YOUY01)
+- feat: Introduce `PruningMetrics` and use it in parquet file pruning metric [#18297](https://github.com/apache/datafusion/pull/18297) (2010YOUY01)
+- feat: Improve metrics for aggregate streams. [#18325](https://github.com/apache/datafusion/pull/18325) (EmilyMatt)
+- feat: allow pushdown of dynamic filters having partition cols [#18172](https://github.com/apache/datafusion/pull/18172) (feniljain)
+- feat: support temporary views in DataFrameTableProvider [#18158](https://github.com/apache/datafusion/pull/18158) (r1b)
+- feat: Better parquet row-group/page pruning metrics display [#18321](https://github.com/apache/datafusion/pull/18321) (2010YOUY01)
+- feat: Add Hash trait to StatsType enum [#18382](https://github.com/apache/datafusion/pull/18382) (rluvaton)
+- feat: support get_field for map literal [#18371](https://github.com/apache/datafusion/pull/18371) (chenkovsky)
+- feat(docs): enable navbar [#18324](https://github.com/apache/datafusion/pull/18324) (foskey51)
+- feat: Add `selectivity` metrics to `FilterExec` [#18406](https://github.com/apache/datafusion/pull/18406) (2010YOUY01)
+- feat: Add `reduction_factor` metric to `AggregateExec` for EXPLAIN ANALYZE [#18455](https://github.com/apache/datafusion/pull/18455) (petern48)
+- feat: support named arguments for aggregate and window udfs [#18389](https://github.com/apache/datafusion/pull/18389) (bubulalabu)
+- feat: Add selectivity metric to NestedLoopJoinExec for EXPLAIN ANALYZE [#18481](https://github.com/apache/datafusion/pull/18481) (petern48)
+- feat: Enhance `array_slice` functionality to support `ListView` and `LargeListView` types [#18432](https://github.com/apache/datafusion/pull/18432) (Weijun-H)
+
+**Fixed bugs:**
+
+- fix: lazy evaluation for coalesce [#17357](https://github.com/apache/datafusion/pull/17357) (chenkovsky)
+- fix: Implement AggregateUDFImpl::reverse_expr for StringAgg [#17165](https://github.com/apache/datafusion/pull/17165) (nuno-faria)
+- fix: Support aggregate expressions in `QUALIFY` [#17313](https://github.com/apache/datafusion/pull/17313) (rkrishn7)
+- fix: synchronize partition bounds reporting in HashJoin [#17452](https://github.com/apache/datafusion/pull/17452) (rkrishn7)
+- fix: correct typos in `CONTRIBUTING.md` [#17507](https://github.com/apache/datafusion/pull/17507) (Weijun-H)
+- fix: Add AWS environment variable checks for S3 tests [#17519](https://github.com/apache/datafusion/pull/17519) (Weijun-H)
+- fix: Ensure the CachedParquetFileReader respects the metadata prefetch hint [#17302](https://github.com/apache/datafusion/pull/17302) (nuno-faria)
+- fix: prevent UnionExec panic with empty inputs [#17449](https://github.com/apache/datafusion/pull/17449) (EeshanBembi)
+- fix: ignore non-existent columns when adding filter equivalence info in `FileScanConfig` [#17546](https://github.com/apache/datafusion/pull/17546) (rkrishn7)
+- fix: Prevent duplicate expressions in DynamicPhysicalExpr [#17551](https://github.com/apache/datafusion/pull/17551) (UBarney)
+- fix: `SortExec` `TopK` OOM [#17622](https://github.com/apache/datafusion/pull/17622) (nuno-faria)
+- fix: Change `OuterReferenceColumn` to contain the entire outer field to prevent metadata loss [#17524](https://github.com/apache/datafusion/pull/17524) (Kontinuation)
+- fix: Preserves field metadata when creating logical plan for VALUES expression [#17525](https://github.com/apache/datafusion/pull/17525) (Kontinuation)
+- fix: Ignore governance doc from typos [#17678](https://github.com/apache/datafusion/pull/17678) (rkrishn7)
+- fix: null padding for `array_reverse` on `FixedSizeList` [#17673](https://github.com/apache/datafusion/pull/17673) (chenkovsky)
+- fix: correct statistics for `NestedLoopJoinExec` [#17680](https://github.com/apache/datafusion/pull/17680) (duongcongtoai)
+- fix: Partial AggregateMode will generate duplicate field names which will fail DFSchema construct [#17706](https://github.com/apache/datafusion/pull/17706) (zhuqi-lucas)
+- fix: Remove parquet encryption feature from root deps [#17700](https://github.com/apache/datafusion/pull/17700) (Vyquos)
+- fix: Remove datafusion-macros's dependency on datafusion-expr [#17688](https://github.com/apache/datafusion/pull/17688) (yutannihilation)
+- fix: Filter out nulls properly in approx_percentile_cont_with_weight [#17780](https://github.com/apache/datafusion/pull/17780) (Jefffrey)
+- fix: ignore `DataType::Null` in possible types during csv type inference [#17796](https://github.com/apache/datafusion/pull/17796) (dqkqd)
+- fix: `ParquetSource` - `with_predicate()` don't have to reset metrics [#17858](https://github.com/apache/datafusion/pull/17858) (2010YOUY01)
+- fix: optimizer `common_sub_expression_eliminate` fails in a window function [#17852](https://github.com/apache/datafusion/pull/17852) (dqkqd)
+- fix: fix failing test compilation on main [#17955](https://github.com/apache/datafusion/pull/17955) (Jefffrey)
+- fix: update `PrimitiveGroupValueBuilder` to match NaN correctly in scalar `equal_to` [#17979](https://github.com/apache/datafusion/pull/17979) (rluvaton)
+- fix: Add overflow checks to SparkDateAdd/Sub to avoid panics [#18013](https://github.com/apache/datafusion/pull/18013) (andygrove)
+- fix: Ensure ListingTable partitions are pruned when filters are not used [#17958](https://github.com/apache/datafusion/pull/17958) (peasee)
+- fix: Improve null handling in array_to_string function [#18076](https://github.com/apache/datafusion/pull/18076) (Weijun-H)
+- fix: Re-bump latest datafusion-testing module so extended tests succeed [#18110](https://github.com/apache/datafusion/pull/18110) (Jefffrey)
+- fix: window unparsing [#17367](https://github.com/apache/datafusion/pull/17367) (chenkovsky)
+- fix: Add dictionary coercion support for numeric comparison operations [#18099](https://github.com/apache/datafusion/pull/18099) (ahmed-mez)
+- fix(substrait): schema errors for Aggregates with no groupings [#17909](https://github.com/apache/datafusion/pull/17909) (vbarua)
+- fix: `array_distinct` inner nullability causing type mismatch [#18104](https://github.com/apache/datafusion/pull/18104) (dqkqd)
+- fix: improve document ui [#18157](https://github.com/apache/datafusion/pull/18157) (getChan)
+- fix(docs): resolve extra outline on tables [#18193](https://github.com/apache/datafusion/pull/18193) (foskey51)
+- fix: Use dynamic timezone in now() function for accurate timestamp [#18017](https://github.com/apache/datafusion/pull/18017) (Weijun-H)
+- fix: UnnestExec preserves relevant equivalence properties of input [#16985](https://github.com/apache/datafusion/pull/16985) (vegarsti)
+- fix: wrong simplification for >= >, <= < [#18222](https://github.com/apache/datafusion/pull/18222) (chenkovsky)
+- fix: only fall back to listing prefixes on 404 errors [#18263](https://github.com/apache/datafusion/pull/18263) (colinmarc)
+- fix: Support Dictionary[Int32, Binary] for bitmap count spark function [#18273](https://github.com/apache/datafusion/pull/18273) (kazantsev-maksim)
+- fix: support float16 for `abs()` [#18304](https://github.com/apache/datafusion/pull/18304) (Jefffrey)
+- fix: Add WITH ORDER display in information_schema.views [#18282](https://github.com/apache/datafusion/pull/18282) (gene-bordegaray)
+- fix: correct date_trunc for times before the epoch [#18356](https://github.com/apache/datafusion/pull/18356) (mhilton)
+- fix: Preserve percent-encoding in `PartitionedFile` paths during deserialization [#18346](https://github.com/apache/datafusion/pull/18346) (lonless9)
+- fix: SortPreservingMerge sanity check rejects valid ORDER BY with CASE expression [#18342](https://github.com/apache/datafusion/pull/18342) (watford-ep)
+- fix: `DataFrame::select_columns` and `DataFrame::drop_columns` for qualified duplicated field names [#18236](https://github.com/apache/datafusion/pull/18236) (dqkqd)
+- fix(docs): remove navbar padding breaking ui on mobile [#18402](https://github.com/apache/datafusion/pull/18402) (foskey51)
+- fix: null cast not valid in substrait round trip [#18414](https://github.com/apache/datafusion/pull/18414) (gene-bordegaray)
+- fix: map benchmark failing [#18469](https://github.com/apache/datafusion/pull/18469) (randyli)
+- fix: eliminate warning when building without sql feature [#18480](https://github.com/apache/datafusion/pull/18480) (corasaurus-hex)
+- fix: spark array return type mismatch when inner data type is LargeList [#18485](https://github.com/apache/datafusion/pull/18485) (jizezhang)
+- fix: shuffle seed [#18518](https://github.com/apache/datafusion/pull/18518) (chenkovsky)
+
+**Documentation updates:**
+
+- Auto detect hive column partitioning with ListingTableFactory / `CREATE EXTERNAL TABLE` [#17232](https://github.com/apache/datafusion/pull/17232) (BlakeOrth)
+- Rename Blaze to Auron [#17532](https://github.com/apache/datafusion/pull/17532) (merrily01)
+- Revert #17295 (Support from-first SQL syntax) [#17520](https://github.com/apache/datafusion/pull/17520) (adriangb)
+- minor: Update doc comments on type signature [#17556](https://github.com/apache/datafusion/pull/17556) (Jefffrey)
+- docs: Update documentation on Epics and Supervising Maintainers [#17505](https://github.com/apache/datafusion/pull/17505) (alamb)
+- docs: Move Google Summer of Code 2025 pages to a section [#17504](https://github.com/apache/datafusion/pull/17504) (alamb)
+- Upgrade to arrow 56.1.0 [#17275](https://github.com/apache/datafusion/pull/17275) (alamb)
+- docs: add xorq to list of known users [#17668](https://github.com/apache/datafusion/pull/17668) (dlovell)
+- docs: deduplicate links in `introduction.md` [#17669](https://github.com/apache/datafusion/pull/17669) (Jefffrey)
+- Add explicit PMC/committers list to governance docs page [#17574](https://github.com/apache/datafusion/pull/17574) (alamb)
+- chore: Update READMEs of crates to be more consistent [#17691](https://github.com/apache/datafusion/pull/17691) (Jefffrey)
+- chore: fix wasm-pack installation link in wasmtest README [#17704](https://github.com/apache/datafusion/pull/17704) (Jefffrey)
+- docs: Remove disclaimer that `datafusion` 50.0.0 is not released [#17695](https://github.com/apache/datafusion/pull/17695) (nuno-faria)
+- Bump MSRV to 1.87.0 [#17724](https://github.com/apache/datafusion/pull/17724) (findepi)
+- docs: Fix 'Clicking a link in optimizer docs downloads the file instead of redirecting to github' [#17723](https://github.com/apache/datafusion/pull/17723) (petern48)
+- Move misplaced upgrading entry about MSRV [#17727](https://github.com/apache/datafusion/pull/17727) (findepi)
+- Introduce `avg_distinct()` and `sum_distinct()` functions to DataFrame API [#17536](https://github.com/apache/datafusion/pull/17536) (Jefffrey)
+- Support `WHERE`, `ORDER BY`, `LIMIT`, `SELECT`, `EXTEND` pipe operators [#17278](https://github.com/apache/datafusion/pull/17278) (simonvandel)
+- doc: add missing examples for multiple math functions [#17018](https://github.com/apache/datafusion/pull/17018) (Adez017)
+- chore: remove homebrew publish instructions from release steps [#17735](https://github.com/apache/datafusion/pull/17735) (Jefffrey)
+- Improve documentation for ordered set aggregate functions [#17744](https://github.com/apache/datafusion/pull/17744) (alamb)
+- docs: fix sidebar overlapping table on configuration page on website [#17738](https://github.com/apache/datafusion/pull/17738) (saimahendra282)
+- docs: add Ballista link to landing page (#17746) [#17775](https://github.com/apache/datafusion/pull/17775) (Nihallllll)
+- [DOCS] Add dbt Fusion engine and R2 Query Engine to "Known Users" [#17793](https://github.com/apache/datafusion/pull/17793) (dataders)
+- docs: update wasmtest README with instructions for Apple silicon [#17755](https://github.com/apache/datafusion/pull/17755) (Jefffrey)
+- docs: Add SedonaDB as known user of Apache DataFusion [#17806](https://github.com/apache/datafusion/pull/17806) (petern48)
+- minor: simplify docs build process & pin pip package versions [#17816](https://github.com/apache/datafusion/pull/17816) (Jefffrey)
+- Cleanup user guide known users section [#17834](https://github.com/apache/datafusion/pull/17834) (blaginin)
+- Fix the doc about row_groups pruning metrics in explain_usage.md [#17846](https://github.com/apache/datafusion/pull/17846) (xudong963)
+- Fix docs.rs build: Replace `auto_doc_cfg` with `doc_cfg` [#17845](https://github.com/apache/datafusion/pull/17845) (mbrobbel)
+- docs: Add rerun.io to known users guide [#17825](https://github.com/apache/datafusion/pull/17825) (alamb)
+- chore: fix typos & pin action hashes [#17855](https://github.com/apache/datafusion/pull/17855) (Jefffrey)
+- Clarify email reply instructions for invitations [#17851](https://github.com/apache/datafusion/pull/17851) (rluvaton)
+- Add missing parenthesis in features documentation [#17869](https://github.com/apache/datafusion/pull/17869) (Viicos)
+- Improve comments for DataSinkExec [#17873](https://github.com/apache/datafusion/pull/17873) (xudong963)
+- minor: Make `FunctionRegistry` `udafs` and `udwfs` methods mandatory [#17847](https://github.com/apache/datafusion/pull/17847) (milenkovicm)
+- docs: Improve documentation for FunctionFactory / CREATE FUNCTION [#17859](https://github.com/apache/datafusion/pull/17859) (alamb)
+- Support `AS`, `UNION`, `INTERSECTION`, `EXCEPT`, `AGGREGATE` pipe operators [#17312](https://github.com/apache/datafusion/pull/17312) (simonvandel)
+- [forward port] Change version to 50.1.0 and add changelog (#17748) [#17826](https://github.com/apache/datafusion/pull/17826) (alamb)
+- chore(deps): bump maturin from 1.9.4 to 1.9.5 in /docs [#17940](https://github.com/apache/datafusion/pull/17940) (dependabot[bot])
+- docs: `Window::try_new_with_schema` with a descriptive error message [#17926](https://github.com/apache/datafusion/pull/17926) (dqkqd)
+- Support `JOIN` pipe operator [#17969](https://github.com/apache/datafusion/pull/17969) (simonvandel)
+- Adds Object Store Profiling options/commands to CLI [#18004](https://github.com/apache/datafusion/pull/18004) (BlakeOrth)
+- docs: typo in `working-with-exprs.md` [#18033](https://github.com/apache/datafusion/pull/18033) (Weijun-H)
+- chore(deps): bump maturin from 1.9.5 to 1.9.6 in /docs [#18039](https://github.com/apache/datafusion/pull/18039) (dependabot[bot])
+- [forward port] Change version to 50.2.0 and add changelog [#18057](https://github.com/apache/datafusion/pull/18057) (xudong963)
+- Update committers on governance page [#18015](https://github.com/apache/datafusion/pull/18015) (alamb)
+- Feat: Make current_date aware of execution timezone. [#18034](https://github.com/apache/datafusion/pull/18034) (codetyri0n)
+- Add independent configs for topk/join dynamic filter [#18090](https://github.com/apache/datafusion/pull/18090) (xudong963)
+- Adds Trace and Summary to CLI instrumented stores [#18064](https://github.com/apache/datafusion/pull/18064) (BlakeOrth)
+- refactor: add dialect enum [#18043](https://github.com/apache/datafusion/pull/18043) (dariocurr)
+- #17982 Make `nvl` a thin wrapper for `coalesce` [#17991](https://github.com/apache/datafusion/pull/17991) (pepijnve)
+- minor: fix incorrect deprecation version & window docs [#18093](https://github.com/apache/datafusion/pull/18093) (Jefffrey)
+- Adding hiop as known user [#18114](https://github.com/apache/datafusion/pull/18114) (enryls)
+- Improve datafusion-cli object store profiling summary display [#18085](https://github.com/apache/datafusion/pull/18085) (alamb)
+- Feat: Make current_time aware of execution timezone. [#18040](https://github.com/apache/datafusion/pull/18040) (codetyri0n)
+- Docs: Update SQL example for current_time() and current_date(). [#18200](https://github.com/apache/datafusion/pull/18200) (codetyri0n)
+- doc: Add `Metrics` section to the user-guide [#18216](https://github.com/apache/datafusion/pull/18216) (2010YOUY01)
+- docs: Update HOWTOs for adding new functions [#18089](https://github.com/apache/datafusion/pull/18089) (Jefffrey)
+- docs: fix trim for `rust,ignore` blocks [#18239](https://github.com/apache/datafusion/pull/18239) (Jefffrey)
+- docs: refine `AggregateUDFImpl::is_ordered_set_aggregate` documentation [#17805](https://github.com/apache/datafusion/pull/17805) (Jefffrey)
+- docs: fix broken SQL & DataFrame links in root README (#18153) [#18274](https://github.com/apache/datafusion/pull/18274) (manasa-manoj-nbr)
+- doc: Contributor guide for AI-generated PRs [#18237](https://github.com/apache/datafusion/pull/18237) (2010YOUY01)
+- doc: Add Join Physical Plan documentation, and configuration flag to benchmarks [#18209](https://github.com/apache/datafusion/pull/18209) (jonathanc-n)
+- "Gentle Introduction to Arrow / Record Batches" #11336 [#18051](https://github.com/apache/datafusion/pull/18051) (sm4rtm4art)
+- Upgrade DataFusion to arrow/parquet 57.0.0 [#17888](https://github.com/apache/datafusion/pull/17888) (alamb)
+- Deduplicate range/gen_series nested functions code [#18198](https://github.com/apache/datafusion/pull/18198) (Jefffrey)
+- minor: doc fixes for timestamp output format [#18315](https://github.com/apache/datafusion/pull/18315) (Jefffrey)
+- Add PostgreSQL-style named arguments support for scalar functions [#18019](https://github.com/apache/datafusion/pull/18019) (bubulalabu)
+- Change default prefetch_hint to 512Kb to reduce number of object store requests when reading parquet files [#18160](https://github.com/apache/datafusion/pull/18160) (zhuqi-lucas)
+- Bump MSRV to 1.88.0 [#18403](https://github.com/apache/datafusion/pull/18403) (harshasiddartha)
+- Change default `time_zone` to `None` (was `"+00:00"`) [#18359](https://github.com/apache/datafusion/pull/18359) (Omega359)
+- Fix instances of "the the" to be "the" in comments/docs [#18478](https://github.com/apache/datafusion/pull/18478) (corasaurus-hex)
+- Update roadmap links for DataFusion Q1 2026 [#18495](https://github.com/apache/datafusion/pull/18495) (alamb)
+- Add a SpillingPool to manage collections of spill files [#18207](https://github.com/apache/datafusion/pull/18207) (adriangb)
+
+**Other:**
+
+- Extract complex default impls from AggregateUDFImpl trait [#17391](https://github.com/apache/datafusion/pull/17391) (findepi)
+- chore: make `TableFunction` clonable [#17457](https://github.com/apache/datafusion/pull/17457) (sunng87)
+- chore(deps): bump wasm-bindgen-test from 0.3.50 to 0.3.51 [#17470](https://github.com/apache/datafusion/pull/17470) (dependabot[bot])
+- chore(deps): bump log from 0.4.27 to 0.4.28 [#17471](https://github.com/apache/datafusion/pull/17471) (dependabot[bot])
+- Support csv truncated rows in datafusion [#17465](https://github.com/apache/datafusion/pull/17465) (zhuqi-lucas)
+- chore(deps): bump indexmap from 2.11.0 to 2.11.1 [#17484](https://github.com/apache/datafusion/pull/17484) (dependabot[bot])
+- chore(deps): bump chrono from 0.4.41 to 0.4.42 [#17483](https://github.com/apache/datafusion/pull/17483) (dependabot[bot])
+- Improve `PartialEq`, `Eq` speed for `LexOrdering`, make `PartialEq` and `PartialOrd` consistent [#17442](https://github.com/apache/datafusion/pull/17442) (findepi)
+- Fix array types coercion: preserve child element nullability for list types [#17306](https://github.com/apache/datafusion/pull/17306) (sgrebnov)
+- better preserve statistics when applying limits [#17381](https://github.com/apache/datafusion/pull/17381) (adriangb)
+- Refactor HashJoinExec to progressively accumulate dynamic filter bounds instead of computing them after data is accumulated [#17444](https://github.com/apache/datafusion/pull/17444) (adriangb)
+- Fix `PartialOrd` for logical plan nodes and expressions [#17438](https://github.com/apache/datafusion/pull/17438) (findepi)
+- chore(deps): bump sqllogictest from 0.28.3 to 0.28.4 [#17500](https://github.com/apache/datafusion/pull/17500) (dependabot[bot])
+- chore(deps): bump tempfile from 3.21.0 to 3.22.0 [#17499](https://github.com/apache/datafusion/pull/17499) (dependabot[bot])
+- refactor: Move `SMJ` tests into own file [#17495](https://github.com/apache/datafusion/pull/17495) (jonathanc-n)
+- move MinAggregator and MaxAggregator to functions-aggregate-common [#17492](https://github.com/apache/datafusion/pull/17492) (adriangb)
+- Update datafusion-testing pin to update expected output for extended tests [#17490](https://github.com/apache/datafusion/pull/17490) (alamb)
+- update physical-plan to use datafusion-functions-aggregate-common for Min/MaxAccumulator [#17502](https://github.com/apache/datafusion/pull/17502) (adriangb)
+- bug: Always use 'indent' format for explain verbose [#17481](https://github.com/apache/datafusion/pull/17481) (petern48)
+- Fix ambiguous column names in substrait conversion as a result of literals having the same name during conversion. [#17299](https://github.com/apache/datafusion/pull/17299) (xanderbailey)
+- Fix NULL Arithmetic Handling for Numerical Operators in Type Coercion [#17418](https://github.com/apache/datafusion/pull/17418) (etolbakov)
+- Prepare for Merge Queue [#17183](https://github.com/apache/datafusion/pull/17183) (blaginin)
+- bug: Support null as argument to to_local_time [#17491](https://github.com/apache/datafusion/pull/17491) (petern48)
+- Implement timestamp_cast_dtype for SqliteDialect [#17479](https://github.com/apache/datafusion/pull/17479) (krinart)
+- Disable `required_status_checks` for now [#17537](https://github.com/apache/datafusion/pull/17537) (blaginin)
+- Update Bug issue template to use Bug issue type [#17540](https://github.com/apache/datafusion/pull/17540) (findepi)
+- Fix predicate simplification for incompatible types in push_down_filter [#17521](https://github.com/apache/datafusion/pull/17521) (adriangb)
+- Add assertion that ScalarUDFImpl implementation is consistent with declared return type [#17515](https://github.com/apache/datafusion/pull/17515) (findepi)
+- Using `encode_arrow_schema` from arrow-rs. [#17543](https://github.com/apache/datafusion/pull/17543) (samueleresca)
+- Add test for decimal256 and float math [#17530](https://github.com/apache/datafusion/pull/17530) (Jefffrey)
+- Document how schema projection works. [#17250](https://github.com/apache/datafusion/pull/17250) (wiedld)
+- chore(deps): bump rust_decimal from 1.37.2 to 1.38.0 [#17564](https://github.com/apache/datafusion/pull/17564) (dependabot[bot])
+- chore(deps): bump semver from 1.0.26 to 1.0.27 [#17566](https://github.com/apache/datafusion/pull/17566) (dependabot[bot])
+- Generalize struct-to-struct casting with CastOptions and SchemaAdapter integration [#17468](https://github.com/apache/datafusion/pull/17468) (kosiew)
+- Add `TableProvider::scan_with_args` [#17336](https://github.com/apache/datafusion/pull/17336) (adriangb)
+- Use taiki-e/install-action and binstall in CI [#17573](https://github.com/apache/datafusion/pull/17573) (AdamGS)
+- Trying cargo machete to prune unused deps. [#17545](https://github.com/apache/datafusion/pull/17545) (samueleresca)
+- Fix typo in error message in `substring.rs` [#17570](https://github.com/apache/datafusion/pull/17570) (AdamGS)
+- chore(deps): bump taiki-e/install-action from 2.61.5 to 2.61.6 [#17586](https://github.com/apache/datafusion/pull/17586) (dependabot[bot])
+- datafusion/substrait: enable `unicode_expressions` in dev-dependencies to fix substring planning test [#17584](https://github.com/apache/datafusion/pull/17584) (kosiew)
+- chore: replace deprecated UnionExec API [#17588](https://github.com/apache/datafusion/pull/17588) (etolbakov)
+- minor: fix compilation issue for extended tests due to missing parquet encryption flag [#17579](https://github.com/apache/datafusion/pull/17579) (Jefffrey)
+- Update release README for new `datafusion/physical-expr-adapter` crate [#17591](https://github.com/apache/datafusion/pull/17591) (xudong963)
+- chore(deps): bump indexmap from 2.11.1 to 2.11.3 [#17587](https://github.com/apache/datafusion/pull/17587) (dependabot[bot])
+- chore(deps): bump serde_json from 1.0.143 to 1.0.145 [#17585](https://github.com/apache/datafusion/pull/17585) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.61.6 to 2.61.8 [#17615](https://github.com/apache/datafusion/pull/17615) (dependabot[bot])
+- Always run CI checks [#17538](https://github.com/apache/datafusion/pull/17538) (blaginin)
+- Revert "Always run CI checks" [#17629](https://github.com/apache/datafusion/pull/17629) (blaginin)
+- Bump datafusion-testing to latest [#17609](https://github.com/apache/datafusion/pull/17609) (Jefffrey)
+- Use `Display` formatting of `DataType`:s in error messages [#17565](https://github.com/apache/datafusion/pull/17565) (emilk)
+- `avg(distinct)` support for decimal types [#17560](https://github.com/apache/datafusion/pull/17560) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.61.8 to 2.61.9 [#17640](https://github.com/apache/datafusion/pull/17640) (dependabot[bot])
+- chore(deps): bump Swatinem/rust-cache from 2.8.0 to 2.8.1 [#17641](https://github.com/apache/datafusion/pull/17641) (dependabot[bot])
+- Validate the memory consumption in SPM created by multi level merge [#17029](https://github.com/apache/datafusion/pull/17029) (ding-young)
+- fix(SubqueryAlias): use maybe_project_redundant_column [#17478](https://github.com/apache/datafusion/pull/17478) (notfilippo)
+- minor: Ensure `datafusion-sql` package dependencies have `sql` flag [#17644](https://github.com/apache/datafusion/pull/17644) (Jefffrey)
+- optimizer: Rewrite `IS NOT DISTINCT FROM` joins as Hash Joins [#17319](https://github.com/apache/datafusion/pull/17319) (2010YOUY01)
+- chore(deps): bump serde from 1.0.223 to 1.0.225 [#17614](https://github.com/apache/datafusion/pull/17614) (dependabot[bot])
+- chore: Update dynamic filter formatting [#17647](https://github.com/apache/datafusion/pull/17647) (rkrishn7)
+- chore(deps): bump taiki-e/install-action from 2.61.9 to 2.61.10 [#17660](https://github.com/apache/datafusion/pull/17660) (dependabot[bot])
+- proto: don't include parquet feature by default [#17577](https://github.com/apache/datafusion/pull/17577) (jackkleeman)
+- minor: Ensure `proto` crate has datetime & unicode expr flags in datafusion dev dependency [#17656](https://github.com/apache/datafusion/pull/17656) (Jefffrey)
+- chore(deps): bump indexmap from 2.11.3 to 2.11.4 [#17661](https://github.com/apache/datafusion/pull/17661) (dependabot[bot])
+- Support Decimal32/64 types [#17501](https://github.com/apache/datafusion/pull/17501) (AdamGS)
+- minor: Improve hygiene for `datafusion-functions` macros [#17638](https://github.com/apache/datafusion/pull/17638) (Jefffrey)
+- [unparser] Custom timestamp format for DuckDB [#17653](https://github.com/apache/datafusion/pull/17653) (krinart)
+- Support LargeList for array_sort [#17657](https://github.com/apache/datafusion/pull/17657) (Jefffrey)
+- Support FixedSizeList for array_except [#17658](https://github.com/apache/datafusion/pull/17658) (Jefffrey)
+- chore: refactor array fn signatures & add more slt tests [#17672](https://github.com/apache/datafusion/pull/17672) (Jefffrey)
+- Support FixedSizeList for array_to_string [#17666](https://github.com/apache/datafusion/pull/17666) (Jefffrey)
+- minor: add SQLancer fuzzed SLT case for natural joins [#17683](https://github.com/apache/datafusion/pull/17683) (Jefffrey)
+- chore: Upgrade Rust version to 1.90.0 [#17677](https://github.com/apache/datafusion/pull/17677) (rkrishn7)
+- Support FixedSizeList for array_position [#17659](https://github.com/apache/datafusion/pull/17659) (Jefffrey)
+- chore(deps): bump the proto group with 2 updates [#16806](https://github.com/apache/datafusion/pull/16806) (dependabot[bot])
+- chore: update a bunch of dependencies [#17708](https://github.com/apache/datafusion/pull/17708) (Jefffrey)
+- Support FixedSizeList for array_slice via coercion to List [#17667](https://github.com/apache/datafusion/pull/17667) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.61.10 to 2.62.1 [#17710](https://github.com/apache/datafusion/pull/17710) (dependabot[bot])
+- fix(agg/corr): return NULL when variance is zero or samples < 2 [#17621](https://github.com/apache/datafusion/pull/17621) (killme2008)
+- chore(deps): bump taiki-e/install-action from 2.62.1 to 2.62.4 [#17739](https://github.com/apache/datafusion/pull/17739) (dependabot[bot])
+- chore(deps): bump tempfile from 3.22.0 to 3.23.0 [#17741](https://github.com/apache/datafusion/pull/17741) (dependabot[bot])
+- chore: make `LimitPushPastWindows` public [#17736](https://github.com/apache/datafusion/pull/17736) (linhr)
+- minor: create `OptimizerContext` with provided `ConfigOptions` [#17742](https://github.com/apache/datafusion/pull/17742) (MichaelScofield)
+- Add support for calling async UDF as aggregation expression [#17620](https://github.com/apache/datafusion/pull/17620) (simonvandel)
+- chore(deps): bump taiki-e/install-action from 2.62.4 to 2.62.5 [#17750](https://github.com/apache/datafusion/pull/17750) (dependabot[bot])
+- (fix): Lag function creates unwanted projection (#17630) [#17639](https://github.com/apache/datafusion/pull/17639) (renato2099)
+- Support `LargeList` in `array_has` simplification to `InList` [#17732](https://github.com/apache/datafusion/pull/17732) (Jefffrey)
+- chore(deps): bump wasm-bindgen-test from 0.3.51 to 0.3.53 [#17642](https://github.com/apache/datafusion/pull/17642) (dependabot[bot])
+- chore(deps): bump object_store from 0.12.3 to 0.12.4 [#17753](https://github.com/apache/datafusion/pull/17753) (dependabot[bot])
+- Update `arrow` / `parquet` to 56.2.0 [#17631](https://github.com/apache/datafusion/pull/17631) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.5 to 2.62.6 [#17766](https://github.com/apache/datafusion/pull/17766) (dependabot[bot])
+- Keep aggregate udaf schema names unique when missing an order-by [#17731](https://github.com/apache/datafusion/pull/17731) (wiedld)
+- feat : Display function alias in output column name [#17690](https://github.com/apache/datafusion/pull/17690) (devampatel03)
+- Support join cardinality estimation less conservatively [#17476](https://github.com/apache/datafusion/pull/17476) (jackkleeman)
+- chore(deps): bump libc from 0.2.175 to 0.2.176 [#17767](https://github.com/apache/datafusion/pull/17767) (dependabot[bot])
+- chore(deps): bump postgres-types from 0.2.9 to 0.2.10 [#17768](https://github.com/apache/datafusion/pull/17768) (dependabot[bot])
+- Use `Expr::qualified_name()` and `Column::new()` to extract partition keys from window and aggregate operators [#17757](https://github.com/apache/datafusion/pull/17757) (masonh22)
+- chore(deps): bump taiki-e/install-action from 2.62.6 to 2.62.8 [#17781](https://github.com/apache/datafusion/pull/17781) (dependabot[bot])
+- chore(deps): bump wasm-bindgen-test from 0.3.53 to 0.3.54 [#17784](https://github.com/apache/datafusion/pull/17784) (dependabot[bot])
+- chore: Action some old TODOs in github actions [#17694](https://github.com/apache/datafusion/pull/17694) (Jefffrey)
+- dev: Add benchmark for compilation profiles [#17754](https://github.com/apache/datafusion/pull/17754) (2010YOUY01)
+- chore(deps): bump tokio-postgres from 0.7.13 to 0.7.14 [#17785](https://github.com/apache/datafusion/pull/17785) (dependabot[bot])
+- chore(deps): bump serde from 1.0.226 to 1.0.227 [#17783](https://github.com/apache/datafusion/pull/17783) (dependabot[bot])
+- chore(deps): bump regex from 1.11.2 to 1.11.3 [#17782](https://github.com/apache/datafusion/pull/17782) (dependabot[bot])
+- Test `CAST` from temporal to `Utf8View` [#17535](https://github.com/apache/datafusion/pull/17535) (findepi)
+- chore: dependabot to run weekly [#17797](https://github.com/apache/datafusion/pull/17797) (comphead)
+- chore(deps): bump sysinfo from 0.37.0 to 0.37.1 [#17800](https://github.com/apache/datafusion/pull/17800) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.8 to 2.62.9 [#17799](https://github.com/apache/datafusion/pull/17799) (dependabot[bot])
+- Fix potential overflow when we print verbose physical plan [#17798](https://github.com/apache/datafusion/pull/17798) (zhuqi-lucas)
+- Extend datatype semantic equality check to include timestamps [#17777](https://github.com/apache/datafusion/pull/17777) (shivbhatia10)
+- dev: Add Apache license check to the lint script [#17787](https://github.com/apache/datafusion/pull/17787) (2010YOUY01)
+- Fix: common_sub_expression_eliminate optimizer rule failed [#16066](https://github.com/apache/datafusion/pull/16066) (Col-Waltz)
+- chore: remove dialect fixes in SLT tests that are outdated [#17807](https://github.com/apache/datafusion/pull/17807) (Jefffrey)
+- chore(deps): bump thiserror from 2.0.16 to 2.0.17 [#17821](https://github.com/apache/datafusion/pull/17821) (dependabot[bot])
+- chore(deps): bump quote from 1.0.40 to 1.0.41 [#17822](https://github.com/apache/datafusion/pull/17822) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.9 to 2.62.12 [#17823](https://github.com/apache/datafusion/pull/17823) (dependabot[bot])
+- chore(deps): bump serde from 1.0.227 to 1.0.228 [#17827](https://github.com/apache/datafusion/pull/17827) (dependabot[bot])
+- Temporarily disable failing `sql_planner` benchmark query [#17809](https://github.com/apache/datafusion/pull/17809) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.12 to 2.62.13 [#17836](https://github.com/apache/datafusion/pull/17836) (dependabot[bot])
+- More decimal 32/64 support - type coercsion and misc gaps [#17808](https://github.com/apache/datafusion/pull/17808) (AdamGS)
+- Implement `AsRef` for `Expr` [#17819](https://github.com/apache/datafusion/pull/17819) (findepi)
+- chore(deps): bump taiki-e/install-action from 2.62.13 to 2.62.14 [#17840](https://github.com/apache/datafusion/pull/17840) (dependabot[bot])
+- chore(deps): bump petgraph from 0.8.2 to 0.8.3 [#17842](https://github.com/apache/datafusion/pull/17842) (dependabot[bot])
+- Relax constraint that file sort order must only reference individual columns [#17419](https://github.com/apache/datafusion/pull/17419) (pepijnve)
+- minor: Include consumer name in OOM message [#17870](https://github.com/apache/datafusion/pull/17870) (andygrove)
+- Implement `partition_statistics` API for `InterleaveExec` [#17051](https://github.com/apache/datafusion/pull/17051) (liamzwbao)
+- Add `CastColumnExpr` for struct-aware column casting [#17773](https://github.com/apache/datafusion/pull/17773) (kosiew)
+- chore(deps): bump taiki-e/install-action from 2.62.14 to 2.62.16 [#17879](https://github.com/apache/datafusion/pull/17879) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.37.0 to 1.37.1 [#17878](https://github.com/apache/datafusion/pull/17878) (dependabot[bot])
+- Fix failing CI caused by hash collisions [#17886](https://github.com/apache/datafusion/pull/17886) (liamzwbao)
+- Minor: reuse test schemas in simplify tests [#17864](https://github.com/apache/datafusion/pull/17864) (alamb)
+- Make limit pushdown work for SortPreservingMergeExec [#17893](https://github.com/apache/datafusion/pull/17893) (Dandandan)
+- chore(deps): bump taiki-e/install-action from 2.62.16 to 2.62.17 [#17896](https://github.com/apache/datafusion/pull/17896) (dependabot[bot])
+- Consolidate `apply_schema_adapter_tests` [#17905](https://github.com/apache/datafusion/pull/17905) (alamb)
+- Improve `InListExpr` plan display [#17884](https://github.com/apache/datafusion/pull/17884) (pepijnve)
+- Export JoinSetTracerError from datafusion-common-runtime [#17877](https://github.com/apache/datafusion/pull/17877) (JanKaul)
+- Clippy to `extended_tests` [#17922](https://github.com/apache/datafusion/pull/17922) (blaginin)
+- chore: rename Schema `print_schema_tree` to `tree_string` [#17919](https://github.com/apache/datafusion/pull/17919) (comphead)
+- chore: utilize trait upcasting for AsyncScalarUDF PartialEq & Hash [#17872](https://github.com/apache/datafusion/pull/17872) (Jefffrey)
+- Refactor: Update enforce_sorting tests to use insta snapshots for easier updates [#17900](https://github.com/apache/datafusion/pull/17900) (alamb)
+- chore(deps): bump flate2 from 1.1.2 to 1.1.4 [#17938](https://github.com/apache/datafusion/pull/17938) (dependabot[bot])
+- chore(deps): bump actions/stale from 10.0.0 to 10.1.0 [#17937](https://github.com/apache/datafusion/pull/17937) (dependabot[bot])
+- chore(deps): bump aws-credential-types from 1.2.6 to 1.2.7 [#17936](https://github.com/apache/datafusion/pull/17936) (dependabot[bot])
+- chore(deps): bump rustyline from 17.0.1 to 17.0.2 [#17932](https://github.com/apache/datafusion/pull/17932) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.17 to 2.62.21 [#17934](https://github.com/apache/datafusion/pull/17934) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.37.1 to 1.37.2 [#17935](https://github.com/apache/datafusion/pull/17935) (dependabot[bot])
+- chore: upgrade sqlparser [#17925](https://github.com/apache/datafusion/pull/17925) (chenkovsky)
+- minor: impl Clone and Debug on CaseBuilder [#17927](https://github.com/apache/datafusion/pull/17927) (timsaucer)
+- chore: Extend backtrace coverage for `Execution` and `Internal` errors [#17921](https://github.com/apache/datafusion/pull/17921) (comphead)
+- chore(deps): bump taiki-e/install-action from 2.62.21 to 2.62.22 [#17949](https://github.com/apache/datafusion/pull/17949) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.37.2 to 1.38.0 [#17948](https://github.com/apache/datafusion/pull/17948) (dependabot[bot])
+- Feat: [datafusion-spark] Migrate avg from comet to datafusion-spark and add tests. [#17871](https://github.com/apache/datafusion/pull/17871) (codetyri0n)
+- Update tests to use insta / make them easier to update [#17945](https://github.com/apache/datafusion/pull/17945) (alamb)
+- Minor Test refactor: avoid creating the same SchemaRef [#17951](https://github.com/apache/datafusion/pull/17951) (alamb)
+- Precision::<usize>::{add, sub, multiply}: avoid overflows [#17929](https://github.com/apache/datafusion/pull/17929) (Tpt)
+- Resolve `ListingScan` projection against table schema including partition columns [#17911](https://github.com/apache/datafusion/pull/17911) (mach-kernel)
+- chore(deps): bump crate-ci/typos from 1.38.0 to 1.38.1 [#17960](https://github.com/apache/datafusion/pull/17960) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.22 to 2.62.23 [#17959](https://github.com/apache/datafusion/pull/17959) (dependabot[bot])
+- bench: fix `vectorized_equal_to` bench mutated between iterations [#17968](https://github.com/apache/datafusion/pull/17968) (rluvaton)
+- fix docs and broken example from #17956 [#17980](https://github.com/apache/datafusion/pull/17980) (adriangb)
+- Refactor: Update `replace_with_order_preserving_variants` tests to use insta snapshots for easier updates [#17962](https://github.com/apache/datafusion/pull/17962) (blaginin)
+- Support repartitioned() method in RepartitionExec [#17990](https://github.com/apache/datafusion/pull/17990) (gabotechs)
+- Adds Instrumented Object Store to CLI [#17984](https://github.com/apache/datafusion/pull/17984) (BlakeOrth)
+- Migrate `join_selection` tests to snapshot-based testing [#17974](https://github.com/apache/datafusion/pull/17974) (blaginin)
+- bench: fix actually generate a lot of unique values in benchmark table [#17967](https://github.com/apache/datafusion/pull/17967) (rluvaton)
+- Adds Instrument Mode for InstrumentedObjectStore in datafusion-cli [#18000](https://github.com/apache/datafusion/pull/18000) (BlakeOrth)
+- minor: refactor Spark ascii function to reuse DataFusion ascii function code [#17965](https://github.com/apache/datafusion/pull/17965) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.62.23 to 2.62.24 [#17989](https://github.com/apache/datafusion/pull/17989) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.24 to 2.62.25 [#18007](https://github.com/apache/datafusion/pull/18007) (dependabot[bot])
+- Clarify documentation that ScalarUDFImpl::simplity must not change the schema [#17981](https://github.com/apache/datafusion/pull/17981) (alamb)
+- Expose trace_future and trace_block outside of common-runtime [#17976](https://github.com/apache/datafusion/pull/17976) (AdamGS)
+- Adds instrumentation to get requests for datafusion-cli [#18016](https://github.com/apache/datafusion/pull/18016) (BlakeOrth)
+- chore(deps): bump half from 2.6.0 to 2.7.0 [#18036](https://github.com/apache/datafusion/pull/18036) (dependabot[bot])
+- chore(deps): bump aws-config from 1.8.6 to 1.8.7 [#18038](https://github.com/apache/datafusion/pull/18038) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.25 to 2.62.28 [#18037](https://github.com/apache/datafusion/pull/18037) (dependabot[bot])
+- refactor: cleanup naming and macro usages for binary operator [#17985](https://github.com/apache/datafusion/pull/17985) (sunng87)
+- Impl `gather_filters_for_pushdown` for `CoalescePartitionsExec` [#18046](https://github.com/apache/datafusion/pull/18046) (xudong963)
+- Fix bug in LimitPushPastWindows [#18029](https://github.com/apache/datafusion/pull/18029) (avantgardnerio)
+- Fix `SortPreservingMergeExec` tree formatting with limit [#18009](https://github.com/apache/datafusion/pull/18009) (AdamGS)
+- chore(deps): bump actions/setup-node from 5.0.0 to 6.0.0 [#18049](https://github.com/apache/datafusion/pull/18049) (dependabot[bot])
+- chore(deps): bump sysinfo from 0.37.1 to 0.37.2 [#18035](https://github.com/apache/datafusion/pull/18035) (dependabot[bot])
+- FileScanConfig: Preserve schema metadata across ser/de boundary [#17966](https://github.com/apache/datafusion/pull/17966) (mach-kernel)
+- physical-plan: push filters down to UnionExec children [#18054](https://github.com/apache/datafusion/pull/18054) (asubiotto)
+- Add `min_max_bytes` benchmark (Reproduce quadratic runtime in min_max_bytes) [#18041](https://github.com/apache/datafusion/pull/18041) (ctsk)
+- Adds summary output to CLI instrumented object stores [#18045](https://github.com/apache/datafusion/pull/18045) (BlakeOrth)
+- Impl spark bit not function [#18018](https://github.com/apache/datafusion/pull/18018) (kazantsev-maksim)
+- chore: revert tests [#18065](https://github.com/apache/datafusion/pull/18065) (comphead)
+- chore: Use an enum to express the different kinds of nullability in an array [#18048](https://github.com/apache/datafusion/pull/18048) (martin-g)
+- chore(deps): bump taiki-e/install-action from 2.62.28 to 2.62.29 [#18069](https://github.com/apache/datafusion/pull/18069) (dependabot[bot])
+- Split up monster test_window_partial_constant_and_set_monotonicity into smaller functions [#17952](https://github.com/apache/datafusion/pull/17952) (alamb)
+- Push Down Filter Subexpressions in Nested Loop Joins as Projections [#17906](https://github.com/apache/datafusion/pull/17906) (tobixdev)
+- ci: Use PR description for merge commit body in squash merges [#18027](https://github.com/apache/datafusion/pull/18027) (Weijun-H)
+- Fix extended tests on main to get CI green [#18096](https://github.com/apache/datafusion/pull/18096) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.29 to 2.62.31 [#18094](https://github.com/apache/datafusion/pull/18094) (dependabot[bot])
+- chore: run extended suite on PRs for critical areas [#18088](https://github.com/apache/datafusion/pull/18088) (comphead)
+- chore(deps): bump taiki-e/install-action from 2.62.31 to 2.62.33 [#18113](https://github.com/apache/datafusion/pull/18113) (dependabot[bot])
+- chore: remove unnecessary `skip_failed_rules` config in slt [#18117](https://github.com/apache/datafusion/pull/18117) (Jefffrey)
+- Refactor repartition to use `insta` [#18106](https://github.com/apache/datafusion/pull/18106) (blaginin)
+- refactor: move ListingTable over to the catalog-listing-table crate [#18080](https://github.com/apache/datafusion/pull/18080) (timsaucer)
+- refactor: move arrow datasource to new `datafusion-datasource-arrow` crate [#18082](https://github.com/apache/datafusion/pull/18082) (timsaucer)
+- Adds instrumentation to LIST operations in CLI [#18103](https://github.com/apache/datafusion/pull/18103) (BlakeOrth)
+- Add extra case_when benchmarks [#18097](https://github.com/apache/datafusion/pull/18097) (pepijnve)
+- Adds instrumentation to delimited LIST operations in CLI [#18134](https://github.com/apache/datafusion/pull/18134) (BlakeOrth)
+- test: `to_timestamp(double)` for vectorized input [#18147](https://github.com/apache/datafusion/pull/18147) (dqkqd)
+- Fix `concat_elements_utf8view` capacity initialization. [#18003](https://github.com/apache/datafusion/pull/18003) (samueleresca)
+- Use < instead of = in case benchmark predicates, use Integers [#18144](https://github.com/apache/datafusion/pull/18144) (pepijnve)
+- Adds instrumentation to PUT ops in the CLI [#18139](https://github.com/apache/datafusion/pull/18139) (BlakeOrth)
+- [main] chore: Fix `no space left on device` (#18141) [#18151](https://github.com/apache/datafusion/pull/18151) (alamb)
+- Fix `DISTINCT ON` for tables with no columns (ReplaceDistinctWithAggregate: do not fail when on input without columns) [#18133](https://github.com/apache/datafusion/pull/18133) (Tpt)
+- Fix quadratic runtime in min_max_bytes [#18044](https://github.com/apache/datafusion/pull/18044) (ctsk)
+- chore(deps): bump getrandom from 0.3.3 to 0.3.4 [#18163](https://github.com/apache/datafusion/pull/18163) (dependabot[bot])
+- chore(deps): bump tokio from 1.47.1 to 1.48.0 [#18164](https://github.com/apache/datafusion/pull/18164) (dependabot[bot])
+- chore(deps): bump indexmap from 2.11.4 to 2.12.0 [#18162](https://github.com/apache/datafusion/pull/18162) (dependabot[bot])
+- chore(deps): bump bzip2 from 0.6.0 to 0.6.1 [#18165](https://github.com/apache/datafusion/pull/18165) (dependabot[bot])
+- chore(deps): bump taiki-e/install-action from 2.62.33 to 2.62.34 [#18194](https://github.com/apache/datafusion/pull/18194) (dependabot[bot])
+- Fix COPY TO does not produce an output file for the empty set [#18074](https://github.com/apache/datafusion/pull/18074) (bert-beyondloops)
+- Add Projection struct w/ helper methods to manipulate projections [#18176](https://github.com/apache/datafusion/pull/18176) (adriangb)
+- Add TableSchema helper to encapsulate file schema + partition fields [#18178](https://github.com/apache/datafusion/pull/18178) (adriangb)
+- Add spilling to RepartitionExec [#18014](https://github.com/apache/datafusion/pull/18014) (adriangb)
+- Adds DELETE and HEAD instrumentation to CLI [#18206](https://github.com/apache/datafusion/pull/18206) (BlakeOrth)
+- [branch-50] Prepare 50.3.0 release version number and README (#18173) [#18182](https://github.com/apache/datafusion/pull/18182) (alamb)
+- Fix array_has simplification with null argument [#18186](https://github.com/apache/datafusion/pull/18186) (joroKr21)
+- chore(deps): bump taiki-e/install-action from 2.62.34 to 2.62.35 [#18215](https://github.com/apache/datafusion/pull/18215) (dependabot[bot])
+- bench: create benchmark for lookup table like `CASE WHEN` [#18203](https://github.com/apache/datafusion/pull/18203) (rluvaton)
+- Adds instrumentation to COPY operations in the CLI [#18227](https://github.com/apache/datafusion/pull/18227) (BlakeOrth)
+- Consolidate core_integration/datasource and rename parquet_source --> parquet_integration [#18226](https://github.com/apache/datafusion/pull/18226) (alamb)
+- CoalescePartitionsExec fetch is not consistent with one partition and more than one partition [#18245](https://github.com/apache/datafusion/pull/18245) (zhuqi-lucas)
+- Migrate core test to insta part 3 [#16978](https://github.com/apache/datafusion/pull/16978) (Chen-Yuan-Lai)
+- chore(deps): bump taiki-e/install-action from 2.62.35 to 2.62.36 [#18240](https://github.com/apache/datafusion/pull/18240) (dependabot[bot])
+- Fix: Do not normalize table names when deserializing from protobuf [#18187](https://github.com/apache/datafusion/pull/18187) (drin)
+- Revert "chore: revert tests (#18065)" [#18255](https://github.com/apache/datafusion/pull/18255) (dqkqd)
+- Refactor `nvl2` Function to Support Lazy Evaluation and Simplification via CASE Expression [#18191](https://github.com/apache/datafusion/pull/18191) (kosiew)
+- fix null count stats computation [#18276](https://github.com/apache/datafusion/pull/18276) (adriangb)
+- Improve docs and examples for `DataTypeExt` and `FieldExt` [#18271](https://github.com/apache/datafusion/pull/18271) (alamb)
+- Easier construction of ScalarAndMetadata [#18272](https://github.com/apache/datafusion/pull/18272) (alamb)
+- Add integration test for IO operations for listing tables queries [#18229](https://github.com/apache/datafusion/pull/18229) (alamb)
+- Fix: Error rather than silently ignore extra parameter passed to ceil/floor [#18265](https://github.com/apache/datafusion/pull/18265) (toxicteddy00077)
+- chore(deps): Update `half` to 2.7.1, ignore `RUSTSEC-2025-0111` [#18287](https://github.com/apache/datafusion/pull/18287) (alamb)
+- chore(deps): bump taiki-e/install-action from 2.62.36 to 2.62.38 [#18293](https://github.com/apache/datafusion/pull/18293) (dependabot[bot])
+- chore(deps): bump regex from 1.11.3 to 1.12.2 [#18294](https://github.com/apache/datafusion/pull/18294) (dependabot[bot])
+- chore(deps): bump clap from 4.5.48 to 4.5.50 [#18292](https://github.com/apache/datafusion/pull/18292) (dependabot[bot])
+- chore(deps): bump syn from 2.0.106 to 2.0.108 [#18291](https://github.com/apache/datafusion/pull/18291) (dependabot[bot])
+- Enforce unique names for `is_set` on `first_value` and `last_value` [#18303](https://github.com/apache/datafusion/pull/18303) (marc-pydantic)
+- chore(deps): update testcontainers to `0.25.2` and drop ignore of `RUSTSEC-2025-0111` [#18305](https://github.com/apache/datafusion/pull/18305) (DDtKey)
+- Using `try_append_value` from arrow-rs 57.0.0 [#18313](https://github.com/apache/datafusion/pull/18313) (samueleresca)
+- minor: Add documentation to function `concat_elements_utf8view` [#18316](https://github.com/apache/datafusion/pull/18316) (2010YOUY01)
+- chore(deps): bump taiki-e/install-action from 2.62.38 to 2.62.40 [#18318](https://github.com/apache/datafusion/pull/18318) (dependabot[bot])
+- Fix: Add projection to generate_series [#18298](https://github.com/apache/datafusion/pull/18298) (mkleen)
+- Do not accept null is_set for first_value/last_value [#18301](https://github.com/apache/datafusion/pull/18301) (marc-pydantic)
+- Optimize merging of partial case expression results [#18152](https://github.com/apache/datafusion/pull/18152) (pepijnve)
+- chore: Format examples in doc strings - execution [#18339](https://github.com/apache/datafusion/pull/18339) (CuteChuanChuan)
+- chore: Format examples in doc strings - common [#18336](https://github.com/apache/datafusion/pull/18336) (CuteChuanChuan)
+- chore: Format examples in doc strings - crate datafusion [#18333](https://github.com/apache/datafusion/pull/18333) (CuteChuanChuan)
+- chore: Format examples in doc strings - expr [#18340](https://github.com/apache/datafusion/pull/18340) (CuteChuanChuan)
+- chore: Format examples in doc strings - datasource crates [#18338](https://github.com/apache/datafusion/pull/18338) (CuteChuanChuan)
+- Insta for enforce_distrubution (easy ones) [#18248](https://github.com/apache/datafusion/pull/18248) (blaginin)
+- chore: Format examples in doc strings - macros and optmizer [#18354](https://github.com/apache/datafusion/pull/18354) (CuteChuanChuan)
+- chore: Format examples in doc strings - proto, pruning, and session [#18358](https://github.com/apache/datafusion/pull/18358) (CuteChuanChuan)
+- chore: Format examples in doc strings - catalog listing [#18335](https://github.com/apache/datafusion/pull/18335) (CuteChuanChuan)
+- ci: fix temporary file creation in tests and tighten CI check [#18374](https://github.com/apache/datafusion/pull/18374) (2010YOUY01)
+- Run extended tests when there are changes to datafusion-testing pin [#18310](https://github.com/apache/datafusion/pull/18310) (alamb)
+- Add simple unit test for `merge` in case expression [#18369](https://github.com/apache/datafusion/pull/18369) (pepijnve)
+- chore(deps): bump taiki-e/install-action from 2.62.40 to 2.62.41 [#18377](https://github.com/apache/datafusion/pull/18377) (dependabot[bot])
+- Refactor `range`/`gen_series` signature away from user defined [#18317](https://github.com/apache/datafusion/pull/18317) (Jefffrey)
+- Adds Partitioned CSV test to object store access tests [#18370](https://github.com/apache/datafusion/pull/18370) (BlakeOrth)
+- Add reproducer for consecutive RepartitionExec [#18343](https://github.com/apache/datafusion/pull/18343) (NGA-TRAN)
+- chore: bump substrait version to `0.60.0` to use substrait spec v0.75.0 [#17866](https://github.com/apache/datafusion/pull/17866) (benbellick)
+- Use the upstream arrow-rs coalesce kernel [#17193](https://github.com/apache/datafusion/pull/17193) (zhuqi-lucas)
+- Extract out super slow planning benchmark to it's own benchmark [#18388](https://github.com/apache/datafusion/pull/18388) (Omega359)
+- minor: Fix parquet pruning metrics display order [#18379](https://github.com/apache/datafusion/pull/18379) (2010YOUY01)
+- chore: use enum as `date_trunc` granularity [#18390](https://github.com/apache/datafusion/pull/18390) (comphead)
+- chore(deps): bump taiki-e/install-action from 2.62.41 to 2.62.43 [#18398](https://github.com/apache/datafusion/pull/18398) (dependabot[bot])
+- Project record batches to avoid filtering unused columns in `CASE` evaluation [#18329](https://github.com/apache/datafusion/pull/18329) (pepijnve)
+- catch errors when simplifying cast(lit(...), ...) and bubble those up [#18332](https://github.com/apache/datafusion/pull/18332) (adriangb)
+- Align `NowFunc::new()` with canonical `ConfigOptions` timezone and enhance documentation [#18347](https://github.com/apache/datafusion/pull/18347) (kosiew)
+- chore: Format examples in doc strings - physical expr, optimizer, and plan [#18357](https://github.com/apache/datafusion/pull/18357) (CuteChuanChuan)
+- Fix: spark bit_count function [#18322](https://github.com/apache/datafusion/pull/18322) (kazantsev-maksim)
+- chore: bump workspace rust version to 1.91.0 [#18422](https://github.com/apache/datafusion/pull/18422) (randyli)
+- Minor: Remove unneccessary vec! in SortMergeJoinStream initialization [#18430](https://github.com/apache/datafusion/pull/18430) (mapleFU)
+- minor: refactor array reverse internals [#18445](https://github.com/apache/datafusion/pull/18445) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.62.43 to 2.62.45 [#18465](https://github.com/apache/datafusion/pull/18465) (dependabot[bot])
+- chore(deps): bump crate-ci/typos from 1.38.1 to 1.39.0 [#18464](https://github.com/apache/datafusion/pull/18464) (dependabot[bot])
+- chore(deps): bump rstest from 0.25.0 to 0.26.1 [#18463](https://github.com/apache/datafusion/pull/18463) (dependabot[bot])
+- chore(deps): bump wasm-bindgen-test from 0.3.54 to 0.3.55 [#18462](https://github.com/apache/datafusion/pull/18462) (dependabot[bot])
+- chore(deps): bump postgres-types from 0.2.10 to 0.2.11 [#18461](https://github.com/apache/datafusion/pull/18461) (dependabot[bot])
+- chore(deps): bump ctor from 0.4.3 to 0.6.1 [#18460](https://github.com/apache/datafusion/pull/18460) (dependabot[bot])
+- chore(deps): bump libc from 0.2.176 to 0.2.177 [#18459](https://github.com/apache/datafusion/pull/18459) (dependabot[bot])
+- chore: Format examples in doc strings - functions [#18353](https://github.com/apache/datafusion/pull/18353) (CuteChuanChuan)
+- Feat: Support array flatten() on `List(LargeList(_))` types [#18363](https://github.com/apache/datafusion/pull/18363) (sdf-jkl)
+- Reproducer tests for #18380 (resorting sorted inputs) [#18352](https://github.com/apache/datafusion/pull/18352) (rgehan)
+- Update criterion to 0.7.\* [#18472](https://github.com/apache/datafusion/pull/18472) (Omega359)
+- chore(deps): bump taiki-e/install-action from 2.62.45 to 2.62.46 [#18484](https://github.com/apache/datafusion/pull/18484) (dependabot[bot])
+- Consolidate flight examples (#18142) [#18442](https://github.com/apache/datafusion/pull/18442) (cj-zhukov)
+- Support reverse for ListView [#18424](https://github.com/apache/datafusion/pull/18424) (vegarsti)
+- Complete migrating `enforce_distrubution` tests to insta [#18185](https://github.com/apache/datafusion/pull/18185) (blaginin)
+- Add benchmark for array_reverse [#18425](https://github.com/apache/datafusion/pull/18425) (vegarsti)
+- chore: simplify map const [#18440](https://github.com/apache/datafusion/pull/18440) (chenkovsky)
+- Fix an out of date comment for `snapshot_physical_expr` [#18498](https://github.com/apache/datafusion/pull/18498) (AdamGS)
+- Disable `parquet_encryption` by default in datafusion-sqllogictests [#18492](https://github.com/apache/datafusion/pull/18492) (zhuqi-lucas)
+- Make extended test to use optional parquet_encryption feature [#18507](https://github.com/apache/datafusion/pull/18507) (zhuqi-lucas)
+- Consolidate udf examples (#18142) [#18493](https://github.com/apache/datafusion/pull/18493) (cj-zhukov)
+- test: add prepare alias slt test [#18522](https://github.com/apache/datafusion/pull/18522) (dqkqd)
+- CI: add `clippy::needless_pass_by_value` rule [#18468](https://github.com/apache/datafusion/pull/18468) (2010YOUY01)
+- Refactor create_hashes to accept array references [#18448](https://github.com/apache/datafusion/pull/18448) (adriangb)
+- chore: Format examples in doc strings - spark, sql, sqllogictest, sibstrait [#18443](https://github.com/apache/datafusion/pull/18443) (CuteChuanChuan)
+- refactor: simplify `calculate_binary_math` in datafusion-functions [#18525](https://github.com/apache/datafusion/pull/18525) (Jefffrey)
+- ci: enforce needless_pass_by_value for datafusion-optimzer [#18533](https://github.com/apache/datafusion/pull/18533) (jizezhang)
+- Add comments to Cargo.toml about workspace overrides [#18526](https://github.com/apache/datafusion/pull/18526) (alamb)
+- minor: Remove inconsistent comment [#18539](https://github.com/apache/datafusion/pull/18539) (2010YOUY01)
+- Refactor `log()` signature to use coercion API + fixes [#18519](https://github.com/apache/datafusion/pull/18519) (Jefffrey)
+- chore(deps): bump taiki-e/install-action from 2.62.46 to 2.62.47 [#18508](https://github.com/apache/datafusion/pull/18508) (dependabot[bot])
+- Consolidate builtin functions examples (#18142) [#18523](https://github.com/apache/datafusion/pull/18523) (cj-zhukov)
+
+## Credits
+
+Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor.
+
+```
+    88	dependabot[bot]
+    49	Jeffrey Vo
+    32	Andrew Lamb
+    20	Yongting You
+    19	Adrian Garcia Badaracco
+    14	Blake Orth
+    12	Pepijn Van Eeckhoudt
+    12	Piotr Findeisen
+    11	Chen Chongchen
+    11	Dmitrii Blaginin
+    11	Yu-Chuan Hung
+     9	Jonathan Chen
+     9	Khanh Duong
+     9	Oleks V
+     9	Peter Nguyen
+     8	Alex Huang
+     8	Qi Zhu
+     8	Raz Luvaton
+     7	Adam Gutglick
+     7	Rohan Krishnaswamy
+     7	kosiew
+     6	xudong.w
+     5	Nuno Faria
+     5	Tim Saucer
+     4	Dhanush
+     4	Samuele Resca
+     4	Simon Vandel Sillesen
+     4	Sriram Sundar
+     4	Vegard Stikbakke
+     3	Bruce Ritchie
+     3	David López
+     3	EeshanBembi
+     3	Jack Kleeman
+     3	Kazantsev Maksim
+     3	Marko Milenković
+     3	Thomas Tanon
+     2	Andy Grove
+     2	Bruno Volpato
+     2	Christian
+     2	Colin Marc
+     2	Cora Sutton
+     2	David Stancu
+     2	Devam Patel
+     2	Eugene Tolbakov
+     2	Evgenii Glotov
+     2	Kristin Cowalcijk
+     2	Liam Bao
+     2	Marc Brinkmann
+     2	Michael Kleen
+     2	Namgung Chan
+     2	Ning Sun
+     2	Randy
+     2	Sergey Zhukov
+     2	Viktor Yershov
+     2	bubulalabu
+     2	dennis zhuang
+     2	jizezhang
+     2	wiedld
+     1	Ahmed Mezghani
+     1	Aldrin M
+     1	Alfonso Subiotto Marqués
+     1	Anders
+     1	Artem Medvedev
+     1	Aryamaan Singh
+     1	Ben Bellick
+     1	Berkay Şahin
+     1	Bert Vermeiren
+     1	Brent Gardner
+     1	Christopher Watford
+     1	Dan Lovell
+     1	Daniël Heres
+     1	Dewey Dunnington
+     1	Douglas Anderson
+     1	Duong Cong Toai
+     1	Emil Ernerfeldt
+     1	Emily Matheys
+     1	Enrico La Sala
+     1	Eshed Schacham
+     1	Filippo Rossi
+     1	Gabriel
+     1	Gene Bordegaray
+     1	Georgi Krastev
+     1	Heran Lin
+     1	Hiroaki Yutani
+     1	Ian Lai
+     1	Ilya Ostanevich
+     1	JanKaul
+     1	Kosta Tarasov
+     1	LFC
+     1	Leonardo Yvens
+     1	Lía Adriana
+     1	Manasa Manoj
+     1	Martin
+     1	Martin Grigorov
+     1	Martin Hilton
+     1	Mason
+     1	Matt Butrovich
+     1	Matthew Kim
+     1	Matthijs Brobbel
+     1	Nga Tran
+     1	Nihal Rajak
+     1	Rafael Fernández
+     1	Renan GEHAN
+     1	Renato Marroquin
+     1	Rok Mihevc
+     1	Ruilei Ma
+     1	Sai Mahendra
+     1	Sergei Grebnov
+     1	Shiv Bhatia
+     1	Tobias Schwarzinger
+     1	UBarney
+     1	Victor Barua
+     1	Victorien
+     1	Vyquos
+     1	Weston Pace
+     1	XL Liang
+     1	Xander
+     1	Zhen Wang
+     1	aditya singh rathore
+     1	dario curreri
+     1	ding-young
+     1	feniljain
+     1	gene-bordegaray
+     1	harshasiddartha
+     1	mwish
+     1	peasee
+     1	r1b
+     1	theirix
+```
+
+Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release.
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 355cd347ef58..8352bf09f1b1 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -19,6 +19,6 @@ sphinx==8.2.3
 sphinx-reredirects==1.0.0
 pydata-sphinx-theme==0.16.1
 myst-parser==4.0.1
-maturin==1.9.6
+maturin==1.10.0
 jinja2==3.1.6
 setuptools==80.9.0
diff --git a/docs/source/_static/theme_overrides.css b/docs/source/_static/theme_overrides.css
index 01f1a126a76a..6a05d1a30fc0 100644
--- a/docs/source/_static/theme_overrides.css
+++ b/docs/source/_static/theme_overrides.css
@@ -29,7 +29,6 @@
   --pst-color-h2: var(--color-text-base);
   /* Use softer blue from bootstrap's default info color */
   --pst-color-info: 23, 162, 184;
-  --pst-header-height: 0px;
 }
 
 code {
@@ -40,24 +39,34 @@ code {
   text-align: center;
 }
 
+/* Limit both light and dark mode logos in the navbar */
+.logo__image {
+  height: 32px;
+  width: auto;     
+  max-height: 2.5rem;
+}
+
 /* Display appropriate logo for dark and light mode */
-.light-logo { display: inline; }
-.dark-logo { display: none; }
-
-@media (prefers-color-scheme: dark) {
-  .light-logo { display: none; }
-  .dark-logo { 
-    display: inline; 
-    background-color: transparent !important;
-  }
+.light-logo {
+  display: inline;
+}
+
+.dark-logo {
+  display: none;
+}
+
+html[data-theme="dark"] .light-logo {
+  display: none;
 }
 
-/* Ensure the logo is properly displayed */
+html[data-theme="dark"] .dark-logo {
+  display: inline;
+  background-color: transparent !important;
+}
 
-.navbar-brand {
-  height: auto;
-  width: auto;
-  padding: 0 2em;
+/* Align search bar & theme switch right */
+.navbar-header-items__end {
+  margin-left: auto;
 }
 
 /* This is the bootstrap CSS style for "table-striped". Since the theme does
diff --git a/docs/source/_templates/docs-sidebar.html b/docs/source/_templates/docs-sidebar.html
index 01aabb986050..fa3cd96b1360 100644
--- a/docs/source/_templates/docs-sidebar.html
+++ b/docs/source/_templates/docs-sidebar.html
@@ -1,14 +1,3 @@
-<p>
-  <a href="{{ pathto(master_doc) }}">
-    <img src="{{ pathto('_static/images/original.svg', 1) }}" class="logo light-logo" alt="logo">
-    <img src="{{ pathto('_static/images/original_dark.svg', 1) }}" class="logo dark-logo" alt="logo">
-  </a>
-</p>
-<p>
-  <form class="bd-search d-flex align-items-center" action="{{ pathto('search') }}" method="get">
-    <input type="search" class="form-control" name="q" id="search-input" placeholder="{{ theme_search_bar_text }}" aria-label="{{ theme_search_bar_text }}" autocomplete="off" >
-  </form>
-</p>
 <nav class="bd-links" id="bd-docs-nav" aria-label="Main navigation">
 
   <div class="bd-toc-item active">
diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html
index d3163f1a8147..b7b135af5f0a 100644
--- a/docs/source/_templates/layout.html
+++ b/docs/source/_templates/layout.html
@@ -1,9 +1,5 @@
 {% extends "pydata_sphinx_theme/layout.html" %}
 
-{# Silence the navbar #}
-{% block docs_navbar %}
-{% endblock %}
-
 <!--
     Custom footer
 -->
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 36556e74e69c..3d9b05b5b81a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -88,7 +88,13 @@
 html_theme = "pydata_sphinx_theme"
 
 html_theme_options = {
+     "logo": {
+        "image_light": "_static/images/original.svg", 
+        "image_dark": "_static/images/original_dark.svg", 
+    },
     "use_edit_page_button": True,
+    "navbar_center": [],
+    "navbar_end": ["theme-switcher"],
 }
 
 html_context = {
diff --git a/docs/source/contributor-guide/roadmap.md b/docs/source/contributor-guide/roadmap.md
index 073682008047..aac0710dadf7 100644
--- a/docs/source/contributor-guide/roadmap.md
+++ b/docs/source/contributor-guide/roadmap.md
@@ -55,8 +55,9 @@ discussion.
 For more information:
 
 1. [Search for issues labeled `roadmap`](https://github.com/apache/datafusion/issues?q=is%3Aissue%20%20%20roadmap)
-2. [DataFusion Road Map: Q3-Q4 2025](https://github.com/apache/datafusion/issues/15878)
-3. [2024 Q4 / 2025 Q1 Roadmap](https://github.com/apache/datafusion/issues/13274)
+2. [DataFusion Road Map: Q1 2026](https://github.com/apache/datafusion/issues/18494)
+3. [DataFusion Road Map: Q3-Q4 2025](https://github.com/apache/datafusion/issues/15878)
+4. [2024 Q4 / 2025 Q1 Roadmap](https://github.com/apache/datafusion/issues/13274)
 
 ## Improvement Proposals
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6bb3c9485b71..b589c9ce4047 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -118,6 +118,7 @@ To get started, see
    user-guide/crate-configuration
    user-guide/cli/index
    user-guide/dataframe
+   user-guide/arrow-introduction
    user-guide/expressions
    user-guide/sql/index
    user-guide/configs
diff --git a/docs/source/library-user-guide/functions/adding-udfs.md b/docs/source/library-user-guide/functions/adding-udfs.md
index ecb618179ea1..e56790a4b7d8 100644
--- a/docs/source/library-user-guide/functions/adding-udfs.md
+++ b/docs/source/library-user-guide/functions/adding-udfs.md
@@ -586,6 +586,99 @@ For async UDF implementation details, see [`async_udf.rs`](https://github.com/ap
 [`process_scalar_func_inputs`]: https://docs.rs/datafusion/latest/datafusion/physical_expr/functions/fn.process_scalar_func_inputs.html
 [`advanced_udf.rs`]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
 
+## Named Arguments
+
+DataFusion supports named arguments for Scalar, Window, and Aggregate UDFs, allowing you to pass arguments by parameter name:
+
+```sql
+-- Scalar function
+SELECT substr(str => 'hello', start_pos => 2, length => 3);
+
+-- Window function
+SELECT lead(expr => value, offset => 1) OVER (ORDER BY id) FROM table;
+
+-- Aggregate function
+SELECT corr(y => col1, x => col2) FROM table;
+```
+
+Named arguments can be mixed with positional arguments, but positional arguments must come first:
+
+```sql
+SELECT substr('hello', start_pos => 2, length => 3);  -- Valid
+```
+
+### Implementing Functions with Named Arguments
+
+To support named arguments in your UDF, add parameter names to your function's signature using `.with_parameter_names()`. This works the same way for Scalar, Window, and Aggregate UDFs:
+
+```rust
+# use std::sync::Arc;
+# use std::any::Any;
+# use arrow::datatypes::DataType;
+# use datafusion_common::Result;
+# use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, Signature, Volatility};
+# use datafusion_expr::ScalarUDFImpl;
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+struct PowerFunction {
+    signature: Signature,
+}
+
+impl PowerFunction {
+    fn new() -> Self {
+        Self {
+            signature: Signature::uniform(
+                2,
+                vec![DataType::Float64],
+                Volatility::Immutable
+            )
+            .with_parameter_names(vec![
+                "base".to_string(),
+                "exponent".to_string()
+            ])
+            .expect("valid parameter names"),
+        }
+    }
+}
+
+impl ScalarUDFImpl for PowerFunction {
+    fn as_any(&self) -> &dyn Any { self }
+    fn name(&self) -> &str { "power" }
+    fn signature(&self) -> &Signature { &self.signature }
+
+    fn return_type(&self, _args: &[DataType]) -> Result<DataType> {
+        Ok(DataType::Float64)
+    }
+
+    fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result<ColumnarValue> {
+        // Your implementation - arguments are in correct positional order
+        unimplemented!()
+    }
+}
+```
+
+The parameter names should match the order of arguments in your function's signature. DataFusion automatically resolves named arguments to the correct positional order before invoking your function.
+
+Once registered, users can call your functions with named arguments in any order:
+
+```sql
+-- All equivalent
+SELECT power(base => 2.0, exponent => 3.0);
+SELECT power(exponent => 3.0, base => 2.0);
+SELECT power(2.0, exponent => 3.0);
+```
+
+### Error Messages
+
+When a function call fails due to incorrect arguments, DataFusion will show the parameter names in error messages to help users:
+
+```text
+No function matches the given name and argument types substr(Utf8).
+    Candidate functions:
+    substr(str: Any, start_pos: Any)
+    substr(str: Any, start_pos: Any, length: Any)
+```
+
 ## Adding a Window UDF
 
 Scalar UDFs are functions that take a row of data and return a single value. Window UDFs are similar, but they also have
diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md
index 6b9cb0843c53..f08e2c383a17 100644
--- a/docs/source/library-user-guide/upgrading.md
+++ b/docs/source/library-user-guide/upgrading.md
@@ -25,11 +25,20 @@
 
 You can see the current [status of the `51.0.0`release here](https://github.com/apache/datafusion/issues/17558)
 
-### `MSRV` updated to 1.87.0
+### `arrow` / `parquet` updated to 57.0.0
 
-The Minimum Supported Rust Version (MSRV) has been updated to [`1.87.0`].
+### Upgrade to arrow `57.0.0` and parquet `57.0.0`
 
-[`1.87.0`]: https://releases.rs/docs/1.87.0/
+This version of DataFusion upgrades the underlying Apache Arrow implementation
+to version `57.0.0`, including several dependent crates such as `prost`,
+`tonic`, `pyo3`, and `substrait`. . See the [release
+notes](https://github.com/apache/arrow-rs/releases/tag/57.0.0) for more details.
+
+### `MSRV` updated to 1.88.0
+
+The Minimum Supported Rust Version (MSRV) has been updated to [`1.88.0`].
+
+[`1.88.0`]: https://releases.rs/docs/1.88.0/
 
 ### `FunctionRegistry` exposes two additional methods
 
@@ -116,12 +125,241 @@ Users may need to update their paths to account for these changes.
 
 See [issue #17713] for more details.
 
+### `FileScanConfig::projection` renamed to `FileScanConfig::projection_exprs`
+
+The `projection` field in `FileScanConfig` has been renamed to `projection_exprs` and its type has changed from `Option<Vec<usize>>` to `Option<ProjectionExprs>`. This change enables more powerful projection pushdown capabilities by supporting arbitrary physical expressions rather than just column indices.
+
+**Impact on direct field access:**
+
+If you directly access the `projection` field:
+
+```rust,ignore
+let config: FileScanConfig = ...;
+let projection = config.projection;
+```
+
+You should update to:
+
+```rust,ignore
+let config: FileScanConfig = ...;
+let projection_exprs = config.projection_exprs;
+```
+
+**Impact on builders:**
+
+The `FileScanConfigBuilder::with_projection()` method has been deprecated in favor of `with_projection_indices()`:
+
+```diff
+let config = FileScanConfigBuilder::new(url, file_source)
+-   .with_projection(Some(vec![0, 2, 3]))
++   .with_projection_indices(Some(vec![0, 2, 3]))
+    .build();
+```
+
+Note: `with_projection()` still works but is deprecated and will be removed in a future release.
+
+**What is `ProjectionExprs`?**
+
+`ProjectionExprs` is a new type that represents a list of physical expressions for projection. While it can be constructed from column indices (which is what `with_projection_indices` does internally), it also supports arbitrary physical expressions, enabling advanced features like expression evaluation during scanning.
+
+You can access column indices from `ProjectionExprs` using its methods if needed:
+
+```rust,ignore
+let projection_exprs: ProjectionExprs = ...;
+// Get the column indices if the projection only contains simple column references
+let indices = projection_exprs.column_indices();
+```
+
 ### `DESCRIBE query` support
 
 `DESCRIBE query` was previously an alias for `EXPLAIN query`, which outputs the
 _execution plan_ of the query. With this release, `DESCRIBE query` now outputs
 the computed _schema_ of the query, consistent with the behavior of `DESCRIBE table_name`.
 
+### `datafusion.execution.time_zone` default configuration changed
+
+The default value for `datafusion.execution.time_zone` previously was a string value of `+00:00` (GMT/Zulu time).
+This was changed to be an `Option<String>` with a default of `None`. If you want to change the timezone back
+to the previous value you can execute the sql:
+
+```sql
+SET
+TIMEZONE = '+00:00';
+```
+
+This change was made to better support using the default timezone in scalar UDF functions such as
+`now`, `current_date`, `current_time`, and `to_timestamp` among others.
+
+### Refactoring of `FileSource` constructors and `FileScanConfigBuilder` to accept schemas upfront
+
+The way schemas are passed to file sources and scan configurations has been significantly refactored. File sources now require the schema (including partition columns) to be provided at construction time, and `FileScanConfigBuilder` no longer takes a separate schema parameter.
+
+**Who is affected:**
+
+- Users who create `FileScanConfig` or file sources (`ParquetSource`, `CsvSource`, `JsonSource`, `AvroSource`) directly
+- Users who implement custom `FileFormat` implementations
+
+**Key changes:**
+
+1. **FileSource constructors now require TableSchema**: All built-in file sources now take the schema in their constructor:
+
+   ```diff
+   - let source = ParquetSource::default();
+   + let source = ParquetSource::new(table_schema);
+   ```
+
+2. **FileScanConfigBuilder no longer takes schema as a parameter**: The schema is now passed via the FileSource:
+
+   ```diff
+   - FileScanConfigBuilder::new(url, schema, source)
+   + FileScanConfigBuilder::new(url, source)
+   ```
+
+3. **Partition columns are now part of TableSchema**: The `with_table_partition_cols()` method has been removed from `FileScanConfigBuilder`. Partition columns are now passed as part of the `TableSchema` to the FileSource constructor:
+
+   ```diff
+   + let table_schema = TableSchema::new(
+   +     file_schema,
+   +     vec![Arc::new(Field::new("date", DataType::Utf8, false))],
+   + );
+   + let source = ParquetSource::new(table_schema);
+     let config = FileScanConfigBuilder::new(url, source)
+   -     .with_table_partition_cols(vec![Field::new("date", DataType::Utf8, false)])
+         .with_file(partitioned_file)
+         .build();
+   ```
+
+4. **FileFormat::file_source() now takes TableSchema parameter**: Custom `FileFormat` implementations must be updated:
+   ```diff
+   impl FileFormat for MyFileFormat {
+   -   fn file_source(&self) -> Arc<dyn FileSource> {
+   +   fn file_source(&self, table_schema: TableSchema) -> Arc<dyn FileSource> {
+   -       Arc::new(MyFileSource::default())
+   +       Arc::new(MyFileSource::new(table_schema))
+       }
+   }
+   ```
+
+**Migration examples:**
+
+For Parquet files:
+
+```diff
+- let source = Arc::new(ParquetSource::default());
+- let config = FileScanConfigBuilder::new(url, schema, source)
++ let table_schema = TableSchema::new(schema, vec![]);
++ let source = Arc::new(ParquetSource::new(table_schema));
++ let config = FileScanConfigBuilder::new(url, source)
+      .with_file(partitioned_file)
+      .build();
+```
+
+For CSV files with partition columns:
+
+```diff
+- let source = Arc::new(CsvSource::new(true, b',', b'"'));
+- let config = FileScanConfigBuilder::new(url, file_schema, source)
+-     .with_table_partition_cols(vec![Field::new("year", DataType::Int32, false)])
++ let options = CsvOptions {
++     has_header: Some(true),
++     delimiter: b',',
++     quote: b'"',
++     ..Default::default()
++ };
++ let table_schema = TableSchema::new(
++     file_schema,
++     vec![Arc::new(Field::new("year", DataType::Int32, false))],
++ );
++ let source = Arc::new(CsvSource::new(table_schema).with_csv_options(options));
++ let config = FileScanConfigBuilder::new(url, source)
+      .build();
+```
+
+### Introduction of `TableSchema` and changes to `FileSource::with_schema()` method
+
+A new `TableSchema` struct has been introduced in the `datafusion-datasource` crate to better manage table schemas with partition columns. This struct helps distinguish between:
+
+- **File schema**: The schema of actual data files on disk
+- **Partition columns**: Columns derived from directory structure (e.g., Hive-style partitioning)
+- **Table schema**: The complete schema combining both file and partition columns
+
+As part of this change, the `FileSource::with_schema()` method signature has changed from accepting a `SchemaRef` to accepting a `TableSchema`.
+
+**Who is affected:**
+
+- Users who have implemented custom `FileSource` implementations will need to update their code
+- Users who only use built-in file sources (Parquet, CSV, JSON, AVRO, Arrow) are not affected
+
+**Migration guide for custom `FileSource` implementations:**
+
+```diff
+ use datafusion_datasource::file::FileSource;
+-use arrow::datatypes::SchemaRef;
++use datafusion_datasource::TableSchema;
+
+ impl FileSource for MyCustomSource {
+-    fn with_schema(&self, schema: SchemaRef) -> Arc<dyn FileSource> {
++    fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource> {
+         Arc::new(Self {
+-            schema: Some(schema),
++            // Use schema.file_schema() to get the file schema without partition columns
++            schema: Some(Arc::clone(schema.file_schema())),
+             ..self.clone()
+         })
+     }
+ }
+```
+
+For implementations that need access to partition columns:
+
+```rust,ignore
+fn with_schema(&self, schema: TableSchema) -> Arc<dyn FileSource> {
+    Arc::new(Self {
+        file_schema: Arc::clone(schema.file_schema()),
+        partition_cols: schema.table_partition_cols().clone(),
+        table_schema: Arc::clone(schema.table_schema()),
+        ..self.clone()
+    })
+}
+```
+
+**Note**: Most `FileSource` implementations only need to store the file schema (without partition columns), as shown in the first example. The second pattern of storing all three schema components is typically only needed for advanced use cases where you need access to different schema representations for different operations (e.g., ParquetSource uses the file schema for building pruning predicates but needs the table schema for filter pushdown logic).
+
+**Using `TableSchema` directly:**
+
+If you're constructing a `FileScanConfig` or working with table schemas and partition columns, you can now use `TableSchema`:
+
+```rust
+use datafusion_datasource::TableSchema;
+use arrow::datatypes::{Schema, Field, DataType};
+use std::sync::Arc;
+
+// Create a TableSchema with partition columns
+let file_schema = Arc::new(Schema::new(vec![
+    Field::new("user_id", DataType::Int64, false),
+    Field::new("amount", DataType::Float64, false),
+]));
+
+let partition_cols = vec![
+    Arc::new(Field::new("date", DataType::Utf8, false)),
+    Arc::new(Field::new("region", DataType::Utf8, false)),
+];
+
+let table_schema = TableSchema::new(file_schema, partition_cols);
+
+// Access different schema representations
+let file_schema_ref = table_schema.file_schema();      // Schema without partition columns
+let full_schema = table_schema.table_schema();          // Complete schema with partition columns
+let partition_cols_ref = table_schema.table_partition_cols(); // Just the partition columns
+```
+
+### `AggregateUDFImpl::is_ordered_set_aggregate` has been renamed to `AggregateUDFImpl::supports_within_group_clause`
+
+This method has been renamed to better reflect the actual impact it has for aggregate UDF implementations.
+The accompanying `AggregateUDF::is_ordered_set_aggregate` has also been renamed to `AggregateUDF::supports_within_group_clause`.
+No functionality has been changed with regards to this method; it still refers only to permitting use of `WITHIN GROUP`
+SQL syntax for the aggregate function.
+
 ## DataFusion `50.0.0`
 
 ### ListingTable automatically detects Hive Partitioned tables
@@ -984,7 +1222,7 @@ Pattern in DataFusion `47.0.0`:
 
 ```rust
 # /* comment to avoid running
-let config = FileScanConfigBuilder::new(url, schema, Arc::new(file_source))
+let config = FileScanConfigBuilder::new(url, Arc::new(file_source))
   .with_statistics(stats)
   ...
   .build();
diff --git a/docs/source/user-guide/arrow-introduction.md b/docs/source/user-guide/arrow-introduction.md
new file mode 100644
index 000000000000..89662a0c29c5
--- /dev/null
+++ b/docs/source/user-guide/arrow-introduction.md
@@ -0,0 +1,255 @@
+<!---
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+# Gentle Arrow Introduction
+
+```{contents}
+:local:
+:depth: 2
+```
+
+## Overview
+
+DataFusion uses [Apache Arrow] as its native in-memory format, so anyone using DataFusion will likely interact with Arrow at some point. This guide introduces the key Arrow concepts you need to know to effectively use DataFusion.
+
+Apache Arrow defines a standardized columnar representation for in-memory data. This enables different systems and languages (e.g., Rust and Python) to share data with zero-copy interchange, avoiding serialization overhead. In addition to zero copy interchange, Arrow also standardizes best practice columnar data representation enabling high performance analytical processing through vectorized execution.
+
+## Columnar Layout
+
+Quick visual: row-major (left) vs Arrow's columnar layout (right). For a deeper primer, see the [arrow2 guide].
+
+```text
+Traditional Row Storage:          Arrow Columnar Storage:
+┌──────────────────┐              ┌─────────┬─────────┬──────────┐
+│ id │ name │ age  │              │   id    │  name   │   age    │
+├────┼──────┼──────┤              ├─────────┼─────────┼──────────┤
+│ 1  │  A   │  30  │              │ [1,2,3] │ [A,B,C] │[30,25,35]│
+│ 2  │  B   │  25  │              └─────────┴─────────┴──────────┘
+│ 3  │  C   │  35  │                   ↑          ↑         ↑
+└──────────────────┘              Int32Array StringArray Int32Array
+(read entire rows)                (process entire columns at once)
+```
+
+## `RecordBatch`
+
+Arrow's standard unit for packaging data is the **[`RecordBatch`]**.
+
+A **[`RecordBatch`]** represents a horizontal slice of a table—a collection of equal-length columnar arrays that conform to a defined schema. Each column within the slice is a contiguous Arrow array, and all columns have the same number of rows (length). This chunked, immutable unit enables efficient streaming and parallel execution.
+
+Think of it as having two perspectives:
+
+- **Columnar inside**: Each column (`id`, `name`, `age`) is a contiguous array optimized for vectorized operations
+- **Row-chunked externally**: The batch represents a chunk of rows (e.g., rows 1-1000), making it a manageable unit for streaming
+
+RecordBatches are **immutable snapshots**—once created, they cannot be modified. Any transformation produces a _new_ RecordBatch, enabling safe parallel processing without locks or coordination overhead.
+
+This design allows DataFusion to process streams of row-based chunks while gaining maximum performance from the columnar layout.
+
+## Streaming Through the Engine
+
+DataFusion processes queries as pull-based pipelines where operators request batches from their inputs. This streaming approach enables early result production, bounds memory usage (spilling to disk only when necessary), and naturally supports parallel execution across multiple CPU cores.
+
+For example, given the following query:
+
+```sql
+SELECT name FROM 'data.parquet' WHERE id > 10
+```
+
+The DataFusion Pipeline looks like this:
+
+```text
+
+┌─────────────┐    ┌──────────────┐    ┌────────────────┐    ┌──────────────────┐    ┌──────────┐
+│ Parquet     │───▶│ Scan         │───▶│ Filter         │───▶│ Projection       │───▶│ Results  │
+│ File        │    │ Operator     │    │ Operator       │    │ Operator         │    │          │
+└─────────────┘    └──────────────┘    └────────────────┘    └──────────────────┘    └──────────┘
+                   (reads data)        (id > 10)             (keeps "name" col)
+                   RecordBatch ───▶    RecordBatch ────▶     RecordBatch ────▶        RecordBatch
+```
+
+In this pipeline, [`RecordBatch`]es are the "packages" of columnar data that flow between the different stages of query execution. Each operator processes batches incrementally, enabling the system to produce results before reading the entire input.
+
+## Creating `ArrayRef` and `RecordBatch`es
+
+Sometimes you need to create Arrow data programmatically rather than reading from files.
+
+The first thing needed is creating an Arrow Array, for each column. [arrow-rs] provides array builders and `From` impls to create arrays from Rust vectors.
+
+```rust
+use arrow::array::{StringArray, Int32Array};
+// Create an Int32Array from a vector of i32 values
+let ids = Int32Array::from(vec![1, 2, 3]);
+// There are similar constructors for other array types, e.g., StringArray, Float64Array, etc.
+let names = StringArray::from(vec![Some("alice"), None, Some("carol")]);
+```
+
+Every element in an Arrow array can be "null" (aka missing). Often, arrays are
+created from `Option<T>` values to indicate nullability (e.g., `Some("alice")`
+vs `None` above).
+
+Note: You'll see [`Arc`] used frequently in the code—Arrow arrays are wrapped in
+[`Arc`] (atomically reference-counted pointers) to enable cheap, thread-safe
+sharing across operators and tasks. [`ArrayRef`] is simply a type alias for
+`Arc<dyn Array>`. To create an `ArrayRef`, wrap your array in `Arc::new(...)` as shown below.
+
+```rust
+use std::sync::Arc;
+# use arrow::array::{ArrayRef, Int32Array, StringArray};
+// To get an ArrayRef, wrap the Int32Array in an Arc.
+// (note you will often have to explicitly type annotate to ArrayRef)
+let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+
+// you can also store Strings and other types in ArrayRefs
+let arr: ArrayRef = Arc::new(
+  StringArray::from(vec![Some("alice"), None, Some("carol")])
+);
+```
+
+To create a [`RecordBatch`], you need to define its [`Schema`] (the column names and types) and provide the corresponding columns as [`ArrayRef`]s as shown below:
+
+```rust
+# use std::sync::Arc;
+# use arrow_schema::ArrowError;
+# use arrow::array::{ArrayRef, Int32Array, StringArray, RecordBatch};
+use arrow_schema::{DataType, Field, Schema};
+
+// Create the columns as Arrow arrays
+let ids = Int32Array::from(vec![1, 2, 3]);
+let names = StringArray::from(vec![Some("alice"), None, Some("carol")]);
+// Create the schema
+let schema = Arc::new(Schema::new(vec![
+    Field::new("id", DataType::Int32, false), // false means non-nullable
+    Field::new("name", DataType::Utf8, true), // true means nullable
+]));
+// Assemble the columns
+let cols: Vec<ArrayRef> = vec![
+      Arc::new(ids),
+      Arc::new(names)
+];
+// Finally, create the RecordBatch
+RecordBatch::try_new(schema, cols).expect("Failed to create RecordBatch");
+```
+
+## Working with `ArrayRef` and `RecordBatch`
+
+Most DataFusion APIs are in terms of [`ArrayRef`] and [`RecordBatch`]. To work with the
+underlying data, you typically downcast the [`ArrayRef`] to its concrete type
+(e.g., [`Int32Array`]).
+
+To do so either use the `as_any().downcast_ref::<T>()` method or the
+`as_::<T>()` helper method from the [AsArray] trait.
+
+[asarray]: https://docs.rs/arrow-array/latest/arrow_array/cast/trait.AsArray.html
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, Int32Type};
+# use arrow::array::{AsArray, ArrayRef, Int32Array, RecordBatch};
+# let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+// First check the data type of the array
+match arr.data_type() {
+   &DataType::Int32 => {
+         // Downcast to Int32Array
+         let int_array = arr.as_primitive::<Int32Type>();
+         // Now you can access Int32Array methods
+         for i in 0..int_array.len() {
+              println!("Value at index {}: {}", i, int_array.value(i));
+         }
+   }
+    _ => {
+        println ! ("Array is not of type Int32");
+    }
+}
+```
+
+The following two downcasting methods are equivalent:
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, Int32Type};
+# use arrow::array::{AsArray, ArrayRef, Int32Array, RecordBatch};
+# let arr: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3]));
+// Downcast to Int32Array using as_any
+let int_array1 = arr.as_any().downcast_ref::<Int32Array>().unwrap();
+// This is the same as using the as_::<T>() helper
+let int_array2 = arr.as_primitive::<Int32Type>();
+assert_eq!(int_array1, int_array2);
+```
+
+## Common Pitfalls
+
+When working with Arrow and RecordBatches, watch out for these common issues:
+
+- **Schema consistency**: All batches in a stream must share the exact same [`Schema`]. For example, you can't have one batch where a column is [`Int32`] and the next where it's [`Int64`], even if the values would fit
+- **Immutability**: Arrays are immutable—to "modify" data, you must build new arrays or new RecordBatches. For instance, to change a value in an array, you'd create a new array with the updated value
+- **Row by Row Processing**: Avoid iterating over Arrays element by element when possible, and use Arrow's built-in [compute kernels] instead
+- **Type mismatches**: Mixed input types across files may require explicit casts. For example, a string column `"123"` from a CSV file won't automatically join with an integer column `123` from a Parquet file—you'll need to cast one to match the other. Use Arrow's [`cast`] kernel where appropriate
+- **Batch size assumptions**: Don't assume a particular batch size; always iterate until the stream ends. One file might produce 8192-row batches while another produces 1024-row batches
+
+[compute kernels]: https://docs.rs/arrow/latest/arrow/compute/index.html
+
+## Further reading
+
+**Arrow Documentation:**
+
+- [Arrow Format Introduction](https://arrow.apache.org/docs/format/Intro.html) - Understand the Arrow specification and why it enables zero-copy data sharing
+- [Arrow Columnar Format](https://arrow.apache.org/docs/format/Columnar.html) - Deep dive into memory layout for performance optimization
+- [Arrow Rust Documentation](https://docs.rs/arrow/latest/arrow/) - Complete API reference for the Rust implementation
+
+**Key API References:**
+
+- [RecordBatch](https://docs.rs/arrow-array/latest/arrow_array/struct.RecordBatch.html) - The fundamental data structure for columnar data (a table slice)
+- [ArrayRef](https://docs.rs/arrow-array/latest/arrow_array/array/type.ArrayRef.html) - Represents a reference-counted Arrow array (single column)
+- [DataType](https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html) - Enum of all supported Arrow data types (e.g., Int32, Utf8)
+- [Schema](https://docs.rs/arrow-schema/latest/arrow_schema/struct.Schema.html) - Describes the structure of a RecordBatch (column names and types)
+
+[apache arrow]: https://arrow.apache.org/docs/index.html
+[`arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html
+[`arrayref`]: https://docs.rs/arrow-array/latest/arrow_array/array/type.ArrayRef.html
+[`cast`]: https://docs.rs/arrow/latest/arrow/compute/fn.cast.html
+[`field`]: https://docs.rs/arrow-schema/latest/arrow_schema/struct.Field.html
+[`schema`]: https://docs.rs/arrow-schema/latest/arrow_schema/struct.Schema.html
+[`datatype`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html
+[`int32array`]: https://docs.rs/arrow-array/latest/arrow_array/array/struct.Int32Array.html
+[`stringarray`]: https://docs.rs/arrow-array/latest/arrow_array/array/struct.StringArray.html
+[`int32`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html#variant.Int32
+[`int64`]: https://docs.rs/arrow-schema/latest/arrow_schema/enum.DataType.html#variant.Int64
+[extension points]: ../library-user-guide/extensions.md
+[`tableprovider`]: https://docs.rs/datafusion/latest/datafusion/datasource/trait.TableProvider.html
+[custom table providers guide]: ../library-user-guide/custom-table-providers.md
+[user-defined functions (udfs)]: ../library-user-guide/functions/adding-udfs.md
+[custom optimizer rules and physical operators]: ../library-user-guide/extending-operators.md
+[`executionplan`]: https://docs.rs/datafusion/latest/datafusion/physical_plan/trait.ExecutionPlan.html
+[`.register_table()`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.register_table
+[`.sql()`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.sql
+[`.show()`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.show
+[`memtable`]: https://docs.rs/datafusion/latest/datafusion/datasource/struct.MemTable.html
+[`sessioncontext`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html
+[`csvreadoptions`]: https://docs.rs/datafusion/latest/datafusion/execution/options/struct.CsvReadOptions.html
+[`parquetreadoptions`]: https://docs.rs/datafusion/latest/datafusion/execution/options/struct.ParquetReadOptions.html
+[`recordbatch`]: https://docs.rs/arrow-array/latest/arrow_array/struct.RecordBatch.html
+[`read_csv`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_csv
+[`read_parquet`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_parquet
+[`read_json`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_json
+[`read_avro`]: https://docs.rs/datafusion/latest/datafusion/execution/context/struct.SessionContext.html#method.read_avro
+[`dataframe`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html
+[`.collect()`]: https://docs.rs/datafusion/latest/datafusion/dataframe/struct.DataFrame.html#method.collect
+[arrow2 guide]: https://jorgecarleitao.github.io/arrow2/main/guide/arrow.html#what-is-apache-arrow
+[configuration settings]: configs.md
+[`datafusion.execution.batch_size`]: configs.md#setting-configuration-options
diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md
index fbf55a56057b..6e5e063a1292 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -77,11 +77,11 @@ The following configuration settings are available:
 | datafusion.execution.coalesce_batches                                   | true                      | When set to true, record batches will be examined between each operator and small batches will be coalesced into larger batches. This is helpful when there are highly selective filters or joins that could produce tiny output batches. The target batch size is determined by the configuration setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.collect_statistics                                 | true                      | Should DataFusion collect statistics when first creating a table. Has no effect after the table is created. Applies to the default `ListingTableProvider` in DataFusion. Defaults to true.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 | datafusion.execution.target_partitions                                  | 0                         | Number of partitions for query execution. Increasing partitions can increase concurrency. Defaults to the number of CPU cores on the system                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.time_zone                                          | +00:00                    | The default time zone Some functions, e.g. `EXTRACT(HOUR from SOME_TIME)`, shift the underlying datetime according to this time zone, and then extract the hour                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
+| datafusion.execution.time_zone                                          | NULL                      | The default time zone Some functions, e.g. `now` return timestamps in this time zone                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.enable_page_index                          | true                      | (reading) If true, reads the Parquet data page level metadata (the Page Index), if present, to reduce the I/O and number of rows decoded.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.execution.parquet.pruning                                    | true                      | (reading) If true, the parquet reader attempts to skip entire row groups based on the predicate in the query and the metadata (min/max values) stored in the parquet file                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               |
 | datafusion.execution.parquet.skip_metadata                              | true                      | (reading) If true, the parquet reader skip the optional embedded metadata that may be in the file Schema. This setting can help avoid schema conflicts when querying multiple parquet files with schemas containing compatible types but different metadata                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
-| datafusion.execution.parquet.metadata_size_hint                         | NULL                      | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| datafusion.execution.parquet.metadata_size_hint                         | 524288                    | (reading) If specified, the parquet reader will try and fetch the last `size_hint` bytes of the parquet file optimistically. If not specified, two reads are required: One read to fetch the 8-byte parquet footer and another to fetch the metadata length encoded in the footer Default setting to 512 KiB, which should be sufficient for most parquet files, it can reduce one I/O operation per parquet file. If the metadata is larger than the hint, two reads will still be performed.                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.pushdown_filters                           | false                     | (reading) If true, filter expressions are be applied during the parquet decoding operation to reduce the number of rows decoded. This optimization is sometimes called "late materialization".                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
 | datafusion.execution.parquet.reorder_filters                            | false                     | (reading) If true, filter expressions evaluated during the parquet decoding operation will be reordered heuristically to minimize the cost of evaluation. If false, the filters are applied in the same order as written in the query                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   |
 | datafusion.execution.parquet.schema_force_view_types                    | true                      | (reading) If true, parquet reader will read columns of `Utf8/Utf8Large` with `Utf8View`, and `Binary/BinaryLarge` with `BinaryView`.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
@@ -98,7 +98,7 @@ The following configuration settings are available:
 | datafusion.execution.parquet.dictionary_page_size_limit                 | 1048576                   | (writing) Sets best effort maximum dictionary page size, in bytes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.parquet.statistics_enabled                         | page                      | (writing) Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.max_row_group_size                         | 1048576                   | (writing) Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         |
-| datafusion.execution.parquet.created_by                                 | datafusion version 50.3.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
+| datafusion.execution.parquet.created_by                                 | datafusion version 51.0.0 | (writing) Sets "created by" property                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    |
 | datafusion.execution.parquet.column_index_truncate_length               | 64                        | (writing) Sets column index truncate length                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | datafusion.execution.parquet.statistics_truncate_length                 | 64                        | (writing) Sets statistics truncate length. If NULL, uses default parquet writer setting                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | datafusion.execution.parquet.data_page_row_count_limit                  | 20000                     | (writing) Sets best effort maximum number of rows in data page                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
@@ -114,6 +114,7 @@ The following configuration settings are available:
 | datafusion.execution.spill_compression                                  | uncompressed              | Sets the compression codec used when spilling data to disk. Since datafusion writes spill files using the Arrow IPC Stream format, only codecs supported by the Arrow IPC Stream Writer are allowed. Valid values are: uncompressed, lz4_frame, zstd. Note: lz4_frame offers faster (de)compression, but typically results in larger spill files. In contrast, zstd achieves higher compression ratios at the cost of slower (de)compression speed.                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
 | datafusion.execution.sort_spill_reservation_bytes                       | 10485760                  | Specifies the reserved memory for each spillable sort operation to facilitate an in-memory merge. When a sort operation spills to disk, the in-memory data must be sorted and merged before being written to a file. This setting reserves a specific amount of memory for that in-memory sort/merge process. Note: This setting is irrelevant if the sort operation cannot spill (i.e., if there's no `DiskManager` configured).                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
 | datafusion.execution.sort_in_place_threshold_bytes                      | 1048576                   | When sorting, below what size should data be concatenated and sorted in a single RecordBatch rather than sorted in batches and merged.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
+| datafusion.execution.max_spill_file_size_bytes                          | 134217728                 | Maximum size in bytes for individual spill files before rotating to a new file. When operators spill data to disk (e.g., RepartitionExec), they write multiple batches to the same file until this size limit is reached, then rotate to a new file. This reduces syscall overhead compared to one-file-per-batch while preventing files from growing too large. A larger value reduces file creation overhead but may hold more disk space. A smaller value creates more files but allows finer-grained space reclamation as files can be deleted once fully consumed. Now only `RepartitionExec` supports this spill file rotation feature, other spilling operators may create spill files larger than the limit. Default: 128 MB                                                                                                                                                                                    |
 | datafusion.execution.meta_fetch_concurrency                             | 32                        | Number of files to read in parallel when inferring schema and statistics                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                |
 | datafusion.execution.minimum_parallel_output_files                      | 4                         | Guarantees a minimum level of output files running in parallel. RecordBatches will be distributed in round robin fashion to each parallel writer. Each writer is closed and a new file opened once soft_max_rows_per_output_file is reached.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
 | datafusion.execution.soft_max_rows_per_output_file                      | 50000000                  | Target number of rows in output files when writing multiple. This is a soft max, so it can be exceeded slightly. There also will be one file smaller than the limit if the total number of rows written is not roughly divisible by the soft max                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
diff --git a/docs/source/user-guide/dataframe.md b/docs/source/user-guide/dataframe.md
index 82f1eeb2823d..85724a72399a 100644
--- a/docs/source/user-guide/dataframe.md
+++ b/docs/source/user-guide/dataframe.md
@@ -19,6 +19,8 @@
 
 # DataFrame API
 
+## DataFrame overview
+
 A DataFrame represents a logical set of rows with the same named columns,
 similar to a [Pandas DataFrame] or [Spark DataFrame].
 
diff --git a/docs/source/user-guide/explain-usage.md b/docs/source/user-guide/explain-usage.md
index 2288cae85dda..5a1184539c03 100644
--- a/docs/source/user-guide/explain-usage.md
+++ b/docs/source/user-guide/explain-usage.md
@@ -225,14 +225,11 @@ Again, reading from bottom up:
 
 When predicate pushdown is enabled, `DataSourceExec` with `ParquetSource` gains the following metrics:
 
-- `page_index_rows_matched`: number of rows in pages that were tested by a page index filter, and passed
-- `page_index_rows_pruned`: number of rows in pages that were tested by a page index filter, and did not pass
-- `row_groups_matched_bloom_filter`: number of row groups that were tested by a Bloom Filter, and passed
-- `row_groups_pruned_bloom_filter`: number of row groups that were tested by a Bloom Filter, and did not pass
-- `row_groups_matched_statistics`: number of row groups that were tested by row group statistics (min and max value), and passed
-- `row_groups_pruned_statistics`: number of row groups that were tested by row group statistics (min and max value), and did not pass
-- `pushdown_rows_matched`: rows that were tested by any of the above filtered, and passed all of them (this should be minimum of `page_index_rows_matched`, `row_groups_pruned_bloom_filter`, and `row_groups_pruned_statistics`)
-- `pushdown_rows_pruned`: rows that were tested by any of the above filtered, and did not pass one of them (this should be sum of `page_index_rows_matched`, `row_groups_pruned_bloom_filter`, and `row_groups_pruned_statistics`)
+- `page_index_rows_pruned`: number of rows evaluated by page index filters. The metric reports both how many rows were considered in total and how many matched (were not pruned).
+- `row_groups_pruned_bloom_filter`: number of row groups evaluated by Bloom Filters, reporting both total checked groups and groups that matched.
+- `row_groups_pruned_statistics`: number of row groups evaluated by row-group statistics (min/max), reporting both total checked groups and groups that matched.
+- `pushdown_rows_matched`: rows that were tested by any of the above filters, and passed all of them.
+- `pushdown_rows_pruned`: rows that were tested by any of the above filters, and did not pass at least one of them.
 - `predicate_evaluation_errors`: number of times evaluating the filter expression failed (expected to be zero in normal operation)
 - `num_predicate_creation_errors`: number of errors creating predicates (expected to be zero in normal operation)
 - `bloom_filter_eval_time`: time spent parsing and evaluating Bloom Filters
diff --git a/docs/source/user-guide/metrics.md b/docs/source/user-guide/metrics.md
index f2634b901518..43bfcd2afec2 100644
--- a/docs/source/user-guide/metrics.md
+++ b/docs/source/user-guide/metrics.md
@@ -27,10 +27,12 @@ DataFusion operators expose runtime metrics so you can understand where time is
 
 `BaselineMetrics` are available in most physical operators to capture common measurements.
 
-| Metric          | Description                                            |
-| --------------- | ------------------------------------------------------ |
-| elapsed_compute | CPU time the operator actively spends processing work. |
-| output_rows     | Total number of rows the operator produces.            |
+| Metric          | Description                                                                                                                                                                                        |
+| --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| elapsed_compute | CPU time the operator actively spends processing work.                                                                                                                                             |
+| output_rows     | Total number of rows the operator produces.                                                                                                                                                        |
+| output_bytes    | Memory usage of all output batches. Note: This value may be overestimated. If multiple output `RecordBatch` instances share underlying memory buffers, their sizes will be counted multiple times. |
+| output_batches  | Total number of output batches the operator produces.                                                                                                                                              |
 
 ## Operator-specific Metrics
 
diff --git a/docs/source/user-guide/sql/data_types.md b/docs/source/user-guide/sql/data_types.md
index d977a4396e40..02edb6371ce3 100644
--- a/docs/source/user-guide/sql/data_types.md
+++ b/docs/source/user-guide/sql/data_types.md
@@ -41,7 +41,18 @@ You can cast a SQL expression to a specific Arrow type using the `arrow_cast` fu
 For example, to cast the output of `now()` to a `Timestamp` with second precision:
 
 ```sql
-select arrow_cast(now(), 'Timestamp(Second, None)');
+select arrow_cast(now(), 'Timestamp(s)') as "now()";
++---------------------+
+| now()               |
++---------------------+
+| 2025-10-24T20:02:45 |
++---------------------+
+```
+
+The older syntax still works as well:
+
+```sql
+select arrow_cast(now(), 'Timestamp(Second, None)') as "now()";
 +---------------------+
 | now()               |
 +---------------------+
diff --git a/docs/source/user-guide/sql/dml.md b/docs/source/user-guide/sql/dml.md
index c29447f23cd9..4934bc267437 100644
--- a/docs/source/user-guide/sql/dml.md
+++ b/docs/source/user-guide/sql/dml.md
@@ -88,7 +88,7 @@ of hive-style partitioned parquet files:
 +-------+
 ```
 
-If the the data contains values of `x` and `y` in column1 and only `a` in
+If the data contains values of `x` and `y` in column1 and only `a` in
 column2, output files will appear in the following directory structure:
 
 ```text
diff --git a/docs/source/user-guide/sql/explain.md b/docs/source/user-guide/sql/explain.md
index 1caadcc29141..23101632625b 100644
--- a/docs/source/user-guide/sql/explain.md
+++ b/docs/source/user-guide/sql/explain.md
@@ -70,19 +70,10 @@ to see the high level structure of the plan
 |               | │      RepartitionExec      │ |
 |               | │    --------------------   │ |
 |               | │   input_partition_count:  │ |
-|               | │             16            │ |
-|               | │                           │ |
-|               | │    partitioning_scheme:   │ |
-|               | │      Hash([b@0], 16)      │ |
-|               | └─────────────┬─────────────┘ |
-|               | ┌─────────────┴─────────────┐ |
-|               | │      RepartitionExec      │ |
-|               | │    --------------------   │ |
-|               | │   input_partition_count:  │ |
 |               | │             1             │ |
 |               | │                           │ |
 |               | │    partitioning_scheme:   │ |
-|               | │    RoundRobinBatch(16)    │ |
+|               | │      Hash([b@0], 16)      │ |
 |               | └─────────────┬─────────────┘ |
 |               | ┌─────────────┴─────────────┐ |
 |               | │       AggregateExec       │ |
@@ -126,10 +117,9 @@ Elapsed 0.004 seconds.
 | physical_plan | ProjectionExec: expr=[sum(t.x)@1 as sum(t.x)]                                 |
 |               |   AggregateExec: mode=FinalPartitioned, gby=[b@0 as b], aggr=[sum(t.x)]       |
 |               |     CoalesceBatchesExec: target_batch_size=8192                               |
-|               |       RepartitionExec: partitioning=Hash([b@0], 16), input_partitions=16      |
-|               |         RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1 |
-|               |           AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(t.x)]        |
-|               |             DataSourceExec: partitions=1, partition_sizes=[1]                 |
+|               |       RepartitionExec: partitioning=Hash([b@0], 16), input_partitions=1       |
+|               |         AggregateExec: mode=Partial, gby=[b@1 as b], aggr=[sum(t.x)]          |
+|               |           DataSourceExec: partitions=1, partition_sizes=[1]                   |
 |               |                                                                               |
 +---------------+-------------------------------------------------------------------------------+
 2 row(s) fetched.
diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md
index d2e7066191f9..f5c3fe911476 100644
--- a/docs/source/user-guide/sql/scalar_functions.md
+++ b/docs/source/user-guide/sql/scalar_functions.md
@@ -2387,6 +2387,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
 - [date_trunc](#date_trunc)
 - [datepart](#datepart)
 - [datetrunc](#datetrunc)
+- [extract](#extract)
 - [from_unixtime](#from_unixtime)
 - [make_date](#make_date)
 - [now](#now)
@@ -2570,6 +2571,36 @@ _Alias of [date_part](#date_part)._
 
 _Alias of [date_trunc](#date_trunc)._
 
+### `extract`
+
+Returns the specified part of the date as an integer.
+
+```sql
+extract(field FROM source)
+```
+
+#### Arguments
+
+- **field**: Part of the date to return. The following date parts are supported:
+
+- year
+- quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in)
+- month
+- week (week of the year)
+- day (day of the month)
+- hour
+- minute
+- second
+- millisecond
+- microsecond
+- nanosecond
+- dow (day of the week where Sunday is 0)
+- doy (day of the year)
+- epoch (seconds since Unix epoch)
+- isodow (day of the week where Monday is 0)
+
+- **source**: Time expression to operate on. Can be a constant, column, or function.
+
 ### `from_unixtime`
 
 Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp.
@@ -2629,7 +2660,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo
 
 ### `now`
 
-Returns the current UTC timestamp.
+Returns the current timestamp in the system configured timezone (None by default).
 
 The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes.
 
@@ -2749,11 +2780,11 @@ to_local_time(expression)
 FROM (
   SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time
 );
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-| time                      | type                                           | to_local_time       | to_local_time_type          |
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
-| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) |
-+---------------------------+------------------------------------------------+---------------------+-----------------------------+
++---------------------------+----------------------------------+---------------------+--------------------+
+| time                      | type                             | to_local_time       | to_local_time_type |
++---------------------------+----------------------------------+---------------------+--------------------+
+| 2024-04-01T00:00:20+02:00 | Timestamp(ns, "Europe/Brussels") | 2024-04-01T00:00:20 | Timestamp(ns)      |
++---------------------------+----------------------------------+---------------------+--------------------+
 
 # combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather
 # than UTC boundaries
@@ -2777,7 +2808,7 @@ FROM (
 
 Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
 
-Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
+Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
 
 ```sql
 to_timestamp(expression[, ..., format_n])
@@ -4182,7 +4213,8 @@ flatten(array)
 Similar to the range function, but it includes the upper bound.
 
 ```sql
-generate_series(start, stop, step)
+generate_series(stop)
+generate_series(start, stop[, step])
 ```
 
 #### Arguments
@@ -4402,7 +4434,8 @@ _Alias of [make_array](#make_array)._
 Returns an Arrow array between start and stop with step. The range start..end contains all values with start <= x < end. It is empty if start >= end. Step cannot be 0.
 
 ```sql
-range(start, stop, step)
+range(stop)
+range(start, stop[, step])
 ```
 
 #### Arguments
@@ -4422,11 +4455,11 @@ range(start, stop, step)
 +-----------------------------------+
 
 > select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);
-+--------------------------------------------------------------+
-| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) |
-+--------------------------------------------------------------+
++--------------------------------------------------------------------------+
+| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH)          |
++--------------------------------------------------------------------------+
 | [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] |
-+--------------------------------------------------------------+
++--------------------------------------------------------------------------+
 ```
 
 ### `string_to_array`
@@ -4972,16 +5005,26 @@ arrow_cast(expression, datatype)
 #### Example
 
 ```sql
-> select arrow_cast(-5, 'Int8') as a,
+> select
+  arrow_cast(-5,    'Int8') as a,
   arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b,
-  arrow_cast('bar', 'LargeUtf8') as c,
-  arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d
-  ;
-+----+-----+-----+---------------------------+
-| a  | b   | c   | d                         |
-+----+-----+-----+---------------------------+
-| -5 | foo | bar | 2023-01-02T12:53:02+08:00 |
-+----+-----+-----+---------------------------+
+  arrow_cast('bar', 'LargeUtf8') as c;
+
++----+-----+-----+
+| a  | b   | c   |
++----+-----+-----+
+| -5 | foo | bar |
++----+-----+-----+
+
+> select
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs, "+08:00")') as d,
+  arrow_cast('2023-01-02T12:53:02', 'Timestamp(µs)') as e;
+
++---------------------------+---------------------+
+| d                         | e                   |
++---------------------------+---------------------+
+| 2023-01-02T12:53:02+08:00 | 2023-01-02T12:53:02 |
++---------------------------+---------------------+
 ```
 
 ### `arrow_typeof`
diff --git a/docs/source/user-guide/sql/subqueries.md b/docs/source/user-guide/sql/subqueries.md
index ee75a6a1575c..692d1c4020d7 100644
--- a/docs/source/user-guide/sql/subqueries.md
+++ b/docs/source/user-guide/sql/subqueries.md
@@ -183,7 +183,7 @@ FROM
 and return _true_ or _false_.
 Rows that evaluate to _false_ or NULL are filtered from results.
 The `WHERE` clause supports correlated and non-correlated subqueries
-as well as scalar and non-scalar subqueries (depending on the the operator used
+as well as scalar and non-scalar subqueries (depending on the operator used
 in the predicate expression).
 
 ```sql
@@ -293,7 +293,7 @@ returned by aggregate functions in the `SELECT` clause to the result of the
 subquery and return _true_ or _false_.
 Rows that evaluate to _false_ are filtered from results.
 The `HAVING` clause supports correlated and non-correlated subqueries
-as well as scalar and non-scalar subqueries (depending on the the operator used
+as well as scalar and non-scalar subqueries (depending on the operator used
 in the predicate expression).
 
 ```sql
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 7697bc1c1e25..22666a1b45b5 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -19,5 +19,5 @@
 # to compile this workspace and run CI jobs.
 
 [toolchain]
-channel = "1.90.0"
+channel = "1.91.0"
 components = ["rustfmt", "clippy"]
diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml
index 3a161d5f4d64..cdaee6f442bf 100644
--- a/test-utils/Cargo.toml
+++ b/test-utils/Cargo.toml
@@ -22,6 +22,9 @@ edition = { workspace = true }
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
+# Note: add additional linter rules in lib.rs.
+# Rust does not support workspace + new linter rules in subcrates yet
+# https://github.com/rust-lang/cargo/issues/13157
 [lints]
 workspace = true