From 5b550dcebf95675fa7b0c49a17bf0676f686853b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 16 May 2025 18:42:24 +0800 Subject: [PATCH 1/5] Bump iceberg-rust version to 0.5.0 (Round 1) Signed-off-by: Xuanwo --- .github/workflows/ci.yml | 12 +- .github/workflows/publish.yml | 6 +- .github/workflows/release_python.yml | 76 +++++++------ .github/workflows/release_python_nightly.yml | 110 ++++++++++--------- Cargo.toml | 4 +- bindings/python/Cargo.lock | 1 + bindings/python/Cargo.toml | 4 +- crates/catalog/rest/Cargo.toml | 1 - crates/catalog/rest/src/client.rs | 18 +-- crates/iceberg/src/delete_vector.rs | 4 +- crates/iceberg/src/io/file_io.rs | 6 +- rust-toolchain.toml | 2 +- 12 files changed, 128 insertions(+), 116 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2eb891a939..7587580843 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,7 +30,7 @@ concurrency: cancel-in-progress: true env: - rust_msrv: "1.84.0" + rust_msrv: "1.85.0" jobs: check: @@ -118,11 +118,11 @@ jobs: - name: Maximize build space uses: easimon/maximize-build-space@master with: - remove-dotnet: 'true' - remove-android: 'true' - remove-haskell: 'true' - remove-codeql: 'true' - remove-docker-images: 'true' + remove-dotnet: "true" + remove-android: "true" + remove-haskell: "true" + remove-codeql: "true" + remove-docker-images: "true" root-reserve-mb: 10240 temp-reserve-mb: 10240 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 486d662464..1faaf14be3 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -20,11 +20,11 @@ name: Publish on: push: tags: - - '*' + - "*" workflow_dispatch: env: - rust_msrv: "1.77.1" + rust_msrv: "1.85" jobs: publish: @@ -57,4 +57,4 @@ jobs: if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} run: cargo publish --all-features env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} \ No newline at end of file + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/release_python.yml b/.github/workflows/release_python.yml index 047800a2d0..a721355198 100644 --- a/.github/workflows/release_python.yml +++ b/.github/workflows/release_python.yml @@ -20,7 +20,7 @@ name: Publish Python 🐍 distribution 📦 to PyPI on: push: tags: - - '*' + - "*" pull_request: branches: - main @@ -29,7 +29,7 @@ on: workflow_dispatch: env: - rust_msrv: "1.77.1" + rust_msrv: "1.85" concurrency: group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} @@ -62,33 +62,37 @@ jobs: - { os: windows-latest } - { os: macos-latest, target: "universal2-apple-darwin" } - { os: ubuntu-latest, target: "x86_64" } - - { os: ubuntu-latest, target: "aarch64", manylinux: "manylinux_2_28" } + - { + os: ubuntu-latest, + target: "aarch64", + manylinux: "manylinux_2_28", + } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: 3.9 - - name: Setup Rust toolchain - uses: ./.github/actions/setup-builder - with: - rust-version: ${{ env.rust_msrv }} - - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - manylinux: ${{ matrix.manylinux || 'auto' }} - working-directory: "bindings/python" - command: build - args: --release -o dist - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-${{ matrix.os }}-${{ matrix.target }} - path: bindings/python/dist + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.9 + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ env.rust_msrv }} + - uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + manylinux: ${{ matrix.manylinux || 'auto' }} + working-directory: "bindings/python" + command: build + args: --release -o dist + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }}-${{ matrix.target }} + path: bindings/python/dist pypi-publish: name: Publish Python 🐍 distribution 📦 to Pypi - needs: [ sdist, wheels ] + needs: [sdist, wheels] runs-on: ubuntu-latest # Only publish to PyPi if the tag is not a pre-release if: ${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }} @@ -98,18 +102,18 @@ jobs: url: https://pypi.org/p/pyiceberg-core permissions: - id-token: write # IMPORTANT: mandatory for trusted publishing + id-token: write # IMPORTANT: mandatory for trusted publishing steps: - - name: Download all the dists - uses: actions/download-artifact@v4 - with: - pattern: wheels-* - merge-multiple: true - path: bindings/python/dist - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: bindings/python/dist + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 - with: - skip-existing: true - packages-dir: bindings/python/dist + with: + skip-existing: true + packages-dir: bindings/python/dist diff --git a/.github/workflows/release_python_nightly.yml b/.github/workflows/release_python_nightly.yml index 9333fe597c..3e9d9c74a7 100644 --- a/.github/workflows/release_python_nightly.yml +++ b/.github/workflows/release_python_nightly.yml @@ -19,11 +19,11 @@ name: "Nightly PyPI Build" on: schedule: - - cron: "0 0 * * *" # Runs at midnight UTC every day - workflow_dispatch: # Allows manual triggering + - cron: "0 0 * * *" # Runs at midnight UTC every day + workflow_dispatch: # Allows manual triggering env: - rust_msrv: "1.77.1" + rust_msrv: "1.85" permissions: contents: read @@ -40,12 +40,12 @@ jobs: sdist: needs: set-version - if: github.repository == 'apache/iceberg-rust' # Only run for apache repo + if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp + + - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp with: timestamp: ${{ needs.set-version.outputs.TIMESTAMP }} @@ -54,7 +54,7 @@ jobs: working-directory: "bindings/python" command: sdist args: -o dist - + - name: Upload sdist uses: actions/upload-artifact@v4 with: @@ -63,7 +63,7 @@ jobs: wheels: needs: set-version - if: github.repository == 'apache/iceberg-rust' # Only run for apache repo + if: github.repository == 'apache/iceberg-rust' # Only run for apache repo runs-on: "${{ matrix.os }}" strategy: matrix: @@ -71,40 +71,44 @@ jobs: - { os: windows-latest } - { os: macos-latest, target: "universal2-apple-darwin" } - { os: ubuntu-latest, target: "x86_64" } - - { os: ubuntu-latest, target: "aarch64", manylinux: "manylinux_2_28" } + - { + os: ubuntu-latest, + target: "aarch64", + manylinux: "manylinux_2_28", + } - { os: ubuntu-latest, target: "armv7l" } steps: - - uses: actions/checkout@v4 - - - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp - with: - timestamp: ${{ needs.set-version.outputs.TIMESTAMP }} - - - uses: actions/setup-python@v5 - with: - python-version: 3.9 - - - name: Setup Rust toolchain - uses: ./.github/actions/setup-builder - with: - rust-version: ${{ env.rust_msrv }} - - - uses: PyO3/maturin-action@v1 - with: - target: ${{ matrix.target }} - manylinux: ${{ matrix.manylinux || 'auto' }} - working-directory: "bindings/python" - command: build - args: --release -o dist - - - name: Upload wheels - uses: actions/upload-artifact@v4 - with: - name: wheels-${{ matrix.os }}-${{ matrix.target }} - path: bindings/python/dist + - uses: actions/checkout@v4 + + - uses: ./.github/actions/overwrite-package-version # Overwrite package version with timestamp + with: + timestamp: ${{ needs.set-version.outputs.TIMESTAMP }} + + - uses: actions/setup-python@v5 + with: + python-version: 3.9 + + - name: Setup Rust toolchain + uses: ./.github/actions/setup-builder + with: + rust-version: ${{ env.rust_msrv }} + + - uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + manylinux: ${{ matrix.manylinux || 'auto' }} + working-directory: "bindings/python" + command: build + args: --release -o dist + + - name: Upload wheels + uses: actions/upload-artifact@v4 + with: + name: wheels-${{ matrix.os }}-${{ matrix.target }} + path: bindings/python/dist testpypi-publish: - needs: [ sdist, wheels ] + needs: [sdist, wheels] runs-on: ubuntu-latest environment: @@ -112,20 +116,20 @@ jobs: url: https://test.pypi.org/p/pyiceberg-core permissions: - id-token: write # IMPORTANT: mandatory for trusted publishing + id-token: write # IMPORTANT: mandatory for trusted publishing steps: - - name: Download all the dists - uses: actions/download-artifact@v4 - with: - pattern: wheels-* - merge-multiple: true - path: bindings/python/dist - - name: List downloaded artifacts - run: ls -R bindings/python/dist - - name: Publish to TestPyPI - uses: pypa/gh-action-pypi-publish@release/v1 - with: - repository-url: https://test.pypi.org/legacy/ - skip-existing: true - packages-dir: bindings/python/dist + - name: Download all the dists + uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: bindings/python/dist + - name: List downloaded artifacts + run: ls -R bindings/python/dist + - name: Publish to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + packages-dir: bindings/python/dist diff --git a/Cargo.toml b/Cargo.toml index e543fdaa3d..a10e185e2a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,14 +29,14 @@ members = [ resolver = "2" [workspace.package] -edition = "2021" +edition = "2024" homepage = "https://rust.iceberg.apache.org/" version = "0.4.0" license = "Apache-2.0" repository = "https://github.com/apache/iceberg-rust" # Check the MSRV policy in README.md before changing this -rust-version = "1.84" +rust-version = "1.85" [workspace.dependencies] anyhow = "1.0.72" diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index 03f1a4ca52..b8b5653677 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -2180,6 +2180,7 @@ dependencies = [ "arrow-select", "arrow-string", "async-trait", + "base64", "bimap", "bytes", "chrono", diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 7d126650a0..8a92744bce 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -16,10 +16,10 @@ # under the License. [package] -edition = "2021" +edition = "2024" homepage = "https://rust.iceberg.apache.org" name = "pyiceberg_core_rust" -rust-version = "1.84" +rust-version = "1.85" version = "0.4.0" # This crate is used to build python bindings, we don't want to publish it publish = false diff --git a/crates/catalog/rest/Cargo.toml b/crates/catalog/rest/Cargo.toml index 70a1e89987..916b5ccf75 100644 --- a/crates/catalog/rest/Cargo.toml +++ b/crates/catalog/rest/Cargo.toml @@ -29,7 +29,6 @@ license = { workspace = true } repository = { workspace = true } [dependencies] -# async-trait = { workspace = true } async-trait = { workspace = true } chrono = { workspace = true } http = { workspace = true } diff --git a/crates/catalog/rest/src/client.rs b/crates/catalog/rest/src/client.rs index 778ec7582f..75c2d45604 100644 --- a/crates/catalog/rest/src/client.rs +++ b/crates/catalog/rest/src/client.rs @@ -25,8 +25,8 @@ use reqwest::{Client, IntoUrl, Method, Request, RequestBuilder, Response}; use serde::de::DeserializeOwned; use tokio::sync::Mutex; -use crate::types::{ErrorResponse, TokenResponse}; use crate::RestCatalogConfig; +use crate::types::{ErrorResponse, TokenResponse}; pub(crate) struct HttpClient { client: Client, @@ -80,14 +80,18 @@ impl HttpClient { Ok(HttpClient { client: cfg.client().unwrap_or(self.client), token: Mutex::new(cfg.token().or_else(|| self.token.into_inner())), - token_endpoint: (!cfg.get_token_endpoint().is_empty()) - .then(|| cfg.get_token_endpoint()) - .unwrap_or(self.token_endpoint), + token_endpoint: if !cfg.get_token_endpoint().is_empty() { + cfg.get_token_endpoint() + } else { + self.token_endpoint + }, credential: cfg.credential().or(self.credential), extra_headers, - extra_oauth_params: (!cfg.extra_oauth_params().is_empty()) - .then(|| cfg.extra_oauth_params()) - .unwrap_or(self.extra_oauth_params), + extra_oauth_params: if !cfg.extra_oauth_params().is_empty() { + cfg.extra_oauth_params() + } else { + self.extra_oauth_params + }, }) } diff --git a/crates/iceberg/src/delete_vector.rs b/crates/iceberg/src/delete_vector.rs index 57c15ffec1..7bde3c43d0 100644 --- a/crates/iceberg/src/delete_vector.rs +++ b/crates/iceberg/src/delete_vector.rs @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. +use roaring::RoaringTreemap; use roaring::bitmap::Iter; use roaring::treemap::BitmapIter; -use roaring::RoaringTreemap; #[allow(unused)] pub struct DeleteVector { @@ -61,7 +61,7 @@ impl Iterator for DeleteVectorIterator<'_> { type Item = u64; fn next(&mut self) -> Option { - if let Some(ref mut inner) = &mut self.inner { + if let Some(inner) = &mut self.inner { if let Some(inner_next) = inner.bitmap_iter.next() { return Some(u64::from(inner.high_bits) << 32 | u64::from(inner_next)); } diff --git a/crates/iceberg/src/io/file_io.rs b/crates/iceberg/src/io/file_io.rs index 7557883b06..2f0ae1736c 100644 --- a/crates/iceberg/src/io/file_io.rs +++ b/crates/iceberg/src/io/file_io.rs @@ -300,7 +300,7 @@ impl InputFile { /// Creates [`FileRead`] for continuous reading. /// /// For one-time reading, use [`Self::read`] instead. - pub async fn reader(&self) -> crate::Result { + pub async fn reader(&self) -> crate::Result> { Ok(self.op.reader(&self.path[self.relative_path_pos..]).await?) } } @@ -399,13 +399,13 @@ impl OutputFile { #[cfg(test)] mod tests { - use std::fs::{create_dir_all, File}; + use std::fs::{File, create_dir_all}; use std::io::Write; use std::path::Path; use bytes::Bytes; - use futures::io::AllowStdIo; use futures::AsyncReadExt; + use futures::io::AllowStdIo; use tempfile::TempDir; use super::{FileIO, FileIOBuilder}; diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 98a7df6e0f..a9a807133e 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -20,5 +20,5 @@ # # The channel is exactly same day for our MSRV. [toolchain] -channel = "nightly-2024-11-22" +channel = "nightly-2025-02-20" components = ["rustfmt", "clippy"] From 9cedf6ce87f2917336b0e97935c324aa47b7f89b Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 16 May 2025 18:45:24 +0800 Subject: [PATCH 2/5] Update versio for python Signed-off-by: Xuanwo --- bindings/python/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 8a92744bce..d6807c5250 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -20,7 +20,7 @@ edition = "2024" homepage = "https://rust.iceberg.apache.org" name = "pyiceberg_core_rust" rust-version = "1.85" -version = "0.4.0" +version = "0.5.0" # This crate is used to build python bindings, we don't want to publish it publish = false From e56534c48585161f33d778fe55198dfc94035554 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 16 May 2025 18:45:39 +0800 Subject: [PATCH 3/5] Udpate version for python Signed-off-by: Xuanwo --- bindings/python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 62104e0eef..59bd24f4f0 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -33,7 +33,7 @@ classifiers = [ name = "pyiceberg-core" readme = "project-description.md" requires-python = "~=3.9" -version = "0.4.0" +version = "0.5.0" [tool.maturin] features = ["pyo3/extension-module"] From 0018c5497abb6cda327aae817d3a5b7e2bd5938a Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 16 May 2025 20:49:57 +0800 Subject: [PATCH 4/5] Format code Signed-off-by: Xuanwo --- crates/catalog/glue/src/catalog.rs | 2 +- crates/catalog/glue/src/schema.rs | 6 +- crates/catalog/glue/src/utils.rs | 9 +- .../catalog/glue/tests/glue_catalog_test.rs | 10 +- crates/catalog/hms/src/catalog.rs | 2 +- crates/catalog/hms/src/schema.rs | 6 +- crates/catalog/hms/src/utils.rs | 2 +- crates/catalog/hms/tests/hms_catalog_test.rs | 8 +- crates/catalog/memory/src/catalog.rs | 123 +++-- crates/catalog/memory/src/namespace_state.rs | 2 +- crates/catalog/rest/src/catalog.rs | 91 ++-- .../catalog/rest/tests/rest_catalog_test.rs | 48 +- crates/catalog/sql/src/catalog.rs | 85 +-- crates/examples/src/rest_catalog_table.rs | 12 +- .../iceberg/src/arrow/nan_val_cnt_visitor.rs | 8 +- crates/iceberg/src/arrow/reader.rs | 32 +- .../src/arrow/record_batch_projector.rs | 2 +- .../src/arrow/record_batch_transformer.rs | 2 +- crates/iceberg/src/arrow/schema.rs | 8 +- crates/iceberg/src/arrow/value.rs | 4 +- crates/iceberg/src/avro/schema.rs | 116 ++-- crates/iceberg/src/catalog/mod.rs | 15 +- crates/iceberg/src/delete_file_index.rs | 2 +- crates/iceberg/src/expr/predicate.rs | 4 +- .../expr/visitors/bound_predicate_visitor.rs | 6 +- .../src/expr/visitors/expression_evaluator.rs | 13 +- .../visitors/inclusive_metrics_evaluator.rs | 18 +- .../src/expr/visitors/inclusive_projection.rs | 9 +- .../src/expr/visitors/manifest_evaluator.rs | 4 +- .../src/expr/visitors/page_index_evaluator.rs | 12 +- .../visitors/row_group_metrics_evaluator.rs | 6 +- .../expr/visitors/strict_metrics_evaluator.rs | 9 +- .../src/expr/visitors/strict_projection.rs | 8 +- crates/iceberg/src/inspect/manifests.rs | 6 +- crates/iceberg/src/inspect/snapshots.rs | 10 +- crates/iceberg/src/io/mod.rs | 4 +- crates/iceberg/src/io/object_cache.rs | 2 +- crates/iceberg/src/io/storage_fs.rs | 2 +- crates/iceberg/src/io/storage_gcs.rs | 2 +- crates/iceberg/src/io/storage_memory.rs | 2 +- crates/iceberg/src/puffin/metadata.rs | 29 +- crates/iceberg/src/puffin/mod.rs | 4 +- crates/iceberg/src/puffin/reader.rs | 2 +- crates/iceberg/src/puffin/test_utils.rs | 2 +- crates/iceberg/src/puffin/writer.rs | 4 +- crates/iceberg/src/scan/mod.rs | 6 +- crates/iceberg/src/scan/task.rs | 2 +- crates/iceberg/src/spec/datatypes.rs | 14 +- crates/iceberg/src/spec/encrypted_key.rs | 2 +- crates/iceberg/src/spec/manifest/_serde.rs | 8 +- crates/iceberg/src/spec/manifest/data_file.rs | 4 +- crates/iceberg/src/spec/manifest/entry.rs | 4 +- crates/iceberg/src/spec/manifest/mod.rs | 2 +- crates/iceberg/src/spec/manifest/writer.rs | 22 +- crates/iceberg/src/spec/manifest_list.rs | 4 +- crates/iceberg/src/spec/name_mapping/mod.rs | 56 +- crates/iceberg/src/spec/partition.rs | 54 +- crates/iceberg/src/spec/schema/_serde.rs | 2 +- crates/iceberg/src/spec/schema/index.rs | 7 +- crates/iceberg/src/spec/schema/mod.rs | 270 +++++----- .../iceberg/src/spec/schema/prune_columns.rs | 500 +++++++++--------- crates/iceberg/src/spec/snapshot.rs | 4 +- crates/iceberg/src/spec/snapshot_summary.rs | 32 +- crates/iceberg/src/spec/sort.rs | 56 +- crates/iceberg/src/spec/table_metadata.rs | 46 +- .../src/spec/table_metadata_builder.rs | 102 ++-- crates/iceberg/src/spec/transform.rs | 14 +- crates/iceberg/src/spec/values.rs | 273 +++++----- crates/iceberg/src/spec/view_metadata.rs | 50 +- .../iceberg/src/spec/view_metadata_builder.rs | 131 ++--- crates/iceberg/src/spec/view_version.rs | 17 +- crates/iceberg/src/table.rs | 2 +- crates/iceberg/src/transaction/append.rs | 6 +- crates/iceberg/src/transaction/mod.rs | 2 +- crates/iceberg/src/transaction/snapshot.rs | 9 +- crates/iceberg/src/transaction/sort_order.rs | 2 +- crates/iceberg/src/transform/bucket.rs | 17 +- crates/iceberg/src/transform/identity.rs | 9 +- crates/iceberg/src/transform/mod.rs | 2 +- crates/iceberg/src/transform/temporal.rs | 50 +- crates/iceberg/src/transform/truncate.rs | 21 +- crates/iceberg/src/transform/void.rs | 29 +- .../writer/base_writer/data_file_writer.rs | 10 +- .../base_writer/equality_delete_writer.rs | 115 ++-- .../writer/file_writer/location_generator.rs | 2 +- crates/iceberg/src/writer/file_writer/mod.rs | 2 +- .../src/writer/file_writer/parquet_writer.rs | 213 ++++---- .../src/writer/file_writer/track_writer.rs | 4 +- crates/iceberg/src/writer/mod.rs | 6 +- crates/integration_tests/tests/shared.rs | 2 +- .../shared_tests/append_data_file_test.rs | 2 +- .../append_partition_data_file_test.rs | 2 +- .../shared_tests/conflict_commit_test.rs | 2 +- .../tests/shared_tests/scan_all_type.rs | 6 +- crates/integrations/cli/src/main.rs | 2 +- .../integrations/datafusion/src/table/mod.rs | 2 +- .../src/table/table_provider_factory.rs | 5 +- .../tests/integration_datafusion_test.rs | 22 +- crates/sqllogictest/src/engine/datafusion.rs | 2 +- 99 files changed, 1567 insertions(+), 1416 deletions(-) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index fac83e6feb..2bf423dd24 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -37,7 +37,7 @@ use crate::utils::{ create_sdk_config, get_default_table_location, get_metadata_location, validate_namespace, }; use crate::{ - with_catalog_id, AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, + AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, with_catalog_id, }; #[derive(Debug, TypedBuilder)] diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index 1b490d13d6..28093c5d74 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -25,7 +25,7 @@ pub(crate) const ICEBERG_FIELD_CURRENT: &str = "iceberg.field.current"; use std::collections::HashMap; use aws_sdk_glue::types::Column; -use iceberg::spec::{visit_schema, PrimitiveType, SchemaVisitor, TableMetadata}; +use iceberg::spec::{PrimitiveType, SchemaVisitor, TableMetadata, visit_schema}; use iceberg::{Error, ErrorKind, Result}; use crate::error::from_aws_build_error; @@ -177,7 +177,7 @@ impl SchemaVisitor for GlueSchemaBuilder { return Err(Error::new( ErrorKind::FeatureUnsupported, "Conversion from 'Timestamptz' is not supported", - )) + )); } }; @@ -187,8 +187,8 @@ impl SchemaVisitor for GlueSchemaBuilder { #[cfg(test)] mod tests { - use iceberg::spec::{Schema, TableMetadataBuilder}; use iceberg::TableCreation; + use iceberg::spec::{Schema, TableMetadataBuilder}; use super::*; diff --git a/crates/catalog/glue/src/utils.rs b/crates/catalog/glue/src/utils.rs index c43af500ba..0384e15a20 100644 --- a/crates/catalog/glue/src/utils.rs +++ b/crates/catalog/glue/src/utils.rs @@ -336,12 +336,9 @@ mod tests { let properties = HashMap::new(); let schema = Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 1, - "foo", - Type::Primitive(PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), + ]) .build()?; let metadata = create_metadata(schema)?; diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs index 78481f384a..bec9494fe9 100644 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ b/crates/catalog/glue/tests/glue_catalog_test.rs @@ -26,7 +26,7 @@ use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCreation, TableIdent}; use iceberg_catalog_glue::{ - GlueCatalog, GlueCatalogConfig, AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, + AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GlueCatalog, GlueCatalogConfig, }; use iceberg_test_utils::docker::DockerCompose; use iceberg_test_utils::{normalize_test_name, set_up}; @@ -231,9 +231,11 @@ async fn test_create_table() -> Result<()> { let result = catalog.create_table(&namespace, creation).await?; assert_eq!(result.identifier().name(), "my_table"); - assert!(result - .metadata_location() - .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-"))); + assert!( + result + .metadata_location() + .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")) + ); assert!( catalog .file_io() diff --git a/crates/catalog/hms/src/catalog.rs b/crates/catalog/hms/src/catalog.rs index 89fa9f1903..97d79f85c7 100644 --- a/crates/catalog/hms/src/catalog.rs +++ b/crates/catalog/hms/src/catalog.rs @@ -258,7 +258,7 @@ impl Catalog for HmsCatalog { return Err(Error::new( ErrorKind::DataInvalid, "Database name must be specified", - )) + )); } }; diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index 4012098c2e..6b7f71c6a0 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -16,7 +16,7 @@ // under the License. use hive_metastore::FieldSchema; -use iceberg::spec::{visit_schema, PrimitiveType, Schema, SchemaVisitor}; +use iceberg::spec::{PrimitiveType, Schema, SchemaVisitor, visit_schema}; use iceberg::{Error, ErrorKind, Result}; type HiveSchema = Vec; @@ -134,7 +134,7 @@ impl SchemaVisitor for HiveSchemaBuilder { return Err(Error::new( ErrorKind::FeatureUnsupported, "Conversion from 'Timestamptz' is not supported", - )) + )); } }; @@ -144,8 +144,8 @@ impl SchemaVisitor for HiveSchemaBuilder { #[cfg(test)] mod tests { - use iceberg::spec::Schema; use iceberg::Result; + use iceberg::spec::Schema; use super::*; diff --git a/crates/catalog/hms/src/utils.rs b/crates/catalog/hms/src/utils.rs index 1e48d3fbd4..432ceac833 100644 --- a/crates/catalog/hms/src/utils.rs +++ b/crates/catalog/hms/src/utils.rs @@ -130,7 +130,7 @@ pub(crate) fn convert_to_database( return Err(Error::new( ErrorKind::DataInvalid, format!("Invalid value for setting 'owner_type': {}", v), - )) + )); } }; db.owner_type = Some(owner_type); diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index 2f9adcf3d9..12d34a707f 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -207,9 +207,11 @@ async fn test_create_table() -> Result<()> { let result = catalog.create_table(namespace.name(), creation).await?; assert_eq!(result.identifier().name(), "my_table"); - assert!(result - .metadata_location() - .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-"))); + assert!( + result + .metadata_location() + .is_some_and(|location| location.starts_with("s3a://warehouse/hive/metadata/00000-")) + ); assert!( catalog .file_io() diff --git a/crates/catalog/memory/src/catalog.rs b/crates/catalog/memory/src/catalog.rs index cf4ad72169..7454de1267 100644 --- a/crates/catalog/memory/src/catalog.rs +++ b/crates/catalog/memory/src/catalog.rs @@ -174,12 +174,18 @@ impl Catalog for MemoryCatalog { let location_prefix = match namespace_properties.get(LOCATION) { Some(namespace_location) => Ok(namespace_location.clone()), None => match self.warehouse_location.clone() { - Some(warehouse_location) => Ok(format!("{}/{}", warehouse_location, namespace_ident.join("/"))), - None => Err(Error::new(ErrorKind::Unexpected, + Some(warehouse_location) => Ok(format!( + "{}/{}", + warehouse_location, + namespace_ident.join("/") + )), + None => Err(Error::new( + ErrorKind::Unexpected, format!( "Cannot create table {:?}. No default path is set, please specify a location when creating a table.", &table_ident - ))) + ), + )), }, }?; @@ -322,12 +328,9 @@ mod tests { fn simple_table_schema() -> Schema { Schema::builder() - .with_fields(vec![NestedField::required( - 1, - "foo", - Type::Primitive(PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), + ]) .build() .unwrap() } @@ -529,10 +532,12 @@ mod tests { let namespace_ident = NamespaceIdent::new("a".into()); create_namespace(&catalog, &namespace_ident).await; - assert!(!catalog - .namespace_exists(&NamespaceIdent::new("b".into())) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&NamespaceIdent::new("b".into())) + .await + .unwrap() + ); } #[tokio::test] @@ -676,8 +681,8 @@ mod tests { } #[tokio::test] - async fn test_create_deeply_nested_namespace_throws_error_if_intermediate_namespace_doesnt_exist( - ) { + async fn test_create_deeply_nested_namespace_throws_error_if_intermediate_namespace_doesnt_exist() + { let catalog = new_memory_catalog(); let namespace_ident_a = NamespaceIdent::new("a".into()); @@ -886,10 +891,12 @@ mod tests { catalog.drop_namespace(&namespace_ident_a_b).await.unwrap(); - assert!(!catalog - .namespace_exists(&namespace_ident_a_b) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&namespace_ident_a_b) + .await + .unwrap() + ); assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap()); } @@ -912,15 +919,19 @@ mod tests { .await .unwrap(); - assert!(!catalog - .namespace_exists(&namespace_ident_a_b_c) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&namespace_ident_a_b_c) + .await + .unwrap() + ); - assert!(catalog - .namespace_exists(&namespace_ident_a_b) - .await - .unwrap()); + assert!( + catalog + .namespace_exists(&namespace_ident_a_b) + .await + .unwrap() + ); assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap()); } @@ -974,10 +985,12 @@ mod tests { assert!(!catalog.namespace_exists(&namespace_ident_a).await.unwrap()); - assert!(!catalog - .namespace_exists(&namespace_ident_a_b) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&namespace_ident_a_b) + .await + .unwrap() + ); } #[tokio::test] @@ -1010,11 +1023,13 @@ mod tests { assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); - assert!(table - .metadata_location() - .unwrap() - .to_string() - .starts_with(&location)) + assert!( + table + .metadata_location() + .unwrap() + .to_string() + .starts_with(&location) + ) } #[tokio::test] @@ -1059,8 +1074,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing( - ) { + async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing() + { let file_io = FileIOBuilder::new_fs_io().build().unwrap(); let warehouse_location = temp_path(); let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); @@ -1112,8 +1127,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing( - ) { + async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing() + { let file_io = FileIOBuilder::new_fs_io().build().unwrap(); let warehouse_location = temp_path(); let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); @@ -1153,8 +1168,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing( - ) { + async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing() + { let file_io = FileIOBuilder::new_fs_io().build().unwrap(); let warehouse_location = temp_path(); let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); @@ -1201,8 +1216,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_throws_error_if_table_location_and_namespace_location_and_warehouse_location_are_missing( - ) { + async fn test_create_table_throws_error_if_table_location_and_namespace_location_and_warehouse_location_are_missing() + { let file_io = FileIOBuilder::new_fs_io().build().unwrap(); let catalog = MemoryCatalog::new(file_io, None); @@ -1454,10 +1469,12 @@ mod tests { create_namespace(&catalog, &namespace_ident).await; let non_existent_table_ident = TableIdent::new(namespace_ident.clone(), "tbl1".into()); - assert!(!catalog - .table_exists(&non_existent_table_ident) - .await - .unwrap()); + assert!( + !catalog + .table_exists(&non_existent_table_ident) + .await + .unwrap() + ); } #[tokio::test] @@ -1473,10 +1490,12 @@ mod tests { assert!(catalog.table_exists(&table_ident).await.unwrap()); let non_existent_table_ident = TableIdent::new(namespace_ident_a_b.clone(), "tbl2".into()); - assert!(!catalog - .table_exists(&non_existent_table_ident) - .await - .unwrap()); + assert!( + !catalog + .table_exists(&non_existent_table_ident) + .await + .unwrap() + ); } #[tokio::test] diff --git a/crates/catalog/memory/src/namespace_state.rs b/crates/catalog/memory/src/namespace_state.rs index de1532203c..e324e7a3dc 100644 --- a/crates/catalog/memory/src/namespace_state.rs +++ b/crates/catalog/memory/src/namespace_state.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use std::collections::{hash_map, HashMap}; +use std::collections::{HashMap, hash_map}; use iceberg::{Error, ErrorKind, NamespaceIdent, Result, TableIdent}; use itertools::Itertools; diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index 1cd8aadbc9..8518190699 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -36,7 +36,7 @@ use tokio::sync::OnceCell; use typed_builder::TypedBuilder; use crate::client::{ - deserialize_catalog_response, deserialize_unexpected_catalog_error, HttpClient, + HttpClient, deserialize_catalog_response, deserialize_unexpected_catalog_error, }; use crate::types::{ CatalogConfig, CommitTableRequest, CommitTableResponse, CreateTableRequest, @@ -312,7 +312,7 @@ impl RestCatalog { return Err(Error::new( ErrorKind::Unexpected, "Unable to load file io, neither warehouse nor metadata location is set!", - ))? + ))?; } }; @@ -566,13 +566,13 @@ impl Catalog for RestCatalog { return Err(Error::new( ErrorKind::Unexpected, "Tried to create a table under a namespace that does not exist", - )) + )); } StatusCode::CONFLICT => { return Err(Error::new( ErrorKind::Unexpected, "The table already exists", - )) + )); } _ => return Err(deserialize_unexpected_catalog_error(http_response).await), }; @@ -628,7 +628,7 @@ impl Catalog for RestCatalog { return Err(Error::new( ErrorKind::Unexpected, "Tried to load a table that does not exist", - )) + )); } _ => return Err(deserialize_unexpected_catalog_error(http_response).await), }; @@ -743,38 +743,36 @@ impl Catalog for RestCatalog { let http_response = context.client.query_catalog(request).await?; let response: CommitTableResponse = match http_response.status() { - StatusCode::OK => { - deserialize_catalog_response(http_response).await? - } + StatusCode::OK => deserialize_catalog_response(http_response).await?, StatusCode::NOT_FOUND => { return Err(Error::new( ErrorKind::Unexpected, "Tried to update a table that does not exist", - )) + )); } StatusCode::CONFLICT => { return Err(Error::new( ErrorKind::Unexpected, "CommitFailedException, one or more requirements failed. The client may retry.", - )) + )); } StatusCode::INTERNAL_SERVER_ERROR => { return Err(Error::new( ErrorKind::Unexpected, "An unknown server-side problem occurred; the commit state is unknown.", - )) + )); } StatusCode::BAD_GATEWAY => { return Err(Error::new( ErrorKind::Unexpected, "A gateway or proxy received an invalid response from the upstream server; the commit state is unknown.", - )) + )); } StatusCode::GATEWAY_TIMEOUT => { return Err(Error::new( ErrorKind::Unexpected, "A server-side gateway timeout occurred; the commit state is unknown.", - )) + )); } _ => return Err(deserialize_unexpected_catalog_error(http_response).await), }; @@ -1379,10 +1377,12 @@ mod tests { let catalog = RestCatalog::new(RestCatalogConfig::builder().uri(server.url()).build()); - assert!(catalog - .namespace_exists(&NamespaceIdent::new("ns1".to_string())) - .await - .unwrap()); + assert!( + catalog + .namespace_exists(&NamespaceIdent::new("ns1".to_string())) + .await + .unwrap() + ); config_mock.assert_async().await; get_ns_mock.assert_async().await; @@ -1699,13 +1699,15 @@ mod tests { let catalog = RestCatalog::new(RestCatalogConfig::builder().uri(server.url()).build()); - assert!(catalog - .table_exists(&TableIdent::new( - NamespaceIdent::new("ns1".to_string()), - "table1".to_string(), - )) - .await - .unwrap()); + assert!( + catalog + .table_exists(&TableIdent::new( + NamespaceIdent::new("ns1".to_string()), + "table1".to_string(), + )) + .await + .unwrap() + ); config_mock.assert_async().await; check_table_exists_mock.assert_async().await; @@ -1768,7 +1770,10 @@ mod tests { &TableIdent::from_strs(vec!["ns1", "test1"]).unwrap(), table.identifier() ); - assert_eq!("s3://warehouse/database/table/metadata/00001-5f2f8166-244c-4eae-ac36-384ecdec81fc.gz.metadata.json", table.metadata_location().unwrap()); + assert_eq!( + "s3://warehouse/database/table/metadata/00001-5f2f8166-244c-4eae-ac36-384ecdec81fc.gz.metadata.json", + table.metadata_location().unwrap() + ); assert_eq!(FormatVersion::V1, table.metadata().format_version()); assert_eq!("s3://warehouse/database/table", table.metadata().location()); assert_eq!( @@ -1919,11 +1924,13 @@ mod tests { .properties(HashMap::from([("owner".to_string(), "testx".to_string())])) .partition_spec( UnboundPartitionSpec::builder() - .add_partition_fields(vec![UnboundPartitionField::builder() - .source_id(1) - .transform(Transform::Truncate(3)) - .name("id".to_string()) - .build()]) + .add_partition_fields(vec![ + UnboundPartitionField::builder() + .source_id(1) + .transform(Transform::Truncate(3)) + .name("id".to_string()) + .build(), + ]) .unwrap() .build(), ) @@ -2068,11 +2075,13 @@ mod tests { .await; assert!(table_result.is_err()); - assert!(table_result - .err() - .unwrap() - .message() - .contains("already exists")); + assert!( + table_result + .err() + .unwrap() + .message() + .contains("already exists") + ); config_mock.assert_async().await; create_table_mock.assert_async().await; @@ -2248,11 +2257,13 @@ mod tests { .await; assert!(table_result.is_err()); - assert!(table_result - .err() - .unwrap() - .message() - .contains("does not exist")); + assert!( + table_result + .err() + .unwrap() + .message() + .contains("does not exist") + ); config_mock.assert_async().await; update_table_mock.assert_async().await; diff --git a/crates/catalog/rest/tests/rest_catalog_test.rs b/crates/catalog/rest/tests/rest_catalog_test.rs index f08bfb6fa7..ab7ea3d62c 100644 --- a/crates/catalog/rest/tests/rest_catalog_test.rs +++ b/crates/catalog/rest/tests/rest_catalog_test.rs @@ -136,12 +136,14 @@ async fn test_list_namespace() { ); // Currently this namespace doesn't exist, so it should return error. - assert!(catalog - .list_namespaces(Some( - &NamespaceIdent::from_strs(["test_list_namespace"]).unwrap() - )) - .await - .is_err()); + assert!( + catalog + .list_namespaces(Some( + &NamespaceIdent::from_strs(["test_list_namespace"]).unwrap() + )) + .await + .is_err() + ); // Create namespaces catalog @@ -178,10 +180,12 @@ async fn test_list_empty_namespace() { ); // Currently this namespace doesn't exist, so it should return error. - assert!(catalog - .list_namespaces(Some(ns_apple.name())) - .await - .is_err()); + assert!( + catalog + .list_namespaces(Some(ns_apple.name())) + .await + .is_err() + ); // Create namespaces catalog @@ -218,12 +222,14 @@ async fn test_list_root_namespace() { ); // Currently this namespace doesn't exist, so it should return error. - assert!(catalog - .list_namespaces(Some( - &NamespaceIdent::from_strs(["test_list_root_namespace"]).unwrap() - )) - .await - .is_err()); + assert!( + catalog + .list_namespaces(Some( + &NamespaceIdent::from_strs(["test_list_root_namespace"]).unwrap() + )) + .await + .is_err() + ); // Create namespaces catalog @@ -375,10 +381,12 @@ async fn test_list_empty_multi_level_namespace() { ); // Currently this namespace doesn't exist, so it should return error. - assert!(catalog - .list_namespaces(Some(ns_apple.name())) - .await - .is_err()); + assert!( + catalog + .list_namespaces(Some(ns_apple.name())) + .await + .is_err() + ); // Create namespaces catalog diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs index b6bff78963..78767c5b5a 100644 --- a/crates/catalog/sql/src/catalog.rs +++ b/crates/catalog/sql/src/catalog.rs @@ -26,7 +26,7 @@ use iceberg::{ Catalog, Error, ErrorKind, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent, }; -use sqlx::any::{install_default_drivers, AnyPoolOptions, AnyQueryResult, AnyRow}; +use sqlx::any::{AnyPoolOptions, AnyQueryResult, AnyRow, install_default_drivers}; use sqlx::{Any, AnyPool, Row, Transaction}; use typed_builder::TypedBuilder; use uuid::Uuid; @@ -839,12 +839,9 @@ mod tests { fn simple_table_schema() -> Schema { Schema::builder() - .with_fields(vec![NestedField::required( - 1, - "foo", - Type::Primitive(PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), + ]) .build() .unwrap() } @@ -1050,10 +1047,12 @@ mod tests { let namespace_ident = NamespaceIdent::new("a".into()); create_namespace(&catalog, &namespace_ident).await; - assert!(!catalog - .namespace_exists(&NamespaceIdent::new("b".into())) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&NamespaceIdent::new("b".into())) + .await + .unwrap() + ); } #[tokio::test] @@ -1183,10 +1182,12 @@ mod tests { catalog.drop_namespace(&namespace_ident_a_b).await.unwrap(); - assert!(!catalog - .namespace_exists(&namespace_ident_a_b) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&namespace_ident_a_b) + .await + .unwrap() + ); assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap()); } @@ -1210,15 +1211,19 @@ mod tests { .await .unwrap(); - assert!(!catalog - .namespace_exists(&namespace_ident_a_b_c) - .await - .unwrap()); + assert!( + !catalog + .namespace_exists(&namespace_ident_a_b_c) + .await + .unwrap() + ); - assert!(catalog - .namespace_exists(&namespace_ident_a_b) - .await - .unwrap()); + assert!( + catalog + .namespace_exists(&namespace_ident_a_b) + .await + .unwrap() + ); assert!(catalog.namespace_exists(&namespace_ident_a).await.unwrap()); } @@ -1275,10 +1280,12 @@ mod tests { assert!(!catalog.namespace_exists(&namespace_ident_a).await.unwrap()); - assert!(catalog - .namespace_exists(&namespace_ident_a_b) - .await - .unwrap()); + assert!( + catalog + .namespace_exists(&namespace_ident_a_b) + .await + .unwrap() + ); } #[tokio::test] @@ -1341,11 +1348,13 @@ mod tests { assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); - assert!(table - .metadata_location() - .unwrap() - .to_string() - .starts_with(&location)) + assert!( + table + .metadata_location() + .unwrap() + .to_string() + .starts_with(&location) + ) } #[tokio::test] @@ -1392,8 +1401,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing( - ) { + async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing() + { let warehouse_loc = temp_path(); let catalog = new_sql_catalog(warehouse_loc).await; @@ -1449,8 +1458,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing( - ) { + async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing() + { let warehouse_loc = temp_path(); let catalog = new_sql_catalog(warehouse_loc.clone()).await; @@ -1489,8 +1498,8 @@ mod tests { } #[tokio::test] - async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing( - ) { + async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing() + { let warehouse_loc = temp_path(); let catalog = new_sql_catalog(warehouse_loc.clone()).await; diff --git a/crates/examples/src/rest_catalog_table.rs b/crates/examples/src/rest_catalog_table.rs index 25ef9d9acd..9597576580 100644 --- a/crates/examples/src/rest_catalog_table.rs +++ b/crates/examples/src/rest_catalog_table.rs @@ -83,11 +83,13 @@ async fn main() { // ANCHOR: load_table // Ensure that the table is under the correct namespace. - assert!(catalog - .list_tables(&namespace_ident) - .await - .unwrap() - .contains(&table_ident)); + assert!( + catalog + .list_tables(&namespace_ident) + .await + .unwrap() + .contains(&table_ident) + ); // Load the table back from the catalog. It should be identical to the created table. let loaded_table = catalog.load_table(&table_ident).await.unwrap(); diff --git a/crates/iceberg/src/arrow/nan_val_cnt_visitor.rs b/crates/iceberg/src/arrow/nan_val_cnt_visitor.rs index db6279d9ca..6b75c011cb 100644 --- a/crates/iceberg/src/arrow/nan_val_cnt_visitor.rs +++ b/crates/iceberg/src/arrow/nan_val_cnt_visitor.rs @@ -17,19 +17,19 @@ //! The module contains the visitor for calculating NaN values in give arrow record batch. -use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::collections::hash_map::Entry; use std::sync::Arc; use arrow_array::{ArrayRef, Float32Array, Float64Array, RecordBatch, StructArray}; use arrow_schema::DataType; +use crate::Result; use crate::arrow::ArrowArrayAccessor; use crate::spec::{ - visit_struct_with_partner, ListType, MapType, NestedFieldRef, PrimitiveType, Schema, SchemaRef, - SchemaWithPartnerVisitor, StructType, + ListType, MapType, NestedFieldRef, PrimitiveType, Schema, SchemaRef, SchemaWithPartnerVisitor, + StructType, visit_struct_with_partner, }; -use crate::Result; macro_rules! cast_and_update_cnt_map { ($t:ty, $col:ident, $self:ident, $field_id:ident) => { diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index 228996903c..a3462edec6 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -33,12 +33,12 @@ use arrow_string::like::starts_with; use bytes::Bytes; use fnv::FnvHashSet; use futures::future::BoxFuture; -use futures::{try_join, FutureExt, StreamExt, TryFutureExt, TryStreamExt}; +use futures::{FutureExt, StreamExt, TryFutureExt, TryStreamExt, try_join}; use parquet::arrow::arrow_reader::{ ArrowPredicateFn, ArrowReaderOptions, RowFilter, RowSelection, RowSelector, }; use parquet::arrow::async_reader::AsyncFileReader; -use parquet::arrow::{ParquetRecordBatchStreamBuilder, ProjectionMask, PARQUET_FIELD_ID_META_KEY}; +use parquet::arrow::{PARQUET_FIELD_ID_META_KEY, ParquetRecordBatchStreamBuilder, ProjectionMask}; use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, RowGroupMetaData}; use parquet::schema::types::{SchemaDescriptor, Type as ParquetType}; @@ -47,7 +47,7 @@ use crate::arrow::record_batch_transformer::RecordBatchTransformer; use crate::arrow::{arrow_schema_to_schema, get_arrow_datum}; use crate::delete_vector::DeleteVector; use crate::error::Result; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::visitors::page_index_evaluator::PageIndexEvaluator; use crate::expr::visitors::row_group_metrics_evaluator::RowGroupMetricsEvaluator; use crate::expr::{BoundPredicate, BoundReference}; @@ -1439,6 +1439,7 @@ mod tests { use roaring::RoaringTreemap; use tempfile::TempDir; + use crate::ErrorKind; use crate::arrow::reader::{CollectFieldIdVisitor, PARQUET_FIELD_ID_META_KEY}; use crate::arrow::{ArrowReader, ArrowReaderBuilder}; use crate::delete_vector::DeleteVector; @@ -1449,7 +1450,6 @@ mod tests { use crate::spec::{ DataContentType, DataFileFormat, Datum, NestedField, PrimitiveType, Schema, SchemaRef, Type, }; - use crate::ErrorKind; fn table_schema_simple() -> SchemaRef { Arc::new( @@ -1773,25 +1773,19 @@ message schema { let schema = Arc::new( Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::optional( - 1, - "a", - Type::Primitive(PrimitiveType::String), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(1, "a", Type::Primitive(PrimitiveType::String)).into(), + ]) .build() .unwrap(), ); - let arrow_schema = Arc::new(ArrowSchema::new(vec![Field::new( - "a", - col_a_type.clone(), - true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "1".to_string(), - )]))])); + let arrow_schema = Arc::new(ArrowSchema::new(vec![ + Field::new("a", col_a_type.clone(), true).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "1".to_string(), + )])), + ])); let tmp_dir = TempDir::new().unwrap(); let table_location = tmp_dir.path().to_str().unwrap().to_string(); diff --git a/crates/iceberg/src/arrow/record_batch_projector.rs b/crates/iceberg/src/arrow/record_batch_projector.rs index 878d0fe28e..7ca28c25c7 100644 --- a/crates/iceberg/src/arrow/record_batch_projector.rs +++ b/crates/iceberg/src/arrow/record_batch_projector.rs @@ -17,7 +17,7 @@ use std::sync::Arc; -use arrow_array::{make_array, ArrayRef, RecordBatch, StructArray}; +use arrow_array::{ArrayRef, RecordBatch, StructArray, make_array}; use arrow_buffer::NullBuffer; use arrow_schema::{DataType, Field, FieldRef, Fields, Schema, SchemaRef}; diff --git a/crates/iceberg/src/arrow/record_batch_transformer.rs b/crates/iceberg/src/arrow/record_batch_transformer.rs index 38543509bb..f231e0f2e2 100644 --- a/crates/iceberg/src/arrow/record_batch_transformer.rs +++ b/crates/iceberg/src/arrow/record_batch_transformer.rs @@ -434,7 +434,7 @@ impl RecordBatchTransformer { return Err(Error::new( ErrorKind::Unexpected, format!("unexpected target column type {}", dt), - )) + )); } }) } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index e6873e9584..f5e089d1cd 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; use std::sync::Arc; use arrow_array::types::{ - validate_decimal_precision_and_scale, Decimal128Type, TimestampMicrosecondType, + Decimal128Type, TimestampMicrosecondType, validate_decimal_precision_and_scale, }; use arrow_array::{ BooleanArray, Date32Array, Datum as ArrowDatum, Float32Array, Float64Array, Int32Array, @@ -307,7 +307,7 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { return Err(Error::new( ErrorKind::DataInvalid, "List type must have list data type", - )) + )); } }; @@ -830,7 +830,7 @@ pub(crate) fn get_parquet_stat_min_as_datum( (PrimitiveType::Binary, Statistics::ByteArray(stat)) => { return Ok(stat .min_bytes_opt() - .map(|bytes| Datum::binary(bytes.to_vec()))) + .map(|bytes| Datum::binary(bytes.to_vec()))); } _ => { return Ok(None); @@ -977,7 +977,7 @@ pub(crate) fn get_parquet_stat_max_as_datum( (PrimitiveType::Binary, Statistics::ByteArray(stat)) => { return Ok(stat .max_bytes_opt() - .map(|bytes| Datum::binary(bytes.to_vec()))) + .map(|bytes| Datum::binary(bytes.to_vec()))); } _ => { return Ok(None); diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index 84b33d3ff6..f8fd380dd0 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -26,8 +26,8 @@ use uuid::Uuid; use super::get_field_id; use crate::spec::{ - visit_struct_with_partner, ListType, Literal, Map, MapType, NestedField, PartnerAccessor, - PrimitiveType, SchemaWithPartnerVisitor, Struct, StructType, + ListType, Literal, Map, MapType, NestedField, PartnerAccessor, PrimitiveType, + SchemaWithPartnerVisitor, Struct, StructType, visit_struct_with_partner, }; use crate::{Error, ErrorKind, Result}; diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 638d575d77..8c3d07a590 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -18,19 +18,19 @@ //! Conversion between iceberg and avro schema. use std::collections::BTreeMap; +use apache_avro::Schema as AvroSchema; use apache_avro::schema::{ ArraySchema, DecimalSchema, FixedSchema, MapSchema, Name, RecordField as AvroRecordField, RecordFieldOrder, RecordSchema, UnionSchema, }; -use apache_avro::Schema as AvroSchema; use itertools::{Either, Itertools}; use serde_json::{Number, Value}; use crate::spec::{ - visit_schema, ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, - SchemaVisitor, StructType, Type, + ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor, + StructType, Type, visit_schema, }; -use crate::{ensure_data_valid, Error, ErrorKind, Result}; +use crate::{Error, ErrorKind, Result, ensure_data_valid}; const ELEMENT_ID: &str = "element-id"; const FILED_ID_PROP: &str = "field-id"; @@ -545,7 +545,7 @@ impl AvroSchemaVisitor for AvroSchemaToSchema { format!( "Logical type {ty} is not support in iceberg primitive type.", ), - )) + )); } } } else { @@ -558,7 +558,7 @@ impl AvroSchemaVisitor for AvroSchemaToSchema { return Err(Error::new( ErrorKind::Unexpected, "Unable to convert avro {schema} to iceberg primitive type.", - )) + )); } }; @@ -636,8 +636,8 @@ mod tests { use std::fs::read_to_string; use std::sync::Arc; - use apache_avro::schema::{Namespace, UnionSchema}; use apache_avro::Schema as AvroSchema; + use apache_avro::schema::{Namespace, UnionSchema}; use super::*; use crate::avro::schema::AvroSchemaToSchema; @@ -783,20 +783,22 @@ mod tests { let iceberg_schema = { Schema::builder() - .with_fields(vec![NestedField::required( - 100, - "array_with_string", - ListType { - element_field: NestedField::list_element( - 101, - PrimitiveType::String.into(), - true, - ) + .with_fields(vec![ + NestedField::required( + 100, + "array_with_string", + ListType { + element_field: NestedField::list_element( + 101, + PrimitiveType::String.into(), + true, + ) + .into(), + } .into(), - } + ) .into(), - ) - .into()]) + ]) .build() .unwrap() }; @@ -832,20 +834,22 @@ mod tests { let iceberg_schema = { Schema::builder() - .with_fields(vec![NestedField::required( - 100, - "array_with_string", - ListType { - element_field: NestedField::list_element( - 101, - PrimitiveType::String.into(), - true, - ) + .with_fields(vec![ + NestedField::required( + 100, + "array_with_string", + ListType { + element_field: NestedField::list_element( + 101, + PrimitiveType::String.into(), + true, + ) + .into(), + } .into(), - } + ) .into(), - ) - .into()]) + ]) .build() .unwrap() }; @@ -895,34 +899,36 @@ mod tests { let iceberg_schema = { Schema::builder() - .with_fields(vec![NestedField::required( - 100, - "array_with_record", - ListType { - element_field: NestedField::list_element( - 101, - StructType::new(vec![ - NestedField::required( - 102, - "contains_null", - PrimitiveType::Boolean.into(), - ) - .into(), - NestedField::optional( - 103, - "contains_nan", - PrimitiveType::Boolean.into(), - ) + .with_fields(vec![ + NestedField::required( + 100, + "array_with_record", + ListType { + element_field: NestedField::list_element( + 101, + StructType::new(vec![ + NestedField::required( + 102, + "contains_null", + PrimitiveType::Boolean.into(), + ) + .into(), + NestedField::optional( + 103, + "contains_nan", + PrimitiveType::Boolean.into(), + ) + .into(), + ]) .into(), - ]) + true, + ) .into(), - true, - ) + } .into(), - } + ) .into(), - ) - .into()]) + ]) .build() .unwrap() }; diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 9521cde208..3457f83611 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -41,7 +41,7 @@ use crate::{Error, ErrorKind, Result}; pub trait Catalog: Debug + Sync + Send { /// List namespaces inside the catalog. async fn list_namespaces(&self, parent: Option<&NamespaceIdent>) - -> Result>; + -> Result>; /// Create a new namespace inside the catalog. async fn create_namespace( @@ -872,17 +872,18 @@ mod tests { use std::collections::HashMap; use std::fmt::Debug; - use serde::de::DeserializeOwned; use serde::Serialize; + use serde::de::DeserializeOwned; use uuid::uuid; use super::ViewUpdate; use crate::spec::{ - BlobMetadata, FormatVersion, NestedField, NullOrder, Operation, PartitionStatisticsFile, - PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention, SortDirection, - SortField, SortOrder, SqlViewRepresentation, StatisticsFile, Summary, TableMetadata, - TableMetadataBuilder, Transform, Type, UnboundPartitionSpec, ViewFormatVersion, - ViewRepresentation, ViewRepresentations, ViewVersion, MAIN_BRANCH, + BlobMetadata, FormatVersion, MAIN_BRANCH, NestedField, NullOrder, Operation, + PartitionStatisticsFile, PrimitiveType, Schema, Snapshot, SnapshotReference, + SnapshotRetention, SortDirection, SortField, SortOrder, SqlViewRepresentation, + StatisticsFile, Summary, TableMetadata, TableMetadataBuilder, Transform, Type, + UnboundPartitionSpec, ViewFormatVersion, ViewRepresentation, ViewRepresentations, + ViewVersion, }; use crate::{NamespaceIdent, TableCreation, TableIdent, TableRequirement, TableUpdate}; diff --git a/crates/iceberg/src/delete_file_index.rs b/crates/iceberg/src/delete_file_index.rs index 4c9ffb6951..3f25bbda36 100644 --- a/crates/iceberg/src/delete_file_index.rs +++ b/crates/iceberg/src/delete_file_index.rs @@ -22,8 +22,8 @@ use std::pin::Pin; use std::sync::{Arc, RwLock}; use std::task::{Context, Poll}; -use futures::channel::mpsc::{channel, Sender}; use futures::StreamExt; +use futures::channel::mpsc::{Sender, channel}; use crate::runtime::spawn; use crate::scan::{DeleteFileContext, FileScanTaskDeleteFile}; diff --git a/crates/iceberg/src/expr/predicate.rs b/crates/iceberg/src/expr/predicate.rs index 18b5700559..7e50e39954 100644 --- a/crates/iceberg/src/expr/predicate.rs +++ b/crates/iceberg/src/expr/predicate.rs @@ -393,7 +393,7 @@ impl Bind for Predicate { return Err(Error::new( ErrorKind::Unexpected, format!("Expecting unary operator, but found {op}"), - )) + )); } } @@ -476,7 +476,7 @@ impl Bind for Predicate { return Err(Error::new( ErrorKind::Unexpected, format!("Expecting unary operator,but found {op}"), - )) + )); } } diff --git a/crates/iceberg/src/expr/visitors/bound_predicate_visitor.rs b/crates/iceberg/src/expr/visitors/bound_predicate_visitor.rs index 0858d1dcf7..22f6d08ad3 100644 --- a/crates/iceberg/src/expr/visitors/bound_predicate_visitor.rs +++ b/crates/iceberg/src/expr/visitors/bound_predicate_visitor.rs @@ -17,9 +17,9 @@ use fnv::FnvHashSet; +use crate::Result; use crate::expr::{BoundPredicate, BoundReference, PredicateOperator}; use crate::spec::Datum; -use crate::Result; /// A visitor for [`BoundPredicate`]s. Visits in post-order. pub trait BoundPredicateVisitor { @@ -57,7 +57,7 @@ pub trait BoundPredicateVisitor { /// Called after a predicate with an `IsNan` operator is visited fn is_nan(&mut self, reference: &BoundReference, predicate: &BoundPredicate) - -> Result; + -> Result; /// Called after a predicate with a `NotNan` operator is visited fn not_nan( @@ -234,7 +234,7 @@ mod tests { use fnv::FnvHashSet; - use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; + use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{ BinaryExpression, Bind, BoundPredicate, BoundReference, Predicate, PredicateOperator, Reference, SetExpression, UnaryExpression, diff --git a/crates/iceberg/src/expr/visitors/expression_evaluator.rs b/crates/iceberg/src/expr/visitors/expression_evaluator.rs index 4715b164f8..4db1ad7d93 100644 --- a/crates/iceberg/src/expr/visitors/expression_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/expression_evaluator.rs @@ -17,7 +17,7 @@ use fnv::FnvHashSet; -use super::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use super::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference}; use crate::spec::{DataFile, Datum, PrimitiveLiteral, Struct}; use crate::{Error, ErrorKind, Result}; @@ -86,7 +86,10 @@ impl BoundPredicateVisitor for ExpressionEvaluatorVisitor<'_> { } fn not(&mut self, _inner: bool) -> Result { - Err(Error::new(ErrorKind::Unexpected, "The evaluation of expressions should not be performed against Predicates that contain a Not operator. Ensure that \"Rewrite Not\" gets applied to the originating Predicate before binding it.")) + Err(Error::new( + ErrorKind::Unexpected, + "The evaluation of expressions should not be performed against Predicates that contain a Not operator. Ensure that \"Rewrite Not\" gets applied to the originating Predicate before binding it.", + )) } fn is_null(&mut self, reference: &BoundReference, _predicate: &BoundPredicate) -> Result { @@ -252,17 +255,17 @@ mod tests { use predicate::SetExpression; use super::ExpressionEvaluator; + use crate::Result; use crate::expr::visitors::inclusive_projection::InclusiveProjection; use crate::expr::{ - predicate, BinaryExpression, Bind, BoundPredicate, Predicate, PredicateOperator, Reference, - UnaryExpression, + BinaryExpression, Bind, BoundPredicate, Predicate, PredicateOperator, Reference, + UnaryExpression, predicate, }; use crate::spec::{ DataContentType, DataFile, DataFileFormat, Datum, Literal, NestedField, PartitionSpec, PartitionSpecRef, PrimitiveType, Schema, SchemaRef, Struct, Transform, Type, UnboundPartitionField, }; - use crate::Result; fn create_partition_spec(r#type: PrimitiveType) -> Result<(PartitionSpecRef, SchemaRef)> { let schema = Schema::builder() diff --git a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs index 302359984a..a00376e1ac 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_metrics_evaluator.rs @@ -17,7 +17,7 @@ use fnv::FnvHashSet; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference}; use crate::spec::{DataFile, Datum, PrimitiveLiteral}; use crate::{Error, ErrorKind}; @@ -485,11 +485,11 @@ mod test { use fnv::FnvHashSet; - use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator; use crate::expr::PredicateOperator::{ Eq, GreaterThan, GreaterThanOrEq, In, IsNan, IsNull, LessThan, LessThanOrEq, NotEq, NotIn, NotNan, NotNull, NotStartsWith, StartsWith, }; + use crate::expr::visitors::inclusive_metrics_evaluator::InclusiveMetricsEvaluator; use crate::expr::{ BinaryExpression, Bind, BoundPredicate, Predicate, Reference, SetExpression, UnaryExpression, @@ -1659,12 +1659,14 @@ mod test { let partition_spec = PartitionSpec::builder(table_schema_ref.clone()) .with_spec_id(1) - .add_unbound_fields(vec![UnboundPartitionField::builder() - .source_id(1) - .name("a".to_string()) - .field_id(1) - .transform(Transform::Identity) - .build()]) + .add_unbound_fields(vec![ + UnboundPartitionField::builder() + .source_id(1) + .name("a".to_string()) + .field_id(1) + .transform(Transform::Identity) + .build(), + ]) .unwrap() .build() .unwrap(); diff --git a/crates/iceberg/src/expr/visitors/inclusive_projection.rs b/crates/iceberg/src/expr/visitors/inclusive_projection.rs index 38f53c44d0..d9544e4c47 100644 --- a/crates/iceberg/src/expr/visitors/inclusive_projection.rs +++ b/crates/iceberg/src/expr/visitors/inclusive_projection.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use fnv::FnvHashSet; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference, Predicate}; use crate::spec::{Datum, PartitionField, PartitionSpecRef}; use crate::{Error, ErrorKind}; @@ -102,11 +102,10 @@ impl BoundPredicateVisitor for InclusiveProjection { } fn not(&mut self, _inner: Self::T) -> crate::Result { - Err( - Error::new( + Err(Error::new( ErrorKind::Unexpected, - "InclusiveProjection should not be performed against Predicates that contain a Not operator. Ensure that \"Rewrite Not\" gets applied to the originating Predicate before binding it.", ) - ) + "InclusiveProjection should not be performed against Predicates that contain a Not operator. Ensure that \"Rewrite Not\" gets applied to the originating Predicate before binding it.", + )) } fn is_null( diff --git a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs index 3554d57a0d..8653b22a0e 100644 --- a/crates/iceberg/src/expr/visitors/manifest_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/manifest_evaluator.rs @@ -17,7 +17,7 @@ use fnv::FnvHashSet; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference}; use crate::spec::{Datum, FieldSummary, ManifestFile, PrimitiveLiteral, Type}; use crate::{Error, ErrorKind, Result}; @@ -423,6 +423,7 @@ mod test { use fnv::FnvHashSet; + use crate::Result; use crate::expr::visitors::manifest_evaluator::ManifestEvaluator; use crate::expr::{ BinaryExpression, Bind, Predicate, PredicateOperator, Reference, SetExpression, @@ -432,7 +433,6 @@ mod test { Datum, FieldSummary, ManifestContentType, ManifestFile, NestedField, PrimitiveType, Schema, SchemaRef, Type, }; - use crate::Result; const INT_MIN_VALUE: i32 = 30; const INT_MAX_VALUE: i32 = 79; diff --git a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs index 35b03118b0..ea56c32c66 100644 --- a/crates/iceberg/src/expr/visitors/page_index_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/page_index_evaluator.rs @@ -26,7 +26,7 @@ use parquet::file::metadata::RowGroupMetaData; use parquet::file::page_index::index::Index; use parquet::file::page_index::offset_index::OffsetIndexMetaData; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference}; use crate::spec::{Datum, PrimitiveLiteral, PrimitiveType, Schema}; use crate::{Error, ErrorKind, Result}; @@ -371,13 +371,13 @@ impl<'a> PageIndexEvaluator<'a> { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'FIXED_LEN_BYTE_ARRAY' index type in column_index", - )) + )); } Index::INT96(_) => { return Err(Error::new( ErrorKind::FeatureUnsupported, "unsupported 'INT96' index type in column_index", - )) + )); } }; @@ -801,7 +801,7 @@ mod tests { use parquet::schema::types::{ ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType, }; - use rand::{thread_rng, Rng}; + use rand::{Rng, thread_rng}; use super::PageIndexEvaluator; use crate::expr::{Bind, Reference}; @@ -1122,8 +1122,8 @@ mod tests { } #[test] - fn eval_not_starts_with_pages_containing_value_except_pages_with_min_and_max_equal_to_prefix_and_all_null_pages( - ) -> Result<()> { + fn eval_not_starts_with_pages_containing_value_except_pages_with_min_and_max_equal_to_prefix_and_all_null_pages() + -> Result<()> { let row_group_metadata = create_row_group_metadata(4096, 1000, None, 1000, None)?; let (column_index, offset_index) = create_page_index()?; diff --git a/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs index 56f8db8b9e..0506b33af0 100644 --- a/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/row_group_metrics_evaluator.rs @@ -24,7 +24,7 @@ use parquet::file::metadata::RowGroupMetaData; use parquet::file::statistics::Statistics; use crate::arrow::{get_parquet_stat_max_as_datum, get_parquet_stat_min_as_datum}; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference}; use crate::spec::{Datum, PrimitiveLiteral, PrimitiveType, Schema}; use crate::{Error, ErrorKind, Result}; @@ -528,12 +528,12 @@ mod tests { use parquet::schema::types::{ ColumnDescriptor, ColumnPath, SchemaDescriptor, Type as parquetSchemaType, }; - use rand::{thread_rng, Rng}; + use rand::{Rng, thread_rng}; use super::RowGroupMetricsEvaluator; + use crate::Result; use crate::expr::{Bind, Reference}; use crate::spec::{Datum, NestedField, PrimitiveType, Schema, Type}; - use crate::Result; #[test] fn eval_matches_no_rows_for_empty_row_group() -> Result<()> { diff --git a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs index 841b743e5f..f74ce3a6c3 100644 --- a/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs +++ b/crates/iceberg/src/expr/visitors/strict_metrics_evaluator.rs @@ -17,7 +17,7 @@ use fnv::FnvHashSet; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference}; use crate::spec::{DataFile, Datum}; use crate::{Error, ErrorKind, Result}; @@ -421,11 +421,11 @@ mod test { use fnv::FnvHashSet; - use crate::expr::visitors::strict_metrics_evaluator::StrictMetricsEvaluator; use crate::expr::PredicateOperator::{ Eq, GreaterThan, GreaterThanOrEq, In, IsNan, IsNull, LessThan, LessThanOrEq, NotEq, NotIn, NotNan, NotNull, NotStartsWith, StartsWith, }; + use crate::expr::visitors::strict_metrics_evaluator::StrictMetricsEvaluator; use crate::expr::{ BinaryExpression, Bind, BoundPredicate, Predicate, Reference, SetExpression, UnaryExpression, @@ -1371,7 +1371,10 @@ mod test { let result = StrictMetricsEvaluator::eval(¬_equal_int("id", INT_MIN_VALUE), &file).unwrap(); - assert!(!result, "Strict eval: notEqual should be false when literal equals lower bound (but upper is different)"); + assert!( + !result, + "Strict eval: notEqual should be false when literal equals lower bound (but upper is different)" + ); let result = StrictMetricsEvaluator::eval(¬_equal_int("id", INT_MAX_VALUE - 4), &file).unwrap(); diff --git a/crates/iceberg/src/expr/visitors/strict_projection.rs b/crates/iceberg/src/expr/visitors/strict_projection.rs index 1ac497ca15..ebc6212c76 100644 --- a/crates/iceberg/src/expr/visitors/strict_projection.rs +++ b/crates/iceberg/src/expr/visitors/strict_projection.rs @@ -19,7 +19,7 @@ use std::collections::HashMap; use fnv::FnvHashSet; -use crate::expr::visitors::bound_predicate_visitor::{visit, BoundPredicateVisitor}; +use crate::expr::visitors::bound_predicate_visitor::{BoundPredicateVisitor, visit}; use crate::expr::{BoundPredicate, BoundReference, Predicate}; use crate::spec::{Datum, PartitionField, PartitionSpecRef}; use crate::{Error, ErrorKind}; @@ -117,12 +117,10 @@ impl BoundPredicateVisitor for StrictProjection { } fn not(&mut self, _inner: Self::T) -> crate::Result { - Err( - Error::new( + Err(Error::new( ErrorKind::Unexpected, "StrictProjection should not be performed against Predicates that contain a Not operator. Ensure that \"Rewrite Not\" gets applied to the originating Predicate before binding it.", - ) - ) + )) } fn is_null( diff --git a/crates/iceberg/src/inspect/manifests.rs b/crates/iceberg/src/inspect/manifests.rs index 0edd82fdb0..d59d831c7f 100644 --- a/crates/iceberg/src/inspect/manifests.rs +++ b/crates/iceberg/src/inspect/manifests.rs @@ -18,19 +18,19 @@ use std::collections::HashMap; use std::sync::Arc; +use arrow_array::RecordBatch; use arrow_array::builder::{ BooleanBuilder, GenericListBuilder, ListBuilder, PrimitiveBuilder, StringBuilder, StructBuilder, }; use arrow_array::types::{Int32Type, Int64Type}; -use arrow_array::RecordBatch; use arrow_schema::{DataType, Field, Fields}; -use futures::{stream, StreamExt}; +use futures::{StreamExt, stream}; +use crate::Result; use crate::arrow::schema_to_arrow_schema; use crate::scan::ArrowRecordBatchStream; use crate::spec::{FieldSummary, ListType, NestedField, PrimitiveType, StructType, Type}; use crate::table::Table; -use crate::Result; /// Manifests table. pub struct ManifestsTable<'a> { diff --git a/crates/iceberg/src/inspect/snapshots.rs b/crates/iceberg/src/inspect/snapshots.rs index 20e9b4edcb..002cc8eb84 100644 --- a/crates/iceberg/src/inspect/snapshots.rs +++ b/crates/iceberg/src/inspect/snapshots.rs @@ -18,20 +18,20 @@ use std::collections::HashMap; use std::sync::Arc; +use arrow_array::RecordBatch; use arrow_array::builder::{MapBuilder, MapFieldNames, PrimitiveBuilder, StringBuilder}; use arrow_array::types::{Int64Type, TimestampMicrosecondType}; -use arrow_array::RecordBatch; use arrow_schema::{DataType, Field}; -use futures::{stream, StreamExt}; +use futures::{StreamExt, stream}; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; -use crate::arrow::{schema_to_arrow_schema, DEFAULT_MAP_FIELD_NAME}; +use crate::Result; +use crate::arrow::{DEFAULT_MAP_FIELD_NAME, schema_to_arrow_schema}; use crate::scan::ArrowRecordBatchStream; use crate::spec::{ - MapType, NestedField, PrimitiveType, Type, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, + MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedField, PrimitiveType, Type, }; use crate::table::Table; -use crate::Result; /// Snapshots table. pub struct SnapshotsTable<'a> { diff --git a/crates/iceberg/src/io/mod.rs b/crates/iceberg/src/io/mod.rs index 8e0638257c..d442b1522e 100644 --- a/crates/iceberg/src/io/mod.rs +++ b/crates/iceberg/src/io/mod.rs @@ -22,8 +22,8 @@ //! We provided a `FileIOBuilder` to build `FileIO` from scratch. For example: //! //! ```rust -//! use iceberg::io::{FileIOBuilder, S3_REGION}; //! use iceberg::Result; +//! use iceberg::io::{FileIOBuilder, S3_REGION}; //! //! # fn test() -> Result<()> { //! // Build a memory file io. @@ -41,8 +41,8 @@ //! Or you can pass a path to ask `FileIO` to infer schema for you: //! //! ```rust -//! use iceberg::io::{FileIO, S3_REGION}; //! use iceberg::Result; +//! use iceberg::io::{FileIO, S3_REGION}; //! //! # fn test() -> Result<()> { //! // Build a memory file io. diff --git a/crates/iceberg/src/io/object_cache.rs b/crates/iceberg/src/io/object_cache.rs index e40af3e022..23f869b83b 100644 --- a/crates/iceberg/src/io/object_cache.rs +++ b/crates/iceberg/src/io/object_cache.rs @@ -190,13 +190,13 @@ mod tests { use uuid::Uuid; use super::*; + use crate::TableIdent; use crate::io::{FileIO, OutputFile}; use crate::spec::{ DataContentType, DataFileBuilder, DataFileFormat, Literal, ManifestEntry, ManifestListWriter, ManifestStatus, ManifestWriterBuilder, Struct, TableMetadata, }; use crate::table::Table; - use crate::TableIdent; struct TableTestFixture { table_location: String, diff --git a/crates/iceberg/src/io/storage_fs.rs b/crates/iceberg/src/io/storage_fs.rs index ff38d7613c..d3e121a085 100644 --- a/crates/iceberg/src/io/storage_fs.rs +++ b/crates/iceberg/src/io/storage_fs.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use opendal::services::FsConfig; use opendal::Operator; +use opendal::services::FsConfig; use crate::Result; diff --git a/crates/iceberg/src/io/storage_gcs.rs b/crates/iceberg/src/io/storage_gcs.rs index 1e7aab7a47..8c3d914c86 100644 --- a/crates/iceberg/src/io/storage_gcs.rs +++ b/crates/iceberg/src/io/storage_gcs.rs @@ -18,8 +18,8 @@ use std::collections::HashMap; -use opendal::services::GcsConfig; use opendal::Operator; +use opendal::services::GcsConfig; use url::Url; use crate::io::is_truthy; diff --git a/crates/iceberg/src/io/storage_memory.rs b/crates/iceberg/src/io/storage_memory.rs index ffc082d837..b8023717b6 100644 --- a/crates/iceberg/src/io/storage_memory.rs +++ b/crates/iceberg/src/io/storage_memory.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use opendal::services::MemoryConfig; use opendal::Operator; +use opendal::services::MemoryConfig; use crate::Result; diff --git a/crates/iceberg/src/puffin/metadata.rs b/crates/iceberg/src/puffin/metadata.rs index b56e4d8fd0..6039c7f820 100644 --- a/crates/iceberg/src/puffin/metadata.rs +++ b/crates/iceberg/src/puffin/metadata.rs @@ -611,7 +611,10 @@ mod tests { let input_file = input_file_with_bytes(&temp_dir, &bytes).await; assert_eq!( - FileMetadata::read(&input_file).await.unwrap_err().to_string(), + FileMetadata::read(&input_file) + .await + .unwrap_err() + .to_string(), "DataInvalid => Footer is not a valid UTF-8 string, source: invalid utf-8 sequence of 1 bytes from index 1", ) } @@ -710,7 +713,10 @@ mod tests { .await; assert_eq!( - FileMetadata::read(&input_file).await.unwrap_err().to_string(), + FileMetadata::read(&input_file) + .await + .unwrap_err() + .to_string(), format!( "DataInvalid => Given string is not valid JSON, source: missing field `blobs` at line 3 column 13" ), @@ -730,8 +736,13 @@ mod tests { .await; assert_eq!( - FileMetadata::read(&input_file).await.unwrap_err().to_string(), - format!("DataInvalid => Given string is not valid JSON, source: invalid type: map, expected a sequence at line 2 column 26"), + FileMetadata::read(&input_file) + .await + .unwrap_err() + .to_string(), + format!( + "DataInvalid => Given string is not valid JSON, source: invalid type: map, expected a sequence at line 2 column 26" + ), ) } @@ -867,7 +878,10 @@ mod tests { .await; assert_eq!( - FileMetadata::read(&input_file).await.unwrap_err().to_string(), + FileMetadata::read(&input_file) + .await + .unwrap_err() + .to_string(), format!( "DataInvalid => Given string is not valid JSON, source: invalid value: integer `{}`, expected i32 at line 5 column 51", out_of_i32_range_number @@ -882,7 +896,10 @@ mod tests { let input_file = input_file_with_payload(&temp_dir, r#""blobs" = []"#).await; assert_eq!( - FileMetadata::read(&input_file).await.unwrap_err().to_string(), + FileMetadata::read(&input_file) + .await + .unwrap_err() + .to_string(), "DataInvalid => Given string is not valid JSON, source: invalid type: string \"blobs\", expected struct FileMetadata at line 1 column 7", ) } diff --git a/crates/iceberg/src/puffin/mod.rs b/crates/iceberg/src/puffin/mod.rs index a1c792426c..0a03781655 100644 --- a/crates/iceberg/src/puffin/mod.rs +++ b/crates/iceberg/src/puffin/mod.rs @@ -20,13 +20,13 @@ #![deny(missing_docs)] mod blob; -pub use blob::{Blob, APACHE_DATASKETCHES_THETA_V1, DELETION_VECTOR_V1}; +pub use blob::{APACHE_DATASKETCHES_THETA_V1, Blob, DELETION_VECTOR_V1}; mod compression; pub use compression::CompressionCodec; mod metadata; -pub use metadata::{BlobMetadata, FileMetadata, CREATED_BY_PROPERTY}; +pub use metadata::{BlobMetadata, CREATED_BY_PROPERTY, FileMetadata}; mod reader; pub use reader::PuffinReader; diff --git a/crates/iceberg/src/puffin/reader.rs b/crates/iceberg/src/puffin/reader.rs index 4e6fb2fb9a..dce53d93f0 100644 --- a/crates/iceberg/src/puffin/reader.rs +++ b/crates/iceberg/src/puffin/reader.rs @@ -17,10 +17,10 @@ use tokio::sync::OnceCell; +use crate::Result; use crate::io::{FileRead, InputFile}; use crate::puffin::blob::Blob; use crate::puffin::metadata::{BlobMetadata, FileMetadata}; -use crate::Result; /// Puffin reader pub struct PuffinReader { diff --git a/crates/iceberg/src/puffin/test_utils.rs b/crates/iceberg/src/puffin/test_utils.rs index 3dfe3e61e2..ca91fa217b 100644 --- a/crates/iceberg/src/puffin/test_utils.rs +++ b/crates/iceberg/src/puffin/test_utils.rs @@ -20,7 +20,7 @@ use std::collections::HashMap; use super::blob::Blob; use crate::io::{FileIOBuilder, InputFile}; use crate::puffin::compression::CompressionCodec; -use crate::puffin::metadata::{BlobMetadata, FileMetadata, CREATED_BY_PROPERTY}; +use crate::puffin::metadata::{BlobMetadata, CREATED_BY_PROPERTY, FileMetadata}; const JAVA_TESTDATA: &str = "testdata/puffin/java-generated"; const EMPTY_UNCOMPRESSED: &str = "empty-puffin-uncompressed.bin"; diff --git a/crates/iceberg/src/puffin/writer.rs b/crates/iceberg/src/puffin/writer.rs index 7d6d0548c5..f68efda889 100644 --- a/crates/iceberg/src/puffin/writer.rs +++ b/crates/iceberg/src/puffin/writer.rs @@ -19,11 +19,11 @@ use std::collections::{HashMap, HashSet}; use bytes::Bytes; +use crate::Result; use crate::io::{FileWrite, OutputFile}; use crate::puffin::blob::Blob; use crate::puffin::compression::CompressionCodec; use crate::puffin::metadata::{BlobMetadata, FileMetadata, Flag}; -use crate::Result; /// Puffin writer pub struct PuffinWriter { @@ -148,6 +148,7 @@ mod tests { use tempfile::TempDir; + use crate::Result; use crate::io::{FileIOBuilder, InputFile, OutputFile}; use crate::puffin::blob::Blob; use crate::puffin::compression::CompressionCodec; @@ -160,7 +161,6 @@ mod tests { zstd_compressed_metric_file_metadata, }; use crate::puffin::writer::PuffinWriter; - use crate::Result; async fn write_puffin_file( temp_dir: &TempDir, diff --git a/crates/iceberg/src/scan/mod.rs b/crates/iceberg/src/scan/mod.rs index c1cedd58e0..8280d43080 100644 --- a/crates/iceberg/src/scan/mod.rs +++ b/crates/iceberg/src/scan/mod.rs @@ -26,7 +26,7 @@ mod task; use std::sync::Arc; use arrow_array::RecordBatch; -use futures::channel::mpsc::{channel, Sender}; +use futures::channel::mpsc::{Sender, channel}; use futures::stream::BoxStream; use futures::{SinkExt, StreamExt, TryStreamExt}; pub use task::*; @@ -602,7 +602,7 @@ pub mod tests { use arrow_array::{ ArrayRef, BooleanArray, Float64Array, Int32Array, Int64Array, RecordBatch, StringArray, }; - use futures::{stream, TryStreamExt}; + use futures::{TryStreamExt, stream}; use parquet::arrow::{ArrowWriter, PARQUET_FIELD_ID_META_KEY}; use parquet::basic::Compression; use parquet::file::properties::WriterProperties; @@ -610,6 +610,7 @@ pub mod tests { use tera::{Context, Tera}; use uuid::Uuid; + use crate::TableIdent; use crate::arrow::ArrowReaderBuilder; use crate::expr::{BoundPredicate, Reference}; use crate::io::{FileIO, OutputFile}; @@ -620,7 +621,6 @@ pub mod tests { PrimitiveType, Schema, Struct, StructType, TableMetadata, Type, }; use crate::table::Table; - use crate::TableIdent; pub struct TableTestFixture { pub table_location: String, diff --git a/crates/iceberg/src/scan/task.rs b/crates/iceberg/src/scan/task.rs index 3369ffdaae..447ed7ced3 100644 --- a/crates/iceberg/src/scan/task.rs +++ b/crates/iceberg/src/scan/task.rs @@ -18,9 +18,9 @@ use futures::stream::BoxStream; use serde::{Deserialize, Serialize}; +use crate::Result; use crate::expr::BoundPredicate; use crate::spec::{DataContentType, DataFileFormat, ManifestEntryRef, Schema, SchemaRef}; -use crate::Result; /// A stream of [`FileScanTask`]. pub type FileScanTaskStream = BoxStream<'static, Result>; diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index 5b9ca6c339..7a7547967a 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -26,14 +26,14 @@ use std::sync::{Arc, OnceLock}; use ::serde::de::{MapAccess, Visitor}; use serde::de::{Error, IntoDeserializer}; -use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; use serde_json::Value as JsonValue; use super::values::Literal; use crate::ensure_data_valid; use crate::error::Result; -use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH}; use crate::spec::PrimitiveLiteral; +use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH}; /// Field name for list type. pub const LIST_FIELD_NAME: &str = "element"; @@ -153,14 +153,20 @@ impl Type { /// Returns minimum bytes required for decimal with [`precision`]. #[inline(always)] pub fn decimal_required_bytes(precision: u32) -> Result { - ensure_data_valid!(precision > 0 && precision <= MAX_DECIMAL_PRECISION, "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",); + ensure_data_valid!( + precision > 0 && precision <= MAX_DECIMAL_PRECISION, + "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}", + ); Ok(REQUIRED_LENGTH[precision as usize - 1]) } /// Creates decimal type. #[inline(always)] pub fn decimal(precision: u32, scale: u32) -> Result { - ensure_data_valid!(precision > 0 && precision <= MAX_DECIMAL_PRECISION, "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}",); + ensure_data_valid!( + precision > 0 && precision <= MAX_DECIMAL_PRECISION, + "Decimals with precision larger than {MAX_DECIMAL_PRECISION} are not supported: {precision}", + ); Ok(Type::Primitive(PrimitiveType::Decimal { precision, scale })) } diff --git a/crates/iceberg/src/spec/encrypted_key.rs b/crates/iceberg/src/spec/encrypted_key.rs index 20c63f5394..db19a023b2 100644 --- a/crates/iceberg/src/spec/encrypted_key.rs +++ b/crates/iceberg/src/spec/encrypted_key.rs @@ -61,8 +61,8 @@ impl EncryptedKey { } pub(super) mod _serde { - use base64::engine::general_purpose::STANDARD as BASE64; use base64::Engine as _; + use base64::engine::general_purpose::STANDARD as BASE64; use super::*; diff --git a/crates/iceberg/src/spec/manifest/_serde.rs b/crates/iceberg/src/spec/manifest/_serde.rs index 951480e14e..97923c7a86 100644 --- a/crates/iceberg/src/spec/manifest/_serde.rs +++ b/crates/iceberg/src/spec/manifest/_serde.rs @@ -310,11 +310,11 @@ mod tests { use std::io::Cursor; use std::sync::Arc; - use crate::spec::manifest::_serde::{parse_i64_entry, I64Entry}; + use crate::spec::manifest::_serde::{I64Entry, parse_i64_entry}; use crate::spec::{ - read_data_files_from_avro, write_data_files_to_avro, DataContentType, DataFile, - DataFileFormat, Datum, FormatVersion, NestedField, PrimitiveType, Schema, Struct, - StructType, Type, + DataContentType, DataFile, DataFileFormat, Datum, FormatVersion, NestedField, + PrimitiveType, Schema, Struct, StructType, Type, read_data_files_from_avro, + write_data_files_to_avro, }; #[test] diff --git a/crates/iceberg/src/spec/manifest/data_file.rs b/crates/iceberg/src/spec/manifest/data_file.rs index cd867b10e0..1de59a3874 100644 --- a/crates/iceberg/src/spec/manifest/data_file.rs +++ b/crates/iceberg/src/spec/manifest/data_file.rs @@ -19,12 +19,12 @@ use std::collections::HashMap; use std::io::{Read, Write}; use std::str::FromStr; -use apache_avro::{from_value, to_value, Reader as AvroReader, Writer as AvroWriter}; +use apache_avro::{Reader as AvroReader, Writer as AvroWriter, from_value, to_value}; use serde_derive::{Deserialize, Serialize}; use serde_with::{DeserializeFromStr, SerializeDisplay}; use super::_serde::DataFileSerde; -use super::{data_file_schema_v1, data_file_schema_v2, Datum, FormatVersion, Schema}; +use super::{Datum, FormatVersion, Schema, data_file_schema_v1, data_file_schema_v2}; use crate::error::Result; use crate::spec::{Struct, StructType}; use crate::{Error, ErrorKind}; diff --git a/crates/iceberg/src/spec/manifest/entry.rs b/crates/iceberg/src/spec/manifest/entry.rs index 85022a1121..7d2f982d0d 100644 --- a/crates/iceberg/src/spec/manifest/entry.rs +++ b/crates/iceberg/src/spec/manifest/entry.rs @@ -24,8 +24,8 @@ use typed_builder::TypedBuilder; use crate::avro::schema_to_avro_schema; use crate::error::Result; use crate::spec::{ - DataContentType, DataFile, ListType, ManifestFile, MapType, NestedField, NestedFieldRef, - PrimitiveType, Schema, StructType, Type, INITIAL_SEQUENCE_NUMBER, + DataContentType, DataFile, INITIAL_SEQUENCE_NUMBER, ListType, ManifestFile, MapType, + NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type, }; use crate::{Error, ErrorKind}; diff --git a/crates/iceberg/src/spec/manifest/mod.rs b/crates/iceberg/src/spec/manifest/mod.rs index 5b53abaef6..15342f6672 100644 --- a/crates/iceberg/src/spec/manifest/mod.rs +++ b/crates/iceberg/src/spec/manifest/mod.rs @@ -26,7 +26,7 @@ pub use metadata::*; mod writer; use std::sync::Arc; -use apache_avro::{from_value, Reader as AvroReader}; +use apache_avro::{Reader as AvroReader, from_value}; pub use writer::*; use super::{ diff --git a/crates/iceberg/src/spec/manifest/writer.rs b/crates/iceberg/src/spec/manifest/writer.rs index 8f08803d41..c89c147f95 100644 --- a/crates/iceberg/src/spec/manifest/writer.rs +++ b/crates/iceberg/src/spec/manifest/writer.rs @@ -17,7 +17,7 @@ use std::cmp::min; -use apache_avro::{to_value, Writer as AvroWriter}; +use apache_avro::{Writer as AvroWriter, to_value}; use bytes::Bytes; use itertools::Itertools; use serde_json::to_vec; @@ -183,13 +183,13 @@ impl ManifestWriter { && data_file.content != DataContentType::PositionDeletes { return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Date file at path {} with manifest content type `deletes`, should have DataContentType `Data`, but has `{:?}`", - data_file.file_path(), - data_file.content - ), - )); + ErrorKind::DataInvalid, + format!( + "Date file at path {} with manifest content type `deletes`, should have DataContentType `Data`, but has `{:?}`", + data_file.file_path(), + data_file.content + ), + )); } } } @@ -465,11 +465,7 @@ impl PartitionFieldStats { self.summary.upper_bound = Some(self.summary.upper_bound.take().map_or( value.clone(), |original| { - if value > original { - value - } else { - original - } + if value > original { value } else { original } }, )); diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index 8cf5df8dd9..c29713cefa 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -21,7 +21,7 @@ use std::collections::HashMap; use std::str::FromStr; use apache_avro::types::Value; -use apache_avro::{from_value, Reader, Writer}; +use apache_avro::{Reader, Writer, from_value}; use bytes::Bytes; use self::_const_schema::{MANIFEST_LIST_AVRO_SCHEMA_V1, MANIFEST_LIST_AVRO_SCHEMA_V2}; @@ -703,9 +703,9 @@ pub(super) mod _serde { use serde_derive::{Deserialize, Serialize}; use super::ManifestFile; + use crate::Error; use crate::error::Result; use crate::spec::{Datum, PrimitiveType, StructType}; - use crate::Error; #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(transparent)] diff --git a/crates/iceberg/src/spec/name_mapping/mod.rs b/crates/iceberg/src/spec/name_mapping/mod.rs index af18e05b23..db9e44c290 100644 --- a/crates/iceberg/src/spec/name_mapping/mod.rs +++ b/crates/iceberg/src/spec/name_mapping/mod.rs @@ -20,7 +20,7 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; -use serde_with::{serde_as, DefaultOnNull}; +use serde_with::{DefaultOnNull, serde_as}; /// Property name for name mapping. pub const DEFAULT_SCHEMA_NAME_MAPPING: &str = "schema.name-mapping.default"; @@ -275,12 +275,14 @@ mod tests { MappedField { field_id: Some(4), names: vec!["qux".to_string()], - fields: vec![MappedField { - field_id: Some(5), - names: vec!["element".to_string()], - fields: vec![], - } - .into()], + fields: vec![ + MappedField { + field_id: Some(5), + names: vec!["element".to_string()], + fields: vec![], + } + .into(), + ], }, MappedField { field_id: Some(6), @@ -316,25 +318,27 @@ mod tests { MappedField { field_id: Some(11), names: vec!["location".to_string()], - fields: vec![MappedField { - field_id: Some(12), - names: vec!["element".to_string()], - fields: vec![ - MappedField { - field_id: Some(13), - names: vec!["latitude".to_string()], - fields: vec![], - } - .into(), - MappedField { - field_id: Some(14), - names: vec!["longitude".to_string()], - fields: vec![], - } - .into(), - ], - } - .into()], + fields: vec![ + MappedField { + field_id: Some(12), + names: vec!["element".to_string()], + fields: vec![ + MappedField { + field_id: Some(13), + names: vec!["latitude".to_string()], + fields: vec![], + } + .into(), + MappedField { + field_id: Some(14), + names: vec!["longitude".to_string()], + fields: vec![], + } + .into(), + ], + } + .into(), + ], }, MappedField { field_id: Some(15), diff --git a/crates/iceberg/src/spec/partition.rs b/crates/iceberg/src/spec/partition.rs index 5ddde0e4ce..0229a97bdb 100644 --- a/crates/iceberg/src/spec/partition.rs +++ b/crates/iceberg/src/spec/partition.rs @@ -638,12 +638,14 @@ trait CorePartitionSpecValidator { if let Some(collision) = collision { Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Cannot add redundant partition with source id `{}` and transform `{}`. A partition with the same source id and transform already exists with name `{}`", - source_id, transform.dedup_name(), collision.name - ), - )) + ErrorKind::DataInvalid, + format!( + "Cannot add redundant partition with source id `{}` and transform `{}`. A partition with the same source id and transform already exists with name `{}`", + source_id, + transform.dedup_name(), + collision.name + ), + )) } else { Ok(()) } @@ -1198,24 +1200,20 @@ mod tests { }); assert_eq!( spec.partition_type(&schema).unwrap(), - StructType::new(vec![NestedField::optional( - 1000, - "id_bucket[16]", - Type::Primitive(PrimitiveType::Int) - ) - .into()]) + StructType::new(vec![ + NestedField::optional(1000, "id_bucket[16]", Type::Primitive(PrimitiveType::Int)) + .into() + ]) ) } #[test] fn test_collision_with_schema_name() { let schema = Schema::builder() - .with_fields(vec![NestedField::required( - 1, - "id", - Type::Primitive(crate::spec::PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(crate::spec::PrimitiveType::Int)) + .into(), + ]) .build() .unwrap(); @@ -1362,12 +1360,10 @@ mod tests { #[test] fn test_builder_incompatible_transforms_disallowed() { let schema = Schema::builder() - .with_fields(vec![NestedField::required( - 1, - "id", - Type::Primitive(crate::spec::PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(crate::spec::PrimitiveType::Int)) + .into(), + ]) .build() .unwrap(); @@ -1452,12 +1448,10 @@ mod tests { #[test] fn test_not_compatible_with_transform_different() { let schema = Schema::builder() - .with_fields(vec![NestedField::required( - 1, - "id", - Type::Primitive(crate::spec::PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "id", Type::Primitive(crate::spec::PrimitiveType::Int)) + .into(), + ]) .build() .unwrap(); diff --git a/crates/iceberg/src/spec/schema/_serde.rs b/crates/iceberg/src/spec/schema/_serde.rs index e21f29956b..4b0011835a 100644 --- a/crates/iceberg/src/spec/schema/_serde.rs +++ b/crates/iceberg/src/spec/schema/_serde.rs @@ -26,7 +26,7 @@ use serde::Deserialize; /// [SchemaV1] and [SchemaV2] are internal struct that are only used for serialization and deserialization. use serde::Serialize; -use super::{Schema, DEFAULT_SCHEMA_ID}; +use super::{DEFAULT_SCHEMA_ID, Schema}; use crate::spec::StructType; use crate::{Error, Result}; diff --git a/crates/iceberg/src/spec/schema/index.rs b/crates/iceberg/src/spec/schema/index.rs index 6e12aa7df8..d4e77ab2aa 100644 --- a/crates/iceberg/src/spec/schema/index.rs +++ b/crates/iceberg/src/spec/schema/index.rs @@ -174,7 +174,12 @@ impl IndexByName { .chain(vec![name]) .join("."); if let Some(existing_field_id) = self.name_to_id.get(full_name.as_str()) { - return Err(Error::new(ErrorKind::DataInvalid, format!("Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}"))); + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}" + ), + )); } else { self.name_to_id.insert(full_name, field_id); } diff --git a/crates/iceberg/src/spec/schema/mod.rs b/crates/iceberg/src/spec/schema/mod.rs index 23b0a4349b..b1afa62c1a 100644 --- a/crates/iceberg/src/spec/schema/mod.rs +++ b/crates/iceberg/src/spec/schema/mod.rs @@ -29,21 +29,21 @@ mod id_reassigner; mod index; mod prune_columns; use bimap::BiHashMap; -use itertools::{zip_eq, Itertools}; +use itertools::{Itertools, zip_eq}; use serde::{Deserialize, Serialize}; use self::_serde::SchemaEnum; use self::id_reassigner::ReassignFieldIds; -use self::index::{index_by_id, index_parents, IndexByName}; +use self::index::{IndexByName, index_by_id, index_parents}; pub use self::prune_columns::prune_columns; use super::NestedField; use crate::error::Result; use crate::expr::accessor::StructAccessor; use crate::spec::datatypes::{ - ListType, MapType, NestedFieldRef, PrimitiveType, StructType, Type, LIST_FIELD_NAME, - MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, + LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedFieldRef, + PrimitiveType, StructType, Type, }; -use crate::{ensure_data_valid, Error, ErrorKind}; +use crate::{Error, ErrorKind, ensure_data_valid}; /// Type alias for schema id. pub type SchemaId = i32; @@ -290,7 +290,12 @@ impl SchemaBuilder { field.name, parent_field ); - ensure_data_valid!(parent_field.required, "Cannot add field {} as an identifier field: must not be nested in an optional field {}", field.name, parent_field); + ensure_data_valid!( + parent_field.required, + "Cannot add field {} as an identifier field: must not be nested in an optional field {}", + field.name, + parent_field + ); cur_field_id = *parent; } } @@ -613,10 +618,11 @@ table { ]) .build(); - assert!(ret - .unwrap_err() - .message() - .contains("Invalid schema: multiple fields for name baz")); + assert!( + ret.unwrap_err() + .message() + .contains("Invalid schema: multiple fields for name baz") + ); } #[test] @@ -1072,123 +1078,151 @@ table { #[test] fn test_identifier_field_ids() { // field in map - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![2]) - .with_fields(vec![NestedField::required( - 1, - "Map", - Type::Map(MapType::new( - NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String)).into(), - NestedField::map_value_element( - 3, - Type::Primitive(PrimitiveType::Boolean), - true, + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![2]) + .with_fields(vec![ + NestedField::required( + 1, + "Map", + Type::Map(MapType::new( + NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::map_value_element( + 3, + Type::Primitive(PrimitiveType::Boolean), + true, + ) + .into(), + )), ) - .into(), - )), - ) - .into()]) - .build() - .is_err()); - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![3]) - .with_fields(vec![NestedField::required( - 1, - "Map", - Type::Map(MapType::new( - NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String)).into(), - NestedField::map_value_element( - 3, - Type::Primitive(PrimitiveType::Boolean), - true, + .into() + ]) + .build() + .is_err() + ); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![3]) + .with_fields(vec![ + NestedField::required( + 1, + "Map", + Type::Map(MapType::new( + NestedField::map_key_element(2, Type::Primitive(PrimitiveType::String)) + .into(), + NestedField::map_value_element( + 3, + Type::Primitive(PrimitiveType::Boolean), + true, + ) + .into(), + )), ) - .into(), - )), - ) - .into()]) - .build() - .is_err()); + .into() + ]) + .build() + .is_err() + ); // field in list - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![2]) - .with_fields(vec![NestedField::required( - 1, - "List", - Type::List(ListType::new( - NestedField::list_element(2, Type::Primitive(PrimitiveType::String), true) - .into(), - )), - ) - .into()]) - .build() - .is_err()); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![2]) + .with_fields(vec![ + NestedField::required( + 1, + "List", + Type::List(ListType::new( + NestedField::list_element( + 2, + Type::Primitive(PrimitiveType::String), + true + ) + .into(), + )), + ) + .into() + ]) + .build() + .is_err() + ); // field in optional struct - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![2]) - .with_fields(vec![NestedField::optional( - 1, - "Struct", - Type::Struct(StructType::new(vec![ - NestedField::required(2, "name", Type::Primitive(PrimitiveType::String)).into(), - NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int)).into(), - ])), - ) - .into()]) - .build() - .is_err()); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![2]) + .with_fields(vec![ + NestedField::optional( + 1, + "Struct", + Type::Struct(StructType::new(vec![ + NestedField::required( + 2, + "name", + Type::Primitive(PrimitiveType::String) + ) + .into(), + NestedField::optional(3, "age", Type::Primitive(PrimitiveType::Int)) + .into(), + ])), + ) + .into() + ]) + .build() + .is_err() + ); // float and double - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![1]) - .with_fields(vec![NestedField::required( - 1, - "Float", - Type::Primitive(PrimitiveType::Float), - ) - .into()]) - .build() - .is_err()); - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![1]) - .with_fields(vec![NestedField::required( - 1, - "Double", - Type::Primitive(PrimitiveType::Double), - ) - .into()]) - .build() - .is_err()); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![1]) + .with_fields(vec![ + NestedField::required(1, "Float", Type::Primitive(PrimitiveType::Float),) + .into() + ]) + .build() + .is_err() + ); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![1]) + .with_fields(vec![ + NestedField::required(1, "Double", Type::Primitive(PrimitiveType::Double),) + .into() + ]) + .build() + .is_err() + ); // optional field - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![1]) - .with_fields(vec![NestedField::required( - 1, - "Required", - Type::Primitive(PrimitiveType::String), - ) - .into()]) - .build() - .is_ok()); - assert!(Schema::builder() - .with_schema_id(1) - .with_identifier_field_ids(vec![1]) - .with_fields(vec![NestedField::optional( - 1, - "Optional", - Type::Primitive(PrimitiveType::String), - ) - .into()]) - .build() - .is_err()); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![1]) + .with_fields(vec![ + NestedField::required(1, "Required", Type::Primitive(PrimitiveType::String),) + .into() + ]) + .build() + .is_ok() + ); + assert!( + Schema::builder() + .with_schema_id(1) + .with_identifier_field_ids(vec![1]) + .with_fields(vec![ + NestedField::optional(1, "Optional", Type::Primitive(PrimitiveType::String),) + .into() + ]) + .build() + .is_err() + ); } } diff --git a/crates/iceberg/src/spec/schema/prune_columns.rs b/crates/iceberg/src/spec/schema/prune_columns.rs index b81917b1ef..5a2f0b50fc 100644 --- a/crates/iceberg/src/spec/schema/prune_columns.rs +++ b/crates/iceberg/src/spec/schema/prune_columns.rs @@ -182,9 +182,12 @@ impl SchemaVisitor for PruneColumn { return Ok(Some(Type::List(list.clone()))); } else { return Err(Error::new( - ErrorKind::DataInvalid, - format!("Cannot explicitly project List or Map types, List element {} of type {} was selected", list.element_field.id, list.element_field.field_type), - )); + ErrorKind::DataInvalid, + format!( + "Cannot explicitly project List or Map types, List element {} of type {} was selected", + list.element_field.id, list.element_field.field_type + ), + )); } } else if let Some(result) = value { Ok(Some(Type::List(PruneColumn::project_list(list, result)?))) @@ -213,9 +216,12 @@ impl SchemaVisitor for PruneColumn { return Ok(Some(Type::Map(map.clone()))); } else { return Err(Error::new( - ErrorKind::DataInvalid, - format!("Cannot explicitly project List or Map types, Map value {} of type {} was selected", map.value_field.id, map.value_field.field_type), - )); + ErrorKind::DataInvalid, + format!( + "Cannot explicitly project List or Map types, Map value {} of type {} was selected", + map.value_field.id, map.value_field.field_type + ), + )); } } else if let Some(value_result) = value { return Ok(Some(Type::Map(PruneColumn::project_map( @@ -245,12 +251,9 @@ mod tests { fn test_schema_prune_columns_string() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::optional( - 1, - "foo", - Type::Primitive(PrimitiveType::String), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), + ]) .build() .unwrap() .as_struct() @@ -267,12 +270,9 @@ mod tests { fn test_schema_prune_columns_string_full() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::optional( - 1, - "foo", - Type::Primitive(PrimitiveType::String), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(1, "foo", Type::Primitive(PrimitiveType::String)).into(), + ]) .build() .unwrap() .as_struct() @@ -289,19 +289,21 @@ mod tests { fn test_schema_prune_columns_list() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::required( - 4, - "qux", - Type::List(ListType { - element_field: NestedField::list_element( - 5, - Type::Primitive(PrimitiveType::String), - true, - ) - .into(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::required( + 4, + "qux", + Type::List(ListType { + element_field: NestedField::list_element( + 5, + Type::Primitive(PrimitiveType::String), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() @@ -326,19 +328,21 @@ mod tests { fn test_schema_prune_columns_list_full() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::required( - 4, - "qux", - Type::List(ListType { - element_field: NestedField::list_element( - 5, - Type::Primitive(PrimitiveType::String), - true, - ) - .into(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::required( + 4, + "qux", + Type::List(ListType { + element_field: NestedField::list_element( + 5, + Type::Primitive(PrimitiveType::String), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() @@ -355,36 +359,38 @@ mod tests { fn test_prune_columns_map() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::required( - 6, - "quux", - Type::Map(MapType { - key_field: NestedField::map_key_element( - 7, - Type::Primitive(PrimitiveType::String), - ) - .into(), - value_field: NestedField::map_value_element( - 8, - Type::Map(MapType { - key_field: NestedField::map_key_element( - 9, - Type::Primitive(PrimitiveType::String), - ) - .into(), - value_field: NestedField::map_value_element( - 10, - Type::Primitive(PrimitiveType::Int), - true, - ) - .into(), - }), - true, - ) - .into(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::required( + 6, + "quux", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 7, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 8, + Type::Map(MapType { + key_field: NestedField::map_key_element( + 9, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 10, + Type::Primitive(PrimitiveType::Int), + true, + ) + .into(), + }), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() @@ -409,36 +415,38 @@ mod tests { fn test_prune_columns_map_full() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::required( - 6, - "quux", - Type::Map(MapType { - key_field: NestedField::map_key_element( - 7, - Type::Primitive(PrimitiveType::String), - ) - .into(), - value_field: NestedField::map_value_element( - 8, - Type::Map(MapType { - key_field: NestedField::map_key_element( - 9, - Type::Primitive(PrimitiveType::String), - ) - .into(), - value_field: NestedField::map_value_element( - 10, - Type::Primitive(PrimitiveType::Int), - true, - ) - .into(), - }), - true, - ) - .into(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::required( + 6, + "quux", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 7, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 8, + Type::Map(MapType { + key_field: NestedField::map_key_element( + 9, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 10, + Type::Primitive(PrimitiveType::Int), + true, + ) + .into(), + }), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() @@ -455,36 +463,38 @@ mod tests { fn test_prune_columns_map_key() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::required( - 6, - "quux", - Type::Map(MapType { - key_field: NestedField::map_key_element( - 7, - Type::Primitive(PrimitiveType::String), - ) - .into(), - value_field: NestedField::map_value_element( - 8, - Type::Map(MapType { - key_field: NestedField::map_key_element( - 9, - Type::Primitive(PrimitiveType::String), - ) - .into(), - value_field: NestedField::map_value_element( - 10, - Type::Primitive(PrimitiveType::Int), - true, - ) - .into(), - }), - true, - ) - .into(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::required( + 6, + "quux", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 7, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 8, + Type::Map(MapType { + key_field: NestedField::map_key_element( + 9, + Type::Primitive(PrimitiveType::String), + ) + .into(), + value_field: NestedField::map_value_element( + 10, + Type::Primitive(PrimitiveType::Int), + true, + ) + .into(), + }), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() @@ -501,17 +511,21 @@ mod tests { fn test_prune_columns_struct() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::optional( - 15, - "person", - Type::Struct(StructType::new(vec![NestedField::optional( - 16, - "name", - Type::Primitive(PrimitiveType::String), + .with_fields(vec![ + NestedField::optional( + 15, + "person", + Type::Struct(StructType::new(vec![ + NestedField::optional( + 16, + "name", + Type::Primitive(PrimitiveType::String), + ) + .into(), + ])), ) - .into()])), - ) - .into()]) + .into(), + ]) .build() .unwrap() .as_struct() @@ -528,17 +542,21 @@ mod tests { fn test_prune_columns_struct_full() { let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::optional( - 15, - "person", - Type::Struct(StructType::new(vec![NestedField::optional( - 16, - "name", - Type::Primitive(PrimitiveType::String), + .with_fields(vec![ + NestedField::optional( + 15, + "person", + Type::Struct(StructType::new(vec![ + NestedField::optional( + 16, + "name", + Type::Primitive(PrimitiveType::String), + ) + .into(), + ])), ) - .into()])), - ) - .into()]) + .into(), + ]) .build() .unwrap() .as_struct() @@ -554,22 +572,17 @@ mod tests { #[test] fn test_prune_columns_empty_struct() { let schema_with_empty_struct_field = Schema::builder() - .with_fields(vec![NestedField::optional( - 15, - "person", - Type::Struct(StructType::new(vec![])), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(15, "person", Type::Struct(StructType::new(vec![]))).into(), + ]) .build() .unwrap(); let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::optional( - 15, - "person", - Type::Struct(StructType::new(vec![])), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(15, "person", Type::Struct(StructType::new(vec![]))) + .into(), + ]) .build() .unwrap() .as_struct() @@ -584,22 +597,17 @@ mod tests { #[test] fn test_prune_columns_empty_struct_full() { let schema_with_empty_struct_field = Schema::builder() - .with_fields(vec![NestedField::optional( - 15, - "person", - Type::Struct(StructType::new(vec![])), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(15, "person", Type::Struct(StructType::new(vec![]))).into(), + ]) .build() .unwrap(); let expected_type = Type::from( Schema::builder() - .with_fields(vec![NestedField::optional( - 15, - "person", - Type::Struct(StructType::new(vec![])), - ) - .into()]) + .with_fields(vec![ + NestedField::optional(15, "person", Type::Struct(StructType::new(vec![]))) + .into(), + ]) .build() .unwrap() .as_struct() @@ -615,30 +623,8 @@ mod tests { fn test_prune_columns_struct_in_map() { let schema_with_struct_in_map_field = Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 6, - "id_to_person", - Type::Map(MapType { - key_field: NestedField::map_key_element(7, Type::Primitive(PrimitiveType::Int)) - .into(), - value_field: NestedField::map_value_element( - 8, - Type::Struct(StructType::new(vec![ - NestedField::optional(10, "name", Primitive(PrimitiveType::String)) - .into(), - NestedField::required(11, "age", Primitive(PrimitiveType::Int)).into(), - ])), - true, - ) - .into(), - }), - ) - .into()]) - .build() - .unwrap(); - let expected_type = Type::from( - Schema::builder() - .with_fields(vec![NestedField::required( + .with_fields(vec![ + NestedField::required( 6, "id_to_person", Type::Map(MapType { @@ -649,18 +635,46 @@ mod tests { .into(), value_field: NestedField::map_value_element( 8, - Type::Struct(StructType::new(vec![NestedField::required( - 11, - "age", - Primitive(PrimitiveType::Int), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::optional(10, "name", Primitive(PrimitiveType::String)) + .into(), + NestedField::required(11, "age", Primitive(PrimitiveType::Int)) + .into(), + ])), true, ) .into(), }), ) - .into()]) + .into(), + ]) + .build() + .unwrap(); + let expected_type = Type::from( + Schema::builder() + .with_fields(vec![ + NestedField::required( + 6, + "id_to_person", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 7, + Type::Primitive(PrimitiveType::Int), + ) + .into(), + value_field: NestedField::map_value_element( + 8, + Type::Struct(StructType::new(vec![ + NestedField::required(11, "age", Primitive(PrimitiveType::Int)) + .into(), + ])), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() @@ -675,30 +689,8 @@ mod tests { fn test_prune_columns_struct_in_map_full() { let schema = Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 6, - "id_to_person", - Type::Map(MapType { - key_field: NestedField::map_key_element(7, Type::Primitive(PrimitiveType::Int)) - .into(), - value_field: NestedField::map_value_element( - 8, - Type::Struct(StructType::new(vec![ - NestedField::optional(10, "name", Primitive(PrimitiveType::String)) - .into(), - NestedField::required(11, "age", Primitive(PrimitiveType::Int)).into(), - ])), - true, - ) - .into(), - }), - ) - .into()]) - .build() - .unwrap(); - let expected_type = Type::from( - Schema::builder() - .with_fields(vec![NestedField::required( + .with_fields(vec![ + NestedField::required( 6, "id_to_person", Type::Map(MapType { @@ -709,18 +701,46 @@ mod tests { .into(), value_field: NestedField::map_value_element( 8, - Type::Struct(StructType::new(vec![NestedField::required( - 11, - "age", - Primitive(PrimitiveType::Int), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::optional(10, "name", Primitive(PrimitiveType::String)) + .into(), + NestedField::required(11, "age", Primitive(PrimitiveType::Int)) + .into(), + ])), true, ) .into(), }), ) - .into()]) + .into(), + ]) + .build() + .unwrap(); + let expected_type = Type::from( + Schema::builder() + .with_fields(vec![ + NestedField::required( + 6, + "id_to_person", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 7, + Type::Primitive(PrimitiveType::Int), + ) + .into(), + value_field: NestedField::map_value_element( + 8, + Type::Struct(StructType::new(vec![ + NestedField::required(11, "age", Primitive(PrimitiveType::Int)) + .into(), + ])), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap() .as_struct() diff --git a/crates/iceberg/src/spec/snapshot.rs b/crates/iceberg/src/spec/snapshot.rs index 922e7bab95..a2716ad97e 100644 --- a/crates/iceberg/src/spec/snapshot.rs +++ b/crates/iceberg/src/spec/snapshot.rs @@ -27,7 +27,7 @@ use serde::{Deserialize, Serialize}; use typed_builder::TypedBuilder; use super::table_metadata::SnapshotLog; -use crate::error::{timestamp_ms_to_utc, Result}; +use crate::error::{Result, timestamp_ms_to_utc}; use crate::io::FileIO; use crate::spec::{ManifestList, SchemaId, SchemaRef, StructType, TableMetadata}; use crate::{Error, ErrorKind}; @@ -226,8 +226,8 @@ pub(super) mod _serde { use serde::{Deserialize, Serialize}; use super::{Operation, Snapshot, Summary}; - use crate::spec::SchemaId; use crate::Error; + use crate::spec::SchemaId; #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "kebab-case")] diff --git a/crates/iceberg/src/spec/snapshot_summary.rs b/crates/iceberg/src/spec/snapshot_summary.rs index 3e12322979..556c5435eb 100644 --- a/crates/iceberg/src/spec/snapshot_summary.rs +++ b/crates/iceberg/src/spec/snapshot_summary.rs @@ -729,11 +729,13 @@ mod tests { let partition_spec = Arc::new( PartitionSpec::builder(schema.clone()) - .add_unbound_fields(vec![UnboundPartitionField::builder() - .source_id(2) - .name("year".to_string()) - .transform(Transform::Identity) - .build()]) + .add_unbound_fields(vec![ + UnboundPartitionField::builder() + .source_id(2) + .name("year".to_string()) + .transform(Transform::Identity) + .build(), + ]) .unwrap() .with_spec_id(1) .build() @@ -876,11 +878,13 @@ mod tests { let partition_spec = Arc::new( PartitionSpec::builder(schema.clone()) - .add_unbound_fields(vec![UnboundPartitionField::builder() - .source_id(2) - .name("year".to_string()) - .transform(Transform::Identity) - .build()]) + .add_unbound_fields(vec![ + UnboundPartitionField::builder() + .source_id(2) + .name("year".to_string()) + .transform(Transform::Identity) + .build(), + ]) .unwrap() .with_spec_id(1) .build() @@ -1005,8 +1009,10 @@ mod tests { assert_eq!(props.get(ADDED_DATA_FILES).unwrap(), "2"); assert_eq!(props.get(ADDED_RECORDS).unwrap(), "6"); - assert!(props - .iter() - .all(|(k, _)| !k.starts_with(CHANGED_PARTITION_PREFIX))); + assert!( + props + .iter() + .all(|(k, _)| !k.starts_with(CHANGED_PARTITION_PREFIX)) + ); } } diff --git a/crates/iceberg/src/spec/sort.rs b/crates/iceberg/src/spec/sort.rs index 29d5ac9a94..379d44cc2d 100644 --- a/crates/iceberg/src/spec/sort.rs +++ b/crates/iceberg/src/spec/sort.rs @@ -182,7 +182,7 @@ impl SortOrderBuilder { return Err(Error::new( ErrorKind::Unexpected, format!("Cannot find source column for sort field: {sort_field}"), - )) + )); } Some(source_field) => { let source_type = source_field.field_type.as_ref(); @@ -278,8 +278,8 @@ mod tests { } #[test] - fn test_build_unbound_should_return_err_if_order_id_equals_zero_is_used_for_anything_other_than_unsorted_order( - ) { + fn test_build_unbound_should_return_err_if_order_id_equals_zero_is_used_for_anything_other_than_unsorted_order() + { assert_eq!( SortOrder::builder() .with_order_id(SortOrder::UNSORTED_ORDER_ID) @@ -361,8 +361,8 @@ mod tests { } #[test] - fn test_build_unbound_should_return_sort_order_with_given_sort_fields_and_defaults_to_1_if_missing_an_order_id( - ) { + fn test_build_unbound_should_return_sort_order_with_given_sort_fields_and_defaults_to_1_if_missing_an_order_id() + { let sort_field = SortField::builder() .source_id(2) .direction(SortDirection::Ascending) @@ -386,12 +386,9 @@ mod tests { fn test_build_should_return_err_if_sort_order_field_is_not_present_in_schema() { let schema = Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 1, - "foo", - Type::Primitive(PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), + ]) .build() .unwrap(); @@ -418,19 +415,21 @@ mod tests { fn test_build_should_return_err_if_source_field_is_not_a_primitive_type() { let schema = Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 1, - "foo", - Type::List(ListType { - element_field: NestedField::list_element( - 2, - Type::Primitive(PrimitiveType::String), - true, - ) - .into(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::required( + 1, + "foo", + Type::List(ListType { + element_field: NestedField::list_element( + 2, + Type::Primitive(PrimitiveType::String), + true, + ) + .into(), + }), + ) + .into(), + ]) .build() .unwrap(); @@ -457,12 +456,9 @@ mod tests { fn test_build_should_return_err_if_source_field_type_is_not_supported_by_transform() { let schema = Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 1, - "foo", - Type::Primitive(PrimitiveType::Int), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "foo", Type::Primitive(PrimitiveType::Int)).into(), + ]) .build() .unwrap(); diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index d335597ab0..fcd5109203 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -33,10 +33,10 @@ use uuid::Uuid; use super::snapshot::SnapshotReference; pub use super::table_metadata_builder::{TableMetadataBuildResult, TableMetadataBuilder}; use super::{ - PartitionSpecRef, PartitionStatisticsFile, SchemaId, SchemaRef, SnapshotRef, SnapshotRetention, - SortOrder, SortOrderRef, StatisticsFile, StructType, DEFAULT_PARTITION_SPEC_ID, + DEFAULT_PARTITION_SPEC_ID, PartitionSpecRef, PartitionStatisticsFile, SchemaId, SchemaRef, + SnapshotRef, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType, }; -use crate::error::{timestamp_ms_to_utc, Result}; +use crate::error::{Result, timestamp_ms_to_utc}; use crate::{Error, ErrorKind}; static MAIN_BRANCH: &str = "main"; @@ -512,12 +512,12 @@ impl TableMetadata { self.current_snapshot_id = None; } else if self.snapshot_by_id(current_snapshot_id).is_none() { return Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Snapshot for current snapshot id {} does not exist in the existing snapshots list", - current_snapshot_id - ), - )); + ErrorKind::DataInvalid, + format!( + "Snapshot for current snapshot id {} does not exist in the existing snapshots list", + current_snapshot_id + ), + )); } } Ok(()) @@ -671,8 +671,8 @@ pub(super) mod _serde { use uuid::Uuid; use super::{ - FormatVersion, MetadataLog, SnapshotLog, TableMetadata, DEFAULT_PARTITION_SPEC_ID, - MAIN_BRANCH, + DEFAULT_PARTITION_SPEC_ID, FormatVersion, MAIN_BRANCH, MetadataLog, SnapshotLog, + TableMetadata, }; use crate::spec::schema::_serde::{SchemaV1, SchemaV2}; use crate::spec::snapshot::_serde::{SnapshotV1, SnapshotV2}; @@ -1326,13 +1326,13 @@ mod tests { use uuid::Uuid; use super::{FormatVersion, MetadataLog, SnapshotLog, TableMetadataBuilder}; + use crate::TableCreation; use crate::spec::table_metadata::TableMetadata; use crate::spec::{ BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PartitionStatisticsFile, PrimitiveType, Schema, Snapshot, SnapshotReference, SnapshotRetention, SortDirection, SortField, SortOrder, StatisticsFile, Summary, Transform, Type, UnboundPartitionField, }; - use crate::TableCreation; fn check_table_metadata_serde(json: &str, expected_type: TableMetadata) { let desered_type: TableMetadata = serde_json::from_str(json).unwrap(); @@ -1861,9 +1861,10 @@ mod tests { "#; let err = serde_json::from_str::(data).unwrap_err(); - assert!(err - .to_string() - .contains("Current snapshot id does not match main branch")); + assert!( + err.to_string() + .contains("Current snapshot id does not match main branch") + ); } #[test] @@ -1952,9 +1953,10 @@ mod tests { "#; let err = serde_json::from_str::(data).unwrap_err(); - assert!(err - .to_string() - .contains("Current snapshot is not set, but main branch exists")); + assert!( + err.to_string() + .contains("Current snapshot is not set, but main branch exists") + ); } #[test] @@ -2047,9 +2049,11 @@ mod tests { "#; let err = serde_json::from_str::(data).unwrap_err(); - assert!(err - .to_string() - .contains("Snapshot for reference foo does not exist in the existing snapshots list")); + assert!( + err.to_string().contains( + "Snapshot for reference foo does not exist in the existing snapshots list" + ) + ); } #[test] diff --git a/crates/iceberg/src/spec/table_metadata_builder.rs b/crates/iceberg/src/spec/table_metadata_builder.rs index 408061019f..1f3f89533b 100644 --- a/crates/iceberg/src/spec/table_metadata_builder.rs +++ b/crates/iceberg/src/spec/table_metadata_builder.rs @@ -21,12 +21,12 @@ use std::sync::Arc; use uuid::Uuid; use super::{ - FormatVersion, MetadataLog, PartitionSpec, PartitionSpecBuilder, PartitionStatisticsFile, - Schema, SchemaRef, Snapshot, SnapshotLog, SnapshotReference, SnapshotRetention, SortOrder, - SortOrderRef, StatisticsFile, StructType, TableMetadata, UnboundPartitionSpec, - DEFAULT_PARTITION_SPEC_ID, DEFAULT_SCHEMA_ID, MAIN_BRANCH, ONE_MINUTE_MS, - PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX, PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT, - RESERVED_PROPERTIES, UNPARTITIONED_LAST_ASSIGNED_ID, + DEFAULT_PARTITION_SPEC_ID, DEFAULT_SCHEMA_ID, FormatVersion, MAIN_BRANCH, MetadataLog, + ONE_MINUTE_MS, PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX, + PROPERTY_METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT, PartitionSpec, PartitionSpecBuilder, + PartitionStatisticsFile, RESERVED_PROPERTIES, Schema, SchemaRef, Snapshot, SnapshotLog, + SnapshotReference, SnapshotRetention, SortOrder, SortOrderRef, StatisticsFile, StructType, + TableMetadata, UNPARTITIONED_LAST_ASSIGNED_ID, UnboundPartitionSpec, }; use crate::error::{Error, ErrorKind, Result}; use crate::{TableCreation, TableUpdate}; @@ -352,7 +352,7 @@ impl TableMetadataBuilder { "Cannot add snapshot with sequence number {} older than last sequence number {}", snapshot.sequence_number(), self.metadata.last_sequence_number - ) + ), )); } @@ -762,17 +762,19 @@ impl TableMetadataBuilder { )); } - let schemaless_spec = - self.metadata - .partition_specs - .get(&spec_id) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Cannot set default partition spec to unknown spec with id: '{spec_id}'",), - ) - })? - .clone(); + let schemaless_spec = self + .metadata + .partition_specs + .get(&spec_id) + .ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!( + "Cannot set default partition spec to unknown spec with id: '{spec_id}'", + ), + ) + })? + .clone(); let spec = Arc::unwrap_or_clone(schemaless_spec); let spec_type = spec.partition_type(self.get_current_schema()?)?; self.metadata.default_spec = Arc::new(spec); @@ -1257,6 +1259,7 @@ mod tests { use std::thread::sleep; use super::*; + use crate::TableIdent; use crate::io::FileIOBuilder; use crate::spec::{ BlobMetadata, NestedField, NullOrder, Operation, PartitionSpec, PrimitiveType, Schema, @@ -1264,7 +1267,6 @@ mod tests { UnboundPartitionField, }; use crate::table::Table; - use crate::TableIdent; const TEST_LOCATION: &str = "s3://bucket/test/location"; const LAST_ASSIGNED_COLUMN_ID: i32 = 3; @@ -1409,12 +1411,10 @@ mod tests { NestedField::required( 13, "struct", - Type::Struct(StructType::new(vec![NestedField::required( - 14, - "nested", - Type::Primitive(PrimitiveType::Long), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::required(14, "nested", Type::Primitive(PrimitiveType::Long)) + .into(), + ])), ) .into(), NestedField::required(15, "c", Type::Primitive(PrimitiveType::Long)).into(), @@ -1450,12 +1450,10 @@ mod tests { NestedField::required( 3, "struct", - Type::Struct(StructType::new(vec![NestedField::required( - 5, - "nested", - Type::Primitive(PrimitiveType::Long), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::required(5, "nested", Type::Primitive(PrimitiveType::Long)) + .into(), + ])), ) .into(), NestedField::required(4, "c", Type::Primitive(PrimitiveType::Long)).into(), @@ -1967,19 +1965,21 @@ mod tests { let builder = builder.add_snapshot(snapshot.clone()).unwrap(); - assert!(builder - .clone() - .set_ref(MAIN_BRANCH, SnapshotReference { - snapshot_id: 10, - retention: SnapshotRetention::Branch { - min_snapshots_to_keep: Some(10), - max_snapshot_age_ms: None, - max_ref_age_ms: None, - }, - }) - .unwrap_err() - .to_string() - .contains("Cannot set 'main' to unknown snapshot: '10'")); + assert!( + builder + .clone() + .set_ref(MAIN_BRANCH, SnapshotReference { + snapshot_id: 10, + retention: SnapshotRetention::Branch { + min_snapshots_to_keep: Some(10), + max_snapshot_age_ms: None, + max_ref_age_ms: None, + }, + }) + .unwrap_err() + .to_string() + .contains("Cannot set 'main' to unknown snapshot: '10'") + ); let build_result = builder .set_ref(MAIN_BRANCH, SnapshotReference { @@ -2161,9 +2161,10 @@ mod tests { .build() .unwrap_err(); - assert!(err - .to_string() - .contains("Cannot find partition source field")); + assert!( + err.to_string() + .contains("Cannot find partition source field") + ); } #[test] @@ -2282,9 +2283,10 @@ mod tests { let err = builder .set_branch_snapshot(snapshot, MAIN_BRANCH) .unwrap_err(); - assert!(err - .to_string() - .contains("Cannot add snapshot with sequence number")); + assert!( + err.to_string() + .contains("Cannot add snapshot with sequence number") + ); } #[test] diff --git a/crates/iceberg/src/spec/transform.rs b/crates/iceberg/src/spec/transform.rs index 201b5a66dd..2a290ea77c 100644 --- a/crates/iceberg/src/spec/transform.rs +++ b/crates/iceberg/src/spec/transform.rs @@ -25,15 +25,15 @@ use fnv::FnvHashSet; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use super::{Datum, PrimitiveLiteral}; +use crate::ErrorKind; use crate::error::{Error, Result}; use crate::expr::{ BinaryExpression, BoundPredicate, BoundReference, Predicate, PredicateOperator, Reference, SetExpression, UnaryExpression, }; -use crate::spec::datatypes::{PrimitiveType, Type}; use crate::spec::Literal; -use crate::transform::{create_transform_function, BoxedTransformFunction}; -use crate::ErrorKind; +use crate::spec::datatypes::{PrimitiveType, Type}; +use crate::transform::{BoxedTransformFunction, create_transform_function}; /// Transform is used to transform predicates to partition predicates, /// in addition to transforming data values. @@ -384,7 +384,7 @@ impl Transform { "Expected a string or binary literal, got: {:?}", expr.literal() ), - )) + )); } }; match len.cmp(&(*width as usize)) { @@ -411,7 +411,7 @@ impl Transform { "Expected a string or binary literal, got: {:?}", expr.literal() ), - )) + )); } }; match len.cmp(&(*width as usize)) { @@ -770,7 +770,7 @@ impl Transform { // An ugly hack to fix. Refine the increment and decrement logic later. match self { Transform::Day => { - return Some(AdjustedProjection::Single(Datum::date(v + 1))) + return Some(AdjustedProjection::Single(Datum::date(v + 1))); } _ => { return Some(AdjustedProjection::Single(Datum::int(v + 1))); @@ -1031,7 +1031,7 @@ impl FromStr for Transform { return Err(Error::new( ErrorKind::DataInvalid, format!("transform {v:?} is invalid"), - )) + )); } }; diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index afa7752d2b..fff347ce51 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -30,8 +30,8 @@ pub use _serde::RawLiteral; use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc}; use num_bigint::BigInt; use ordered_float::OrderedFloat; -use rust_decimal::prelude::ToPrimitive; use rust_decimal::Decimal; +use rust_decimal::prelude::ToPrimitive; use serde::de::{ MapAccess, {self}, }; @@ -44,12 +44,12 @@ use uuid::Uuid; use super::datatypes::{PrimitiveType, Type}; use crate::error::Result; +use crate::spec::MAX_DECIMAL_PRECISION; use crate::spec::values::date::{date_from_naive_date, days_to_date, unix_epoch}; use crate::spec::values::time::microseconds_to_time; use crate::spec::values::timestamp::microseconds_to_datetime; use crate::spec::values::timestamptz::{microseconds_to_datetimetz, nanoseconds_to_datetimetz}; -use crate::spec::MAX_DECIMAL_PRECISION; -use crate::{ensure_data_valid, Error, ErrorKind}; +use crate::{Error, ErrorKind, ensure_data_valid}; /// Maximum value for [`PrimitiveType::Time`] type in microseconds, e.g. 23 hours 59 minutes 59 seconds 999999 microseconds. const MAX_TIME_VALUE: i64 = 24 * 60 * 60 * 1_000_000i64 - 1; @@ -2204,7 +2204,7 @@ mod _serde { use serde_derive::{Deserialize as DeserializeDerive, Serialize as SerializeDerive}; use super::{Literal, Map, PrimitiveLiteral}; - use crate::spec::{PrimitiveType, Type, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME}; + use crate::spec::{MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, PrimitiveType, Type}; use crate::{Error, ErrorKind}; #[derive(SerializeDerive, DeserializeDerive, Debug)] @@ -2633,160 +2633,163 @@ mod _serde { "bytes", "todo: rust avro doesn't support deserialize any bytes representation now", )), - RawLiteralEnum::List(v) => { - match ty { - Type::List(ty) => Ok(Some(Literal::List( - v.list - .into_iter() - .map(|v| { - if let Some(v) = v { - v.try_into(&ty.element_field.field_type) - } else { - Ok(None) - } - }) - .collect::>()?, - ))), - Type::Map(map_ty) => { - let key_ty = map_ty.key_field.field_type.as_ref(); - let value_ty = map_ty.value_field.field_type.as_ref(); - let mut map = Map::new(); - for k_v in v.list { - let k_v = k_v.ok_or_else(|| invalid_err_with_reason("list","In deserialize, None will be represented as Some(RawLiteral::Null), all element in list must be valid"))?; - if let RawLiteralEnum::Record(Record { - required, - optional: _, - }) = k_v - { - if required.len() != 2 { - return Err(invalid_err_with_reason("list","Record must contains two element(key and value) of array")); + RawLiteralEnum::List(v) => match ty { + Type::List(ty) => Ok(Some(Literal::List( + v.list + .into_iter() + .map(|v| { + if let Some(v) = v { + v.try_into(&ty.element_field.field_type) + } else { + Ok(None) + } + }) + .collect::>()?, + ))), + Type::Map(map_ty) => { + let key_ty = map_ty.key_field.field_type.as_ref(); + let value_ty = map_ty.value_field.field_type.as_ref(); + let mut map = Map::new(); + for k_v in v.list { + let k_v = k_v.ok_or_else(|| invalid_err_with_reason("list","In deserialize, None will be represented as Some(RawLiteral::Null), all element in list must be valid"))?; + if let RawLiteralEnum::Record(Record { + required, + optional: _, + }) = k_v + { + if required.len() != 2 { + return Err(invalid_err_with_reason( + "list", + "Record must contains two element(key and value) of array", + )); + } + let mut key = None; + let mut value = None; + required.into_iter().for_each(|(k, v)| { + if k == MAP_KEY_FIELD_NAME { + key = Some(v); + } else if k == MAP_VALUE_FIELD_NAME { + value = Some(v); } - let mut key = None; - let mut value = None; - required.into_iter().for_each(|(k, v)| { - if k == MAP_KEY_FIELD_NAME { - key = Some(v); - } else if k == MAP_VALUE_FIELD_NAME { - value = Some(v); - } - }); - match (key, value) { - (Some(k), Some(v)) => { - let key = k.try_into(key_ty)?.ok_or_else(|| { - invalid_err_with_reason( - "list", - "Key element in Map must be valid", - ) - })?; - let value = v.try_into(value_ty)?; - if map_ty.value_field.required && value.is_none() { - return Err(invalid_err_with_reason( - "list", - "Value element is required in this Map", - )); - } - map.insert(key, value); + }); + match (key, value) { + (Some(k), Some(v)) => { + let key = k.try_into(key_ty)?.ok_or_else(|| { + invalid_err_with_reason( + "list", + "Key element in Map must be valid", + ) + })?; + let value = v.try_into(value_ty)?; + if map_ty.value_field.required && value.is_none() { + return Err(invalid_err_with_reason( + "list", + "Value element is required in this Map", + )); } - _ => return Err(invalid_err_with_reason( + map.insert(key, value); + } + _ => { + return Err(invalid_err_with_reason( "list", "The elements of record in list are not key and value", - )), + )); } - } else { - return Err(invalid_err_with_reason( - "list", - "Map should represented as record array.", - )); } + } else { + return Err(invalid_err_with_reason( + "list", + "Map should represented as record array.", + )); } - Ok(Some(Literal::Map(map))) } - Type::Primitive(PrimitiveType::Uuid) => { - if v.list.len() != 16 { + Ok(Some(Literal::Map(map))) + } + Type::Primitive(PrimitiveType::Uuid) => { + if v.list.len() != 16 { + return Err(invalid_err_with_reason( + "list", + "The length of list should be 16", + )); + } + let mut bytes = [0u8; 16]; + for (i, v) in v.list.iter().enumerate() { + if let Some(RawLiteralEnum::Long(v)) = v { + bytes[i] = *v as u8; + } else { return Err(invalid_err_with_reason( "list", - "The length of list should be 16", + "The element of list should be int", )); } - let mut bytes = [0u8; 16]; - for (i, v) in v.list.iter().enumerate() { - if let Some(RawLiteralEnum::Long(v)) = v { - bytes[i] = *v as u8; - } else { - return Err(invalid_err_with_reason( - "list", - "The element of list should be int", - )); - } - } - Ok(Some(Literal::uuid(uuid::Uuid::from_bytes(bytes)))) } - Type::Primitive(PrimitiveType::Decimal { - precision: _, - scale: _, - }) => { - if v.list.len() != 16 { + Ok(Some(Literal::uuid(uuid::Uuid::from_bytes(bytes)))) + } + Type::Primitive(PrimitiveType::Decimal { + precision: _, + scale: _, + }) => { + if v.list.len() != 16 { + return Err(invalid_err_with_reason( + "list", + "The length of list should be 16", + )); + } + let mut bytes = [0u8; 16]; + for (i, v) in v.list.iter().enumerate() { + if let Some(RawLiteralEnum::Long(v)) = v { + bytes[i] = *v as u8; + } else { return Err(invalid_err_with_reason( "list", - "The length of list should be 16", + "The element of list should be int", )); } - let mut bytes = [0u8; 16]; - for (i, v) in v.list.iter().enumerate() { + } + Ok(Some(Literal::decimal(i128::from_be_bytes(bytes)))) + } + Type::Primitive(PrimitiveType::Binary) => { + let bytes = v + .list + .into_iter() + .map(|v| { if let Some(RawLiteralEnum::Long(v)) = v { - bytes[i] = *v as u8; + Ok(v as u8) } else { - return Err(invalid_err_with_reason( + Err(invalid_err_with_reason( "list", "The element of list should be int", - )); + )) } - } - Ok(Some(Literal::decimal(i128::from_be_bytes(bytes)))) - } - Type::Primitive(PrimitiveType::Binary) => { - let bytes = v - .list - .into_iter() - .map(|v| { - if let Some(RawLiteralEnum::Long(v)) = v { - Ok(v as u8) - } else { - Err(invalid_err_with_reason( - "list", - "The element of list should be int", - )) - } - }) - .collect::, Error>>()?; - Ok(Some(Literal::binary(bytes))) - } - Type::Primitive(PrimitiveType::Fixed(size)) => { - if v.list.len() != *size as usize { - return Err(invalid_err_with_reason( - "list", - "The length of list should be equal to size", - )); - } - let bytes = v - .list - .into_iter() - .map(|v| { - if let Some(RawLiteralEnum::Long(v)) = v { - Ok(v as u8) - } else { - Err(invalid_err_with_reason( - "list", - "The element of list should be int", - )) - } - }) - .collect::, Error>>()?; - Ok(Some(Literal::fixed(bytes))) + }) + .collect::, Error>>()?; + Ok(Some(Literal::binary(bytes))) + } + Type::Primitive(PrimitiveType::Fixed(size)) => { + if v.list.len() != *size as usize { + return Err(invalid_err_with_reason( + "list", + "The length of list should be equal to size", + )); } - _ => Err(invalid_err("list")), + let bytes = v + .list + .into_iter() + .map(|v| { + if let Some(RawLiteralEnum::Long(v)) = v { + Ok(v as u8) + } else { + Err(invalid_err_with_reason( + "list", + "The element of list should be int", + )) + } + }) + .collect::, Error>>()?; + Ok(Some(Literal::fixed(bytes))) } - } + _ => Err(invalid_err("list")), + }, RawLiteralEnum::Record(Record { required, optional: _, @@ -2844,9 +2847,9 @@ mod tests { use super::*; use crate::avro::schema_to_avro_schema; - use crate::spec::datatypes::{ListType, MapType, NestedField, StructType}; use crate::spec::Schema; use crate::spec::Type::Primitive; + use crate::spec::datatypes::{ListType, MapType, NestedField, StructType}; fn check_json_serde(json: &str, expected_literal: Literal, expected_type: &Type) { let raw_json_value = serde_json::from_str::(json).unwrap(); diff --git a/crates/iceberg/src/spec/view_metadata.rs b/crates/iceberg/src/spec/view_metadata.rs index eaebc63efd..dafca4190e 100644 --- a/crates/iceberg/src/spec/view_metadata.rs +++ b/crates/iceberg/src/spec/view_metadata.rs @@ -32,7 +32,7 @@ use uuid::Uuid; pub use super::view_metadata_builder::ViewMetadataBuilder; use super::view_version::{ViewVersionId, ViewVersionRef}; use super::{SchemaId, SchemaRef}; -use crate::error::{timestamp_ms_to_utc, Result}; +use crate::error::{Result, timestamp_ms_to_utc}; use crate::{Error, ErrorKind}; /// Reference to [`ViewMetadata`]. @@ -248,11 +248,11 @@ pub(super) mod _serde { use uuid::Uuid; use super::{ViewFormatVersion, ViewVersionId, ViewVersionLog}; + use crate::Error; use crate::spec::schema::_serde::SchemaV2; use crate::spec::table_metadata::_serde::VersionNumber; use crate::spec::view_version::_serde::ViewVersionV1; use crate::spec::{ViewMetadata, ViewVersion}; - use crate::Error; #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] #[serde(untagged)] @@ -410,8 +410,8 @@ pub(crate) mod tests { use super::{ViewFormatVersion, ViewMetadataBuilder, ViewVersionLog}; use crate::spec::{ - NestedField, PrimitiveType, Schema, SqlViewRepresentation, Type, ViewMetadata, - ViewRepresentations, ViewVersion, INITIAL_VIEW_VERSION_ID, + INITIAL_VIEW_VERSION_ID, NestedField, PrimitiveType, Schema, SqlViewRepresentation, Type, + ViewMetadata, ViewRepresentations, ViewVersion, }; use crate::{NamespaceIdent, ViewCreation}; @@ -495,12 +495,14 @@ pub(crate) mod tests { ("engineVersion".to_string(), "3.3.2".to_string()), ("engine-name".to_string(), "Spark".to_string()), ])) - .with_representations(ViewRepresentations(vec![SqlViewRepresentation { - sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" - .to_string(), - dialect: "spark".to_string(), - } - .into()])) + .with_representations(ViewRepresentations(vec![ + SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + } + .into(), + ])) .build(); let expected = ViewMetadata { @@ -537,12 +539,14 @@ pub(crate) mod tests { #[test] fn test_view_builder_from_view_creation() { - let representations = ViewRepresentations(vec![SqlViewRepresentation { - sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" - .to_string(), - dialect: "spark".to_string(), - } - .into()]); + let representations = ViewRepresentations(vec![ + SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + } + .into(), + ]); let creation = ViewCreation::builder() .location("s3://bucket/warehouse/default.db/event_agg".to_string()) .name("view".to_string()) @@ -598,12 +602,14 @@ pub(crate) mod tests { ("engineVersion".to_string(), "3.3.2".to_string()), ("engine-name".to_string(), "Spark".to_string()), ])) - .with_representations(ViewRepresentations(vec![SqlViewRepresentation { - sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" - .to_string(), - dialect: "spark".to_string(), - } - .into()])) + .with_representations(ViewRepresentations(vec![ + SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + } + .into(), + ])) .build(); let expected = ViewMetadata { diff --git a/crates/iceberg/src/spec/view_metadata_builder.rs b/crates/iceberg/src/spec/view_metadata_builder.rs index 796da66a1f..dc5c104029 100644 --- a/crates/iceberg/src/spec/view_metadata_builder.rs +++ b/crates/iceberg/src/spec/view_metadata_builder.rs @@ -23,16 +23,16 @@ use itertools::Itertools; use uuid::Uuid; use super::{ - Schema, SchemaId, TableMetadataBuilder, ViewFormatVersion, ViewMetadata, ViewRepresentation, - ViewVersion, ViewVersionLog, ViewVersionRef, DEFAULT_SCHEMA_ID, INITIAL_VIEW_VERSION_ID, - ONE_MINUTE_MS, VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED, + DEFAULT_SCHEMA_ID, INITIAL_VIEW_VERSION_ID, ONE_MINUTE_MS, Schema, SchemaId, + TableMetadataBuilder, VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED, VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED_DEFAULT, VIEW_PROPERTY_VERSION_HISTORY_SIZE, - VIEW_PROPERTY_VERSION_HISTORY_SIZE_DEFAULT, + VIEW_PROPERTY_VERSION_HISTORY_SIZE_DEFAULT, ViewFormatVersion, ViewMetadata, + ViewRepresentation, ViewVersion, ViewVersionLog, ViewVersionRef, }; +use crate::ViewCreation; use crate::catalog::ViewUpdate; use crate::error::{Error, ErrorKind, Result}; use crate::io::is_truthy; -use crate::ViewCreation; /// Manipulating view metadata. /// @@ -595,7 +595,9 @@ fn require_no_dialect_dropped(previous: &ViewVersion, current: &ViewVersion) -> ErrorKind::DataInvalid, format!( "Cannot replace view due to loss of view dialects: \nPrevious dialects: {:?}\nNew dialects: {:?}\nSet {} to true to allow dropping dialects.", - Vec::from_iter(base_dialects), Vec::from_iter(updated_dialects), VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED + Vec::from_iter(base_dialects), + Vec::from_iter(updated_dialects), + VIEW_PROPERTY_REPLACE_DROP_DIALECT_ALLOWED ), )); } @@ -637,10 +639,10 @@ pub(super) fn require_unique_dialects(view_version: &ViewVersion) -> Result<()> mod test { use super::super::view_metadata::tests::get_test_view_metadata; use super::*; + use crate::NamespaceIdent; use crate::spec::{ NestedField, PrimitiveType, SqlViewRepresentation, Type, ViewRepresentations, }; - use crate::NamespaceIdent; fn new_view_version(id: usize, schema_id: SchemaId, sql: &str) -> ViewVersion { new_view_version_with_dialect(id, schema_id, sql, vec!["spark"]) @@ -723,11 +725,13 @@ mod test { let changes = build_result.changes; assert_eq!(changes.len(), 5); assert!(changes.contains(&ViewUpdate::SetLocation { location })); - assert!(changes.contains(&ViewUpdate::AddViewVersion { - view_version: version - .with_version_id(INITIAL_VIEW_VERSION_ID) - .with_schema_id(-1) - })); + assert!( + changes.contains(&ViewUpdate::AddViewVersion { + view_version: version + .with_version_id(INITIAL_VIEW_VERSION_ID) + .with_schema_id(-1) + }) + ); assert!(changes.contains(&ViewUpdate::SetCurrentViewVersion { view_version_id: -1 })); @@ -1132,32 +1136,23 @@ mod test { fn test_view_version_and_schema_deduplication() { let schema_one = Schema::builder() .with_schema_id(5) - .with_fields(vec![NestedField::required( - 1, - "x", - Type::Primitive(PrimitiveType::Long), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "x", Type::Primitive(PrimitiveType::Long)).into(), + ]) .build() .unwrap(); let schema_two = Schema::builder() .with_schema_id(7) - .with_fields(vec![NestedField::required( - 1, - "y", - Type::Primitive(PrimitiveType::Long), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "y", Type::Primitive(PrimitiveType::Long)).into(), + ]) .build() .unwrap(); let schema_three = Schema::builder() .with_schema_id(9) - .with_fields(vec![NestedField::required( - 1, - "z", - Type::Primitive(PrimitiveType::Long), - ) - .into()]) + .with_fields(vec![ + NestedField::required(1, "z", Type::Primitive(PrimitiveType::Long)).into(), + ]) .build() .unwrap(); @@ -1222,20 +1217,24 @@ mod test { fn test_error_on_missing_schema() { let builder = builder_without_changes(); // Missing schema - assert!(builder - .clone() - .add_version(new_view_version(0, 10, "SELECT * FROM foo")) - .unwrap_err() - .to_string() - .contains("Cannot add version with unknown schema: 10")); + assert!( + builder + .clone() + .add_version(new_view_version(0, 10, "SELECT * FROM foo")) + .unwrap_err() + .to_string() + .contains("Cannot add version with unknown schema: 10") + ); // Missing last added schema - assert!(builder - .clone() - .add_version(new_view_version(0, -1, "SELECT * FROM foo")) - .unwrap_err() - .to_string() - .contains("Cannot set last added schema: no schema has been added")); + assert!( + builder + .clone() + .add_version(new_view_version(0, -1, "SELECT * FROM foo")) + .unwrap_err() + .to_string() + .contains("Cannot set last added schema: no schema has been added") + ); } #[test] @@ -1249,12 +1248,14 @@ mod test { .contains( "Cannot set current version id to last added version: no version has been added." )); - assert!(builder - .clone() - .set_current_version_id(10) - .unwrap_err() - .to_string() - .contains("Cannot set current version to unknown version with id: 10")); + assert!( + builder + .clone() + .set_current_version_id(10) + .unwrap_err() + .to_string() + .contains("Cannot set current version to unknown version with id: 10") + ); } #[test] @@ -1278,15 +1279,17 @@ mod test { #[test] fn test_error_when_setting_negative_version_history_size() { let builder = builder_without_changes(); - assert!(builder - .clone() - .set_properties(HashMap::from_iter(vec![( - VIEW_PROPERTY_VERSION_HISTORY_SIZE.to_string(), - "-1".to_string(), - )])) - .unwrap_err() - .to_string() - .contains("version.history.num-entries must be positive but was -1")); + assert!( + builder + .clone() + .set_properties(HashMap::from_iter(vec![( + VIEW_PROPERTY_VERSION_HISTORY_SIZE.to_string(), + "-1".to_string(), + )])) + .unwrap_err() + .to_string() + .contains("version.history.num-entries must be positive but was -1") + ); } #[test] @@ -1342,9 +1345,10 @@ mod test { .build() .unwrap_err(); - assert!(err - .to_string() - .contains("Cannot replace view due to loss of view dialects")); + assert!( + err.to_string() + .contains("Cannot replace view due to loss of view dialects") + ); } #[test] @@ -1494,9 +1498,10 @@ mod test { .build() .unwrap_err(); - assert!(err - .to_string() - .contains("Cannot replace view due to loss of view dialects")); + assert!( + err.to_string() + .contains("Cannot replace view due to loss of view dialects") + ); } #[test] diff --git a/crates/iceberg/src/spec/view_version.rs b/crates/iceberg/src/spec/view_version.rs index b13d87a9ec..849fecb0c2 100644 --- a/crates/iceberg/src/spec/view_version.rs +++ b/crates/iceberg/src/spec/view_version.rs @@ -26,10 +26,10 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use typed_builder::TypedBuilder; -use super::view_metadata::ViewVersionLog; use super::INITIAL_VIEW_VERSION_ID; +use super::view_metadata::ViewVersionLog; use crate::catalog::NamespaceIdent; -use crate::error::{timestamp_ms_to_utc, Result}; +use crate::error::{Result, timestamp_ms_to_utc}; use crate::spec::{SchemaId, SchemaRef, ViewMetadata}; use crate::{Error, ErrorKind}; @@ -279,10 +279,10 @@ impl From for ViewRepresentation { mod tests { use chrono::{TimeZone, Utc}; - use crate::spec::view_version::ViewVersion; - use crate::spec::view_version::_serde::ViewVersionV1; - use crate::spec::ViewRepresentations; use crate::NamespaceIdent; + use crate::spec::ViewRepresentations; + use crate::spec::view_version::_serde::ViewVersionV1; + use crate::spec::view_version::ViewVersion; #[test] fn view_version() { @@ -335,10 +335,9 @@ mod tests { }, )]) ); - assert_eq!( - result.default_namespace.inner(), - vec!["default".to_string()] - ); + assert_eq!(result.default_namespace.inner(), vec![ + "default".to_string() + ]); } #[test] diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index d910b5c8fc..d94c2b1a73 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -21,8 +21,8 @@ use std::sync::Arc; use crate::arrow::ArrowReaderBuilder; use crate::inspect::MetadataTable; -use crate::io::object_cache::ObjectCache; use crate::io::FileIO; +use crate::io::object_cache::ObjectCache; use crate::scan::TableScanBuilder; use crate::spec::{TableMetadata, TableMetadataRef}; use crate::{Error, ErrorKind, Result, TableIdent}; diff --git a/crates/iceberg/src/transaction/append.rs b/crates/iceberg/src/transaction/append.rs index 2fbe9fca60..574904b283 100644 --- a/crates/iceberg/src/transaction/append.rs +++ b/crates/iceberg/src/transaction/append.rs @@ -23,10 +23,10 @@ use uuid::Uuid; use crate::error::Result; use crate::spec::{DataFile, ManifestEntry, ManifestFile, Operation}; +use crate::transaction::Transaction; use crate::transaction::snapshot::{ DefaultManifestProcess, SnapshotProduceAction, SnapshotProduceOperation, }; -use crate::transaction::Transaction; use crate::writer::file_writer::ParquetWriter; use crate::{Error, ErrorKind}; @@ -213,10 +213,10 @@ impl SnapshotProduceOperation for FastAppendOperation { mod tests { use crate::scan::tests::TableTestFixture; use crate::spec::{ - DataContentType, DataFileBuilder, DataFileFormat, Literal, Struct, MAIN_BRANCH, + DataContentType, DataFileBuilder, DataFileFormat, Literal, MAIN_BRANCH, Struct, }; - use crate::transaction::tests::make_v2_minimal_table; use crate::transaction::Transaction; + use crate::transaction::tests::make_v2_minimal_table; use crate::{TableRequirement, TableUpdate}; #[tokio::test] diff --git a/crates/iceberg/src/transaction/mod.rs b/crates/iceberg/src/transaction/mod.rs index 108ad10595..ba79d60bbd 100644 --- a/crates/iceberg/src/transaction/mod.rs +++ b/crates/iceberg/src/transaction/mod.rs @@ -28,12 +28,12 @@ use std::sync::Arc; use uuid::Uuid; +use crate::TableUpdate::UpgradeFormatVersion; use crate::error::Result; use crate::spec::FormatVersion; use crate::table::Table; use crate::transaction::append::FastAppendAction; use crate::transaction::sort_order::ReplaceSortOrderAction; -use crate::TableUpdate::UpgradeFormatVersion; use crate::{Catalog, Error, ErrorKind, TableCommit, TableRequirement, TableUpdate}; /// Table transaction. diff --git a/crates/iceberg/src/transaction/snapshot.rs b/crates/iceberg/src/transaction/snapshot.rs index 0bded91354..a15e17f1d0 100644 --- a/crates/iceberg/src/transaction/snapshot.rs +++ b/crates/iceberg/src/transaction/snapshot.rs @@ -24,11 +24,10 @@ use uuid::Uuid; use crate::error::Result; use crate::io::OutputFile; use crate::spec::{ - update_snapshot_summaries, DataFile, DataFileFormat, FormatVersion, ManifestEntry, - ManifestFile, ManifestListWriter, ManifestWriterBuilder, Operation, Snapshot, - SnapshotReference, SnapshotRetention, SnapshotSummaryCollector, Struct, StructType, Summary, - MAIN_BRANCH, PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT, - PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT, + DataFile, DataFileFormat, FormatVersion, MAIN_BRANCH, ManifestEntry, ManifestFile, + ManifestListWriter, ManifestWriterBuilder, Operation, PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT, + PROPERTY_WRITE_PARTITION_SUMMARY_LIMIT_DEFAULT, Snapshot, SnapshotReference, SnapshotRetention, + SnapshotSummaryCollector, Struct, StructType, Summary, update_snapshot_summaries, }; use crate::transaction::Transaction; use crate::{Error, ErrorKind, TableRequirement, TableUpdate}; diff --git a/crates/iceberg/src/transaction/sort_order.rs b/crates/iceberg/src/transaction/sort_order.rs index 51012dca10..f925e602a1 100644 --- a/crates/iceberg/src/transaction/sort_order.rs +++ b/crates/iceberg/src/transaction/sort_order.rs @@ -107,8 +107,8 @@ impl<'a> ReplaceSortOrderAction<'a> { #[cfg(test)] mod tests { - use crate::transaction::tests::make_v2_table; use crate::transaction::Transaction; + use crate::transaction::tests::make_v2_table; use crate::{TableRequirement, TableUpdate}; #[test] diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index b64a4631d0..8807fb1f79 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -224,7 +224,7 @@ impl TransformFunction for Bucket { "Unsupported data type for bucket transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Arc::new(res)) @@ -258,7 +258,7 @@ impl TransformFunction for Bucket { "Unsupported data type for bucket transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Some(Datum::int(val))) @@ -273,6 +273,7 @@ mod test { use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime}; use super::Bucket; + use crate::Result; use crate::expr::PredicateOperator; use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, @@ -280,9 +281,8 @@ mod test { }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type}; - use crate::transform::test::{TestProjectionFixture, TestTransformFixture}; use crate::transform::TransformFunction; - use crate::Result; + use crate::transform::test::{TestProjectionFixture, TestTransformFixture}; #[test] fn test_bucket_transform() { @@ -320,12 +320,9 @@ mod test { (Primitive(TimestampNs), Some(Primitive(Int))), (Primitive(TimestamptzNs), Some(Primitive(Int))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], diff --git a/crates/iceberg/src/transform/identity.rs b/crates/iceberg/src/transform/identity.rs index 68e5a0b1ae..dd096337ac 100644 --- a/crates/iceberg/src/transform/identity.rs +++ b/crates/iceberg/src/transform/identity.rs @@ -84,12 +84,9 @@ mod test { (Primitive(TimestampNs), Some(Primitive(TimestampNs))), (Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], diff --git a/crates/iceberg/src/transform/mod.rs b/crates/iceberg/src/transform/mod.rs index 509ba6be91..4cc0d1fe8f 100644 --- a/crates/iceberg/src/transform/mod.rs +++ b/crates/iceberg/src/transform/mod.rs @@ -74,12 +74,12 @@ mod test { use std::collections::HashSet; use std::sync::Arc; + use crate::Result; use crate::expr::accessor::StructAccessor; use crate::expr::{ BinaryExpression, BoundPredicate, BoundReference, PredicateOperator, SetExpression, }; use crate::spec::{Datum, NestedField, NestedFieldRef, PrimitiveType, Transform, Type}; - use crate::Result; /// A utitily struct, test fixture /// used for testing the projection on `Transform` diff --git a/crates/iceberg/src/transform/temporal.rs b/crates/iceberg/src/transform/temporal.rs index 51be7c70b0..3b333d7bad 100644 --- a/crates/iceberg/src/transform/temporal.rs +++ b/crates/iceberg/src/transform/temporal.rs @@ -18,7 +18,7 @@ use std::sync::Arc; use arrow_arith::arity::binary; -use arrow_arith::temporal::{date_part, DatePart}; +use arrow_arith::temporal::{DatePart, date_part}; use arrow_array::types::Date32Type; use arrow_array::{ Array, ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray, TimestampNanosecondArray, @@ -102,7 +102,7 @@ impl TransformFunction for Year { "Unsupported data type for year transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Some(Datum::int(val))) @@ -200,7 +200,7 @@ impl TransformFunction for Month { "Unsupported data type for month transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Some(Datum::int(val))) @@ -297,7 +297,7 @@ impl TransformFunction for Day { "Should not call internally for unsupported data type {:?}", input.data_type() ), - )) + )); } }; Ok(Arc::new(res)) @@ -323,7 +323,7 @@ impl TransformFunction for Day { "Unsupported data type for day transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Some(Datum::date(val))) @@ -386,7 +386,7 @@ impl TransformFunction for Hour { "Unsupported data type for hour transform: {:?}", input.data_type() ), - )) + )); } }; Ok(Some(Datum::int(val))) @@ -400,6 +400,7 @@ mod test { use arrow_array::{ArrayRef, Date32Array, Int32Array, TimestampMicrosecondArray}; use chrono::{NaiveDate, NaiveDateTime}; + use crate::Result; use crate::expr::PredicateOperator; use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, @@ -409,7 +410,6 @@ mod test { use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type}; use crate::transform::test::{TestProjectionFixture, TestTransformFixture}; use crate::transform::{BoxedTransformFunction, TransformFunction}; - use crate::Result; #[test] fn test_year_transform() { @@ -449,12 +449,9 @@ mod test { (Primitive(TimestampNs), Some(Primitive(Int))), (Primitive(TimestamptzNs), Some(Primitive(Int))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], @@ -501,12 +498,9 @@ mod test { (Primitive(TimestampNs), Some(Primitive(Int))), (Primitive(TimestamptzNs), Some(Primitive(Int))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], @@ -553,12 +547,9 @@ mod test { (Primitive(TimestampNs), Some(Primitive(Date))), (Primitive(TimestamptzNs), Some(Primitive(Date))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], @@ -605,12 +596,9 @@ mod test { (Primitive(TimestampNs), Some(Primitive(Int))), (Primitive(TimestamptzNs), Some(Primitive(Int))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], diff --git a/crates/iceberg/src/transform/truncate.rs b/crates/iceberg/src/transform/truncate.rs index 5e14188442..d712df518e 100644 --- a/crates/iceberg/src/transform/truncate.rs +++ b/crates/iceberg/src/transform/truncate.rs @@ -21,8 +21,8 @@ use arrow_array::ArrayRef; use arrow_schema::DataType; use super::TransformFunction; -use crate::spec::{Datum, PrimitiveLiteral}; use crate::Error; +use crate::spec::{Datum, PrimitiveLiteral}; #[derive(Debug)] pub struct Truncate { @@ -44,11 +44,7 @@ impl Truncate { #[inline] fn truncate_binary(s: &[u8], width: usize) -> &[u8] { - if s.len() > width { - &s[0..width] - } else { - s - } + if s.len() > width { &s[0..width] } else { s } } #[inline] @@ -192,6 +188,7 @@ mod test { use arrow_array::types::Decimal128Type; use arrow_array::{Decimal128Array, Int32Array, Int64Array}; + use crate::Result; use crate::expr::PredicateOperator; use crate::spec::PrimitiveType::{ Binary, Date, Decimal, Fixed, Int, Long, String as StringType, Time, Timestamp, @@ -199,9 +196,8 @@ mod test { }; use crate::spec::Type::{Primitive, Struct}; use crate::spec::{Datum, NestedField, PrimitiveType, StructType, Transform, Type}; - use crate::transform::test::{TestProjectionFixture, TestTransformFixture}; use crate::transform::TransformFunction; - use crate::Result; + use crate::transform::test::{TestProjectionFixture, TestTransformFixture}; #[test] fn test_truncate_transform() { @@ -243,12 +239,9 @@ mod test { (Primitive(TimestampNs), None), (Primitive(TimestamptzNs), None), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), None, ), ], diff --git a/crates/iceberg/src/transform/void.rs b/crates/iceberg/src/transform/void.rs index 5d429a593b..9473184c24 100644 --- a/crates/iceberg/src/transform/void.rs +++ b/crates/iceberg/src/transform/void.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow_array::{new_null_array, ArrayRef}; +use arrow_array::{ArrayRef, new_null_array}; use super::TransformFunction; use crate::Result; @@ -84,18 +84,12 @@ mod test { (Primitive(TimestampNs), Some(Primitive(TimestampNs))), (Primitive(TimestamptzNs), Some(Primitive(TimestamptzNs))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), - Some(Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()]))), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), + Some(Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ]))), ), ], }; @@ -140,12 +134,9 @@ mod test { (Primitive(Timestamp), Some(Primitive(StringType))), (Primitive(Timestamptz), Some(Primitive(StringType))), ( - Struct(StructType::new(vec![NestedField::optional( - 1, - "a", - Primitive(Timestamp), - ) - .into()])), + Struct(StructType::new(vec![ + NestedField::optional(1, "a", Primitive(Timestamp)).into(), + ])), Some(Primitive(StringType)), ), ], diff --git a/crates/iceberg/src/writer/base_writer/data_file_writer.rs b/crates/iceberg/src/writer/base_writer/data_file_writer.rs index dea8fd423c..ddd20eac85 100644 --- a/crates/iceberg/src/writer/base_writer/data_file_writer.rs +++ b/crates/iceberg/src/writer/base_writer/data_file_writer.rs @@ -20,10 +20,10 @@ use arrow_array::RecordBatch; use itertools::Itertools; +use crate::Result; use crate::spec::{DataContentType, DataFile, Struct}; use crate::writer::file_writer::{FileWriter, FileWriterBuilder}; use crate::writer::{CurrentFileStatus, IcebergWriter, IcebergWriterBuilder}; -use crate::Result; /// Builder for `DataFileWriter`. #[derive(Clone, Debug)] @@ -108,21 +108,21 @@ mod test { use arrow_array::{Int32Array, StringArray}; use arrow_schema::{DataType, Field}; - use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; + use parquet::arrow::arrow_reader::{ArrowReaderMetadata, ArrowReaderOptions}; use parquet::file::properties::WriterProperties; use tempfile::TempDir; + use crate::Result; use crate::io::FileIOBuilder; use crate::spec::{ DataContentType, DataFileFormat, Literal, NestedField, PrimitiveType, Schema, Struct, Type, }; use crate::writer::base_writer::data_file_writer::DataFileWriterBuilder; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; - use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; use crate::writer::file_writer::ParquetWriterBuilder; + use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; + use crate::writer::file_writer::location_generator::test::MockLocationGenerator; use crate::writer::{IcebergWriter, IcebergWriterBuilder, RecordBatch}; - use crate::Result; #[tokio::test] async fn test_parquet_writer() -> Result<()> { diff --git a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs index 0749138e85..cf5e996e8d 100644 --- a/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs +++ b/crates/iceberg/src/writer/base_writer/equality_delete_writer.rs @@ -182,8 +182,8 @@ mod test { use arrow_schema::{DataType, Field, Fields}; use arrow_select::concat::concat_batches; use itertools::Itertools; - use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::arrow::PARQUET_FIELD_ID_META_KEY; + use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; use parquet::file::properties::WriterProperties; use tempfile::TempDir; use uuid::Uuid; @@ -197,9 +197,9 @@ mod test { use crate::writer::base_writer::equality_delete_writer::{ EqualityDeleteFileWriterBuilder, EqualityDeleteWriterConfig, }; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; - use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; use crate::writer::file_writer::ParquetWriterBuilder; + use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; + use crate::writer::file_writer::location_generator::test::MockLocationGenerator; use crate::writer::{IcebergWriter, IcebergWriterBuilder}; async fn check_parquet_data_file_with_equality_delete_write( @@ -296,12 +296,10 @@ mod test { NestedField::required( 1, "col1", - Type::Struct(StructType::new(vec![NestedField::required( - 5, - "sub_col", - Type::Primitive(PrimitiveType::Int), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::required(5, "sub_col", Type::Primitive(PrimitiveType::Int)) + .into(), + ])), ) .into(), NestedField::required(2, "col2", Type::Primitive(PrimitiveType::String)).into(), @@ -317,17 +315,21 @@ mod test { NestedField::required( 4, "col4", - Type::Struct(StructType::new(vec![NestedField::required( - 7, - "sub_col", - Type::Struct(StructType::new(vec![NestedField::required( - 8, - "sub_sub_col", - Type::Primitive(PrimitiveType::Int), + Type::Struct(StructType::new(vec![ + NestedField::required( + 7, + "sub_col", + Type::Struct(StructType::new(vec![ + NestedField::required( + 8, + "sub_sub_col", + Type::Primitive(PrimitiveType::Int), + ) + .into(), + ])), ) - .into()])), - ) - .into()])), + .into(), + ])), ) .into(), ]) @@ -439,23 +441,27 @@ mod test { NestedField::required( 3, "col3", - Type::Struct(StructType::new(vec![NestedField::required( - 4, - "sub_col", - Type::Primitive(PrimitiveType::Int), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::required( + 4, + "sub_col", + Type::Primitive(PrimitiveType::Int), + ) + .into(), + ])), ) .into(), NestedField::optional( 5, "col4", - Type::Struct(StructType::new(vec![NestedField::required( - 6, - "sub_col2", - Type::Primitive(PrimitiveType::Int), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::required( + 6, + "sub_col2", + Type::Primitive(PrimitiveType::Int), + ) + .into(), + ])), ) .into(), NestedField::required( @@ -674,28 +680,30 @@ mod test { NestedField::optional( 1, "col1", - Type::Struct(StructType::new(vec![NestedField::optional( - 2, - "sub_col", - Type::Primitive(PrimitiveType::Int), - ) - .into()])), + Type::Struct(StructType::new(vec![ + NestedField::optional(2, "sub_col", Type::Primitive(PrimitiveType::Int)) + .into(), + ])), ) .into(), NestedField::optional( 3, "col2", - Type::Struct(StructType::new(vec![NestedField::optional( - 4, - "sub_struct_col", - Type::Struct(StructType::new(vec![NestedField::optional( - 5, - "sub_sub_col", - Type::Primitive(PrimitiveType::Int), + Type::Struct(StructType::new(vec![ + NestedField::optional( + 4, + "sub_struct_col", + Type::Struct(StructType::new(vec![ + NestedField::optional( + 5, + "sub_sub_col", + Type::Primitive(PrimitiveType::Int), + ) + .into(), + ])), ) - .into()])), - ) - .into()])), + .into(), + ])), ) .into(), ]) @@ -722,11 +730,14 @@ mod test { let inner_col = { let nulls = NullBuffer::from(vec![true, false, true]); Arc::new(StructArray::new( - Fields::from(vec![Field::new("sub_sub_col", DataType::Int32, true) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "5".to_string(), - )]))]), + Fields::from(vec![ + Field::new("sub_sub_col", DataType::Int32, true).with_metadata( + HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "5".to_string(), + )]), + ), + ]), vec![Arc::new(Int32Array::from(vec![Some(1), Some(2), None]))], Some(nulls), )) diff --git a/crates/iceberg/src/writer/file_writer/location_generator.rs b/crates/iceberg/src/writer/file_writer/location_generator.rs index dddf097b4c..43609743aa 100644 --- a/crates/iceberg/src/writer/file_writer/location_generator.rs +++ b/crates/iceberg/src/writer/file_writer/location_generator.rs @@ -17,8 +17,8 @@ //! This module contains the location generator and file name generator for generating path of data file. -use std::sync::atomic::AtomicU64; use std::sync::Arc; +use std::sync::atomic::AtomicU64; use crate::spec::{DataFileFormat, TableMetadata}; use crate::{Error, ErrorKind, Result}; diff --git a/crates/iceberg/src/writer/file_writer/mod.rs b/crates/iceberg/src/writer/file_writer/mod.rs index 4a0fffcc1e..dbf747ec12 100644 --- a/crates/iceberg/src/writer/file_writer/mod.rs +++ b/crates/iceberg/src/writer/file_writer/mod.rs @@ -21,8 +21,8 @@ use arrow_array::RecordBatch; use futures::Future; use super::CurrentFileStatus; -use crate::spec::DataFileBuilder; use crate::Result; +use crate::spec::DataFileBuilder; mod parquet_writer; pub use parquet_writer::{ParquetWriter, ParquetWriterBuilder}; diff --git a/crates/iceberg/src/writer/file_writer/parquet_writer.rs b/crates/iceberg/src/writer/file_writer/parquet_writer.rs index c4c199a46e..75b3d9244a 100644 --- a/crates/iceberg/src/writer/file_writer/parquet_writer.rs +++ b/crates/iceberg/src/writer/file_writer/parquet_writer.rs @@ -18,16 +18,16 @@ //! The module contains the file writer for parquet file format. use std::collections::HashMap; -use std::sync::atomic::AtomicI64; use std::sync::Arc; +use std::sync::atomic::AtomicI64; use arrow_schema::SchemaRef as ArrowSchemaRef; use bytes::Bytes; use futures::future::BoxFuture; use itertools::Itertools; +use parquet::arrow::AsyncArrowWriter; use parquet::arrow::async_reader::AsyncFileReader; use parquet::arrow::async_writer::AsyncFileWriter as ArrowAsyncFileWriter; -use parquet::arrow::AsyncArrowWriter; use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; use parquet::file::properties::WriterProperties; use parquet::file::statistics::Statistics; @@ -39,14 +39,14 @@ use super::location_generator::{FileNameGenerator, LocationGenerator}; use super::track_writer::TrackWriter; use super::{FileWriter, FileWriterBuilder}; use crate::arrow::{ - get_parquet_stat_max_as_datum, get_parquet_stat_min_as_datum, ArrowFileReader, - NanValueCountVisitor, DEFAULT_MAP_FIELD_NAME, + ArrowFileReader, DEFAULT_MAP_FIELD_NAME, NanValueCountVisitor, get_parquet_stat_max_as_datum, + get_parquet_stat_min_as_datum, }; use crate::io::{FileIO, FileWrite, OutputFile}; use crate::spec::{ - visit_schema, DataContentType, DataFileBuilder, DataFileFormat, Datum, ListType, Literal, - MapType, NestedFieldRef, PartitionSpec, PrimitiveType, Schema, SchemaRef, SchemaVisitor, - Struct, StructType, TableMetadata, Type, + DataContentType, DataFileBuilder, DataFileFormat, Datum, ListType, Literal, MapType, + NestedFieldRef, PartitionSpec, PrimitiveType, Schema, SchemaRef, SchemaVisitor, Struct, + StructType, TableMetadata, Type, visit_schema, }; use crate::transform::create_transform_function; use crate::writer::{CurrentFileStatus, DataFile}; @@ -209,7 +209,12 @@ impl SchemaVisitor for IndexByParquetPathName { let full_name = self.field_names.iter().map(String::as_str).join("."); let field_id = self.field_id; if let Some(existing_field_id) = self.name_to_id.get(full_name.as_str()) { - return Err(Error::new(ErrorKind::DataInvalid, format!("Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}"))); + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid schema: multiple fields for name {full_name}: {field_id} and {existing_field_id}" + ), + )); } else { self.name_to_id.insert(full_name, field_id); } @@ -667,8 +672,8 @@ mod tests { use crate::arrow::schema_to_arrow_schema; use crate::io::FileIOBuilder; use crate::spec::{PrimitiveLiteral, Struct, *}; - use crate::writer::file_writer::location_generator::test::MockLocationGenerator; use crate::writer::file_writer::location_generator::DefaultFileNameGenerator; + use crate::writer::file_writer::location_generator::test::MockLocationGenerator; use crate::writer::tests::check_parquet_data_file; fn schema_for_all_type() -> Schema { @@ -762,17 +767,21 @@ mod tests { NestedField::required( 4, "col4", - Type::Struct(StructType::new(vec![NestedField::required( - 8, - "col_4_8", - Type::Struct(StructType::new(vec![NestedField::required( - 9, - "col_4_8_9", - Type::Primitive(PrimitiveType::Long), + Type::Struct(StructType::new(vec![ + NestedField::required( + 8, + "col_4_8", + Type::Struct(StructType::new(vec![ + NestedField::required( + 9, + "col_4_8_9", + Type::Primitive(PrimitiveType::Long), + ) + .into(), + ])), ) - .into()])), - ) - .into()])), + .into(), + ])), ) .into(), NestedField::required( @@ -1257,10 +1266,12 @@ mod tests { // check data file assert_eq!(data_file.record_count(), 4); assert!(data_file.value_counts().iter().all(|(_, &v)| { v == 4 })); - assert!(data_file - .null_value_counts() - .iter() - .all(|(_, &v)| { v == 1 })); + assert!( + data_file + .null_value_counts() + .iter() + .all(|(_, &v)| { v == 1 }) + ); assert_eq!( *data_file.lower_bounds(), HashMap::from([ @@ -1363,15 +1374,17 @@ mod tests { // test 1.1 and 2.2 let schema = Arc::new( Schema::builder() - .with_fields(vec![NestedField::optional( - 0, - "decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 28, - scale: 10, - }), - ) - .into()]) + .with_fields(vec![ + NestedField::optional( + 0, + "decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 28, + scale: 10, + }), + ) + .into(), + ]) .build() .unwrap(), ); @@ -1416,15 +1429,17 @@ mod tests { // test -1.1 and -2.2 let schema = Arc::new( Schema::builder() - .with_fields(vec![NestedField::optional( - 0, - "decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 28, - scale: 10, - }), - ) - .into()]) + .with_fields(vec![ + NestedField::optional( + 0, + "decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 28, + scale: 10, + }), + ) + .into(), + ]) .build() .unwrap(), ); @@ -1472,15 +1487,17 @@ mod tests { assert_eq!(decimal_max.scale(), decimal_min.scale()); let schema = Arc::new( Schema::builder() - .with_fields(vec![NestedField::optional( - 0, - "decimal", - Type::Primitive(PrimitiveType::Decimal { - precision: 38, - scale: decimal_max.scale(), - }), - ) - .into()]) + .with_fields(vec![ + NestedField::optional( + 0, + "decimal", + Type::Primitive(PrimitiveType::Decimal { + precision: 38, + scale: decimal_max.scale(), + }), + ) + .into(), + ]) .build() .unwrap(), ); @@ -1744,29 +1761,31 @@ mod tests { let file_name_gen = DefaultFileNameGenerator::new("test".to_string(), None, DataFileFormat::Parquet); - let schema_struct_float_fields = - Fields::from(vec![Field::new("col4", DataType::Float32, false) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "4".to_string(), - )]))]); + let schema_struct_float_fields = Fields::from(vec![ + Field::new("col4", DataType::Float32, false).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "4".to_string(), + )])), + ]); - let schema_struct_nested_float_fields = - Fields::from(vec![Field::new("col7", DataType::Float32, false) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "7".to_string(), - )]))]); + let schema_struct_nested_float_fields = Fields::from(vec![ + Field::new("col7", DataType::Float32, false).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "7".to_string(), + )])), + ]); - let schema_struct_nested_fields = Fields::from(vec![Field::new( - "col6", - arrow_schema::DataType::Struct(schema_struct_nested_float_fields.clone()), - false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "6".to_string(), - )]))]); + let schema_struct_nested_fields = Fields::from(vec![ + Field::new( + "col6", + arrow_schema::DataType::Struct(schema_struct_nested_float_fields.clone()), + false, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "6".to_string(), + )])), + ]); // prepare data let arrow_schema = { @@ -1891,15 +1910,11 @@ mod tests { "4".to_string(), )])); - let schema_struct_list_field = Fields::from(vec![Field::new_list( - "col2", - schema_struct_list_float_field.clone(), - true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "3".to_string(), - )]))]); + let schema_struct_list_field = Fields::from(vec![ + Field::new_list("col2", schema_struct_list_float_field.clone(), true).with_metadata( + HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "3".to_string())]), + ), + ]); let arrow_schema = { let fields = vec![ @@ -2106,18 +2121,20 @@ mod tests { [(PARQUET_FIELD_ID_META_KEY.to_string(), "7".to_string())], )); - let schema_struct_map_field = Fields::from(vec![Field::new_map( - "col3", - DEFAULT_MAP_FIELD_NAME, - struct_map_key_field_schema.clone(), - struct_map_value_field_schema.clone(), - false, - false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "5".to_string(), - )]))]); + let schema_struct_map_field = Fields::from(vec![ + Field::new_map( + "col3", + DEFAULT_MAP_FIELD_NAME, + struct_map_key_field_schema.clone(), + struct_map_value_field_schema.clone(), + false, + false, + ) + .with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + "5".to_string(), + )])), + ]); let arrow_schema = { let fields = vec![ @@ -2245,13 +2262,11 @@ mod tests { Arc::new( Schema::builder() .with_schema_id(1) - .with_fields(vec![NestedField::required( - 0, - "col", - Type::Primitive(PrimitiveType::Long), - ) - .with_id(0) - .into()]) + .with_fields(vec![ + NestedField::required(0, "col", Type::Primitive(PrimitiveType::Long)) + .with_id(0) + .into(), + ]) .build() .expect("Failed to create schema"), ), diff --git a/crates/iceberg/src/writer/file_writer/track_writer.rs b/crates/iceberg/src/writer/file_writer/track_writer.rs index 7b916aeb58..d6b12cde0a 100644 --- a/crates/iceberg/src/writer/file_writer/track_writer.rs +++ b/crates/iceberg/src/writer/file_writer/track_writer.rs @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -use std::sync::atomic::AtomicI64; use std::sync::Arc; +use std::sync::atomic::AtomicI64; use bytes::Bytes; -use crate::io::FileWrite; use crate::Result; +use crate::io::FileWrite; /// `TrackWriter` is used to track the written size. pub(crate) struct TrackWriter { diff --git a/crates/iceberg/src/writer/mod.rs b/crates/iceberg/src/writer/mod.rs index 174890e890..2a6aa802bf 100644 --- a/crates/iceberg/src/writer/mod.rs +++ b/crates/iceberg/src/writer/mod.rs @@ -47,10 +47,10 @@ //! use iceberg::spec::DataFile; //! use iceberg::transaction::Transaction; //! use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +//! use iceberg::writer::file_writer::ParquetWriterBuilder; //! use iceberg::writer::file_writer::location_generator::{ //! DefaultFileNameGenerator, DefaultLocationGenerator, //! }; -//! use iceberg::writer::file_writer::ParquetWriterBuilder; //! use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; //! use iceberg::{Catalog, Result, TableIdent}; //! use iceberg_catalog_memory::MemoryCatalog; @@ -102,10 +102,10 @@ //! use iceberg::io::FileIOBuilder; //! use iceberg::spec::DataFile; //! use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +//! use iceberg::writer::file_writer::ParquetWriterBuilder; //! use iceberg::writer::file_writer::location_generator::{ //! DefaultFileNameGenerator, DefaultLocationGenerator, //! }; -//! use iceberg::writer::file_writer::ParquetWriterBuilder; //! use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; //! use iceberg::{Catalog, Result, TableIdent}; //! use iceberg_catalog_memory::MemoryCatalog; @@ -198,8 +198,8 @@ pub mod file_writer; use arrow_array::RecordBatch; -use crate::spec::DataFile; use crate::Result; +use crate::spec::DataFile; type DefaultInput = RecordBatch; type DefaultOutput = Vec; diff --git a/crates/integration_tests/tests/shared.rs b/crates/integration_tests/tests/shared.rs index 500c63cabd..6bdddaa6cf 100644 --- a/crates/integration_tests/tests/shared.rs +++ b/crates/integration_tests/tests/shared.rs @@ -18,7 +18,7 @@ use std::sync::{Arc, OnceLock}; use ctor::dtor; -use iceberg_integration_tests::{set_test_fixture, TestFixture}; +use iceberg_integration_tests::{TestFixture, set_test_fixture}; pub mod shared_tests; diff --git a/crates/integration_tests/tests/shared_tests/append_data_file_test.rs b/crates/integration_tests/tests/shared_tests/append_data_file_test.rs index 20a6f9d099..f3ee17c75c 100644 --- a/crates/integration_tests/tests/shared_tests/append_data_file_test.rs +++ b/crates/integration_tests/tests/shared_tests/append_data_file_test.rs @@ -23,10 +23,10 @@ use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray}; use futures::TryStreamExt; use iceberg::transaction::Transaction; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::file_writer::location_generator::{ DefaultFileNameGenerator, DefaultLocationGenerator, }; -use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; use iceberg::{Catalog, TableCreation}; use iceberg_catalog_rest::RestCatalog; diff --git a/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs b/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs index 2a1c634f39..c5c029a45a 100644 --- a/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs +++ b/crates/integration_tests/tests/shared_tests/append_partition_data_file_test.rs @@ -25,10 +25,10 @@ use iceberg::spec::{Literal, PrimitiveLiteral, Struct, Transform, UnboundPartiti use iceberg::table::Table; use iceberg::transaction::Transaction; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::file_writer::location_generator::{ DefaultFileNameGenerator, DefaultLocationGenerator, }; -use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; use iceberg::{Catalog, TableCreation}; use iceberg_catalog_rest::RestCatalog; diff --git a/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs b/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs index 636020c3e7..d277e12e5a 100644 --- a/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs +++ b/crates/integration_tests/tests/shared_tests/conflict_commit_test.rs @@ -23,10 +23,10 @@ use arrow_array::{ArrayRef, BooleanArray, Int32Array, RecordBatch, StringArray}; use futures::TryStreamExt; use iceberg::transaction::Transaction; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::file_writer::location_generator::{ DefaultFileNameGenerator, DefaultLocationGenerator, }; -use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; use iceberg::{Catalog, TableCreation}; use iceberg_catalog_rest::RestCatalog; diff --git a/crates/integration_tests/tests/shared_tests/scan_all_type.rs b/crates/integration_tests/tests/shared_tests/scan_all_type.rs index a014330110..5ff982720b 100644 --- a/crates/integration_tests/tests/shared_tests/scan_all_type.rs +++ b/crates/integration_tests/tests/shared_tests/scan_all_type.rs @@ -30,15 +30,15 @@ use arrow_schema::{DataType, Field, Fields}; use futures::TryStreamExt; use iceberg::arrow::{DEFAULT_MAP_FIELD_NAME, UTC_TIME_ZONE}; use iceberg::spec::{ - ListType, MapType, NestedField, PrimitiveType, Schema, StructType, Type, LIST_FIELD_NAME, - MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, + LIST_FIELD_NAME, ListType, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME, MapType, NestedField, + PrimitiveType, Schema, StructType, Type, }; use iceberg::transaction::Transaction; use iceberg::writer::base_writer::data_file_writer::DataFileWriterBuilder; +use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::file_writer::location_generator::{ DefaultFileNameGenerator, DefaultLocationGenerator, }; -use iceberg::writer::file_writer::ParquetWriterBuilder; use iceberg::writer::{IcebergWriter, IcebergWriterBuilder}; use iceberg::{Catalog, TableCreation}; use iceberg_catalog_rest::RestCatalog; diff --git a/crates/integrations/cli/src/main.rs b/crates/integrations/cli/src/main.rs index d21edb4b85..3d4f7fc817 100644 --- a/crates/integrations/cli/src/main.rs +++ b/crates/integrations/cli/src/main.rs @@ -26,7 +26,7 @@ use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_cli::exec; use datafusion_cli::print_format::PrintFormat; use datafusion_cli::print_options::{MaxRows, PrintOptions}; -use iceberg_cli::{IcebergCatalogList, ICEBERG_CLI_VERSION}; +use iceberg_cli::{ICEBERG_CLI_VERSION, IcebergCatalogList}; #[derive(Debug, Parser, PartialEq)] #[clap(author, version, about, long_about= None)] diff --git a/crates/integrations/datafusion/src/table/mod.rs b/crates/integrations/datafusion/src/table/mod.rs index 00c9e13229..df81688d7e 100644 --- a/crates/integrations/datafusion/src/table/mod.rs +++ b/crates/integrations/datafusion/src/table/mod.rs @@ -153,9 +153,9 @@ impl TableProvider for IcebergTableProvider { mod tests { use datafusion::common::Column; use datafusion::prelude::SessionContext; + use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::{StaticTable, Table}; - use iceberg::TableIdent; use super::*; diff --git a/crates/integrations/datafusion/src/table/table_provider_factory.rs b/crates/integrations/datafusion/src/table/table_provider_factory.rs index 15a3fef688..a6d3146e54 100644 --- a/crates/integrations/datafusion/src/table/table_provider_factory.rs +++ b/crates/integrations/datafusion/src/table/table_provider_factory.rs @@ -151,7 +151,10 @@ fn check_cmd(cmd: &CreateExternalTable) -> Result<()> { || !column_defaults.is_empty(); if is_invalid { - return Err(Error::new(ErrorKind::FeatureUnsupported, "Currently we only support reading existing icebergs tables in external table command. To create new table, please use catalog provider.")); + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Currently we only support reading existing icebergs tables in external table command. To create new table, please use catalog provider.", + )); } Ok(()) diff --git a/crates/integrations/datafusion/tests/integration_datafusion_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_test.rs index 715635e062..c21b72b3ac 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_test.rs @@ -125,11 +125,12 @@ async fn test_provider_plan_stream_schema() -> Result<()> { assert_eq!(plan.schema(), stream.schema()); assert_eq!( stream.schema().as_ref(), - &ArrowSchema::new(vec![Field::new("foo2", DataType::Utf8, false) - .with_metadata(HashMap::from([( + &ArrowSchema::new(vec![ + Field::new("foo2", DataType::Utf8, false).with_metadata(HashMap::from([( PARQUET_FIELD_ID_META_KEY.to_string(), "2".to_string(), - )]))]), + )])) + ]), ); Ok(()) @@ -178,9 +179,11 @@ async fn test_provider_list_schema_names() -> Result<()> { let expected = ["test_provider_list_schema_names"]; let result = provider.schema_names(); - assert!(expected - .iter() - .all(|item| result.contains(&item.to_string()))); + assert!( + expected + .iter() + .all(|item| result.contains(&item.to_string())) + ); Ok(()) } @@ -244,9 +247,10 @@ async fn test_table_projection() -> Result<()> { .downcast_ref::() .unwrap(); assert_eq!(2, s.len()); - assert!(s - .value(1) - .contains("IcebergTableScan projection:[foo1,foo3]")); + assert!( + s.value(1) + .contains("IcebergTableScan projection:[foo1,foo3]") + ); Ok(()) } diff --git a/crates/sqllogictest/src/engine/datafusion.rs b/crates/sqllogictest/src/engine/datafusion.rs index bddc59d901..f95cfb247d 100644 --- a/crates/sqllogictest/src/engine/datafusion.rs +++ b/crates/sqllogictest/src/engine/datafusion.rs @@ -18,7 +18,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use datafusion::catalog::CatalogProvider; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_sqllogictest::DataFusion; From d35a9a6671a1998cdb1a895cd5f979efdc02eb8c Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Fri, 16 May 2025 20:50:13 +0800 Subject: [PATCH 5/5] Format code Signed-off-by: Xuanwo --- bindings/python/src/datafusion_table_provider.rs | 2 +- bindings/python/src/error.rs | 2 +- bindings/python/src/transform.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bindings/python/src/datafusion_table_provider.rs b/bindings/python/src/datafusion_table_provider.rs index f8701a2fd6..b5e1bf952e 100644 --- a/bindings/python/src/datafusion_table_provider.rs +++ b/bindings/python/src/datafusion_table_provider.rs @@ -20,9 +20,9 @@ use std::ffi::CString; use std::sync::Arc; use datafusion_ffi::table_provider::FFI_TableProvider; +use iceberg::TableIdent; use iceberg::io::FileIO; use iceberg::table::StaticTable; -use iceberg::TableIdent; use iceberg_datafusion::table::IcebergTableProvider; use pyo3::exceptions::PyRuntimeError; use pyo3::prelude::*; diff --git a/bindings/python/src/error.rs b/bindings/python/src/error.rs index a2d1424ccc..def2abf1e4 100644 --- a/bindings/python/src/error.rs +++ b/bindings/python/src/error.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use pyo3::exceptions::PyValueError; use pyo3::PyErr; +use pyo3::exceptions::PyValueError; /// Convert an iceberg error to a python error pub fn to_py_err(err: iceberg::Error) -> PyErr { diff --git a/bindings/python/src/transform.rs b/bindings/python/src/transform.rs index fa05eb095d..24e9f061dd 100644 --- a/bindings/python/src/transform.rs +++ b/bindings/python/src/transform.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -use arrow::array::{make_array, Array, ArrayData}; +use arrow::array::{Array, ArrayData, make_array}; use arrow::pyarrow::{FromPyArrow, ToPyArrow}; use iceberg::spec::Transform; use iceberg::transform::create_transform_function;