From 010c44688f3b8fa392111496aa5e8b3bab6d7177 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Wed, 4 Mar 2026 18:33:42 +0800 Subject: [PATCH 1/6] Fix SigV4 auth to use base64-encoded content SHA256 and custom canonical request --- pyiceberg/catalog/rest/__init__.py | 32 ++++++++++--- tests/catalog/test_rest.py | 72 ++++++++++++++++++++++++++++-- 2 files changed, 94 insertions(+), 10 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 7fa81312d1..945d65ec1d 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -754,6 +754,8 @@ def _split_identifier_for_json(self, identifier: str | Identifier) -> dict[str, return {"namespace": identifier_tuple[:-1], "name": identifier_tuple[-1]} def _init_sigv4(self, session: Session) -> None: + import base64 + import hashlib from urllib import parse import boto3 @@ -762,6 +764,12 @@ def _init_sigv4(self, session: Session) -> None: from requests import PreparedRequest from requests.adapters import HTTPAdapter + class _IcebergSigV4Auth(SigV4Auth): + def canonical_request(self, request: Any) -> str: + cr = super().canonical_request(request) + # Replace the last line (body_checksum) with hex-encoded payload hash. + return cr.rsplit("\n", 1)[0] + "\n" + self.payload(request) + class SigV4Adapter(HTTPAdapter): def __init__(self, **properties: str): self._properties = properties @@ -788,17 +796,27 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin # remove the connection header as it will be updated after signing if "connection" in request.headers: del request.headers["connection"] - # For empty bodies, explicitly set the content hash header to the SHA256 of an empty string - if not request.body: - request.headers["x-amz-content-sha256"] = EMPTY_BODY_SHA256 + + # Compute the x-amz-content-sha256 header to match Iceberg Java SDK: + # - empty body → hex (EMPTY_BODY_SHA256) + # - non-empty body → base64 + if request.body: + body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body + content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode() + else: + content_sha256_header = EMPTY_BODY_SHA256 + + signing_headers = dict(request.headers) + signing_headers["x-amz-content-sha256"] = content_sha256_header aws_request = AWSRequest( - method=request.method, url=url, params=params, data=request.body, headers=dict(request.headers) + method=request.method, url=url, params=params, data=request.body, headers=signing_headers ) - SigV4Auth(credentials, service, region).add_auth(aws_request) - original_header = request.headers - signed_headers = aws_request.headers + _IcebergSigV4Auth(credentials, service, region).add_auth(aws_request) + + original_header = dict(request.headers) + signed_headers = dict(aws_request.headers) relocated_headers = {} # relocate headers if there is a conflict with signed headers diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 2adfe9f06e..63c4661b9b 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -514,9 +514,10 @@ def test_sigv4_sign_request_without_body(rest_mock: Mocker) -> None: assert isinstance(adapter, HTTPAdapter) adapter.add_headers(prepared) - assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256") + assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}" assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256 + assert "SignedHeaders=" in prepared.headers["Authorization"] def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None: @@ -545,9 +546,74 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None: assert isinstance(adapter, HTTPAdapter) adapter.add_headers(prepared) - assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256") + assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") + assert "SignedHeaders=" in prepared.headers["Authorization"] + # Conflicting Authorization header is relocated assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}" - assert prepared.headers.get("x-amz-content-sha256") != EMPTY_BODY_SHA256 + assert prepared.headers["x-amz-content-sha256"] == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k=" + + +def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None: + existing_token = "existing_token" + + catalog = RestCatalog( + "rest", + **{ + "uri": TEST_URI, + "token": existing_token, + "rest.sigv4-enabled": "true", + "rest.signing-region": "us-west-2", + "client.access-key-id": "id", + "client.secret-access-key": "secret", + }, + ) + + body_content = b'{"namespace": "test_namespace"}' + prepared = catalog._session.prepare_request( + Request( + "POST", + f"{TEST_URI}v1/namespaces", + data=body_content, + ) + ) + adapter = catalog._session.adapters[catalog.uri] + assert isinstance(adapter, HTTPAdapter) + adapter.add_headers(prepared) + + assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") + assert "SignedHeaders=" in prepared.headers["Authorization"] + assert prepared.headers["x-amz-content-sha256"] == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo=" + + +def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None: + catalog = RestCatalog( + "rest", + **{ + "uri": TEST_URI, + "rest.sigv4-enabled": "true", + "rest.signing-region": "us-west-2", + "client.access-key-id": "id", + "client.secret-access-key": "secret", + }, + ) + + prepared = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config")) + adapter = catalog._session.adapters[catalog.uri] + assert isinstance(adapter, HTTPAdapter) + + # Inject conflicting SigV4 headers before signing + prepared.headers["x-amz-content-sha256"] = "fake" + prepared.headers["X-Amz-Date"] = "fake" + + adapter.add_headers(prepared) + + # Matching Java SDK: conflicting headers are relocated with "Original-" prefix + assert prepared.headers.get("Original-x-amz-content-sha256") == "fake" + assert prepared.headers.get("Original-X-Amz-Date") == "fake" + # SigV4 headers are set correctly after signing + assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") + assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256 + assert "X-Amz-Date" in prepared.headers def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None: From 0fc62de57b46d1646d02d6748598f36e2a11cd94 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sat, 21 Mar 2026 16:43:35 +0800 Subject: [PATCH 2/6] Refactor _IcebergSigV4Auth to reuse canonical_request logic instead of rsplit --- pyiceberg/catalog/rest/__init__.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 945d65ec1d..5257a6c373 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -766,9 +766,20 @@ def _init_sigv4(self, session: Session) -> None: class _IcebergSigV4Auth(SigV4Auth): def canonical_request(self, request: Any) -> str: - cr = super().canonical_request(request) - # Replace the last line (body_checksum) with hex-encoded payload hash. - return cr.rsplit("\n", 1)[0] + "\n" + self.payload(request) + # Reuses the logic from botocore's SigV4Auth.canonical_request + # (https://github.com/boto/botocore/blob/develop/botocore/auth.py) + # but always uses self.payload(request) for the body checksum. + cr = [request.method.upper()] + path = self._normalize_url_path(parse.urlsplit(request.url).path) + cr.append(path) + cr.append(self.canonical_query_string(request)) + headers_to_sign = self.headers_to_sign(request) + cr.append(self.canonical_headers(headers_to_sign) + "\n") + cr.append(self.signed_headers(headers_to_sign)) + # Always use hex-encoded payload hash per SigV4 spec, + # regardless of the x-amz-content-sha256 header value (which may be base64). + cr.append(self.payload(request)) + return "\n".join(cr) class SigV4Adapter(HTTPAdapter): def __init__(self, **properties: str): From c42198e677cfefb15988dc552f53f14a83d19e9c Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sat, 21 Mar 2026 17:19:09 +0800 Subject: [PATCH 3/6] update test --- tests/catalog/test_rest.py | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 63c4661b9b..3ee3feb5d7 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -18,6 +18,7 @@ from __future__ import annotations import base64 +import hashlib import os from collections.abc import Callable from typing import Any, cast @@ -514,10 +515,16 @@ def test_sigv4_sign_request_without_body(rest_mock: Mocker) -> None: assert isinstance(adapter, HTTPAdapter) adapter.add_headers(prepared) - assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") + auth_header = prepared.headers["Authorization"] + assert auth_header.startswith("AWS4-HMAC-SHA256 Credential=") assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}" assert prepared.headers["x-amz-content-sha256"] == EMPTY_BODY_SHA256 - assert "SignedHeaders=" in prepared.headers["Authorization"] + # Verify the signature format: Credential, SignedHeaders, Signature + assert "Credential=" in auth_header + assert "SignedHeaders=" in auth_header + assert "Signature=" in auth_header + # x-amz-content-sha256 should be in signed headers + assert "x-amz-content-sha256" in auth_header def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None: @@ -546,11 +553,19 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None: assert isinstance(adapter, HTTPAdapter) adapter.add_headers(prepared) - assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") - assert "SignedHeaders=" in prepared.headers["Authorization"] + auth_header = prepared.headers["Authorization"] + assert auth_header.startswith("AWS4-HMAC-SHA256 Credential=") + assert "SignedHeaders=" in auth_header # Conflicting Authorization header is relocated assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}" - assert prepared.headers["x-amz-content-sha256"] == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k=" + # Non-empty body should have base64-encoded SHA256 + content_sha256 = prepared.headers["x-amz-content-sha256"] + assert content_sha256 == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k=" + # Verify it's valid base64 and matches the body + decoded = base64.b64decode(content_sha256) + assert len(decoded) == 32 # SHA256 produces 32 bytes + # x-amz-content-sha256 should be in signed headers + assert "x-amz-content-sha256" in auth_header def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None: @@ -582,7 +597,12 @@ def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None: assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") assert "SignedHeaders=" in prepared.headers["Authorization"] - assert prepared.headers["x-amz-content-sha256"] == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo=" + content_sha256 = prepared.headers["x-amz-content-sha256"] + assert content_sha256 == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo=" + # Verify it's valid base64 and matches the body + decoded = base64.b64decode(content_sha256) + assert len(decoded) == 32 # SHA256 produces 32 bytes + assert decoded == hashlib.sha256(body_content).digest() def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None: From c90823f8b92e80616c9caab1ca5e7bc352741bfd Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Wed, 8 Apr 2026 10:54:23 +0800 Subject: [PATCH 4/6] Improve SigV4 tests and add botocore version reference --- pyiceberg/catalog/rest/__init__.py | 2 + tests/catalog/test_rest.py | 65 +++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 9 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index 5257a6c373..cc036a33b7 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -769,6 +769,8 @@ def canonical_request(self, request: Any) -> str: # Reuses the logic from botocore's SigV4Auth.canonical_request # (https://github.com/boto/botocore/blob/develop/botocore/auth.py) # but always uses self.payload(request) for the body checksum. + # Validated against botocore <= 1.42.x + # (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637) cr = [request.method.upper()] path = self._normalize_url_path(parse.urlsplit(request.url).path) cr.append(path) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 3ee3feb5d7..8690cadd03 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -560,10 +560,9 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None: assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}" # Non-empty body should have base64-encoded SHA256 content_sha256 = prepared.headers["x-amz-content-sha256"] - assert content_sha256 == "nhKdVGKGU3IMGjYlod9xKUVc7/H5K6zTWj60yJOM80k=" - # Verify it's valid base64 and matches the body - decoded = base64.b64decode(content_sha256) - assert len(decoded) == 32 # SHA256 produces 32 bytes + body_bytes = prepared.body.encode("utf-8") if isinstance(prepared.body, str) else prepared.body + expected_sha256 = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode() + assert content_sha256 == expected_sha256 # x-amz-content-sha256 should be in signed headers assert "x-amz-content-sha256" in auth_header @@ -598,11 +597,8 @@ def test_sigv4_content_sha256_with_bytes_body(rest_mock: Mocker) -> None: assert prepared.headers["Authorization"].startswith("AWS4-HMAC-SHA256 Credential=") assert "SignedHeaders=" in prepared.headers["Authorization"] content_sha256 = prepared.headers["x-amz-content-sha256"] - assert content_sha256 == "sD20bEQP+WnwKPT7jxn7PIACGciAeWjQPlzFCK5Fifo=" - # Verify it's valid base64 and matches the body - decoded = base64.b64decode(content_sha256) - assert len(decoded) == 32 # SHA256 produces 32 bytes - assert decoded == hashlib.sha256(body_content).digest() + expected_sha256 = base64.b64encode(hashlib.sha256(body_content).digest()).decode() + assert content_sha256 == expected_sha256 def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None: @@ -636,6 +632,57 @@ def test_sigv4_conflicting_sigv4_headers(rest_mock: Mocker) -> None: assert "X-Amz-Date" in prepared.headers +def test_sigv4_canonical_request_uses_hex_payload(rest_mock: Mocker) -> None: + """Verify that the canonical request uses hex-encoded payload hash, not the base64 header value.""" + from unittest.mock import patch + + from botocore.auth import SigV4Auth + + catalog = RestCatalog( + "rest", + **{ + "uri": TEST_URI, + "token": "token", + "rest.sigv4-enabled": "true", + "rest.signing-region": "us-west-2", + "client.access-key-id": "id", + "client.secret-access-key": "secret", + }, + ) + + body_content = b'{"namespace": "test"}' + prepared = catalog._session.prepare_request( + Request( + "POST", + f"{TEST_URI}v1/namespaces", + data=body_content, + ) + ) + adapter = catalog._session.adapters[catalog.uri] + assert isinstance(adapter, HTTPAdapter) + + # Capture the canonical request string during signing + captured_canonical = [] + original_add_auth = SigV4Auth.add_auth + + def capturing_add_auth(self: Any, request: Any) -> None: + captured_canonical.append(self.canonical_request(request)) + original_add_auth(self, request) + + with patch.object(SigV4Auth, "add_auth", capturing_add_auth): + adapter.add_headers(prepared) + + assert len(captured_canonical) == 1 + canonical_lines = captured_canonical[0].split("\n") + # Last line of canonical request is the payload hash + payload_hash = canonical_lines[-1] + # Must be hex-encoded (64 hex chars), not base64 + assert len(payload_hash) == 64 + assert payload_hash == hashlib.sha256(body_content).hexdigest() + # Meanwhile the header is base64-encoded + assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode() + + def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None: catalog = RestCatalog( "rest", From 64c1042c3cb5428bba4fe0d4db2423ab2df3bcec Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Wed, 8 Apr 2026 15:35:35 +0800 Subject: [PATCH 5/6] Fix mypy error: assert prepared.body is not None before hashing --- tests/catalog/test_rest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 8690cadd03..9a58c3e296 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -560,6 +560,7 @@ def test_sigv4_sign_request_with_body(rest_mock: Mocker) -> None: assert prepared.headers["Original-Authorization"] == f"Bearer {existing_token}" # Non-empty body should have base64-encoded SHA256 content_sha256 = prepared.headers["x-amz-content-sha256"] + assert prepared.body is not None body_bytes = prepared.body.encode("utf-8") if isinstance(prepared.body, str) else prepared.body expected_sha256 = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode() assert content_sha256 == expected_sha256 From 9a7f80374a54b41a8c94a0a164646acceed03d24 Mon Sep 17 00:00:00 2001 From: Li Jiajia Date: Sun, 17 May 2026 15:50:27 +0800 Subject: [PATCH 6/6] add Java cross-language reference vector, harden body type handling --- pyiceberg/catalog/rest/__init__.py | 30 ++++++++++------- pyproject.toml | 2 +- tests/catalog/test_rest.py | 53 ++++++++++++++++++++++++++++++ uv.lock | 2 ++ 4 files changed, 74 insertions(+), 13 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index cc036a33b7..fc5965c52b 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -765,12 +765,11 @@ def _init_sigv4(self, session: Session) -> None: from requests.adapters import HTTPAdapter class _IcebergSigV4Auth(SigV4Auth): - def canonical_request(self, request: Any) -> str: - # Reuses the logic from botocore's SigV4Auth.canonical_request - # (https://github.com/boto/botocore/blob/develop/botocore/auth.py) - # but always uses self.payload(request) for the body checksum. - # Validated against botocore <= 1.42.x - # (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637) + def canonical_request(self, request: AWSRequest) -> str: + # Override forces hex payload hash in the canonical request even when + # x-amz-content-sha256 header is base64 (see body-hash block below). + # Mirrors botocore <=1.42.x SigV4Auth.canonical_request layout: + # https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637 cr = [request.method.upper()] path = self._normalize_url_path(parse.urlsplit(request.url).path) cr.append(path) @@ -778,8 +777,6 @@ def canonical_request(self, request: Any) -> str: headers_to_sign = self.headers_to_sign(request) cr.append(self.canonical_headers(headers_to_sign) + "\n") cr.append(self.signed_headers(headers_to_sign)) - # Always use hex-encoded payload hash per SigV4 spec, - # regardless of the x-amz-content-sha256 header value (which may be base64). cr.append(self.payload(request)) return "\n".join(cr) @@ -810,11 +807,20 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin if "connection" in request.headers: del request.headers["connection"] - # Compute the x-amz-content-sha256 header to match Iceberg Java SDK: - # - empty body → hex (EMPTY_BODY_SHA256) - # - non-empty body → base64 + # Match Iceberg Java's AWS SDK v2 flexible-checksum signing: + # x-amz-content-sha256 header is base64 for non-empty bodies, hex for empty. + # The SigV4 canonical request still uses hex (enforced in _IcebergSigV4Auth above). + # Ref: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/RESTSigV4AuthSession.java if request.body: - body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body + if isinstance(request.body, str): + body_bytes = request.body.encode("utf-8") + elif isinstance(request.body, (bytes, bytearray)): + body_bytes = request.body + else: + raise TypeError( + f"Unsupported request body type for SigV4 signing: " + f"{type(request.body).__name__}; expected str or bytes." + ) content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode() else: content_sha256_header = EMPTY_BODY_SHA256 diff --git a/pyproject.toml b/pyproject.toml index 96118f8451..4293e057a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,7 @@ sql-postgres = [ ] sql-sqlite = ["sqlalchemy>=2.0.18,<3"] gcsfs = ["gcsfs>=2023.1.0"] -rest-sigv4 = ["boto3>=1.24.59"] +rest-sigv4 = ["boto3>=1.24.59", "botocore<2"] hf = ["huggingface-hub>=0.24.0"] pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.10.0"] datafusion = ["datafusion>=52,<53"] diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 9a58c3e296..0e5404029a 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -684,6 +684,59 @@ def capturing_add_auth(self: Any, request: Any) -> None: assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode() +def test_sigv4_content_sha256_matches_iceberg_java_reference(rest_mock: Mocker) -> None: + """Pin byte-for-byte equivalence with Iceberg Java TestRESTSigV4AuthSession (L121, L177).""" + java_reference_body = b'{"namespace":["ns"],"properties":{}}' + java_reference_base64 = "yc5oAKPWjHY4sW8XQq0l/3aNrrXJKBycVFNnDEGMfww=" + java_reference_empty_hex = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + + catalog = RestCatalog( + "rest", + **{ + "uri": TEST_URI, + "rest.sigv4-enabled": "true", + "rest.signing-region": "us-east-1", + "client.access-key-id": "id", + "client.secret-access-key": "secret", + }, + ) + adapter = catalog._session.adapters[catalog.uri] + assert isinstance(adapter, HTTPAdapter) + + # Non-empty body: must match Java's base64 reference value exactly + prepared_with_body = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces", data=java_reference_body)) + adapter.add_headers(prepared_with_body) + assert prepared_with_body.headers["x-amz-content-sha256"] == java_reference_base64 + + # Empty body: must match Java's hex reference value exactly + prepared_empty = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config")) + adapter.add_headers(prepared_empty) + assert prepared_empty.headers["x-amz-content-sha256"] == java_reference_empty_hex + + +def test_sigv4_unsupported_body_type_raises(rest_mock: Mocker) -> None: + """Unsupported body types (e.g. file-like) raise a clear error rather than crashing in hashlib.""" + catalog = RestCatalog( + "rest", + **{ + "uri": TEST_URI, + "rest.sigv4-enabled": "true", + "rest.signing-region": "us-east-1", + "client.access-key-id": "id", + "client.secret-access-key": "secret", + }, + ) + adapter = catalog._session.adapters[catalog.uri] + assert isinstance(adapter, HTTPAdapter) + + prepared = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces")) + # Inject an unsupported body type (a list — not str/bytes) + prepared.body = ["not", "a", "valid", "body"] # type: ignore[assignment] + + with pytest.raises(TypeError, match="Unsupported request body type for SigV4 signing"): + adapter.add_headers(prepared) + + def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None: catalog = RestCatalog( "rest", diff --git a/uv.lock b/uv.lock index abf58a41a5..fcccbc5e9a 100644 --- a/uv.lock +++ b/uv.lock @@ -4688,6 +4688,7 @@ ray = [ ] rest-sigv4 = [ { name = "boto3" }, + { name = "botocore" }, ] s3fs = [ { name = "s3fs" }, @@ -4754,6 +4755,7 @@ requires-dist = [ { name = "boto3", marker = "extra == 'dynamodb'", specifier = ">=1.24.59" }, { name = "boto3", marker = "extra == 'glue'", specifier = ">=1.24.59" }, { name = "boto3", marker = "extra == 'rest-sigv4'", specifier = ">=1.24.59" }, + { name = "botocore", marker = "extra == 'rest-sigv4'", specifier = "<2" }, { name = "cachetools", specifier = ">=5.5,<8.0" }, { name = "click", specifier = ">=7.1.1,<9.0.0" }, { name = "daft", marker = "extra == 'daft'", specifier = ">=0.7.10" },