From 1c7e550c359e83e006913fdf229f687ac7680055 Mon Sep 17 00:00:00 2001 From: Helmi Aziz Date: Wed, 16 Apr 2025 09:38:04 +0700 Subject: [PATCH 1/4] Fixed force_virtual_addressing problem --- pyiceberg/io/pyarrow.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 99223f1253..0c216cc712 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -408,7 +408,7 @@ def _initialize_oss_fs(self) -> FileSystem: "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN), - "region": get_first_property_value(self.properties, S3_REGION, AWS_REGION), + "force_virtual_addressing": property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, True) } if proxy_uri := self.properties.get(S3_PROXY_URI): @@ -426,9 +426,6 @@ def _initialize_oss_fs(self) -> FileSystem: if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME): client_kwargs["session_name"] = session_name - if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING): - client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False) - return S3FileSystem(**client_kwargs) def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem: @@ -472,8 +469,8 @@ def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem: if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME): client_kwargs["session_name"] = session_name - if force_virtual_addressing := self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING): - client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, force_virtual_addressing, False) + if self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING) != None: + client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, False) return S3FileSystem(**client_kwargs) From f878fb8a4d419067c80d56eac2db9d2a96e86e88 Mon Sep 17 00:00:00 2001 From: Helmi Aziz Date: Wed, 16 Apr 2025 09:50:29 +0700 Subject: [PATCH 2/4] Linter fix --- pyiceberg/io/pyarrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 0c216cc712..866065303e 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -408,7 +408,7 @@ def _initialize_oss_fs(self) -> FileSystem: "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN), - "force_virtual_addressing": property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, True) + "force_virtual_addressing": property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, True), } if proxy_uri := self.properties.get(S3_PROXY_URI): @@ -469,7 +469,7 @@ def _initialize_s3_fs(self, netloc: Optional[str]) -> FileSystem: if session_name := get_first_property_value(self.properties, S3_ROLE_SESSION_NAME, AWS_ROLE_SESSION_NAME): client_kwargs["session_name"] = session_name - if self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING) != None: + if self.properties.get(S3_FORCE_VIRTUAL_ADDRESSING) is not None: client_kwargs["force_virtual_addressing"] = property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, False) return S3FileSystem(**client_kwargs) From 1d0fe0796221da64ff17b08c94609dfcd4b2d931 Mon Sep 17 00:00:00 2001 From: Helmi Aziz Date: Thu, 17 Apr 2025 09:27:38 +0700 Subject: [PATCH 3/4] Added region on _oss_fs for testing sake --- pyiceberg/io/pyarrow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index 866065303e..88ef70ee9f 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -408,6 +408,7 @@ def _initialize_oss_fs(self) -> FileSystem: "access_key": get_first_property_value(self.properties, S3_ACCESS_KEY_ID, AWS_ACCESS_KEY_ID), "secret_key": get_first_property_value(self.properties, S3_SECRET_ACCESS_KEY, AWS_SECRET_ACCESS_KEY), "session_token": get_first_property_value(self.properties, S3_SESSION_TOKEN, AWS_SESSION_TOKEN), + "region": get_first_property_value(self.properties, S3_REGION, AWS_REGION), "force_virtual_addressing": property_as_bool(self.properties, S3_FORCE_VIRTUAL_ADDRESSING, True), } From 4497836c3143b9e683eb00b3a18c8ea7adab164b Mon Sep 17 00:00:00 2001 From: Helmi Aziz Date: Fri, 18 Apr 2025 00:19:02 +0700 Subject: [PATCH 4/4] Slight change on the documentation to inform that force-virtual-addressing is set to True by default for OSS authentication --- mkdocs/docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs/docs/configuration.md b/mkdocs/docs/configuration.md index 5758628f48..1e364a11fe 100644 --- a/mkdocs/docs/configuration.md +++ b/mkdocs/docs/configuration.md @@ -189,7 +189,7 @@ PyIceberg uses [S3FileSystem](https://arrow.apache.org/docs/python/generated/pya | s3.access-key-id | admin | Configure the static access key id used to access the FileIO. | | s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. | | s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. | -| s3.force-virtual-addressing | True | Whether to use virtual addressing of buckets. This must be set to True as OSS can only be accessed with virtual hosted style address. | +| s3.force-virtual-addressing | True | Whether to use virtual addressing of buckets. This is set to `True` by default as OSS can only be accessed with virtual hosted style address. |