diff --git a/pyiceberg/catalog/hive.py b/pyiceberg/catalog/hive.py index 5a9387577b..0fd6b9d79e 100644 --- a/pyiceberg/catalog/hive.py +++ b/pyiceberg/catalog/hive.py @@ -211,11 +211,18 @@ def _construct_hive_storage_descriptor( DEFAULT_PROPERTIES = {TableProperties.PARQUET_COMPRESSION: TableProperties.PARQUET_COMPRESSION_DEFAULT} -def _construct_parameters(metadata_location: str, previous_metadata_location: Optional[str] = None) -> Dict[str, Any]: +def _construct_parameters( + metadata_location: str, previous_metadata_location: Optional[str] = None, metadata_properties: Optional[Properties] = None +) -> Dict[str, Any]: properties = {PROP_EXTERNAL: "TRUE", PROP_TABLE_TYPE: "ICEBERG", PROP_METADATA_LOCATION: metadata_location} if previous_metadata_location: properties[PROP_PREVIOUS_METADATA_LOCATION] = previous_metadata_location + if metadata_properties: + for key, value in metadata_properties.items(): + if key not in properties: + properties[key] = str(value) + return properties @@ -360,7 +367,7 @@ def _convert_iceberg_into_hive(self, table: Table) -> HiveTable: property_as_bool(self.properties, HIVE2_COMPATIBLE, HIVE2_COMPATIBLE_DEFAULT), ), tableType=EXTERNAL_TABLE, - parameters=_construct_parameters(table.metadata_location), + parameters=_construct_parameters(metadata_location=table.metadata_location, metadata_properties=table.properties), ) def _create_hive_table(self, open_client: Client, hive_table: HiveTable) -> None: @@ -541,6 +548,7 @@ def commit_table( hive_table.parameters = _construct_parameters( metadata_location=updated_staged_table.metadata_location, previous_metadata_location=current_table.metadata_location, + metadata_properties=updated_staged_table.properties, ) open_client.alter_table_with_environment_context( dbname=database_name, diff --git a/tests/catalog/test_hive.py b/tests/catalog/test_hive.py index 497ff99924..fef0d6acc6 100644 --- a/tests/catalog/test_hive.py +++ b/tests/catalog/test_hive.py @@ -342,7 +342,13 @@ def test_create_table( storedAsSubDirectories=None, ), partitionKeys=None, - parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location}, + parameters={ + "EXTERNAL": "TRUE", + "table_type": "ICEBERG", + "metadata_location": metadata_location, + "write.parquet.compression-codec": "zstd", + "owner": "javaberg", + }, viewOriginalText=None, viewExpandedText=None, tableType="EXTERNAL_TABLE", @@ -517,7 +523,13 @@ def test_create_table_with_given_location_removes_trailing_slash( storedAsSubDirectories=None, ), partitionKeys=None, - parameters={"EXTERNAL": "TRUE", "table_type": "ICEBERG", "metadata_location": metadata_location}, + parameters={ + "EXTERNAL": "TRUE", + "table_type": "ICEBERG", + "metadata_location": metadata_location, + "write.parquet.compression-codec": "zstd", + "owner": "javaberg", + }, viewOriginalText=None, viewExpandedText=None, tableType="EXTERNAL_TABLE", diff --git a/tests/integration/test_reads.py b/tests/integration/test_reads.py index 5ac5162f8e..b417a43616 100644 --- a/tests/integration/test_reads.py +++ b/tests/integration/test_reads.py @@ -112,6 +112,27 @@ def test_table_properties(catalog: Catalog) -> None: assert "None type is not a supported value in properties: property_name" in str(exc_info.value) +@pytest.mark.integration +@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive")]) +def test_hive_properties(catalog: Catalog) -> None: + table = create_table(catalog) + table.transaction().set_properties({"abc": "def", "p1": "123"}).commit_transaction() + + hive_client: _HiveClient = _HiveClient(catalog.properties["uri"]) + + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("abc") == "def" + assert hive_table.parameters.get("p1") == "123" + assert hive_table.parameters.get("not_exist_parameter") is None + + table.transaction().remove_properties("abc").commit_transaction() + + with hive_client as open_client: + hive_table = open_client.get_table(*TABLE_NAME) + assert hive_table.parameters.get("abc") is None + + @pytest.mark.integration @pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")]) def test_table_properties_dict(catalog: Catalog) -> None: