3030
3131import boto3
3232from botocore .config import Config
33- from mypy_boto3_glue .client import GlueClient
34- from mypy_boto3_glue .type_defs import (
35- ColumnTypeDef ,
36- DatabaseInputTypeDef ,
37- DatabaseTypeDef ,
38- StorageDescriptorTypeDef ,
39- TableInputTypeDef ,
40- TableTypeDef ,
41- )
4233
4334from pyiceberg .catalog import (
4435 BOTOCORE_SESSION ,
10192
10293if TYPE_CHECKING :
10394 import pyarrow as pa
95+ from mypy_boto3_glue .client import GlueClient
96+ from mypy_boto3_glue .type_defs import (
97+ ColumnTypeDef ,
98+ DatabaseInputTypeDef ,
99+ DatabaseTypeDef ,
100+ StorageDescriptorTypeDef ,
101+ TableInputTypeDef ,
102+ TableTypeDef ,
103+ )
104104
105105
106106# There is a unique Glue metastore in each AWS account and each AWS region. By default, GlueCatalog chooses the Glue
140140
141141
142142def _construct_parameters (
143- metadata_location : str , glue_table : Optional [TableTypeDef ] = None , prev_metadata_location : Optional [str ] = None
143+ metadata_location : str , glue_table : Optional [" TableTypeDef" ] = None , prev_metadata_location : Optional [str ] = None
144144) -> Properties :
145145 new_parameters = glue_table .get ("Parameters" , {}) if glue_table else {}
146146 new_parameters .update ({TABLE_TYPE : ICEBERG .upper (), METADATA_LOCATION : metadata_location })
@@ -190,15 +190,15 @@ def primitive(self, primitive: PrimitiveType) -> str:
190190 return GLUE_PRIMITIVE_TYPES [primitive_type ]
191191
192192
193- def _to_columns (metadata : TableMetadata ) -> List [ColumnTypeDef ]:
194- results : Dict [str , ColumnTypeDef ] = {}
193+ def _to_columns (metadata : TableMetadata ) -> List [" ColumnTypeDef" ]:
194+ results : Dict [str , " ColumnTypeDef" ] = {}
195195
196196 def _append_to_results (field : NestedField , is_current : bool ) -> None :
197197 if field .name in results :
198198 return
199199
200200 results [field .name ] = cast (
201- ColumnTypeDef ,
201+ " ColumnTypeDef" ,
202202 {
203203 "Name" : field .name ,
204204 "Type" : visit (field .field_type , _IcebergSchemaToGlueType ()),
@@ -230,10 +230,10 @@ def _construct_table_input(
230230 metadata_location : str ,
231231 properties : Properties ,
232232 metadata : TableMetadata ,
233- glue_table : Optional [TableTypeDef ] = None ,
233+ glue_table : Optional [" TableTypeDef" ] = None ,
234234 prev_metadata_location : Optional [str ] = None ,
235- ) -> TableInputTypeDef :
236- table_input : TableInputTypeDef = {
235+ ) -> " TableInputTypeDef" :
236+ table_input : " TableInputTypeDef" = {
237237 "Name" : table_name ,
238238 "TableType" : EXTERNAL_TABLE ,
239239 "Parameters" : _construct_parameters (metadata_location , glue_table , prev_metadata_location ),
@@ -249,10 +249,12 @@ def _construct_table_input(
249249 return table_input
250250
251251
252- def _construct_rename_table_input (to_table_name : str , glue_table : TableTypeDef ) -> TableInputTypeDef :
253- rename_table_input : TableInputTypeDef = {"Name" : to_table_name }
252+ def _construct_rename_table_input (to_table_name : str , glue_table : " TableTypeDef" ) -> " TableInputTypeDef" :
253+ rename_table_input : " TableInputTypeDef" = {"Name" : to_table_name }
254254 # use the same Glue info to create the new table, pointing to the old metadata
255- assert glue_table ["TableType" ]
255+ if not glue_table ["TableType" ]:
256+ raise ValueError ("Glue table type is missing, cannot rename table" )
257+
256258 rename_table_input ["TableType" ] = glue_table ["TableType" ]
257259 if "Owner" in glue_table :
258260 rename_table_input ["Owner" ] = glue_table ["Owner" ]
@@ -264,16 +266,16 @@ def _construct_rename_table_input(to_table_name: str, glue_table: TableTypeDef)
264266 # It turns out the output of StorageDescriptor is not the same as the input type
265267 # because the Column can have a different type, but for now it seems to work, so
266268 # silence the type error.
267- rename_table_input ["StorageDescriptor" ] = cast (StorageDescriptorTypeDef , glue_table ["StorageDescriptor" ])
269+ rename_table_input ["StorageDescriptor" ] = cast (" StorageDescriptorTypeDef" , glue_table ["StorageDescriptor" ])
268270
269271 if "Description" in glue_table :
270272 rename_table_input ["Description" ] = glue_table ["Description" ]
271273
272274 return rename_table_input
273275
274276
275- def _construct_database_input (database_name : str , properties : Properties ) -> DatabaseInputTypeDef :
276- database_input : DatabaseInputTypeDef = {"Name" : database_name }
277+ def _construct_database_input (database_name : str , properties : Properties ) -> " DatabaseInputTypeDef" :
278+ database_input : " DatabaseInputTypeDef" = {"Name" : database_name }
277279 parameters = {}
278280 for k , v in properties .items ():
279281 if k == "Description" :
@@ -286,7 +288,7 @@ def _construct_database_input(database_name: str, properties: Properties) -> Dat
286288 return database_input
287289
288290
289- def _register_glue_catalog_id_with_glue_client (glue : GlueClient , glue_catalog_id : str ) -> None :
291+ def _register_glue_catalog_id_with_glue_client (glue : " GlueClient" , glue_catalog_id : str ) -> None :
290292 """
291293 Register the Glue Catalog ID (AWS Account ID) as a parameter on all Glue client methods.
292294
@@ -303,9 +305,9 @@ def add_glue_catalog_id(params: Dict[str, str], **kwargs: Any) -> None:
303305
304306
305307class GlueCatalog (MetastoreCatalog ):
306- glue : GlueClient
308+ glue : " GlueClient"
307309
308- def __init__ (self , name : str , client : Optional [GlueClient ] = None , ** properties : Any ):
310+ def __init__ (self , name : str , client : Optional [" GlueClient" ] = None , ** properties : Any ):
309311 """Glue Catalog.
310312
311313 You either need to provide a boto3 glue client, or one will be constructed from the properties.
@@ -317,7 +319,7 @@ def __init__(self, name: str, client: Optional[GlueClient] = None, **properties:
317319 """
318320 super ().__init__ (name , ** properties )
319321
320- if client :
322+ if client is not None :
321323 self .glue = client
322324 else :
323325 retry_mode_prop_value = get_first_property_value (properties , GLUE_RETRY_MODE )
@@ -344,12 +346,17 @@ def __init__(self, name: str, client: Optional[GlueClient] = None, **properties:
344346 if glue_catalog_id := properties .get (GLUE_ID ):
345347 _register_glue_catalog_id_with_glue_client (self .glue , glue_catalog_id )
346348
347- def _convert_glue_to_iceberg (self , glue_table : TableTypeDef ) -> Table :
349+ def _convert_glue_to_iceberg (self , glue_table : " TableTypeDef" ) -> Table :
348350 properties : Properties = glue_table ["Parameters" ]
349351
350- assert glue_table ["DatabaseName" ]
351- assert glue_table ["Parameters" ]
352- database_name = glue_table ["DatabaseName" ]
352+ database_name = glue_table .get ("DatabaseName" , None )
353+ if database_name is None :
354+ raise ValueError ("Glue table is missing DatabaseName property" )
355+
356+ parameters = glue_table .get ("Parameters" , None )
357+ if parameters is None :
358+ raise ValueError ("Glue table is missing Parameters property" )
359+
353360 table_name = glue_table ["Name" ]
354361
355362 if TABLE_TYPE not in properties :
@@ -380,15 +387,15 @@ def _convert_glue_to_iceberg(self, glue_table: TableTypeDef) -> Table:
380387 catalog = self ,
381388 )
382389
383- def _create_glue_table (self , database_name : str , table_name : str , table_input : TableInputTypeDef ) -> None :
390+ def _create_glue_table (self , database_name : str , table_name : str , table_input : " TableInputTypeDef" ) -> None :
384391 try :
385392 self .glue .create_table (DatabaseName = database_name , TableInput = table_input )
386393 except self .glue .exceptions .AlreadyExistsException as e :
387394 raise TableAlreadyExistsError (f"Table { database_name } .{ table_name } already exists" ) from e
388395 except self .glue .exceptions .EntityNotFoundException as e :
389396 raise NoSuchNamespaceError (f"Database { database_name } does not exist" ) from e
390397
391- def _update_glue_table (self , database_name : str , table_name : str , table_input : TableInputTypeDef , version_id : str ) -> None :
398+ def _update_glue_table (self , database_name : str , table_name : str , table_input : " TableInputTypeDef" , version_id : str ) -> None :
392399 try :
393400 self .glue .update_table (
394401 DatabaseName = database_name ,
@@ -403,7 +410,7 @@ def _update_glue_table(self, database_name: str, table_name: str, table_input: T
403410 f"Cannot commit { database_name } .{ table_name } because Glue detected concurrent update to table version { version_id } "
404411 ) from e
405412
406- def _get_glue_table (self , database_name : str , table_name : str ) -> TableTypeDef :
413+ def _get_glue_table (self , database_name : str , table_name : str ) -> " TableTypeDef" :
407414 try :
408415 load_table_response = self .glue .get_table (DatabaseName = database_name , Name = table_name )
409416 return load_table_response ["Table" ]
@@ -496,7 +503,7 @@ def commit_table(
496503 table_identifier = table .name ()
497504 database_name , table_name = self .identifier_to_database_and_table (table_identifier , NoSuchTableError )
498505
499- current_glue_table : Optional [TableTypeDef ]
506+ current_glue_table : Optional [" TableTypeDef" ]
500507 glue_table_version_id : Optional [str ]
501508 current_table : Optional [Table ]
502509 try :
@@ -680,13 +687,19 @@ def drop_namespace(self, namespace: Union[str, Identifier]) -> None:
680687 """
681688 database_name = self .identifier_to_database (namespace , NoSuchNamespaceError )
682689 try :
683- table_list = self .list_tables (namespace = database_name )
684- except NoSuchNamespaceError as e :
690+ table_list_response = self .glue .get_tables (DatabaseName = database_name )
691+ table_list = table_list_response ["TableList" ]
692+ except self .glue .exceptions .EntityNotFoundException as e :
685693 raise NoSuchNamespaceError (f"Database does not exist: { database_name } " ) from e
686694
687695 if len (table_list ) > 0 :
688- raise NamespaceNotEmptyError (f"Database { database_name } is not empty" )
689-
696+ first_table = table_list [0 ]
697+ if self .__is_iceberg_table (first_table ):
698+ raise NamespaceNotEmptyError (f"Cannot drop namespace { database_name } because it still contains Iceberg tables" )
699+ else :
700+ raise NamespaceNotEmptyError (
701+ f"Cannot drop namespace { database_name } because it still contains non-Iceberg tables"
702+ )
690703 self .glue .delete_database (Name = database_name )
691704
692705 def list_tables (self , namespace : Union [str , Identifier ]) -> List [Identifier ]:
@@ -702,7 +715,7 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:
702715 NoSuchNamespaceError: If a namespace with the given name does not exist, or the identifier is invalid.
703716 """
704717 database_name = self .identifier_to_database (namespace , NoSuchNamespaceError )
705- table_list : List [TableTypeDef ] = []
718+ table_list : List [" TableTypeDef" ] = []
706719 next_token : Optional [str ] = None
707720 try :
708721 while True :
@@ -730,7 +743,7 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi
730743 if namespace :
731744 return []
732745
733- database_list : List [DatabaseTypeDef ] = []
746+ database_list : List [" DatabaseTypeDef" ] = []
734747 next_token : Optional [str ] = None
735748
736749 while True :
@@ -806,5 +819,5 @@ def view_exists(self, identifier: Union[str, Identifier]) -> bool:
806819 raise NotImplementedError
807820
808821 @staticmethod
809- def __is_iceberg_table (table : TableTypeDef ) -> bool :
822+ def __is_iceberg_table (table : " TableTypeDef" ) -> bool :
810823 return table .get ("Parameters" , {}).get (TABLE_TYPE , "" ).lower () == ICEBERG
0 commit comments