@@ -335,6 +335,175 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec, old_schema: Schema, fre
335335 return PartitionSpec (* partition_fields , spec_id = INITIAL_PARTITION_SPEC_ID )
336336
337337
338+ def assign_fresh_partition_spec_ids_for_replace (
339+ spec : PartitionSpec ,
340+ old_schema : Schema ,
341+ fresh_schema : Schema ,
342+ existing_specs : list [PartitionSpec ],
343+ last_partition_id : int | None ,
344+ format_version : int = 2 ,
345+ current_spec : PartitionSpec | None = None ,
346+ ) -> tuple [PartitionSpec , int ]:
347+ """Assign partition field IDs for a replace operation, reusing IDs from existing specs.
348+
349+ - For v2+, reuse partition field IDs by `(source_id, transform)` across all existing specs.
350+ New fields get IDs starting from `last_partition_id + 1`.
351+ - For v1, the current spec's fields must be preserved (v1 specs are append-only). Fields
352+ absent from the new spec are carried forward with a `VoidTransform`. Matching new fields
353+ reuse the existing partition field ID; remaining new fields are appended with fresh IDs.
354+
355+ Args:
356+ spec: The new partition spec to assign IDs to. Its `source_id`s reference `old_schema`.
357+ old_schema: The schema that the new spec's `source_id`s reference.
358+ fresh_schema: The schema with freshly assigned field IDs.
359+ existing_specs: All partition specs from the existing table metadata.
360+ last_partition_id: The current table's `last_partition_id`.
361+ format_version: Table format version. Required to be set to 1 for v1 carry-forward.
362+ current_spec: The current default partition spec. Required when `format_version <= 1`.
363+
364+ Returns:
365+ A tuple of `(fresh_spec, new_last_partition_id)`.
366+ """
367+ effective_last_partition_id = last_partition_id if last_partition_id is not None else PARTITION_FIELD_ID_START - 1
368+
369+ if format_version <= 1 :
370+ if current_spec is None :
371+ raise ValueError ("current_spec is required for v1 replace_table" )
372+ return _assign_fresh_partition_spec_ids_for_replace_v1 (
373+ spec , old_schema , fresh_schema , current_spec , effective_last_partition_id
374+ )
375+
376+ # v2+: reuse field IDs by (source_id, transform) across all specs. When the same
377+ # (source_id, transform) appears in multiple specs, prefer the highest field_id.
378+ transform_to_field_id : dict [tuple [int , str ], int ] = {}
379+ for existing_spec in existing_specs :
380+ for field in existing_spec .fields :
381+ key = (field .source_id , str (field .transform ))
382+ if key not in transform_to_field_id or field .field_id > transform_to_field_id [key ]:
383+ transform_to_field_id [key ] = field .field_id
384+
385+ next_id = effective_last_partition_id
386+ partition_fields = []
387+ for field in spec .fields :
388+ original_column_name = old_schema .find_column_name (field .source_id )
389+ if original_column_name is None :
390+ raise ValueError (f"Could not find in old schema: { field } " )
391+ fresh_field = fresh_schema .find_field (original_column_name )
392+ if fresh_field is None :
393+ raise ValueError (f"Could not find field in fresh schema: { original_column_name } " )
394+
395+ validate_partition_name (field .name , field .transform , fresh_field .field_id , fresh_schema , set ())
396+
397+ key = (fresh_field .field_id , str (field .transform ))
398+ if key in transform_to_field_id :
399+ partition_field_id = transform_to_field_id [key ]
400+ else :
401+ next_id += 1
402+ partition_field_id = next_id
403+ transform_to_field_id [key ] = partition_field_id
404+
405+ partition_fields .append (
406+ PartitionField (
407+ name = field .name ,
408+ source_id = fresh_field .field_id ,
409+ field_id = partition_field_id ,
410+ transform = field .transform ,
411+ )
412+ )
413+
414+ # `next_id` starts at `effective_last_partition_id` and only increments, so it is the
415+ # new last partition id.
416+ return PartitionSpec (* partition_fields , spec_id = INITIAL_PARTITION_SPEC_ID ), next_id
417+
418+
419+ def _assign_fresh_partition_spec_ids_for_replace_v1 (
420+ spec : PartitionSpec ,
421+ old_schema : Schema ,
422+ fresh_schema : Schema ,
423+ current_spec : PartitionSpec ,
424+ effective_last_partition_id : int ,
425+ ) -> tuple [PartitionSpec , int ]:
426+ """v1 branch of `assign_fresh_partition_spec_ids_for_replace`. See parent docstring."""
427+ # Build (fresh_source_id, transform) → (new_field, fresh_source_id) for the new spec,
428+ # in insertion order so leftover fields keep their declared order on append.
429+ new_field_by_key : dict [tuple [int , str ], tuple [PartitionField , int ]] = {}
430+ new_field_names : list [str ] = []
431+ for new_field in spec .fields :
432+ col_name = old_schema .find_column_name (new_field .source_id )
433+ if col_name is None :
434+ raise ValueError (f"Could not find in old schema: { new_field } " )
435+ fresh_field = fresh_schema .find_field (col_name )
436+ if fresh_field is None :
437+ raise ValueError (f"Could not find field in fresh schema: { col_name } " )
438+ validate_partition_name (new_field .name , new_field .transform , fresh_field .field_id , fresh_schema , set ())
439+ key = (fresh_field .field_id , str (new_field .transform ))
440+ new_field_by_key [key ] = (new_field , fresh_field .field_id )
441+ new_field_names .append (new_field .name )
442+
443+ # Walk current spec, carrying forward each field. Matching new fields consume their key;
444+ # missing fields become void transforms.
445+ used_names : set [str ] = set (new_field_names )
446+ partition_fields = []
447+ for cur_field in current_spec .fields :
448+ key = (cur_field .source_id , str (cur_field .transform ))
449+ match = new_field_by_key .pop (key , None )
450+ if match is not None :
451+ new_field , fresh_source_id = match
452+ partition_fields .append (
453+ PartitionField (
454+ name = new_field .name ,
455+ source_id = fresh_source_id ,
456+ field_id = cur_field .field_id ,
457+ transform = new_field .transform ,
458+ )
459+ )
460+ used_names .add (new_field .name )
461+ else :
462+ void_name = _unique_void_name (cur_field .name , cur_field .field_id , used_names )
463+ used_names .add (void_name )
464+ partition_fields .append (
465+ PartitionField (
466+ name = void_name ,
467+ source_id = cur_field .source_id ,
468+ field_id = cur_field .field_id ,
469+ transform = VoidTransform (),
470+ )
471+ )
472+
473+ # Append remaining new fields at the end with fresh partition IDs.
474+ next_id = effective_last_partition_id
475+ for new_field , fresh_source_id in new_field_by_key .values ():
476+ next_id += 1
477+ partition_fields .append (
478+ PartitionField (
479+ name = new_field .name ,
480+ source_id = fresh_source_id ,
481+ field_id = next_id ,
482+ transform = new_field .transform ,
483+ )
484+ )
485+
486+ # `next_id` starts at `effective_last_partition_id` and only increments, so it is the
487+ # new last partition id.
488+ return PartitionSpec (* partition_fields , spec_id = INITIAL_PARTITION_SPEC_ID ), next_id
489+
490+
491+ def _unique_void_name (base_name : str , field_id : int , used_names : set [str ]) -> str :
492+ """Pick a void-transform name that does not collide with already-used names.
493+
494+ First tries `base_name`; if taken, tries `base_name_{field_id}`; if still taken,
495+ appends `_2`, `_3`, ... until unique.
496+ """
497+ if base_name not in used_names :
498+ return base_name
499+ candidate = f"{ base_name } _{ field_id } "
500+ suffix = 2
501+ while candidate in used_names :
502+ candidate = f"{ base_name } _{ field_id } _{ suffix } "
503+ suffix += 1
504+ return candidate
505+
506+
338507T = TypeVar ("T" )
339508
340509
0 commit comments