apache · xr-chen · Dec 9, 2025 · Dec 11, 2025 · Dec 15, 2025 · Dec 17, 2025
diff --git a/xtable-api/src/main/java/org/apache/xtable/model/schema/InternalField.java b/xtable-api/src/main/java/org/apache/xtable/model/schema/InternalField.java
@@ -43,6 +43,9 @@ public class InternalField {
   // The id field for the field. This is used to identify the field in the schema even after
   // renames.
   Integer fieldId;
+  // The name of the column in the data file used to store this field if it differs from the name in
+  // the table's definition; otherwise, null
+  @Getter String storageName;
 
   // represents the fully qualified path to the field (dot separated)
   @Getter(lazy = true)

diff --git a/xtable-core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java b/xtable-core/src/main/java/org/apache/xtable/delta/DeltaSchemaExtractor.java
@@ -55,6 +55,7 @@
 @NoArgsConstructor(access = AccessLevel.PRIVATE)
 public class DeltaSchemaExtractor {
   private static final String DELTA_COLUMN_MAPPING_ID = "delta.columnMapping.id";
+  private static final String DELTA_COLUMN_MAPPING_NAME = "delta.columnMapping.physicalName";
   private static final DeltaSchemaExtractor INSTANCE = new DeltaSchemaExtractor();
   // Timestamps in Delta are microsecond precision by default
   private static final Map<InternalSchema.MetadataKey, Object>
@@ -136,6 +137,10 @@ private InternalSchema toInternalSchema(
                           field.metadata().contains(DELTA_COLUMN_MAPPING_ID)
                               ? (int) field.metadata().getLong(DELTA_COLUMN_MAPPING_ID)
                               : null;
+                      String storageName =
+                          field.metadata().contains(DELTA_COLUMN_MAPPING_NAME)
+                              ? field.metadata().getString(DELTA_COLUMN_MAPPING_NAME)
+                              : null;
                       String fieldComment =
                           field.getComment().isDefined() ? field.getComment().get() : null;
                       InternalSchema schema =
@@ -148,6 +153,7 @@ private InternalSchema toInternalSchema(
                       return InternalField.builder()
                           .name(field.name())
                           .fieldId(fieldId)
+                          .storageName(storageName)
                           .parentPath(parentPath)
                           .schema(schema)
                           .defaultValue(

diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergConversionTarget.java
@@ -20,9 +20,12 @@
 
 import java.time.Instant;
 import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.Set;
 
 import lombok.extern.log4j.Log4j2;
 
@@ -39,6 +42,11 @@
 import org.apache.iceberg.catalog.Namespace;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.mapping.MappedField;
+import org.apache.iceberg.mapping.MappedFields;
+import org.apache.iceberg.mapping.MappingUtil;
+import org.apache.iceberg.mapping.NameMapping;
+import org.apache.iceberg.mapping.NameMappingParser;
 
 import org.apache.xtable.conversion.TargetTable;
 import org.apache.xtable.model.InternalTable;
@@ -161,12 +169,42 @@ private void initializeTableIfRequired(InternalTable internalTable) {
     }
   }
 
+  private MappedFields updateNameMapping(MappedFields mapping, Map<Integer, String> updates) {
+    if (mapping == null) {
+      return null;
+    }
+    List<MappedField> fieldResults = new ArrayList<>();
+    for (MappedField field : mapping.fields()) {
+      Set<String> fieldNames = new HashSet<>(field.names());
+      if (updates.containsKey(field.id())) {
+        fieldNames.add(updates.get(field.id()));
+      }
+      MappedFields nestedMapping = updateNameMapping(field.nestedMapping(), updates);
+      fieldResults.add(MappedField.of(field.id(), fieldNames, nestedMapping));
+    }
+    return MappedFields.of(fieldResults);
+  }
+
   @Override
   public void syncSchema(InternalSchema schema) {
     Schema latestSchema = schemaExtractor.toIceberg(schema);
+    String mappingJson = transaction.table().properties().get(TableProperties.DEFAULT_NAME_MAPPING);
+    boolean hasFieldIds =
+        schema.getAllFields().stream().anyMatch(field -> field.getFieldId() != null);
+    // Recreate name mapping when field IDs were provided in the source schema to ensure every
+    // field in the mapping was assigned the same ID as what is in the source schema
+    NameMapping mapping =
+        mappingJson == null || hasFieldIds
+            ? MappingUtil.create(latestSchema)
+            : NameMappingParser.fromJson(mappingJson);
+    mapping =
+        NameMapping.of(
+            updateNameMapping(mapping.asMappedFields(), schemaExtractor.getIdToStorageName()));
+    transaction
+        .updateProperties()
+        .set(TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(mapping))
+        .commit();
     if (!transaction.table().schema().sameSchema(latestSchema)) {
-      boolean hasFieldIds =
-          schema.getAllFields().stream().anyMatch(field -> field.getFieldId() != null);
       if (hasFieldIds) {
         // There is no clean way to sync the schema with the provided field IDs using the
         // transaction API so we commit the current transaction and interact directly with

diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergSchemaExtractor.java
@@ -32,6 +32,7 @@
 import java.util.stream.Collectors;
 
 import lombok.AccessLevel;
+import lombok.Getter;
 import lombok.NoArgsConstructor;
 import lombok.extern.log4j.Log4j2;
 
@@ -53,18 +54,63 @@
 @Log4j2
 @NoArgsConstructor(access = AccessLevel.PRIVATE)
 public class IcebergSchemaExtractor {
-  private static final IcebergSchemaExtractor INSTANCE = new IcebergSchemaExtractor();
   private static final String MAP_KEY_FIELD_NAME = "key";
   private static final String MAP_VALUE_FIELD_NAME = "value";
   private static final String LIST_ELEMENT_FIELD_NAME = "element";
+  @Getter private final Map<Integer, String> idToStorageName = new HashMap<>();
 
   public static IcebergSchemaExtractor getInstance() {
-    return INSTANCE;
+    return new IcebergSchemaExtractor();
+  }
+
+  private void initializeFieldIdTracker(InternalSchema schema, AtomicInteger fieldIdTracker) {
+    schema.getFields().stream()
+        .forEach(
+            field -> {
+              if (field.getFieldId() != null)
+                fieldIdTracker.accumulateAndGet(field.getFieldId(), Math::max);
+              initializeFieldIdTracker(field, fieldIdTracker);
+            });
+  }
+
+  private void initializeFieldIdTracker(InternalField field, AtomicInteger fieldIdTracker) {
+    switch (field.getSchema().getDataType()) {
+      case RECORD:
+        initializeFieldIdTracker(field.getSchema(), fieldIdTracker);
+        return;
+      case MAP:
+        field.getSchema().getFields().stream()
+            .filter(
+                mapField ->
+                    InternalField.Constants.MAP_KEY_FIELD_NAME.equals(mapField.getName())
+                        || InternalField.Constants.MAP_VALUE_FIELD_NAME.equals(mapField.getName()))
+            .forEach(
+                mapField -> {
+                  if (mapField.getFieldId() != null)
+                    fieldIdTracker.accumulateAndGet(mapField.getFieldId(), Math::max);
+                  initializeFieldIdTracker(mapField, fieldIdTracker);
+                });
+        return;
+      case LIST:
+        field.getSchema().getFields().stream()
+            .filter(
+                arrayField ->
+                    InternalField.Constants.ARRAY_ELEMENT_FIELD_NAME.equals(arrayField.getName()))
+            .forEach(
+                arrayField -> {
+                  if (arrayField.getFieldId() != null)
+                    fieldIdTracker.accumulateAndGet(arrayField.getFieldId(), Math::max);
+                  initializeFieldIdTracker(arrayField, fieldIdTracker);
+                });
+    }
   }
 
   public Schema toIceberg(InternalSchema internalSchema) {
     // if field IDs are not assigned in the source, just use an incrementing integer
     AtomicInteger fieldIdTracker = new AtomicInteger(0);
+    // traverse the schema before conversion to ensure fieldIdTracker won't return any
+    // fieldIds that are already present in the schema
+    initializeFieldIdTracker(internalSchema, fieldIdTracker);
     List<Types.NestedField> nestedFields = convertFields(internalSchema, fieldIdTracker);
     List<InternalField> recordKeyFields = internalSchema.getRecordKeyFields();
     boolean recordKeyFieldsAreNotRequired =
@@ -154,6 +200,9 @@ private List<Types.NestedField> convertFields(
     List<Types.NestedField> nestedFields = new ArrayList<>(schema.getFields().size());
     for (int i = 0; i < schema.getFields().size(); i++) {
       InternalField field = schema.getFields().get(i);
+      if (field.getStorageName() != null) {
+        idToStorageName.put(ids.get(i), field.getStorageName());
+      }
       nestedFields.add(
           Types.NestedField.of(
               ids.get(i),

diff --git a/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java b/xtable-core/src/main/java/org/apache/xtable/iceberg/IcebergTableManager.java
@@ -36,13 +36,10 @@
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableMetadata;
 import org.apache.iceberg.TableOperations;
-import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.catalog.Catalog;
 import org.apache.iceberg.catalog.TableIdentifier;
 import org.apache.iceberg.exceptions.AlreadyExistsException;
 import org.apache.iceberg.hadoop.HadoopTables;
-import org.apache.iceberg.mapping.MappingUtil;
-import org.apache.iceberg.mapping.NameMappingParser;
 
 @AllArgsConstructor(staticName = "of")
 @Log4j2
@@ -88,14 +85,14 @@ Table getOrCreateTable(
                             new Schema(),
                             PartitionSpec.unpartitioned(),
                             basePath,
-                            getDefaultMappingProperties(schema)))
+                            Collections.emptyMap()))
                 .orElseGet(
                     () ->
                         getHadoopTables()
                             .create(
                                 new Schema(),
                                 PartitionSpec.unpartitioned(),
-                                getDefaultMappingProperties(schema),
+                                Collections.emptyMap(),
                                 basePath));
         // set the schema with the provided field IDs
         TableOperations operations = ((BaseTable) tableWithEmptySchema).operations();
@@ -112,11 +109,6 @@ Table getOrCreateTable(
     }
   }
 
-  private Map<String, String> getDefaultMappingProperties(Schema schema) {
-    return Collections.singletonMap(
-        TableProperties.DEFAULT_NAME_MAPPING, NameMappingParser.toJson(MappingUtil.create(schema)));
-  }
-
   private Optional<Catalog> getCatalog(IcebergCatalogConfig catalogConfig) {
     if (catalogConfig == null) {
       return Optional.empty();

diff --git a/xtable-core/src/test/java/org/apache/xtable/ITConversionController.java b/xtable-core/src/test/java/org/apache/xtable/ITConversionController.java
@@ -746,6 +746,48 @@ public void testIcebergCorruptedSnapshotRecovery() throws Exception {
     }
   }
 
+  @Test
+  public void testColumnMappingEnabledDeltaToIceberg() {
 public class DeltaSchemaExtractor { 
 public class DeltaSchemaExtractor { 
+    String tableName = getTableName();
+    ConversionSourceProvider<?> conversionSourceProvider = getConversionSourceProvider(DELTA);
+    try (TestSparkDeltaTable table =
+        TestSparkDeltaTable.forColumnMappingEnabled(tableName, tempDir, sparkSession, null)) {
+      table.insertRows(20);
+      ConversionController conversionController =
+          new ConversionController(jsc.hadoopConfiguration());
+      ConversionConfig conversionConfig =
+          getTableSyncConfig(
+              DELTA,
+              SyncMode.INCREMENTAL,
+              tableName,
+              table,
+              Collections.singletonList(ICEBERG),
+              null,
+              null);
+      conversionController.sync(conversionConfig, conversionSourceProvider);
+      table.insertRows(10);
+      conversionController.sync(conversionConfig, conversionSourceProvider);
+      table.insertRows(10);
+      conversionController.sync(conversionConfig, conversionSourceProvider);
+      checkDatasetEquivalence(DELTA, table, Collections.singletonList(ICEBERG), 40);
+
+      table.dropColumn("long_field");
+      table.insertRows(10);
+      conversionController.sync(conversionConfig, conversionSourceProvider);
+      checkDatasetEquivalence(DELTA, table, Collections.singletonList(ICEBERG), 50);
+
+      table.renameColumn("double_field", "scores");
+      table.insertRows(10);
+      conversionController.sync(conversionConfig, conversionSourceProvider);
+      checkDatasetEquivalence(DELTA, table, Collections.singletonList(ICEBERG), 60);
+
+      table.addColumn();
+      table.insertRows(10);
+      conversionController.sync(conversionConfig, conversionSourceProvider);
+      checkDatasetEquivalence(DELTA, table, Collections.singletonList(ICEBERG), 70);
+    }
+  }
+
   @Test
   public void testMetadataRetention() throws Exception {
     String tableName = getTableName();

diff --git a/xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java b/xtable-core/src/test/java/org/apache/xtable/TestSparkDeltaTable.java
@@ -69,20 +69,36 @@ public static TestSparkDeltaTable forSchemaWithAdditionalColumnsAndPartitioning(
     return new TestSparkDeltaTable(tableName, tempDir, sparkSession, partitionField, true);
   }
 
+  public static TestSparkDeltaTable forColumnMappingEnabled(
+      String tableName, Path tempDir, SparkSession sparkSession, String partitionField) {
+    return new TestSparkDeltaTable(tableName, tempDir, sparkSession, partitionField, true, true);
+  }
+
   public TestSparkDeltaTable(
       String name,
       Path tempDir,
       SparkSession sparkSession,
       String partitionField,
       boolean includeAdditionalColumns) {
+    this(name, tempDir, sparkSession, partitionField, includeAdditionalColumns, false);
+  }
+
+  public TestSparkDeltaTable(
+      String name,
+      Path tempDir,
+      SparkSession sparkSession,
+      String partitionField,
+      boolean includeAdditionalColumns,
+      boolean enableColumnMapping) {
     try {
       this.tableName = name;
       this.basePath = initBasePath(tempDir, tableName);
       this.sparkSession = sparkSession;
       this.partitionField = partitionField;
       this.includeAdditionalColumns = includeAdditionalColumns;
       this.testDeltaHelper =
-          TestDeltaHelper.createTestDataHelper(partitionField, includeAdditionalColumns);
+          TestDeltaHelper.createTestDataHelper(
+              partitionField, includeAdditionalColumns, enableColumnMapping);
       testDeltaHelper.createTable(sparkSession, tableName, basePath);
       this.deltaLog = DeltaLog.forTable(sparkSession, basePath);
       this.deltaTable = DeltaTable.forPath(sparkSession, basePath);
@@ -260,4 +276,20 @@ public List<String> getColumnsToSelect() {
         .filter(columnName -> !columnName.equals("yearOfBirth"))
         .collect(Collectors.toList());
   }
+
+  public void dropColumn(String colName) {
+    testDeltaHelper.dropColumn(colName);
+    sparkSession.sql(String.format("ALTER TABLE delta.`%s` DROP COLUMN %s", basePath, colName));
+  }
+
+  public void renameColumn(String colName, String newColName) {
+    testDeltaHelper.renameColumn(colName, newColName);
+    sparkSession.sql(
+        String.format(
+            "ALTER TABLE delta.`%s` RENAME COLUMN %s TO %s", basePath, colName, newColName));
+  }
+
+  public void addColumn() {
+    testDeltaHelper.addColumn();
+  }
 }