Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
6eb7161
HIVE-29413: Avoid code duplication by updating getPartCols method for…
ramitg254 Apr 8, 2026
28714b6
commit-2
ramitg254 Apr 9, 2026
bfaa7a4
corrected bucket-map-join test
ramitg254 Apr 9, 2026
8c24b55
corrected update statements
ramitg254 Apr 10, 2026
366827d
corrected load, partition evolution tests
ramitg254 Apr 11, 2026
7c01714
refractored
ramitg254 Apr 11, 2026
44d2a6c
addressed sonar issues
ramitg254 Apr 12, 2026
428a52c
updated table api and usage
ramitg254 Apr 26, 2026
5ac2561
introduced index optimization
ramitg254 May 24, 2026
332d4f1
corrected implementation
ramitg254 May 24, 2026
2aa5517
updated describe implementation and outputs
ramitg254 May 24, 2026
659e75c
updated api and test
ramitg254 May 25, 2026
9439739
updated update implementation
ramitg254 May 25, 2026
a0a992e
updated partition pruning and query rewriting
ramitg254 May 25, 2026
4559494
changes related to metatable
ramitg254 May 25, 2026
2e20703
corrected alter and semantic analyzer implementation
ramitg254 May 26, 2026
b5a5060
updated merge implementation and test output
ramitg254 May 26, 2026
a4a9ce6
updated ctas create and tests output
ramitg254 May 26, 2026
c58da98
updated stats autogather and test output
ramitg254 May 26, 2026
4ddced3
updated getPartitionKeys
ramitg254 May 27, 2026
19b47ee
removed getStorageSchemaCols part-1
ramitg254 May 30, 2026
565e1f6
removed getStorageSchemaCols part-2
ramitg254 May 31, 2026
dd870bb
removed getStorageSchemaCols part-3
ramitg254 May 31, 2026
350541d
removed workaround
ramitg254 May 31, 2026
1de739d
addressed sonar issues
ramitg254 Jun 1, 2026
664b31f
non part cols retrieval made lazy
ramitg254 Jun 1, 2026
001f82e
reviewed required changes
ramitg254 Jun 9, 2026
b4f8f3a
corrected partition.getCols for iceberg table
ramitg254 Jun 9, 2026
5f3d8cd
added wrapper for lineage
ramitg254 Jun 9, 2026
b42b0dc
reverted getPartitionKeys
ramitg254 Jun 9, 2026
57d16b7
refractor-1
ramitg254 Jun 10, 2026
261bb88
refractor-2
ramitg254 Jun 10, 2026
850275e
correction for rebased iceberg view commit recently merged to master
ramitg254 Jun 11, 2026
f19f7af
removed isTableTypeSet and merged partition column comments
ramitg254 Jun 11, 2026
ebbc002
updated conflicts for rebase
ramitg254 Jun 11, 2026
cb23165
reverted to user comment override and refractored
ramitg254 Jun 11, 2026
3829bcb
moved helpers to MetaStoreUtils
ramitg254 Jun 12, 2026
2d4ab11
moved to HiveTableUtil
ramitg254 Jun 18, 2026
095807e
updated method name
ramitg254 Jun 19, 2026
0cf7727
refractor
ramitg254 Jun 22, 2026
f1fbeba
refractor-2
ramitg254 Jun 22, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.CreateTableRequest;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
Expand Down Expand Up @@ -118,16 +117,6 @@ public BaseHiveIcebergMetaHook(Configuration conf) {
this.conf = conf;
}

public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable == null ||
hmsTable.getParameters() == null ||
!TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) {
return false;
}
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler);
}

@Override
public void preCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
CreateTableRequest request = new CreateTableRequest(hmsTable);
Expand All @@ -140,7 +129,7 @@ public void preCreateTable(CreateTableRequest request) {
if (hmsTable.isTemporary()) {
throw new UnsupportedOperationException("Creation of temporary iceberg tables is not supported.");
}
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
preCreateIcebergView(request);
return;
}
Expand Down Expand Up @@ -533,7 +522,7 @@ protected void setWriteModeDefaults(Table icebergTbl, Map<String, String> newPro
public void postGetTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable != null) {
try {
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
IcebergViewSupport.enrichHmsTableFromIcebergView(hmsTable, conf);
return;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ public HiveIcebergMetaHook(Configuration conf) {

@Override
public void commitCreateTable(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf);
Map<String, String> tblProps =
hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters());
Expand Down Expand Up @@ -266,7 +266,7 @@ public void commitDropTable(org.apache.hadoop.hive.metastore.api.Table hmsTable,
@Override
public void preAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable, EnvironmentContext context)
throws MetaException {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
currentAlterTableOp = null;
if (commitLock == null) {
commitLock = new NoLock();
Expand Down Expand Up @@ -503,7 +503,7 @@ public void commitAlterTable(org.apache.hadoop.hive.metastore.api.Table hmsTable
if (commitLock == null) {
throw new IllegalStateException("Hive commit lock should already be set");
}
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable)) {
if (HiveTableUtil.isIcebergView(hmsTable)) {
tableProperties = IcebergTableProperties.getTableProperties(hmsTable, conf);
Map<String, String> tblProps =
hmsTable.getParameters() == null ? Maps.newHashMap() : Maps.newHashMap(hmsTable.getParameters());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ public boolean canProvidePartitionStatistics(org.apache.hadoop.hive.ql.metadata.
if (!getStatsSource().equals(HiveMetaHook.ICEBERG)) {
return false;
}
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return false;
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand Down Expand Up @@ -899,7 +899,7 @@ public boolean supportsPartitionTransform() {

@Override
public List<TransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyList();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand All @@ -916,7 +916,7 @@ public List<TransformSpec> getPartitionTransformSpec(org.apache.hadoop.hive.ql.m
@Override
public Map<Integer, List<TransformSpec>> getPartitionTransformSpecs(
org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyMap();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Expand Down Expand Up @@ -1550,10 +1550,10 @@ public List<FieldSchema> acidSelectColumns(org.apache.hadoop.hive.ql.metadata.Ta
case DELETE ->
// TODO: make it configurable whether we want to include the table columns in the select query.
// It might make delete writes faster if we don't have to write out the row object
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols());
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getAllCols());
Comment thread
deniskuzZ marked this conversation as resolved.
case UPDATE -> shouldOverwrite(table, operation) ?
ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA :
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getCols());
ListUtils.union(ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA, table.getAllCols());
case MERGE -> ACID_VIRTUAL_COLS_AS_FIELD_SCHEMA;
default -> ImmutableList.of();
};
Expand Down Expand Up @@ -1584,7 +1584,7 @@ public boolean supportsSortColumns() {

@Override
public List<FieldSchema> sortColumns(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
if (HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return Collections.emptyList();
}
TableDesc tableDesc = Utilities.getTableDesc(hmsTable);
Expand Down Expand Up @@ -1989,8 +1989,9 @@ public void setTableParametersForCTLT(org.apache.hadoop.hive.ql.metadata.Table t
desc.setIsExternal(true);
}

// If source is Iceberg table set the schema and the partition spec
if (MetaStoreUtils.isIcebergTable(origParams)) {
// parameter table_type is set to "ICEBERG" in case of Iceberg tables
// set the schema and the partition spec accordingly
if (HiveTableUtil.isTableTypeSet(origParams)) {
tbl.getParameters()
.put(InputFormatConfig.TABLE_SCHEMA, origParams.get(InputFormatConfig.TABLE_SCHEMA));
tbl.getParameters()
Expand Down Expand Up @@ -2147,13 +2148,12 @@ public List<Partition> getPartitions(org.apache.hadoop.hive.ql.metadata.Table hm
}

public boolean isPartitioned(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
List<FieldSchema> partCols = hmsTable.getPartCols();
return partCols != null && !partCols.isEmpty();
}
if (!hmsTable.getTTable().isSetId()) {
if (hmsTable.getMetaTable() != null) {
return false;
}
if (!HiveTableUtil.isRegistered(hmsTable) || HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return !hmsTable.getPartitionKeys().isEmpty();
}
Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
Snapshot snapshot = IcebergTableUtil.getTableSnapshot(table, hmsTable);

Expand Down Expand Up @@ -2296,13 +2296,12 @@ public boolean canPerformMetadataDelete(org.apache.hadoop.hive.ql.metadata.Table

@Override
public List<FieldSchema> getPartitionKeys(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
if (BaseHiveIcebergMetaHook.isIcebergView(hmsTable.getTTable())) {
List<FieldSchema> partCols = hmsTable.getPartCols();
return partCols != null ? partCols : Collections.emptyList();
}
if (!hmsTable.getTTable().isSetId()) {
if (hmsTable.getMetaTable() != null) {
return Collections.emptyList();
}
if (!HiveTableUtil.isRegistered(hmsTable) || HiveTableUtil.isIcebergView(hmsTable.getTTable())) {
return hmsTable.getPartitionKeys();
Comment thread
deniskuzZ marked this conversation as resolved.
}
Table icebergTable = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
return MetastoreUtil.getPartitionKeys(icebergTable, icebergTable.spec().specId());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
Expand Down Expand Up @@ -380,4 +382,23 @@ public static boolean isCtas(Properties properties) {
return Boolean.parseBoolean(properties.getProperty(hive_metastoreConstants.TABLE_IS_CTAS));
}

public static boolean isTableTypeSet(Map<String, String> params) {
Comment thread
deniskuzZ marked this conversation as resolved.
return params != null &&
HiveMetaHook.ICEBERG.equalsIgnoreCase(params.get(HiveMetaHook.TABLE_TYPE));
}

public static boolean isIcebergView(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable == null ||
hmsTable.getParameters() == null ||
!TableType.VIRTUAL_VIEW.toString().equals(hmsTable.getTableType())) {
return false;
}
String storageHandler = hmsTable.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE);
return HiveMetaHook.HIVE_ICEBERG_STORAGE_HANDLER.equals(storageHandler);
}

public static boolean isRegistered(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
return hmsTable.getTTable().isSetId() && isTableTypeSet(hmsTable.getParameters());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -946,7 +946,8 @@ public void testCreateTableWithoutColumnComments() {
@Test
public void testCreatePartitionedTableWithColumnComments() {
TableIdentifier identifier = TableIdentifier.of("default", "partitioned_with_comment_table");
String[] expectedDoc = new String[] {"int column", "string column", null, "partition column", null};
String[] expectedDoc = new String[] {"int column", "string column", null, "partition column",
"Transform: identity"};
shell.executeStatement("CREATE EXTERNAL TABLE partitioned_with_comment_table (" +
"t_int INT COMMENT 'int column', " +
"t_string STRING COMMENT 'string column', " +
Expand All @@ -959,13 +960,18 @@ public void testCreatePartitionedTableWithColumnComments() {

List<Object[]> rows = shell.executeStatement("DESCRIBE default.partitioned_with_comment_table");
List<Types.NestedField> columns = icebergTable.schema().columns();
List<String> partitionColumns = List.of("t_string_3", "t_string_4");
// The partition transform information and partition information is 6 extra lines, and 4 more line for the columns
Assert.assertEquals(columns.size() + 10, rows.size());
for (int i = 0; i < columns.size(); i++) {
Types.NestedField field = columns.get(i);
Assert.assertArrayEquals(new Object[] {field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(),
field.doc() != null ? field.doc() : ""}, rows.get(i));
Assert.assertEquals(expectedDoc[i], field.doc());
String fieldDoc = field.doc();
if (fieldDoc == null && partitionColumns.contains(field.name())) {
fieldDoc = "Transform: identity";
}
Assert.assertArrayEquals(new Object[]{field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(),
fieldDoc != null ? fieldDoc : ""}, rows.get(i));
Assert.assertEquals(expectedDoc[i], fieldDoc);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -453,8 +451,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -729,8 +725,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro
# col_name data_type comment
a int
b string
c string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1066,9 +1060,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1513,9 +1504,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1960,9 +1948,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro_mixed
# col_name data_type comment
a int
b double
c int
d string

# Partition Information
# col_name data_type comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_orc
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -415,7 +414,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -770,7 +768,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_int
# col_name data_type comment
a int
b int

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1125,7 +1122,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_parquet_double
# col_name data_type comment
a int
b double

# Partition Information
# col_name data_type comment
Expand Down Expand Up @@ -1426,7 +1422,6 @@ POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_avro
# col_name data_type comment
a int
b string

# Partition Information
# col_name data_type comment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,21 @@ Stage-0
limit:-1
Stage-1
Map 1 vectorized
File Output Operator [FS_53]
Map Join Operator [MAPJOIN_52] (rows=2 width=530)
BucketMapJoin:true,Conds:SEL_51._col1, _col2=RS_49._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
File Output Operator [FS_23]
Map Join Operator [MAPJOIN_22] (rows=2 width=530)
BucketMapJoin:true,Conds:SEL_21._col1, _col2=RS_19._col1, _col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
<-Map 2 [CUSTOM_EDGE] vectorized
MULTICAST [RS_49]
MULTICAST [RS_19]
PartitionCols:_col2, _col1
Select Operator [SEL_48] (rows=2 width=265)
Select Operator [SEL_18] (rows=2 width=265)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_47] (rows=2 width=265)
Filter Operator [FIL_17] (rows=2 width=265)
predicate:(id is not null and part is not null)
TableScan [TS_3] (rows=2 width=265)
default@tbl,tbl2,Tbl:COMPLETE,Col:COMPLETE,Output:["foid","part","id"]
<-Select Operator [SEL_51] (rows=2 width=265)
<-Select Operator [SEL_21] (rows=2 width=265)
Output:["_col0","_col1","_col2"]
Filter Operator [FIL_50] (rows=2 width=265)
Filter Operator [FIL_20] (rows=2 width=265)
predicate:(id is not null and part is not null)
TableScan [TS_0] (rows=2 width=265)
default@tbl,tbl,Tbl:COMPLETE,Col:COMPLETE,Grouping Num Buckets:100,Grouping Partition Columns:["id","part"],Output:["foid","part","id"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ PREHOOK: Input: default@tbl_ice_puffin
POSTHOOK: query: desc formatted tbl_ice_puffin C
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice_puffin
col_name C
col_name c
data_type int
min 52
max 56
Expand All @@ -358,7 +358,7 @@ max_col_len
num_trues
num_falses
bit_vector HL
comment
comment Transform: identity
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
PREHOOK: query: EXPLAIN select count(*) from src_ice t1 join tbl_ice_puffin t2 on (t1.a = t2.a)
PREHOOK: type: QUERY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,6 @@ POSTHOOK: query: describe formatted tbl_ice
POSTHOOK: type: DESCTABLE
POSTHOOK: Input: default@tbl_ice
# col_name data_type comment
a int
b string
c int

# Partition Information
Expand Down
Loading
Loading