From 4c2658c1bd2e6b21b5c9d8c754750f3f9a195f83 Mon Sep 17 00:00:00 2001 From: Mahesh Raju Somalaraju Date: Wed, 24 Jun 2026 19:10:30 +0530 Subject: [PATCH 1/2] HIVE-29490: Add tests for HIVE-26758 (hive.use.scratchdir.for.staging) --- .../resources/testconfiguration.properties | 1 + .../TestGenMapRedUtilsStagingPath.java | 387 ++++++++++++++++++ .../hive_use_scratchdir_for_staging.q | 79 ++++ .../tez/hive_use_scratchdir_for_staging.q.out | 211 ++++++++++ 4 files changed, 678 insertions(+) create mode 100644 ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java create mode 100644 ql/src/test/queries/clientpositive/hive_use_scratchdir_for_staging.q create mode 100644 ql/src/test/results/clientpositive/tez/hive_use_scratchdir_for_staging.q.out diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index bfefc3b5f023..2ac3b5d7d367 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -27,6 +27,7 @@ minitez.query.files=\ explainanalyze_6.q,\ explainuser_3.q,\ flatten_union_subdir.q,\ + hive_use_scratchdir_for_staging.q,\ limit_bailout.q,\ mapjoin_addjar.q,\ non_strict_numeric_to_timestamp_conversion.q,\ diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java new file mode 100644 index 000000000000..d8f6345c6b75 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java @@ -0,0 +1,387 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.io.AcidUtils; +import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; +import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.Collections; +import java.util.Properties; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * Unit tests for HIVE-26758: staging directory path selection controlled by + * hive.use.scratchdir.for.staging. + */ +public class TestGenMapRedUtilsStagingPath { + + private static final Path DEST_PATH = + new Path("hdfs://namenode/warehouse/mydb.db/target_tbl"); + private static final Path FINAL_JOB_STAGING = + new Path("hdfs://namenode/warehouse/mydb.db/target_tbl/.hive-staging_job0"); + private static final Path INTERIM_JOB_STAGING = + new Path("hdfs://namenode/tmp/hive-scratch/.hive-staging_job0"); + + @BeforeClass + public static void initializeSessionState() { + HiveConf conf = new HiveConf(); + conf.set("_hive.hdfs.session.path", "file:///tmp/hive-test/hdfs-session"); + conf.set("_hive.local.session.path", "file:///tmp/hive-test/local-session"); + conf.set(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, "mr"); + SessionState.start(conf); + } + + private static HiveConf buildConf() { + HiveConf conf = new HiveConf(); + conf.set("_hive.hdfs.session.path", "file:///tmp/hive-test/hdfs-session"); + conf.set("_hive.local.session.path", "file:///tmp/hive-test/local-session"); + conf.set(ConfVars.STAGING_DIR.varname, ".hive-staging"); + return conf; + } + + /** + * Builds a minimal FileSinkDesc whose isMmTable and + * isDirectInsert flags are both false, so that + * GenMapRedUtils#createMoveTask enters the staging-path branch. + */ + private static FileSinkDesc buildFileSinkDesc(Path destPath) { + TableDesc tableDesc = new TableDesc(null, null, new Properties()); + FileSinkDesc fsd = new FileSinkDesc( + destPath, + tableDesc, + false, + 0, + false, + false, + 1, + 1, + null, + null, + destPath, + false, + false, + false, + false, + false, + AcidUtils.Operation.NOT_ACID, + false); + return fsd; + } + + private static FileSinkOperator buildFileSinkOperator(FileSinkDesc fsd) { + FileSinkOperator fsOp = mock(FileSinkOperator.class); + when(fsOp.getConf()).thenReturn(fsd); + when(fsOp.getSchema()).thenReturn(mock(RowSchema.class)); + when(fsOp.getCompilationOpContext()).thenReturn(mock(CompilationOpContext.class)); + return fsOp; + } + + private static Context buildMockContext() { + Context mockCtx = mock(Context.class); + when(mockCtx.getTempDirForFinalJobPath(any(Path.class))).thenReturn(FINAL_JOB_STAGING); + when(mockCtx.getTempDirForInterimJobPath(any(Path.class))).thenReturn(INTERIM_JOB_STAGING); + return mockCtx; + } + + private static ParseContext buildMockParseContext(Context ctx) { + ParseContext mockParseCtx = mock(ParseContext.class); + when(mockParseCtx.getContext()).thenReturn(ctx); + return mockParseCtx; + } + + @Test + public void testCreateMoveTaskUsesFinalJobPathWhenConfigFalse() { + HiveConf conf = buildConf(); + conf.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, false); + Context mockCtx = buildMockContext(); + FileSinkDesc fsd = buildFileSinkDesc(DEST_PATH); + FileSinkOperator fsOp = buildFileSinkOperator(fsd); + + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, fsOp, + buildMockParseContext(mockCtx), + Collections.emptyList(), conf, null); + + verify(mockCtx).getTempDirForFinalJobPath(DEST_PATH); + verify(mockCtx, never()).getTempDirForInterimJobPath(any()); + + Path dirName = fsd.getDirName(); + + assertNotNull("FileSinkDesc.getDirName() must not be null", dirName); + assertEquals( + "With scratchdir.for.staging=false, FileSinkDesc.getDirName() must equal " + + "the value returned by getTempDirForFinalJobPath (FINAL_JOB_STAGING).", + FINAL_JOB_STAGING.toString(), dirName.toString()); + assertNotEquals( + "With scratchdir.for.staging=false, getDirName() must NOT be the interim staging path.", + INTERIM_JOB_STAGING.toString(), dirName.toString()); + } + + @Test + public void testCreateMoveTaskUsesInterimJobPathWhenConfigTrue() { + HiveConf conf = buildConf(); + conf.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, true); + Context mockCtx = buildMockContext(); + FileSinkDesc fsd = buildFileSinkDesc(DEST_PATH); + FileSinkOperator fsOp = buildFileSinkOperator(fsd); + + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, fsOp, + buildMockParseContext(mockCtx), + Collections.emptyList(), conf, null); + + verify(mockCtx).getTempDirForInterimJobPath(DEST_PATH); + verify(mockCtx, never()).getTempDirForFinalJobPath(any()); + + Path dirName = fsd.getDirName(); + + assertNotNull("FileSinkDesc.getDirName() must not be null", dirName); + assertEquals( + "With scratchdir.for.staging=true, FileSinkDesc.getDirName() must equal " + + "the value returned by getTempDirForInterimJobPath (INTERIM_JOB_STAGING).", + INTERIM_JOB_STAGING.toString(), dirName.toString()); + assertNotEquals( + "With scratchdir.for.staging=true, getDirName() must NOT be the dest-relative staging path.", + FINAL_JOB_STAGING.toString(), dirName.toString()); + } + + /** + * Flipping the config flag must produce a different staging path, so that the two paths do not collide. + */ + @Test + public void testConfigSwitchChangesStagingLocation() { + HiveConf confFalse = buildConf(); + confFalse.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, false); + Context mockCtxFalse = buildMockContext(); + FileSinkDesc fsdFalse = buildFileSinkDesc(DEST_PATH); + + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, buildFileSinkOperator(fsdFalse), + buildMockParseContext(mockCtxFalse), + Collections.emptyList(), confFalse, null); + + HiveConf confTrue = buildConf(); + confTrue.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, true); + Context mockCtxTrue = buildMockContext(); + FileSinkDesc fsdTrue = buildFileSinkDesc(DEST_PATH); + + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, buildFileSinkOperator(fsdTrue), + buildMockParseContext(mockCtxTrue), + Collections.emptyList(), confTrue, null); + + assertNotEquals( + "Flipping hive.use.scratchdir.for.staging must produce a different " + + "FileSinkDesc.getDirName(). false=" + fsdFalse.getDirName() + + " true=" + fsdTrue.getDirName(), + fsdFalse.getDirName().toString(), + fsdTrue.getDirName().toString()); + assertEquals("false-branch must use FINAL_JOB_STAGING", + FINAL_JOB_STAGING.toString(), fsdFalse.getDirName().toString()); + assertEquals("true-branch must use INTERIM_JOB_STAGING", + INTERIM_JOB_STAGING.toString(), fsdTrue.getDirName().toString()); + } + + /** + * The HIVE-26758 staging path change applies ONLY to: + * native table, no micro-managed (MM), no direct-insert, no full-ACID + */ + @Test + public void testNoStagingForMmTable() { + HiveConf conf = buildConf(); + conf.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, false); + Properties mmProps = new Properties(); + mmProps.setProperty(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "insert_only"); + TableDesc mmTableDesc = new TableDesc(null, null, mmProps); + + FileSinkDesc fsd = new FileSinkDesc( + DEST_PATH, mmTableDesc, false, 0, false, false, + 1, 1, null, null, + DEST_PATH, false, false, false, false, false, + AcidUtils.Operation.NOT_ACID, false); + + Context mockCtx = buildMockContext(); + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, buildFileSinkOperator(fsd), + buildMockParseContext(mockCtx), + Collections.emptyList(), conf, null); + + // Neither staging method must be called for an MM table. + verify(mockCtx, never()).getTempDirForFinalJobPath(any()); + verify(mockCtx, never()).getTempDirForInterimJobPath(any()); + + // getDirName() must remain at its initial value – no staging redirection. + assertEquals( + "MM table: getDirName() must stay at DEST_PATH; staging must be skipped.", + DEST_PATH.toString(), fsd.getDirName().toString()); + } + + /** + * Full-ACID tables with direct-insert must bypass the staging path selection; + */ + @Test + public void testNoStagingForDirectInsert() { + HiveConf conf = buildConf(); + conf.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, false); + + FileSinkDesc fsd = new FileSinkDesc( + DEST_PATH, new TableDesc(null, null, new Properties()), + false, 0, false, false, 1, 1, + null, null, + DEST_PATH, false, false, false, false, + true, // isDirectInsert = true + AcidUtils.Operation.INSERT, false); + + Context mockCtx = buildMockContext(); + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, buildFileSinkOperator(fsd), + buildMockParseContext(mockCtx), + Collections.emptyList(), conf, null); + + // Neither staging method must be called for a direct-insert table. + verify(mockCtx, never()).getTempDirForFinalJobPath(any()); + verify(mockCtx, never()).getTempDirForInterimJobPath(any()); + + assertEquals( + "Direct-insert table: getDirName() must stay at DEST_PATH; staging must be skipped.", + DEST_PATH.toString(), fsd.getDirName().toString()); + } + + /** + * With the default config hive.use.scratchdir.for.staging=false; + * GenMapRedUtils#createMoveTask must produce the new HIVE-26758 layout: + * / — staging directory appears before the static partition segment. + * Example (year=2001 is the static partition) + * table_path/.hive-staging_job0/year=2001 + */ + @Test + public void testDynamicPartitionStagingLayoutWithDefaultConfig() { + HiveConf conf = buildConf(); + conf.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, false); + // Static partition segment: year=2001 (dynamic partition is season) + String spPath = "year=2001"; + DynamicPartitionCtx dpCtx = new DynamicPartitionCtx(); + dpCtx.setSPPath(spPath); + FileSinkDesc fsd = buildFileSinkDesc(DEST_PATH); + fsd.setDynPartCtx(dpCtx); + Context mockCtx = buildMockContext(); + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, buildFileSinkOperator(fsd), + buildMockParseContext(mockCtx), + Collections.emptyList(), conf, null); + Path expected = new Path(FINAL_JOB_STAGING, spPath); + + assertEquals( + "With scratchdir.for.staging=false, getDirName() must be /year=2001 " + + "(staging dir BEFORE static partition).", + expected.toString(), fsd.getDirName().toString()); + + String dirStr = fsd.getDirName().toString(); + int stagingIdx = dirStr.indexOf(".hive-staging"); + int spIdx = dirStr.indexOf(spPath); + + assertTrue( + "Staging dir must precede static partition. stagingIdx=" + stagingIdx + + " spIdx=" + spIdx + " path=" + dirStr, + stagingIdx >= 0 && stagingIdx < spIdx); + + // Explicitly confirm that the PRE HIVE-26758 behavior (/year=2001/) is NOT produced. + assertFalse( + "getDirName() must NOT follow the pre-patch layout (/year=2001/). Got: " + dirStr, + dirStr.contains(DEST_PATH + "/" + spPath)); + } + + /** + * With hive.use.scratchdir.for.staging=true, + * GenMapRedUtils#createMoveTask must place staging under the + * scratch directory and still append the static partition after it: + * // + * This lets a subsequent MoveTask relocate data from scratchdir to the + * final table path without any intermediate moves under the table location. + * Example (year=2001 is the static partition) + * expected: hive.exec.scratchdir/.hive-staging_job0/year=2001 + */ + @Test + public void testDynamicPartitionStagingLayoutWithScratchdirConfig() { + HiveConf conf = buildConf(); + conf.setBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING, true); + String spPath = "year=2001"; + DynamicPartitionCtx dpCtx = new DynamicPartitionCtx(); + dpCtx.setSPPath(spPath); + FileSinkDesc fsd = buildFileSinkDesc(DEST_PATH); + fsd.setDynPartCtx(dpCtx); + Context mockCtx = buildMockContext(); + GenMapRedUtils.createMoveTask( + new MapRedTask(), true, buildFileSinkOperator(fsd), + buildMockParseContext(mockCtx), + Collections.emptyList(), conf, null); + Path expected = new Path(INTERIM_JOB_STAGING, spPath); + + assertEquals( + "With scratchdir.for.staging=true, getDirName() must be " + + "/year=2001.", + expected.toString(), fsd.getDirName().toString()); + + String dirStr = fsd.getDirName().toString(); + int stagingIdx = dirStr.indexOf(".hive-staging"); + int spIdx = dirStr.indexOf(spPath); + + assertTrue( + "Staging dir must precede static partition. stagingIdx=" + stagingIdx + + " spIdx=" + spIdx + " path=" + dirStr, + stagingIdx >= 0 && stagingIdx < spIdx); + assertTrue( + "With config=true, staging must be rooted at INTERIM_JOB_STAGING (scratchdir). Got: " + dirStr, + dirStr.startsWith(INTERIM_JOB_STAGING.toString())); + assertFalse( + "With config=true, staging must NOT be rooted at FINAL_JOB_STAGING (table path). Got: " + dirStr, + dirStr.startsWith(FINAL_JOB_STAGING.toString())); + } + + @Test + public void testDefaultConfigValueIsFalse() { + assertFalse( + "hive.use.scratchdir.for.staging must default to false", + buildConf().getBoolVar(ConfVars.HIVE_USE_SCRATCHDIR_FOR_STAGING)); + } +} diff --git a/ql/src/test/queries/clientpositive/hive_use_scratchdir_for_staging.q b/ql/src/test/queries/clientpositive/hive_use_scratchdir_for_staging.q new file mode 100644 index 000000000000..cb50e3988d1d --- /dev/null +++ b/ql/src/test/queries/clientpositive/hive_use_scratchdir_for_staging.q @@ -0,0 +1,79 @@ +-- HIVE-29490: Test hive.use.scratchdir.for.staging configuration and +-- dynamic partition staging directory layout change for native non-acid, +-- non-mm, non-direct-insert external tables. + +-- SORT_QUERY_RESULTS + +--! qt:replace:/(COLUMN_STATS_ACCURATE\s+)\{.*/$1#Masked#/ +--! qt:replace:/(transient_lastDdlTime\s+)[0-9]+/$1#Masked#/ + +set hive.exec.dynamic.partition=true; +set hive.exec.dynamic.partition.mode=nonstrict; +set hive.stats.autogather=true; +set hive.exec.max.dynamic.partitions=500; +set hive.exec.max.dynamic.partitions.pernode=500; + +create database mydb; +create external table mydb.source_test_tbl (a string) stored as orc; +create external table mydb.target_test_tbl (a string) partitioned by (created string, day string) stored as orc; + +insert into table mydb.source_test_tbl values ('a'); +insert into table mydb.source_test_tbl values ('b'); + +-- Staging dir sits above the static partition in the table directory: +-- /// + +set hive.use.scratchdir.for.staging=false; +set hive.exec.stagingdir=.hive-staging; +insert into table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl; + +-- Verify data is correctly written to the final partition. +-- If staging were broken the partition would be empty or missing. +select * from mydb.target_test_tbl; +show partitions mydb.target_test_tbl; + +-- hive.use.scratchdir.for.staging=true +-- Staging uses ${hive.exec.scratchdir} entirely outside the table path: +-- /// +-- MoveTask then relocates data from scratchdir staging into . +-- If the scratchdir-based staging or MoveTask were broken, the data +-- would NOT appear in the final partition, making this test fail. + +set hive.use.scratchdir.for.staging=true; +set hive.exec.stagingdir=/tmp/hive-staging-dir/hive-staging; + +insert overwrite table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl; + +-- Verify data was correctly moved from scratchdir staging to the final partition. +select * from mydb.target_test_tbl; +show partitions mydb.target_test_tbl; + +-- File merge validation with hive.merge.tezfiles=true & hive.use.scratchdir.for.staging=true +set tez.grouping.split-count=2; +set hive.merge.tezfiles=true; +set hive.merge.smallfiles.avgsize=1000000; + +insert overwrite table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl; + +-- Verify data is still correct after merge +select * from mydb.target_test_tbl; +show partitions mydb.target_test_tbl; + +-- verify the file count. it should merged into 1 final file by the merge task. +analyze table mydb.target_test_tbl partition(created='20250101', day='20250101') compute statistics noscan; +desc formatted mydb.target_test_tbl partition(created='20250101', day='20250101'); + +-- Cleanup +set hive.use.scratchdir.for.staging=false; +set hive.merge.tezfiles=false; + +drop table mydb.source_test_tbl; +drop table mydb.target_test_tbl; +drop database mydb; + diff --git a/ql/src/test/results/clientpositive/tez/hive_use_scratchdir_for_staging.q.out b/ql/src/test/results/clientpositive/tez/hive_use_scratchdir_for_staging.q.out new file mode 100644 index 000000000000..85bf603e7b70 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/hive_use_scratchdir_for_staging.q.out @@ -0,0 +1,211 @@ +PREHOOK: query: create database mydb +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:mydb +POSTHOOK: query: create database mydb +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:mydb +PREHOOK: query: create external table mydb.source_test_tbl (a string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:mydb +PREHOOK: Output: mydb@source_test_tbl +POSTHOOK: query: create external table mydb.source_test_tbl (a string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:mydb +POSTHOOK: Output: mydb@source_test_tbl +PREHOOK: query: create external table mydb.target_test_tbl (a string) partitioned by (created string, day string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:mydb +PREHOOK: Output: mydb@target_test_tbl +POSTHOOK: query: create external table mydb.target_test_tbl (a string) partitioned by (created string, day string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:mydb +POSTHOOK: Output: mydb@target_test_tbl +PREHOOK: query: insert into table mydb.source_test_tbl values ('a') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: mydb@source_test_tbl +POSTHOOK: query: insert into table mydb.source_test_tbl values ('a') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: mydb@source_test_tbl +POSTHOOK: Lineage: source_test_tbl.a SCRIPT [] +PREHOOK: query: insert into table mydb.source_test_tbl values ('b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: mydb@source_test_tbl +POSTHOOK: query: insert into table mydb.source_test_tbl values ('b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: mydb@source_test_tbl +POSTHOOK: Lineage: source_test_tbl.a SCRIPT [] +PREHOOK: query: insert into table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl +PREHOOK: type: QUERY +PREHOOK: Input: mydb@source_test_tbl +PREHOOK: Output: mydb@target_test_tbl@created=20250101 +POSTHOOK: query: insert into table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@source_test_tbl +POSTHOOK: Output: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: Lineage: target_test_tbl PARTITION(created=20250101,day=20250101).a SIMPLE [(source_test_tbl)source_test_tbl.FieldSchema(name:a, type:string, comment:null), ] +PREHOOK: query: select * from mydb.target_test_tbl +PREHOOK: type: QUERY +PREHOOK: Input: mydb@target_test_tbl +PREHOOK: Input: mydb@target_test_tbl@created=20250101/day=20250101 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from mydb.target_test_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@target_test_tbl +POSTHOOK: Input: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: Output: hdfs://### HDFS PATH ### +a 20250101 20250101 +b 20250101 20250101 +PREHOOK: query: show partitions mydb.target_test_tbl +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: mydb@target_test_tbl +POSTHOOK: query: show partitions mydb.target_test_tbl +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: mydb@target_test_tbl +created=20250101/day=20250101 +PREHOOK: query: insert overwrite table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl +PREHOOK: type: QUERY +PREHOOK: Input: mydb@source_test_tbl +PREHOOK: Output: mydb@target_test_tbl@created=20250101 +POSTHOOK: query: insert overwrite table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@source_test_tbl +POSTHOOK: Output: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: Lineage: target_test_tbl PARTITION(created=20250101,day=20250101).a SIMPLE [(source_test_tbl)source_test_tbl.FieldSchema(name:a, type:string, comment:null), ] +PREHOOK: query: select * from mydb.target_test_tbl +PREHOOK: type: QUERY +PREHOOK: Input: mydb@target_test_tbl +PREHOOK: Input: mydb@target_test_tbl@created=20250101/day=20250101 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from mydb.target_test_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@target_test_tbl +POSTHOOK: Input: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: Output: hdfs://### HDFS PATH ### +a 20250101 20250101 +b 20250101 20250101 +PREHOOK: query: show partitions mydb.target_test_tbl +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: mydb@target_test_tbl +POSTHOOK: query: show partitions mydb.target_test_tbl +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: mydb@target_test_tbl +created=20250101/day=20250101 +PREHOOK: query: insert overwrite table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl +PREHOOK: type: QUERY +PREHOOK: Input: mydb@source_test_tbl +PREHOOK: Output: mydb@target_test_tbl@created=20250101 +POSTHOOK: query: insert overwrite table mydb.target_test_tbl partition (created='20250101', day) +select a, '20250101' as day +from mydb.source_test_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@source_test_tbl +POSTHOOK: Output: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: Lineage: target_test_tbl PARTITION(created=20250101,day=20250101).a SIMPLE [(source_test_tbl)source_test_tbl.FieldSchema(name:a, type:string, comment:null), ] +PREHOOK: query: select * from mydb.target_test_tbl +PREHOOK: type: QUERY +PREHOOK: Input: mydb@target_test_tbl +PREHOOK: Input: mydb@target_test_tbl@created=20250101/day=20250101 +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select * from mydb.target_test_tbl +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@target_test_tbl +POSTHOOK: Input: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: Output: hdfs://### HDFS PATH ### +a 20250101 20250101 +b 20250101 20250101 +PREHOOK: query: show partitions mydb.target_test_tbl +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: mydb@target_test_tbl +POSTHOOK: query: show partitions mydb.target_test_tbl +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: mydb@target_test_tbl +created=20250101/day=20250101 +PREHOOK: query: analyze table mydb.target_test_tbl partition(created='20250101', day='20250101') compute statistics noscan +PREHOOK: type: QUERY +PREHOOK: Input: mydb@target_test_tbl +PREHOOK: Output: mydb@target_test_tbl +PREHOOK: Output: mydb@target_test_tbl@created=20250101/day=20250101 +POSTHOOK: query: analyze table mydb.target_test_tbl partition(created='20250101', day='20250101') compute statistics noscan +POSTHOOK: type: QUERY +POSTHOOK: Input: mydb@target_test_tbl +POSTHOOK: Output: mydb@target_test_tbl +POSTHOOK: Output: mydb@target_test_tbl@created=20250101/day=20250101 +PREHOOK: query: desc formatted mydb.target_test_tbl partition(created='20250101', day='20250101') +PREHOOK: type: DESCTABLE +PREHOOK: Input: mydb@target_test_tbl +POSTHOOK: query: desc formatted mydb.target_test_tbl partition(created='20250101', day='20250101') +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: mydb@target_test_tbl +# col_name data_type comment +a string + +# Partition Information +# col_name data_type comment +created string +day string + +# Detailed Partition Information +Partition Value: [20250101, 20250101] +Database: mydb +Table: target_test_tbl +#### A masked pattern was here #### +Partition Parameters: + COLUMN_STATS_ACCURATE #Masked# + numFiles 1 + numRows 2 + rawDataSize 178 + totalSize #Masked# +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: drop table mydb.source_test_tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: mydb@source_test_tbl +PREHOOK: Output: database:mydb +PREHOOK: Output: mydb@source_test_tbl +POSTHOOK: query: drop table mydb.source_test_tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: mydb@source_test_tbl +POSTHOOK: Output: database:mydb +POSTHOOK: Output: mydb@source_test_tbl +PREHOOK: query: drop table mydb.target_test_tbl +PREHOOK: type: DROPTABLE +PREHOOK: Input: mydb@target_test_tbl +PREHOOK: Output: database:mydb +PREHOOK: Output: mydb@target_test_tbl +POSTHOOK: query: drop table mydb.target_test_tbl +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: mydb@target_test_tbl +POSTHOOK: Output: database:mydb +POSTHOOK: Output: mydb@target_test_tbl +PREHOOK: query: drop database mydb +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:mydb +PREHOOK: Output: database:mydb +POSTHOOK: query: drop database mydb +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:mydb +POSTHOOK: Output: database:mydb From 360e0dc06e5d26d0f0193c428593a9a55fe513ff Mon Sep 17 00:00:00 2001 From: Mahesh Raju Somalaraju Date: Thu, 25 Jun 2026 12:15:42 +0530 Subject: [PATCH 2/2] sonarqube issues fix --- .../optimizer/TestGenMapRedUtilsStagingPath.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java index d8f6345c6b75..0aded7010e68 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsStagingPath.java @@ -86,7 +86,7 @@ private static HiveConf buildConf() { */ private static FileSinkDesc buildFileSinkDesc(Path destPath) { TableDesc tableDesc = new TableDesc(null, null, new Properties()); - FileSinkDesc fsd = new FileSinkDesc( + return new FileSinkDesc( destPath, tableDesc, false, @@ -105,7 +105,6 @@ private static FileSinkDesc buildFileSinkDesc(Path destPath) { false, AcidUtils.Operation.NOT_ACID, false); - return fsd; } private static FileSinkOperator buildFileSinkOperator(FileSinkDesc fsd) { @@ -223,8 +222,8 @@ public void testConfigSwitchChangesStagingLocation() { } /** - * The HIVE-26758 staging path change applies ONLY to: - * native table, no micro-managed (MM), no direct-insert, no full-ACID + * The HIVE-26758 staging path change applies ONLY to native, non-MM, non-direct-insert tables. + * Micro-managed (insert-only) tables must bypass the staging path selection entirely. */ @Test public void testNoStagingForMmTable() { @@ -257,7 +256,8 @@ public void testNoStagingForMmTable() { } /** - * Full-ACID tables with direct-insert must bypass the staging path selection; + * Full-ACID tables with direct-insert must bypass the staging path selection entirely. + * Neither Context staging method should be called and getDirName() must remain unchanged. */ @Test public void testNoStagingForDirectInsert() { @@ -288,9 +288,9 @@ DEST_PATH, new TableDesc(null, null, new Properties()), } /** - * With the default config hive.use.scratchdir.for.staging=false; + * With the default config hive.use.scratchdir.for.staging=false. * GenMapRedUtils#createMoveTask must produce the new HIVE-26758 layout: - * / — staging directory appears before the static partition segment. + * / where the staging directory appears before the static partition segment. * Example (year=2001 is the static partition) * table_path/.hive-staging_job0/year=2001 */ @@ -332,7 +332,7 @@ public void testDynamicPartitionStagingLayoutWithDefaultConfig() { } /** - * With hive.use.scratchdir.for.staging=true, + * With hive.use.scratchdir.for.staging=true. * GenMapRedUtils#createMoveTask must place staging under the * scratch directory and still append the static partition after it: * //