From e4fce0292133f8808c71b2db63f425c3bfdde47c Mon Sep 17 00:00:00 2001 From: kevxmsft <61517905+kevxmsft@users.noreply.github.com> Date: Wed, 22 Apr 2020 19:14:49 -0700 Subject: [PATCH 1/4] Fixes errors in export script and adds support for copying Views and Constraints --- hive/hive-export-import-metastore.md | 116 +++++++++++++++------------ 1 file changed, 65 insertions(+), 51 deletions(-) diff --git a/hive/hive-export-import-metastore.md b/hive/hive-export-import-metastore.md index e2a62f4..e1c7afe 100644 --- a/hive/hive-export-import-metastore.md +++ b/hive/hive-export-import-metastore.md @@ -23,67 +23,81 @@ ms.author: dkakadia #### Issue: -Need to export Hive metastore and import it on another HDInsight cluster. +Need to export Hive metastore and import it on another HDInsight cluster. + +* For migration of external metastore, follow steps to make a copy of the SQL Database in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads.) If migrating from 3.6 to 4.0 cluster, follow steps to upgrade the schema. + +* If we plan to export from an internal metastore, or if we plan to import from a 4.0 to 3.6 cluster, then use the script described below to export/import metadata objects as HQL. + +* Import works only if destination cluster shares the same Storage Account as the source cluster. #### Resolution Steps: +If migrating from an external metastore, follow steps in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads.). Otherwise, follow steps below. + 1) Connect to the HDInsight cluster with a Secure Shell (SSH) client (check Further Reading section below). -2) Run the following command on the HDInsight cluster where from you want to export the metastore: - -~~~ -for d in `beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show databases;"`; -do - echo "Scanning Database: $d" - echo "create database if not exists $d; use $d;" >> alltables.hql; - for t in `beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show tables;"`; - do - echo "Copying Table: $t" - ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;"`; - - echo "$ddl;" >> alltables.hql; - lowerddl=$(echo $ddl | awk '{print tolower($0)}') - if [[ $lowerddl == *"'transactional'='true'"* ]]; then - if [[ $lowerddl == *"partitioned by"* ]]; then - # partitioned - raw_cols=$(beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;" | tr '\n' ' ' | grep -io "CREATE TABLE .*" | cut -d"(" -f2- | cut -f1 -d")" | sed 's/`//g'); - ptn_cols=$(beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table $t;" | tr '\n' ' ' | grep -io "PARTITIONED BY .*" | cut -f1 -d")" | cut -d"(" -f2- | sed 's/`//g'); - final_cols=$(echo "(" $raw_cols "," $ptn_cols ")") - - beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "create external table ext_$t $final_cols TBLPROPERTIES ('transactional'='false');"; - beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "insert into ext_$t select * from $t;"; - staging_ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table ext_$t;"`; - dir=$(echo $staging_ddl | grep -io " LOCATION .*" | grep -m1 -o "'.*" | sed "s/'[^-]*//2g" | cut -c2-); - - parsed_ptn_cols=$(echo $ptn_cols| sed 's/ [a-z]*,/,/g' | sed '$s/\w*$//g'); - echo "create table flattened_$t $final_cols;" >> alltables.hql; - echo "load data inpath '$dir' into table flattened_$t;" >> alltables.hql; - echo "insert into $t partition($parsed_ptn_cols) select * from flattened_$t;" >> alltables.hql; - echo "drop table flattened_$t;" >> alltables.hql; - beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "drop table ext_$t"; - else - # not partitioned - beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "create external table ext_$t like $t TBLPROPERTIES ('transactional'='false');"; - staging_ddl=`beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "show create table ext_$t;"`; - dir=$(echo $staging_ddl | grep -io " LOCATION .*" | grep -m1 -o "'.*" | sed "s/'[^-]*//2g" | cut -c2-); - - beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "insert into ext_$t select * from $t;"; - echo "load data inpath '$dir' into table $t;" >> alltables.hql; - beeline -u "jdbc:hive2://localhost:10001/$d;transportMode=http" --showHeader=false --silent=true --outputformat=tsv2 -e "drop table ext_$t"; - fi - fi - echo "$ddl" | grep -q "PARTITIONED\s*BY" && echo "MSCK REPAIR TABLE $t;" >> alltables.hql; - done; -done -~~~ +1) Download the export script to the cluster: + + ```bash + wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/$SCRIPT" + chmod 755 "$SCRIPT" + ``` + + where `SCRIPT="exporthive_hdi_3_6.sh"` for HDInsight 3.6 or `SCRIPT="exporthive_hdi_4_0.sh"` for HDInsight 4.0. + +1) Run the script from the cluster: + +* For HDInsight 4.0, follow these additional steps: + + a. Additionally, download a helper script used by `exporthive_hdi_4_0.sh`. + + ```bash + wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/constraints2altertable.py" + chmod 755 constraints2altertable.py + ``` + + b. set hive.security.authorization.sqlstd.confwhitelist.append=hive.ddl.output.format in Custom hive-site via Ambari. + +* For a non-ESP cluster, simply execute the script. +* For an ESP cluster, kinit with user with full Hive permissions, and then execute the script with modified beeline arguments: + + ```bash + USER="USER" # replace USER + DOMAIN="DOMAIN" # replace DOMAIN + DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') + kinit "$USER@$DOMAIN_UPPER" + ``` + + ```bash + hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) + BEE_CMD="beeline -u 'jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http' -n "$USER@$DOMAIN" --showHeader=false --silent=true --outputformat=tsv2 -e" + ./exporthive_hdi_3_6.sh "$BEE_CMD" # replace script with exporthive_hdi_4_0.sh for 4.0 + ``` This will generate a file named `alltables.hql`. 3) Copy the file `alltables.hql` to the new HDInsight cluster and run the following command: -~~~ -beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" -f alltables.hql -~~~ +* For non-ESP: + + ```bash + beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" -f alltables.hql + ``` + +* for ESP: + + ```bash + USER="USER" # replace USER + DOMAIN="DOMAIN" # replace DOMAIN + DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') + kinit "$USER@$DOMAIN_UPPER" + ``` + + ```bash + hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) + beeline -u "jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http" -n "$USER@$DOMAIN" -f alltables.hql + ``` Note: This assumes that data paths on new cluster are same as on old. If not, you can manually edit the generated `alltables.hql` file to reflect any changes. *For ACID tables, a new copy of the data will be created* From ef45b33893a21b9c0dfc14066446677d635d8d80 Mon Sep 17 00:00:00 2001 From: kevxmsft <61517905+kevxmsft@users.noreply.github.com> Date: Wed, 29 Apr 2020 15:49:51 -0700 Subject: [PATCH 2/4] Fixes link to other doc and adds notes --- hive/hive-export-import-metastore.md | 34 ++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/hive/hive-export-import-metastore.md b/hive/hive-export-import-metastore.md index e1c7afe..3af02a9 100644 --- a/hive/hive-export-import-metastore.md +++ b/hive/hive-export-import-metastore.md @@ -25,15 +25,31 @@ ms.author: dkakadia Need to export Hive metastore and import it on another HDInsight cluster. -* For migration of external metastore, follow steps to make a copy of the SQL Database in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads.) If migrating from 3.6 to 4.0 cluster, follow steps to upgrade the schema. - -* If we plan to export from an internal metastore, or if we plan to import from a 4.0 to 3.6 cluster, then use the script described below to export/import metadata objects as HQL. - -* Import works only if destination cluster shares the same Storage Account as the source cluster. - -#### Resolution Steps: - -If migrating from an external metastore, follow steps in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads.). Otherwise, follow steps below. +* See [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads) for migration of an external metastore, or for upgrading Hive from HDInsight 3.6 to 4.0. + +* This article gives instructions on using a script to export/import contents of an internal Hive metastore. + +* Import works only if the destination cluster shares the same Storage Account as the source cluster. + +> [!NOTE] +> +> * The script supports copying of only tables and partitions. For HDInsight 4.0, it additionally covers constraints, views, and materialized views. Other metadata objects, like UDFs, must be copied manually. +> +> * All managed tables will become transactional in output cluster. Optionally, keep the table non-transactional by exporting the data to an external table with the property 'external.table.purge'='true'. For example, +> +> ```SQL +> create table tablename_backup like tablename; +> insert overwrite table tablename_backup select * from tablename; +> create external table tablename_tmp like tablename; +> insert overwrite table tablename_tmp select * from tablename; +> alter table tablename_tmp set tblproperties('external.table.purge'='true'); +> drop table tablename; +> alter table tablename_tmp rename to tablename; +> + +#### Resolution Steps: + +If migrating from an external metastore, follow steps in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads). Otherwise, follow steps below. 1) Connect to the HDInsight cluster with a Secure Shell (SSH) client (check Further Reading section below). From 9a45726aca3f54a0fceb0d80b0f8c6c2ef95e174 Mon Sep 17 00:00:00 2001 From: kevxmsft <61517905+kevxmsft@users.noreply.github.com> Date: Mon, 8 Jun 2020 18:56:18 -0700 Subject: [PATCH 3/4] Update hive-export-import-metastore.md combines notes, fixes indent and punctuation, adds some clarification --- hive/hive-export-import-metastore.md | 97 +++++++++++++--------------- 1 file changed, 46 insertions(+), 51 deletions(-) diff --git a/hive/hive-export-import-metastore.md b/hive/hive-export-import-metastore.md index 3af02a9..1ac4546 100644 --- a/hive/hive-export-import-metastore.md +++ b/hive/hive-export-import-metastore.md @@ -27,15 +27,15 @@ Need to export Hive metastore and import it on another HDInsight cluster. * See [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads) for migration of an external metastore, or for upgrading Hive from HDInsight 3.6 to 4.0. -* This article gives instructions on using a script to export/import contents of an internal Hive metastore. +* This current article gives instructions on using a script to export/import contents of an internal Hive metastore. * Import works only if the destination cluster shares the same Storage Account as the source cluster. > [!NOTE] > -> * The script supports copying of only tables and partitions. For HDInsight 4.0, it additionally covers constraints, views, and materialized views. Other metadata objects, like UDFs, must be copied manually. +> * The script supports copying of tables and partitions. For HDInsight 4.0, it additionally covers constraints, views, and materialized views. Other metadata objects, like UDFs, must be copied manually. > -> * All managed tables will become transactional in output cluster. Optionally, keep the table non-transactional by exporting the data to an external table with the property 'external.table.purge'='true'. For example, +> * All managed tables will become transactional if the output HDInsight version is 4.0. Optionally, make the table non-transactional by exporting the data to an external table with the property 'external.table.purge'='true'. For example, > > ```SQL > create table tablename_backup like tablename; @@ -46,14 +46,18 @@ Need to export Hive metastore and import it on another HDInsight cluster. > drop table tablename; > alter table tablename_tmp rename to tablename; > +> * This procedure preserves non-ACID table locations. You can manually edit the DDL in +`alltables.hql`, generated from the script, to reflect any location changes. *For ACID tables, a new copy of the data will be created*. +> +> * The procedure assumes that after completion, the old cluster will **not** be used any longer. #### Resolution Steps: If migrating from an external metastore, follow steps in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads). Otherwise, follow steps below. -1) Connect to the HDInsight cluster with a Secure Shell (SSH) client (check Further Reading section below). +1) [Connect to the HDInsight Cluster using SSH](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-linux-use-ssh-unix), to the primary headnode. -1) Download the export script to the cluster: +1) Download the export script from an ssh session to the cluster: ```bash wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/$SCRIPT" @@ -64,62 +68,53 @@ If migrating from an external metastore, follow steps in [apache-hive-migrate-wo 1) Run the script from the cluster: -* For HDInsight 4.0, follow these additional steps: + * For HDInsight 4.0, follow these additional steps, first: - a. Additionally, download a helper script used by `exporthive_hdi_4_0.sh`. + a. Download a helper script used by `exporthive_hdi_4_0.sh`. - ```bash - wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/constraints2altertable.py" - chmod 755 constraints2altertable.py - ``` + ```bash + wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/constraints2altertable.py" + chmod 755 constraints2altertable.py + ``` - b. set hive.security.authorization.sqlstd.confwhitelist.append=hive.ddl.output.format in Custom hive-site via Ambari. + b. set hive.security.authorization.sqlstd.confwhitelist.append=hive.ddl.output.format in Custom hive-site via Ambari. -* For a non-ESP cluster, simply execute the script. -* For an ESP cluster, kinit with user with full Hive permissions, and then execute the script with modified beeline arguments: + * For a non-ESP cluster, simply execute the script. + * For an ESP cluster, kinit with user with full Hive permissions, and then execute the script with modified beeline arguments: - ```bash - USER="USER" # replace USER - DOMAIN="DOMAIN" # replace DOMAIN - DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') - kinit "$USER@$DOMAIN_UPPER" - ``` + ```bash + USER="USER" # replace USER + DOMAIN="DOMAIN" # replace DOMAIN + DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') + kinit "$USER@$DOMAIN_UPPER" + ``` - ```bash - hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) - BEE_CMD="beeline -u 'jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http' -n "$USER@$DOMAIN" --showHeader=false --silent=true --outputformat=tsv2 -e" - ./exporthive_hdi_3_6.sh "$BEE_CMD" # replace script with exporthive_hdi_4_0.sh for 4.0 - ``` + ```bash + hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) + BEE_CMD="beeline -u 'jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http' -n "$USER@$DOMAIN" --showHeader=false --silent=true --outputformat=tsv2 -e" + ./exporthive_hdi_3_6.sh "$BEE_CMD" # replace script with exporthive_hdi_4_0.sh for 4.0 + ``` -This will generate a file named `alltables.hql`. + This will generate a file named `alltables.hql`. -3) Copy the file `alltables.hql` to the new HDInsight cluster and run the following command: +1) Copy the file `alltables.hql` to the new HDInsight cluster and run the following command: -* For non-ESP: - - ```bash - beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" -f alltables.hql - ``` - -* for ESP: - - ```bash - USER="USER" # replace USER - DOMAIN="DOMAIN" # replace DOMAIN - DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') - kinit "$USER@$DOMAIN_UPPER" - ``` - - ```bash - hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) - beeline -u "jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http" -n "$USER@$DOMAIN" -f alltables.hql - ``` + * For non-ESP: -Note: This assumes that data paths on new cluster are same as on old. If not, you can manually edit the generated -`alltables.hql` file to reflect any changes. *For ACID tables, a new copy of the data will be created* + ```bash + beeline -u "jdbc:hive2://localhost:10001/;transportMode=http" -f alltables.hql + ``` -Note: This script also assumes that once the script is complete, the old cluster will **not** be used any longer + * for ESP: -#### Further Reading: + ```bash + USER="USER" # replace USER + DOMAIN="DOMAIN" # replace DOMAIN + DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') + kinit "$USER@$DOMAIN_UPPER" + ``` -1) [Connect to HDInsight Cluster using SSH](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-linux-use-ssh-unix) + ```bash + hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) + beeline -u "jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http" -n "$USER@$DOMAIN" -f alltables.hql + ``` From 70bada9654b6ce9e0c632734d5d932f976c3e28b Mon Sep 17 00:00:00 2001 From: kevxmsft <61517905+kevxmsft@users.noreply.github.com> Date: Tue, 9 Jun 2020 19:45:59 -0700 Subject: [PATCH 4/4] Update hive-export-import-metastore.md Addresses feedback from collaborators. Primary changes are 1. Make it read more like human and less like robot TSG style. 2. Reduce required steps. --- hive/hive-export-import-metastore.md | 76 +++++++++------------------- 1 file changed, 24 insertions(+), 52 deletions(-) diff --git a/hive/hive-export-import-metastore.md b/hive/hive-export-import-metastore.md index 1ac4546..fbd01d9 100644 --- a/hive/hive-export-import-metastore.md +++ b/hive/hive-export-import-metastore.md @@ -19,22 +19,14 @@ ms.author: dkakadia --- -### How do I export Hive metastore and import it on another HDInsight cluster? +### Export metadata from internal Hive metastore on HDInsight -#### Issue: +This article shows how to export Apache Hive and LLAP workloads from an HDInsight cluster with an internal Hive metastore and import them to an external metastore. This is useful for scaling up the SQL Database or for [migrating workloads from HDInsight 3.6 to HDInsight 4.0](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads). -Need to export Hive metastore and import it on another HDInsight cluster. - -* See [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads) for migration of an external metastore, or for upgrading Hive from HDInsight 3.6 to 4.0. - -* This current article gives instructions on using a script to export/import contents of an internal Hive metastore. - -* Import works only if the destination cluster shares the same Storage Account as the source cluster. +To export data from an external Hive metastore, we could copy the SQL Database and, optionally, upgrade the schema for HDInsight 4.0 compatibility. For an internal Hive metastore, however, restricted access to the SQL resource requires us to use Hive. This article provides a script that generates an HQL script to recreate Hive databases, tables, and partitions in another cluster. HDInsight 4.0 also covers constraints, views, and materialized views. Other metadata objects, like UDFs, must be copied manually. > [!NOTE] > -> * The script supports copying of tables and partitions. For HDInsight 4.0, it additionally covers constraints, views, and materialized views. Other metadata objects, like UDFs, must be copied manually. -> > * All managed tables will become transactional if the output HDInsight version is 4.0. Optionally, make the table non-transactional by exporting the data to an external table with the property 'external.table.purge'='true'. For example, > > ```SQL @@ -47,57 +39,33 @@ Need to export Hive metastore and import it on another HDInsight cluster. > alter table tablename_tmp rename to tablename; > > * This procedure preserves non-ACID table locations. You can manually edit the DDL in -`alltables.hql`, generated from the script, to reflect any location changes. *For ACID tables, a new copy of the data will be created*. +`alltables.hql`, generated from the script, to reflect any location changes. +> +> Note: *For ACID tables, a new copy of the data will be created*. > > * The procedure assumes that after completion, the old cluster will **not** be used any longer. -#### Resolution Steps: +#### Prerequisites -If migrating from an external metastore, follow steps in [apache-hive-migrate-workloads](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads). Otherwise, follow steps below. +* If exporting from an HDInsight 4.0 cluster, set `hive.security.authorization.sqlstd.confwhitelist.append=hive.ddl.output.format` in Custom hive-site via Ambari and restart Hive. + +* Prepare a new Hadoop or Interactive Query HDInsight cluster, attached to an external Hive metastore and to the same Storage Account as the source cluster. The new HDInsight version must be 4.0 if the source version is 4.0. + +#### Migrate from internal metastore 1) [Connect to the HDInsight Cluster using SSH](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-linux-use-ssh-unix), to the primary headnode. -1) Download the export script from an ssh session to the cluster: +1) As root user and from a new directory, download and run the script, which generates a file named `alltables.hql`: ```bash + sudo su + SCRIPT="exporthive.sh" wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/$SCRIPT" chmod 755 "$SCRIPT" + exec "./$SCRIPT" ``` - where `SCRIPT="exporthive_hdi_3_6.sh"` for HDInsight 3.6 or `SCRIPT="exporthive_hdi_4_0.sh"` for HDInsight 4.0. - -1) Run the script from the cluster: - - * For HDInsight 4.0, follow these additional steps, first: - - a. Download a helper script used by `exporthive_hdi_4_0.sh`. - - ```bash - wget "https://hdiconfigactions.blob.core.windows.net/hivemetastoreschemaupgrade/constraints2altertable.py" - chmod 755 constraints2altertable.py - ``` - - b. set hive.security.authorization.sqlstd.confwhitelist.append=hive.ddl.output.format in Custom hive-site via Ambari. - - * For a non-ESP cluster, simply execute the script. - * For an ESP cluster, kinit with user with full Hive permissions, and then execute the script with modified beeline arguments: - - ```bash - USER="USER" # replace USER - DOMAIN="DOMAIN" # replace DOMAIN - DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') - kinit "$USER@$DOMAIN_UPPER" - ``` - - ```bash - hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) - BEE_CMD="beeline -u 'jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http' -n "$USER@$DOMAIN" --showHeader=false --silent=true --outputformat=tsv2 -e" - ./exporthive_hdi_3_6.sh "$BEE_CMD" # replace script with exporthive_hdi_4_0.sh for 4.0 - ``` - - This will generate a file named `alltables.hql`. - -1) Copy the file `alltables.hql` to the new HDInsight cluster and run the following command: +1) Copy the file `alltables.hql` to the new HDInsight cluster and from the new cluster, run the following command: * For non-ESP: @@ -112,9 +80,13 @@ If migrating from an external metastore, follow steps in [apache-hive-migrate-wo DOMAIN="DOMAIN" # replace DOMAIN DOMAIN_UPPER=$(printf "%s" "$DOMAIN" | awk '{ print toupper($0) }') kinit "$USER@$DOMAIN_UPPER" - ``` - - ```bash hn0=$(grep hn0- /etc/hosts | xargs | cut -d' ' -f4) beeline -u "jdbc:hive2://$hn0:10001/default;principal=hive/_HOST@$DOMAIN_UPPER;auth-kerberos;transportMode=http" -n "$USER@$DOMAIN" -f alltables.hql ``` + +#### Further Reading + +1) [Connect to HDInsight using SSH](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-linux-use-ssh-unix) +1) [Migrate workloads from HDInsight 3.6 to 4.0](https://docs.microsoft.com/en-us/azure/hdinsight/interactive-query/apache-hive-migrate-workloads) +1) [Use external metastore with HDInsight](https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-use-external-metadata-stores) +1) [Connect to Beeline on HDInsight](https://docs.microsoft.com/en-us/azure/hdinsight/hadoop/connect-install-beeline)