Skip to content

Commit

Permalink
resolved dependency on AKV backed secrets
Browse files Browse the repository at this point in the history
  • Loading branch information
hwang-db committed Nov 10, 2021
1 parent ecaca88 commit 53231b2
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 11 deletions.
27 changes: 20 additions & 7 deletions adb-external-hive-metastore/akv.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,39 +15,52 @@ resource "azurerm_key_vault_access_policy" "example" {
object_id = data.azurerm_client_config.current.object_id
# must use lowercase letters in permission
key_permissions = [
"get", "list", "update", "create", "import", "delete", "recover", "backup", "restore"
"get", "list", "update", "create", "import", "delete", "recover", "backup", "restore", "purge"
]

secret_permissions = [
"get", "list", "delete", "recover", "backup", "restore", "set"
"get", "list", "delete", "recover", "backup", "restore", "set", "purge"
]
}


resource "databricks_secret_scope" "kv" {
# akv backed
name = "hive"

keyvault_metadata {
resource_id = azurerm_key_vault.akv1.id
dns_name = azurerm_key_vault.akv1.vault_uri
}
depends_on = [
azurerm_key_vault.akv1,
]
}

resource "azurerm_key_vault_secret" "hiveurl" {
name = "HIVE-URL"
value = "test1"
value = local.db_url
key_vault_id = azurerm_key_vault.akv1.id
depends_on = [
azurerm_key_vault.akv1,
azurerm_key_vault_access_policy.example, # need dependency on policy or else destroy can't clean up
]
}

resource "azurerm_key_vault_secret" "hiveuser" {
name = "HIVE-USER"
value = "test2"
value = local.db_username_local # use local group instead of var
key_vault_id = azurerm_key_vault.akv1.id
depends_on = [
azurerm_key_vault.akv1,
azurerm_key_vault_access_policy.example,
]
}

resource "azurerm_key_vault_secret" "hivepwd" {
name = "HIVE-PASSWORD"
value = "test3"
value = local.db_password_local
key_vault_id = azurerm_key_vault.akv1.id
depends_on = [
azurerm_key_vault.akv1,
azurerm_key_vault_access_policy.example,
]
}
17 changes: 17 additions & 0 deletions adb-external-hive-metastore/cold_start_metastore.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# automate job to init schema of the database, prep to be hive metastore
data "databricks_current_user" "me" {
depends_on = [azurerm_databricks_workspace.this]
}

resource "databricks_notebook" "ddl" {
source = "./coldstart/metastore_coldstart.py" #local notebook
path = "${data.databricks_current_user.me.home}/coldstart" #remote notebook
}

resource "databricks_job" "metastoresetup" {
name = "Initialize external hive metastore"
existing_cluster_id = databricks_cluster.coldstart[0].id
notebook_task {
notebook_path = databricks_notebook.ddl.path
}
}
65 changes: 65 additions & 0 deletions adb-external-hive-metastore/coldstart/metastore_coldstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Databricks notebook source
# MAGIC %md
# MAGIC
# MAGIC - Hive JDBC URL
# MAGIC `jdbc:sqlserver://bk-sqlserver.database.windows.net:1433;database=hive3;user=abc@bk-sqlserver;password={your_password_here};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;`

# COMMAND ----------

# DBTITLE 1,Download hive and hadoop tools lib
# MAGIC %sh
# MAGIC wget https://archive.apache.org/dist/hadoop/common/hadoop-2.7.2/hadoop-2.7.2.tar.gz
# MAGIC tar -xvzf hadoop-2.7.2.tar.gz --directory /opt
# MAGIC wget https://archive.apache.org/dist/hive/hive-3.1.0/apache-hive-3.1.0-bin.tar.gz
# MAGIC tar -xvzf apache-hive-3.1.0-bin.tar.gz --directory /opt
# MAGIC ## https://www.microsoft.com/en-us/download/details.aspx?id=11774
# MAGIC wget https://download.microsoft.com/download/0/2/A/02AAE597-3865-456C-AE7F-613F99F850A8/sqljdbc_6.0.8112.200_enu.tar.gz
# MAGIC tar -xvzf sqljdbc_6.0.8112.200_enu.tar.gz --directory /opt
# MAGIC cp /opt/sqljdbc_6.0/enu/jre8/sqljdbc42.jar /opt/apache-hive-3.1.0-bin/lib/sqljdbc42.jar

# COMMAND ----------

# MAGIC %sh
# MAGIC mkdir -p /dbfs/tmp/hive/3-1-0/lib/
# MAGIC cp -r /opt/apache-hive-3.1.0-bin/lib/. /dbfs/tmp/hive/3-1-0/lib/
# MAGIC cp -r /opt/hadoop-2.7.2/share/hadoop/common/lib/. /dbfs/tmp/hive/3-1-0/lib/

# COMMAND ----------

# DBTITLE 1,Test Hive Environment Variables - set these vars on cluster UI --> Advance tab
# MAGIC %sh
# MAGIC echo $HIVE_URL
# MAGIC echo $HIVE_USER
# MAGIC echo $HIVE_PASSWORD

# COMMAND ----------

# DBTITLE 1,Initialize hive schema
# MAGIC %sh
# MAGIC export HIVE_HOME="/opt/apache-hive-3.1.0-bin"
# MAGIC export HADOOP_HOME="/opt/hadoop-2.7.2"
# MAGIC export SQLDB_DRIVER="com.microsoft.sqlserver.jdbc.SQLServerDriver"
# MAGIC
# MAGIC # uncomment following line to init schema
# MAGIC /opt/apache-hive-3.1.0-bin/bin/schematool -dbType mssql -url $HIVE_URL -passWord $HIVE_PASSWORD -userName $HIVE_USER -driver $SQLDB_DRIVER -initSchema

# COMMAND ----------

# MAGIC %sh
# MAGIC # validate that schema is initialized
# MAGIC /opt/apache-hive-3.1.0-bin/bin/schematool -dbType mssql -url $HIVE_URL -passWord $HIVE_PASSWORD -userName $HIVE_USER -driver $SQLDB_DRIVER -info

# COMMAND ----------

# DBTITLE 1,Validate hive is initialized
# MAGIC %sh
# MAGIC export HIVE_HOME="/opt/apache-hive-3.1.0-bin"
# MAGIC export HADOOP_HOME="/opt/hadoop-2.7.2"
# MAGIC export SQLDB_DRIVER="com.microsoft.sqlserver.jdbc.SQLServerDriver"
# MAGIC
# MAGIC /opt/apache-hive-3.1.0-bin/bin/schematool -dbType mssql -url $HIVE_URL -passWord $HIVE_PASSWORD -userName $HIVE_USER -driver $SQLDB_DRIVER -info

# COMMAND ----------

# MAGIC %sql
# MAGIC show databases;
3 changes: 3 additions & 0 deletions adb-external-hive-metastore/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ locals {
sqlcidr = var.sqlvnetcidr
dbfsname = join("", [var.dbfs_prefix, "${random_string.naming.result}"]) // dbfs name must not have special chars

db_url = "jdbc:sqlserver://${azurerm_mssql_server.metastoreserver.name}.database.windows.net:1433;database=${azurerm_mssql_database.sqlmetastore.name};user=${var.db_username}@${azurerm_mssql_server.metastoreserver.name};password={${var.db_password}};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
// tags that are propagated down to all resources
db_username_local = var.db_username
db_password_local = var.db_password
tags = {
Environment = "Testing"
Owner = lookup(data.external.me.result, "name")
Expand Down
6 changes: 6 additions & 0 deletions adb-external-hive-metastore/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,9 @@ variable "db_password" {
type = string
sensitive = true
}

variable "cold_start" {
description = "if true, will spin up a cluster to download hive jars to dbfs"
type = bool
default = true # set to true for development of logic, should be false by default
}
13 changes: 9 additions & 4 deletions adb-external-hive-metastore/workspace.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,15 @@ data "databricks_spark_version" "latest_lts" {
]
}

resource "databricks_cluster" "shared_autoscaling" {
cluster_name = "Shared Autoscaling"
resource "databricks_cluster" "coldstart" {
count = var.cold_start ? 1 : 0
cluster_name = "coldstart_cluster"
spark_version = data.databricks_spark_version.latest_lts.id
node_type_id = data.databricks_node_type.smallest.id
autotermination_minutes = 20
autoscale {
min_workers = 1
max_workers = 5
max_workers = 2
}

spark_conf = {
Expand All @@ -71,6 +72,10 @@ resource "databricks_cluster" "shared_autoscaling" {
"HIVE_URL" = "{{secrets/hive/HIVE-URL}}",
}
depends_on = [
azurerm_databricks_workspace.this
azurerm_databricks_workspace.this,
databricks_secret_scope.kv, # need this to be able to access the secrets
azurerm_key_vault_secret.hiveuser,
azurerm_key_vault_secret.hivepwd,
azurerm_key_vault_secret.hiveurl
]
}

0 comments on commit 53231b2

Please sign in to comment.