diff --git a/adb-external-hive-metastore/README.md b/adb-external-hive-metastore/README.md index a1ea8f3..560991f 100644 --- a/adb-external-hive-metastore/README.md +++ b/adb-external-hive-metastore/README.md @@ -3,4 +3,12 @@ This template creates: 1. SQL Server 2. SQL database -3. ADB workspace \ No newline at end of file +3. ADB workspace + +Overall Architecture: +![alt text](../charts/adb-external-hive-metastore.png?raw=true) + +Prepare the environment variables: terraform will automatically look for environment variables with name format TF_VAR_xxxxx. +In our local environment, use: +`export TF_VAR_db_username=yoursqlserveradminuser` +and `export TF_VAR_db_password=yoursqlserveradminpassword` \ No newline at end of file diff --git a/adb-external-hive-metastore/main.tf b/adb-external-hive-metastore/main.tf index 96451ef..c463480 100644 --- a/adb-external-hive-metastore/main.tf +++ b/adb-external-hive-metastore/main.tf @@ -33,6 +33,7 @@ locals { prefix = join("-", [var.workspace_prefix, "${random_string.naming.result}"]) location = var.rglocation cidr = var.spokecidr + sqlcidr = var.sqlvnetcidr dbfsname = join("", [var.dbfs_prefix, "${random_string.naming.result}"]) // dbfs name must not have special chars // tags that are propagated down to all resources diff --git a/adb-external-hive-metastore/sqlserver.tf b/adb-external-hive-metastore/sqlserver.tf new file mode 100644 index 0000000..69d990b --- /dev/null +++ b/adb-external-hive-metastore/sqlserver.tf @@ -0,0 +1,77 @@ +resource "azurerm_storage_account" "sqlserversa" { + name = "${random_string.naming.result}sqlserversa" + resource_group_name = azurerm_resource_group.this.name + location = azurerm_resource_group.this.location + account_tier = "Standard" + account_replication_type = "LRS" // LRS 3 copies of data in the region +} + +resource "azurerm_mssql_server" "metastoreserver" { + name = "${random_string.naming.result}mssqlserver" + resource_group_name = azurerm_resource_group.this.name + location = azurerm_resource_group.this.location + version = "12.0" + administrator_login = var.db_username // sensitive data stored as env variables locally + administrator_login_password = var.db_password + public_network_access_enabled = true // consider to disable public access to the server, to set as false +} + +resource "azurerm_mssql_database" "sqlmetastore" { + name = "${random_string.naming.result}metastore" + server_id = azurerm_mssql_server.metastoreserver.id + collation = "SQL_Latin1_General_CP1_CI_AS" + license_type = "LicenseIncluded" + read_scale = true + max_size_gb = 4 + sku_name = "BC_Gen5_2" + zone_redundant = true + tags = local.tags + +} + +resource "azurerm_mssql_server_extended_auditing_policy" "mssqlpolicy" { + server_id = azurerm_mssql_server.metastoreserver.id + storage_endpoint = azurerm_storage_account.sqlserversa.primary_blob_endpoint + storage_account_access_key = azurerm_storage_account.sqlserversa.primary_access_key + storage_account_access_key_is_secondary = false + retention_in_days = 6 +} + +resource "azurerm_mssql_virtual_network_rule" "sqlservervnetrule" { + // associate sql server to a subnet + name = "sql-server-vnet-rule" + server_id = azurerm_mssql_server.metastoreserver.id + subnet_id = azurerm_subnet.sqlsubnet.id +} + +// add private endpoint connection to sql server / metastore +resource "azurerm_private_endpoint" "sqlserverpe" { + name = "sqlserverpvtendpoint" + location = azurerm_resource_group.this.location + resource_group_name = azurerm_resource_group.this.name + subnet_id = azurerm_subnet.plsubnet.id //private link subnet, in databricks vnet + + private_service_connection { + name = "ple-${var.workspace_prefix}-metastore" + private_connection_resource_id = azurerm_mssql_server.metastoreserver.id + is_manual_connection = false + subresource_names = ["sqlServer"] + } + + private_dns_zone_group { + name = "private-dns-zone-metastore" + private_dns_zone_ids = [azurerm_private_dns_zone.dnsmetastore.id] + } +} + +resource "azurerm_private_dns_zone" "dnsmetastore" { + name = "privatelink.database.windows.net" + resource_group_name = azurerm_resource_group.this.name +} + +resource "azurerm_private_dns_zone_virtual_network_link" "metastorednszonevnetlink" { + name = "metastorednsvnetconnection" + resource_group_name = azurerm_resource_group.this.name + private_dns_zone_name = azurerm_private_dns_zone.dnsmetastore.name + virtual_network_id = azurerm_virtual_network.this.id // connect to databricks vnet +} diff --git a/adb-external-hive-metastore/variables.tf b/adb-external-hive-metastore/variables.tf index e69de29..150af91 100644 --- a/adb-external-hive-metastore/variables.tf +++ b/adb-external-hive-metastore/variables.tf @@ -0,0 +1,41 @@ +variable "spokecidr" { + type = string + default = "10.179.0.0/20" +} + +variable "sqlvnetcidr" { + type = string + default = "10.178.0.0/20" +} + +variable "no_public_ip" { + type = bool + default = true +} + +variable "rglocation" { + type = string + default = "southeastasia" +} + +variable "dbfs_prefix" { + type = string + default = "dbfs" +} + +variable "workspace_prefix" { + type = string + default = "adb" +} + +variable "db_username" { + description = "Database administrator username" + type = string + sensitive = true +} + +variable "db_password" { + description = "Database administrator password" + type = string + sensitive = true +} diff --git a/adb-external-hive-metastore/versions.tf b/adb-external-hive-metastore/versions.tf index 1be66df..c16e21a 100644 --- a/adb-external-hive-metastore/versions.tf +++ b/adb-external-hive-metastore/versions.tf @@ -5,9 +5,9 @@ terraform { source = "databrickslabs/databricks" version = "0.3.10" } - + azurerm = { - source = "hashicorp/azurerm" + source = "hashicorp/azurerm" version = "=2.83.0" } } diff --git a/adb-external-hive-metastore/vnet.tf b/adb-external-hive-metastore/vnet.tf index eff880e..081a034 100644 --- a/adb-external-hive-metastore/vnet.tf +++ b/adb-external-hive-metastore/vnet.tf @@ -13,33 +13,6 @@ resource "azurerm_network_security_group" "this" { tags = local.tags } -resource "azurerm_network_security_rule" "aad" { - name = "AllowAAD" - priority = 200 - direction = "Outbound" - access = "Allow" - protocol = "Tcp" - source_port_range = "*" - destination_port_range = "443" - source_address_prefix = "VirtualNetwork" - destination_address_prefix = "AzureActiveDirectory" - resource_group_name = azurerm_resource_group.this.name - network_security_group_name = azurerm_network_security_group.this.name -} - -resource "azurerm_network_security_rule" "azfrontdoor" { - name = "AllowAzureFrontDoor" - priority = 201 - direction = "Outbound" - access = "Allow" - protocol = "Tcp" - source_port_range = "*" - destination_port_range = "443" - source_address_prefix = "VirtualNetwork" - destination_address_prefix = "AzureFrontDoor.Frontend" - resource_group_name = azurerm_resource_group.this.name - network_security_group_name = azurerm_network_security_group.this.name -} resource "azurerm_subnet" "public" { name = "${local.prefix}-public" resource_group_name = azurerm_resource_group.this.name @@ -105,33 +78,18 @@ resource "azurerm_subnet" "plsubnet" { } -resource "azurerm_virtual_network" "hubvnet" { - name = "${local.prefix}-hub-vnet" +resource "azurerm_virtual_network" "sqlvnet" { + name = "${local.prefix}-sql-vnet" location = azurerm_resource_group.this.location resource_group_name = azurerm_resource_group.this.name - address_space = [var.hubcidr] + address_space = [local.sqlcidr] tags = local.tags } -resource "azurerm_subnet" "hubfw" { - //name must be fixed as AzureFirewallSubnet - name = "AzureFirewallSubnet" +resource "azurerm_subnet" "sqlsubnet" { + name = "sql-server-subnet" resource_group_name = azurerm_resource_group.this.name - virtual_network_name = azurerm_virtual_network.hubvnet.name - address_prefixes = [cidrsubnet(var.hubcidr, 3, 0)] -} - - -resource "azurerm_virtual_network_peering" "hubvnet" { - name = "peerhubtospoke" - resource_group_name = azurerm_resource_group.this.name - virtual_network_name = azurerm_virtual_network.hubvnet.name - remote_virtual_network_id = azurerm_virtual_network.this.id -} - -resource "azurerm_virtual_network_peering" "spokevnet" { - name = "peerspoketohub" - resource_group_name = azurerm_resource_group.this.name - virtual_network_name = azurerm_virtual_network.this.name - remote_virtual_network_id = azurerm_virtual_network.hubvnet.id + virtual_network_name = azurerm_virtual_network.sqlvnet.name + address_prefixes = [cidrsubnet(local.sqlcidr, 3, 2)] + service_endpoints = ["Microsoft.Sql"] } diff --git a/adb-external-hive-metastore/workspace.tf b/adb-external-hive-metastore/workspace.tf index 8631cfe..88f1abd 100644 --- a/adb-external-hive-metastore/workspace.tf +++ b/adb-external-hive-metastore/workspace.tf @@ -1,10 +1,10 @@ resource "azurerm_databricks_workspace" "this" { - name = "${local.prefix}-workspace" - resource_group_name = azurerm_resource_group.this.name - location = azurerm_resource_group.this.location - sku = "premium" - tags = local.tags - customer_managed_key_enabled = true + name = "${local.prefix}-workspace" + resource_group_name = azurerm_resource_group.this.name + location = azurerm_resource_group.this.location + sku = "premium" + tags = local.tags + customer_managed_key_enabled = true //infrastructure_encryption_enabled = true custom_parameters { no_public_ip = var.no_public_ip