Skip to content

Commit

Permalink
feat: merge aws and azure databricks runtime modules
Browse files Browse the repository at this point in the history
  • Loading branch information
MyroslavLevchyk authored and MyroslavLevchyk committed Jan 7, 2025
1 parent 2be3dfc commit 3cfacc1
Show file tree
Hide file tree
Showing 11 changed files with 963 additions and 3 deletions.
239 changes: 236 additions & 3 deletions README.md

Large diffs are not rendered by default.

192 changes: 192 additions & 0 deletions cluster.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
locals {
spark_conf_single_node = var.cloud_name == "azure" ? {
"spark.master" = "local[*]",
"spark.databricks.cluster.profile" = "singleNode"
} : {}

default_node_type_ids = {
azure_node_type_id = "Standard_D4ds_v5"
aws_node_type_id = "m5d.large"
# gcp_node_type_id = "gcp-default-node-type-id"
}
}

resource "databricks_cluster" "this" {
for_each = { for cluster in var.clusters : cluster.cluster_name => cluster }

cluster_name = each.value.cluster_name
spark_version = each.value.spark_version
node_type_id = coalesce(each.value.node_type_id, local.default_node_type_ids["${var.cloud_name}_node_type_id"])
autotermination_minutes = each.value.autotermination_minutes
data_security_mode = each.value.data_security_mode
custom_tags = var.cloud_name == "azure" && each.value.single_node_enable ? merge({ "ResourceClass" = "SingleNode" }, each.value.custom_tags) : each.value.custom_tags

# Azure conditional configuration for Spark Conf
spark_conf = var.cloud_name == "azure" ? merge(
each.value.single_node_enable == true ? local.spark_conf_single_node : {},
each.value.spark_conf
) : each.value.spark_conf

# Autoscaling block for AWS
dynamic "autoscale" {
for_each = var.cloud_name == "aws" || !each.value.single_node_enable ? [1] : []
content {
min_workers = each.value.min_workers
max_workers = each.value.max_workers
}
}

# Specific attributes for AWS
dynamic "aws_attributes" {
for_each = var.cloud_name == "aws" ? [each.value] : []
content {
availability = each.value.aws_attributes.availability
zone_id = each.value.aws_attributes.zone_id
first_on_demand = each.value.aws_attributes.first_on_demand
spot_bid_price_percent = each.value.aws_attributes.spot_bid_price_percent
ebs_volume_count = each.value.aws_attributes.ebs_volume_count
ebs_volume_size = each.value.aws_attributes.ebs_volume_size
ebs_volume_type = each.value.aws_attributes.ebs_volume_type
}
}

# Specific attributes for Azure
dynamic "azure_attributes" {
for_each = var.cloud_name == "azure" ? [each.value] : []
content {
availability = each.value.azure_attributes.availability
first_on_demand = each.value.azure_attributes.first_on_demand
spot_bid_max_price = each.value.azure_attributes.spot_bid_max_price
}
}

# Specific configurations
dynamic "cluster_log_conf" {
for_each = var.cloud_name == "azure" && each.value.cluster_log_conf_destination != null ? [each.value.cluster_log_conf_destination] : []
content {
dynamic "dbfs" {
for_each = var.cloud_name == "azure" ? [1] : []
content {
destination = cluster_log_conf.value
}
}

# TODO
# dynamic "s3" {
# for_each = var.cloud_name == "aws" ? [1] : []
# content {
# destination = "s3://acmecorp-main/cluster-logs"
# region = var.region
# }
# }
}
}

dynamic "init_scripts" {
for_each = each.value.init_scripts_workspace != null ? each.value.init_scripts_workspace : []
content {
workspace {
destination = init_scripts.value
}
}
}

dynamic "init_scripts" {
for_each = each.value.init_scripts_volumes != null ? each.value.init_scripts_volumes : []
content {
volumes {
destination = init_scripts.value
}
}
}

dynamic "init_scripts" {
for_each = var.cloud_name == "azure" && each.value.init_scripts_dbfs != null ? each.value.init_scripts_dbfs : []
content {
dbfs {
destination = init_scripts.value
}
}
}

dynamic "init_scripts" {
for_each = var.cloud_name == "azure" && each.value.init_scripts_abfss != null ? each.value.init_scripts_abfss : []
content {
abfss {
destination = init_scripts.value
}
}
}

# Library configurations
dynamic "library" {
for_each = each.value.pypi_library_repository != null ? each.value.pypi_library_repository : []
content {
pypi {
package = library.value
}
}
}

dynamic "library" {
for_each = each.value.maven_library_repository != null ? each.value.maven_library_repository : []
content {
maven {
coordinates = library.value.coordinates
exclusions = library.value.exclusions
}
}
}
}

resource "databricks_cluster_policy" "this" {
for_each = { for param in var.custom_cluster_policies : (param.name) => param.definition
if param.definition != null
}

name = each.key
definition = jsonencode(each.value)
}

resource "databricks_cluster_policy" "overrides" {
for_each = { for param in var.default_cluster_policies_override : (param.name) => param
if param.definition != null
}

policy_family_id = each.value.family_id
policy_family_definition_overrides = jsonencode(each.value.definition)
name = each.key
}

resource "databricks_permissions" "policy" {
for_each = { for param in var.custom_cluster_policies : param.name => param.can_use
if param.can_use != null
}

cluster_policy_id = databricks_cluster_policy.this[each.key].id

dynamic "access_control" {
for_each = each.value
content {
group_name = access_control.value
permission_level = "CAN_USE"
}
}
}

resource "databricks_permissions" "clusters" {
for_each = {
for v in var.clusters : (v.cluster_name) => v
if length(v.permissions) != 0
}

cluster_id = databricks_cluster.this[each.key].id

dynamic "access_control" {
for_each = each.value.permissions
content {
group_name = access_control.value.group_name
permission_level = access_control.value.permission_level
}
}
}
11 changes: 11 additions & 0 deletions data.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
data "databricks_group" "account_groups" {
for_each = local.iam_account_map

display_name = each.key
}

data "databricks_current_metastore" "this" {
}

data "databricks_sql_warehouses" "all" {
}
26 changes: 26 additions & 0 deletions iam.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
locals {
iam_account_map = tomap({
for group in var.iam_account_groups : group.group_name => group.entitlements
if group.group_name != null
})
}

resource "databricks_group" "this" {
count = var.cloud_name == "azure" && length(local.iam_account_map) == 0 ? length(toset(keys(var.iam_workspace_groups))) : 0

display_name = keys(var.iam_workspace_groups)[count.index]

lifecycle {
ignore_changes = [external_id, allow_cluster_create, allow_instance_pool_create, databricks_sql_access, workspace_access]
}
}

resource "databricks_entitlements" "this" {
for_each = local.iam_account_map

group_id = data.databricks_group.account_groups[each.key].id
allow_cluster_create = contains(coalesce(each.value, ["none"]), "allow_cluster_create")
allow_instance_pool_create = contains(coalesce(each.value, ["none"]), "allow_instance_pool_create")
databricks_sql_access = contains(coalesce(each.value, ["none"]), "databricks_sql_access")
workspace_access = true
}
23 changes: 23 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
resource "databricks_workspace_conf" "this" {
custom_config = var.custom_config
}

resource "databricks_ip_access_list" "allowed_list" {
label = "allow_in"
list_type = "ALLOW"
ip_addresses = flatten([for v in values(var.ip_addresses) : v])

depends_on = [databricks_workspace_conf.this]
}

resource "databricks_token" "pat" {
count = var.workspace_admin_token_enabled ? 1 : 0
comment = "Terraform Provisioning"
lifetime_seconds = var.pat_token_lifetime_seconds
}

resource "databricks_system_schema" "this" {
for_each = var.system_schemas_enabled ? var.system_schemas : toset([])

schema = each.value
}
15 changes: 15 additions & 0 deletions mount.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
resource "databricks_mount" "adls" {
for_each = var.mount_enabled && var.cloud_name == "azure" ? var.mountpoints : {}

name = each.key
cluster_id = var.mount_cluster_name != null ? databricks_cluster.this[var.mount_cluster_name].id : null
uri = "abfss://${each.value["container_name"]}@${each.value["storage_account_name"]}.dfs.core.windows.net"
extra_configs = {
"fs.azure.account.auth.type" : "OAuth",
"fs.azure.account.oauth.provider.type" : "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
"fs.azure.account.oauth2.client.id" : var.mount_service_principal_client_id,
"fs.azure.account.oauth2.client.secret" : databricks_secret.main["mount-sp-secret"].config_reference,

Check warning on line 11 in mount.tf

View workflow job for this annotation

GitHub Actions / Run security KICS scaner

[HIGH] Passwords And Secrets - Generic Secret

Query to find passwords and secrets in infrastructure code.
"fs.azure.account.oauth2.client.endpoint" : "https://login.microsoftonline.com/${var.mount_service_principal_tenant_id}/oauth2/token",
"fs.azure.createRemoteFileSystemDuringInitialization" : "false",
}
}
33 changes: 33 additions & 0 deletions outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
output "sql_endpoint_jdbc_url" {
value = [for n in databricks_sql_endpoint.this : n.jdbc_url]
description = "JDBC connection string of SQL Endpoint"
}

output "sql_endpoint_data_source_id" {
value = [for n in databricks_sql_endpoint.this : n.data_source_id]
description = "ID of the data source for this endpoint"
}

output "token" {
value = length(databricks_token.pat) > 0 ? databricks_token.pat[0].token_value : null
description = "Databricks Personal Authorization Token"
sensitive = true
}

output "clusters" {
value = [for param in var.clusters : {
name = param.cluster_name
id = databricks_cluster.this[param.cluster_name].id
} if length(var.clusters) != 0]
description = "Provides name and unique identifier for the clusters"
}

output "sql_warehouses_list" {
value = data.databricks_sql_warehouses.all.ids
description = "List of IDs of all SQL warehouses in the Databricks workspace."
}

output "metastore_id" {
value = data.databricks_current_metastore.this.id
description = "The ID of the current metastore in the Databricks workspace."
}
83 changes: 83 additions & 0 deletions secrets.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
locals {
mount_sp_secrets = var.cloud_name == "azure" ? {
mount-sp-client-id = { value = var.mount_service_principal_client_id }
mount-sp-secret = { value = var.mount_service_principal_secret }
} : {}

secrets_acl_objects_list = flatten([for param in var.secret_scope : [
for permission in param.acl : {
scope = param.scope_name, principal = permission.principal, permission = permission.permission
}] if param.scope_acl != null
])

secret_scope_config = { for object in var.secret_scope : object.scope_name => object }

secret_scope_config_secrets = { for object in flatten([for k, v in local.secret_scope_config : [for secret in v.secrets : {
scope_name = k,
secret_key = secret.key,
secret_value = secret.string_value,
}]]) : "${object.scope_name}:${object.secret_key}" => object }
}

# Secret Scope with SP secrets for mounting Azure Data Lake Storage
resource "databricks_secret_scope" "main" {
count = var.cloud_name == "azure" && var.mount_enabled ? 1 : 0

name = "main"
initial_manage_principal = null
}

resource "databricks_secret" "main" {
for_each = var.cloud_name == "azure" && var.mount_enabled ? local.mount_sp_secrets : {}

key = each.key
string_value = each.value["value"]
scope = databricks_secret_scope.main[0].id

lifecycle {
precondition {
condition = var.cloud_name == "azure" && var.mount_enabled ? length(compact([var.mount_service_principal_client_id, var.mount_service_principal_secret, var.mount_service_principal_tenant_id])) == 3 : true
error_message = "To mount ADLS Storage, please provide prerequisite Service Principal values - 'mount_service_principal_object_id', 'mount_service_principal_secret', 'mount_service_principal_tenant_id'."
}
}
}

# Custom additional Databricks Secret Scope
resource "databricks_secret_scope" "this" {
for_each = {
for param in var.secret_scope : (param.scope_name) => param
if param.scope_name != null
}

name = each.key

# Key Vault metadata block only for Azure
dynamic "keyvault_metadata" {
for_each = var.cloud_name == "azure" ? [for kv in var.key_vault_secret_scope : kv] : []
content {
resource_id = keyvault_metadata.value.key_vault_id
dns_name = keyvault_metadata.value.dns_name
}
}

# This property is only relevant for Azure
initial_manage_principal = var.cloud_name == "azure" ? null : null
}

resource "databricks_secret" "this" {
for_each = local.secret_scope_config_secrets

key = each.value.secret_key
string_value = each.value.secret_value
scope = databricks_secret_scope.this[each.value.scope_name].id
}

resource "databricks_secret_acl" "this" {
for_each = var.cloud_name == "azure" && length(local.secrets_acl_objects_list) > 0 ? {
for_each = { for entry in local.secrets_acl_objects_list : "${entry.scope}.${entry.principal}.${entry.permission}" => entry }
} : {}

scope = databricks_secret_scope.this[each.value.scope].name
principal = length(var.iam_account_groups) != 0 ? data.databricks_group.account_groups[each.value.principal].display_name : databricks_group.this[each.value.principal].display_name
permission = each.value.permission
}
Loading

0 comments on commit 3cfacc1

Please sign in to comment.