Skip to content

Commit

Permalink
Merge pull request #1 from kabisa/renovate/configure
Browse files Browse the repository at this point in the history
Configure Renovate
  • Loading branch information
obeleh authored Sep 26, 2022
2 parents cbd0dbc + 2aaba56 commit 36e13e9
Show file tree
Hide file tree
Showing 14 changed files with 829 additions and 5 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/documentation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Generate terraform docs

on:
push:
# don't run when we push a tag
tags-ignore:
- '*'
# don't run when we merge to main
# the action should have run already
branches-ignore:
- 'main'
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: terraform-linters/setup-tflint@v2
name: Setup TFLint
with:
tflint_version: v0.38.1
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
- uses: pre-commit/[email protected]
# pre-commit fails if it changed files
# we want to go on
continue-on-error: true
- uses: pre-commit/[email protected]
- uses: EndBug/add-and-commit@v9
with:
default_author: github_actions
9 changes: 7 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
repos:
- repo: https://github.com/gruntwork-io/pre-commit
rev: v0.1.14
rev: v0.1.12
hooks:
- id: terraform-fmt
- id: terraform-validate
- id: tflint
- id: shellcheck
- repo: https://github.com/kabisa/terraform-datadog-pre-commit-hook
rev: "1.3.6"
hooks:
- id: terraform-datadog-docs
args:
- "."
25 changes: 25 additions & 0 deletions .terraform.lock.hcl

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

133 changes: 130 additions & 3 deletions README.md

Large diffs are not rendered by default.

116 changes: 116 additions & 0 deletions errors-slo-variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
variable "error_slo_enabled" {
type = bool
default = true
}

variable "error_slo_note" {
type = string
default = ""
}

variable "error_slo_docs" {
type = string
default = ""
}

variable "error_slo_filter_override" {
type = string
default = ""
}

variable "error_slo_warning" {
type = number
default = null
}

variable "error_slo_critical" {
type = number
default = 99.9
}

variable "error_slo_alerting_enabled" {
type = bool
default = true
}

variable "error_slo_error_filter" {
type = string
description = "Filter string to select the non-errors for the SLO, Dont forget to include the comma or (AND or OR) keywords"
default = ",!status:error"
}

variable "error_slo_timeframe" {
validation {
condition = contains(["7d", "30d", "90d"], var.error_slo_timeframe)
error_message = "SLO Timeframe can be 7,30,90 days. Example: 7d."
}
type = string
default = "30d"
}

variable "error_slo_numerator_override" {
type = string
default = ""
}

variable "error_slo_denominator_override" {
type = string
default = ""
}

variable "error_slo_burn_rate_notification_channel_override" {
type = string
default = ""
}

variable "error_slo_burn_rate_enabled" {
type = bool
default = true
}

variable "error_slo_burn_rate_alerting_enabled" {
type = bool
default = true
}

variable "error_slo_burn_rate_priority" {
description = "Number from 1 (high) to 5 (low)."

type = number
default = 3
}

variable "error_slo_burn_rate_warning" {
type = number
default = null
}

variable "error_slo_burn_rate_critical" {
type = number
default = 10 # 10x burn rate
}

variable "error_slo_burn_rate_note" {
type = string
default = ""
}

variable "error_slo_burn_rate_docs" {
type = string
default = "Use burn rates alerts to measure how fast your error budget is being depleted relative to the time window of your SLO. For example, for a 30 day SLO if a burn rate of 1 is sustained, that means the error budget will be fully depleted in exactly 30 days, a burn rate of 2 means in exactly 15 days, etc. Therefore, you could use a burn rate alert to notify you if a burn rate of 10 is measured in the past hour. Burn rate alerts evaluate two time windows: a long window which you specify and a short window that is automatically calculated as 1/12 of your long window. The long window's purpose is to reduce alert flappiness, while the short window's purpose is to improve recovery time. If your threshold is violated in both windows, you will receive an alert."
}

variable "error_slo_burn_rate_evaluation_period" {
type = string
default = "30d"
}

variable "error_slo_burn_rate_short_window" {
type = string
default = "5m"
}

variable "error_slo_burn_rate_long_window" {
type = string
default = "1h"
}
72 changes: 72 additions & 0 deletions errors-slo.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
locals {
error_slo_filter = coalesce(
var.error_slo_filter_override,
local.filter_str
)
error_slo_numerator = coalesce(
var.error_slo_numerator_override,
"sum:${var.slo_metric_prefix}requests.count{${local.error_slo_filter}${var.error_slo_error_filter}}.as_count()"
)
error_slo_denominator = coalesce(
var.error_slo_denominator_override,
"sum:${var.slo_metric_prefix}requests.count{${local.error_slo_filter}}.as_count()"
)
error_slo_burn_rate_notification_channel = try(coalesce(
var.error_slo_burn_rate_notification_channel_override,
var.notification_channel
), "")
error_slo_burn_rate_enabled = var.error_slo_enabled && var.error_slo_burn_rate_enabled
error_slo_id = local.error_slo_burn_rate_enabled ? datadog_service_level_objective.error_slo[0].id : ""
}

resource "datadog_service_level_objective" "error_slo" {
count = var.error_slo_enabled ? 1 : 0
name = "${local.service_display_name} - ${var.log_source_name} - Error SLO"
type = "metric"
description = "Errors SLO for ${local.service_display_name}"

thresholds {
timeframe = var.error_slo_timeframe
target = var.error_slo_critical
warning = var.error_slo_warning
}

query {
numerator = local.error_slo_numerator
denominator = local.error_slo_denominator
}

tags = local.normalized_tags
}

module "error_slo_burn_rate" {
source = "kabisa/generic-monitor/datadog"
version = "1.0.0"

name = "${var.log_source_name} - Error SLO - Burn Rate"
query = "burn_rate(\"${local.error_slo_id}\").over(\"${var.error_slo_burn_rate_evaluation_period}\").long_window(\"${var.error_slo_burn_rate_long_window}\").short_window(\"${var.error_slo_burn_rate_short_window}\") > ${var.error_slo_burn_rate_critical}"


alert_message = "${local.service_display_name} service is burning through its Error Budget. The percentage of 5XX status codes is {{threshold}}x higher than expected"
recovery_message = "${local.service_display_name} service burn rate has recovered"
type = "slo alert"

# monitor level vars
enabled = var.error_slo_enabled && var.error_slo_burn_rate_enabled
alerting_enabled = var.error_slo_burn_rate_alerting_enabled
warning_threshold = var.error_slo_burn_rate_warning
critical_threshold = var.error_slo_burn_rate_critical
priority = var.error_slo_burn_rate_priority
docs = var.error_slo_burn_rate_docs
note = var.error_slo_burn_rate_note

# module level vars
env = var.env
service = var.service
service_display_name = var.service_display_name
notification_channel = local.error_slo_burn_rate_notification_channel
additional_tags = var.additional_tags
locked = var.locked
name_prefix = var.name_prefix
name_suffix = var.name_suffix
}
116 changes: 116 additions & 0 deletions latency-slo-variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
variable "latency_slo_enabled" {
type = bool
default = true
description = "Note that this monitor requires custom metrics to be present. Those can unfortunately not be created with Terraform yet"
}

variable "latency_slo_note" {
type = string
default = ""
}

variable "latency_slo_docs" {
type = string
default = ""
}

variable "latency_slo_filter_override" {
type = string
default = ""
}

variable "latency_slo_warning" {
type = number
default = null
}

variable "latency_slo_critical" {
type = number
default = 99.9
}

variable "latency_slo_latency_bucket" {
description = "SLO latency bucket in ms for your logs"
type = number
}

variable "latency_slo_alerting_enabled" {
type = bool
default = true
}

variable "latency_slo_timeframe" {
validation {
condition = contains(["7d", "30d", "90d"], var.latency_slo_timeframe)
error_message = "SLO Timeframe can be 7,30,90 days. Example: 7d."
}
type = string
default = "30d"
}

variable "latency_slo_burn_rate_priority" {
description = "Number from 1 (high) to 5 (low)."

type = number
default = 3
}

variable "latency_slo_burn_rate_warning" {
type = number
default = null
}

variable "latency_slo_burn_rate_critical" {
type = number
default = 10 # 10x burn rate
}

variable "latency_slo_burn_rate_note" {
type = string
default = ""
}

variable "latency_slo_burn_rate_docs" {
type = string
default = "Use burn rates alerts to measure how fast your error budget is being depleted relative to the time window of your SLO. For example, for a 30 day SLO if a burn rate of 1 is sustained, that means the error budget will be fully depleted in exactly 30 days, a burn rate of 2 means in exactly 15 days, etc. Therefore, you could use a burn rate alert to notify you if a burn rate of 10 is measured in the past hour. Burn rate alerts evaluate two time windows: a long window which you specify and a short window that is automatically calculated as 1/12 of your long window. The long window's purpose is to reduce alert flappiness, while the short window's purpose is to improve recovery time. If your threshold is violated in both windows, you will receive an alert."
}

variable "latency_slo_burn_rate_evaluation_period" {
type = string
default = "30d"
}

variable "latency_slo_burn_rate_short_window" {
type = string
default = "5m"
}

variable "latency_slo_burn_rate_long_window" {
type = string
default = "1h"
}

variable "latency_slo_burn_rate_notification_channel_override" {
type = string
default = ""
}

variable "latency_slo_burn_rate_enabled" {
type = bool
default = true
}

variable "latency_slo_burn_rate_alerting_enabled" {
type = bool
default = true
}

variable "latency_slo_custom_numerator" {
type = string
default = ""
}

variable "latency_slo_custom_denominator" {
type = string
default = ""
}
Loading

0 comments on commit 36e13e9

Please sign in to comment.