Add dashboard UID auto-generation and Gitea CI workflow
Some checks failed
terraform-dev / validate (push) Failing after 1m53s
terraform-dev / plan (push) Has been skipped
terraform-dev / apply (push) Has been skipped

This commit is contained in:
Alexandr
2026-03-25 06:41:19 +03:00
parent 345c5786b3
commit 558a23d916
83 changed files with 53372 additions and 1 deletions

View File

@ -0,0 +1,33 @@
locals {
# Default template for Telegram messages
default_telegram_message_template = try(
fileexists("${path.module}/template/alerts_message_tg.template") ?
replace(file("${path.module}/template/alerts_message_tg.template"), "__ORG_ID__", var.org_id) :
"Default message template",
"Default message template"
)
# Preparing settings for each contact point
contact_point_templates = {
for cp in var.contact_points :
cp.name => {
name = cp.name
type = cp.type
settings = cp.settings
# Check and load template from variables if specified and file exists
template = try(
(cp.settings["template"] != null && fileexists(cp.settings["template"])) ?
replace(replace(replace(file(cp.settings["template"]), "__ENV__", var.env), "__ORG_ID__", var.org_id), "__GRAFANA_URL__", var.grafana_url) :
local.default_telegram_message_template,
local.default_telegram_message_template
)
# Check and load title template if specified and file exists
title = try(
(cp.settings["title_template"] != null && fileexists(cp.settings["title_template"])) ?
replace(replace(replace(file(cp.settings["title_template"]), "__ENV__", var.env), "__ORG_ID__", var.org_id), "__GRAFANA_URL__", var.grafana_url) :
null,
null
)
}
}
}

View File

@ -0,0 +1,26 @@
resource "grafana_contact_point" "contact_points" {
for_each = { for cp in var.contact_points : "${cp.name}_${cp.type}" => cp }
org_id = var.org_id
name = each.value.name
disable_provenance = var.disable_provenance
dynamic "telegram" {
for_each = each.value.type == "telegram" ? [1] : []
content {
chat_id = each.value.settings["chat_id"]
token = each.value.settings["bot_token"]
message = local.contact_point_templates[each.value.name].template
parse_mode = "HTML"
}
}
dynamic "slack" {
for_each = each.value.type == "slack" ? [1] : []
content {
url = each.value.settings["webhook_url"]
title = try(local.contact_point_templates[each.value.name].title, null)
text = local.contact_point_templates[each.value.name].template
}
}
}

View File

@ -0,0 +1,4 @@
output "contact_point_ids" {
description = "Mapping of contact point names to their IDs"
value = { for k, v in grafana_contact_point.contact_points : k => v.id }
}

View File

@ -0,0 +1,144 @@
{{- define "print_links" -}}
{{- /* Panel link and GeneratorURL are both primary references */ -}}
{{- if .PanelURL -}}
[📊 Graph]({{ .PanelURL }})
{{- else -}}
{{- if and .Labels (index .Labels "panel_id") -}}
{{- $dashboard_uid := index .Labels "dashboard_uid" -}}
{{- if not $dashboard_uid -}}
{{- $service := index .Labels "service" -}}
{{- if $service -}}
{{- $dashboard_uid = printf "%s-dashboard" $service -}}
{{- end -}}
{{- end -}}
{{- if $dashboard_uid -}}
{{- $url := printf "__GRAFANA_URL__/d/%s?orgId=__ORG_ID__&viewPanel=%s&refresh=2s" $dashboard_uid (index .Labels "panel_id") -}}
[📊 Graph]({{ $url }})
{{- end -}}
{{- end -}}
{{- end -}}
{{- if and (eq .Status "firing") (.GeneratorURL) -}}
{{- " " -}}[⚡ Check Alert]({{ .GeneratorURL }})
{{- end -}}
{{- if and (eq .Status "firing") (.SilenceURL) -}}
{{- " " -}}[🤐 Mute]({{ .SilenceURL }})
{{- end -}}
{{- if .DashboardURL -}}
{{- " " -}}[📈 Dashboard]({{ .DashboardURL }})
{{- end -}}
{{- end -}}
{{- define "print_alert" -}}
{{- $status := "" -}}
{{- $severity := "" -}}
{{- if index .Labels "severity" -}}
{{- $severity = index .Labels "severity" -}}
{{- end -}}
{{- if eq .Status "resolved" -}}
{{- $status = "🟢" -}}
{{- else if eq $severity "disaster" -}}
{{- $status = "🔴" -}}
{{- else if eq $severity "critical" -}}
{{- $status = "🟣" -}}
{{- else if eq $severity "performance" -}}
{{- $status = "🟡" -}}
{{- else -}}
{{- $status = "🟠" -}}
{{- end -}}
{{- $summary := index .Annotations "summary" -}}
{{- $description := index .Annotations "description" -}}
{{- $alertname := index .Labels "alertname" -}}
{{- if or (eq $alertname "DatasourceNoData") (eq $alertname "DatasourceError") -}}
{{- $summary = $alertname -}}
{{- $alertname = index .Labels "rulename" -}}
{{- end -}}
{{- $instance := index .Labels "instance" -}}
{{- $service := "" -}}
{{- if index .Labels "service" -}}
{{- $service = index .Labels "service" -}}
{{- end -}}
{{ $status }} **{{ $instance }}**{{ "\n" }}
{{- if $service -}}
{{ $service }}: {{ $alertname }}{{ "\n" }}
{{- else -}}
{{ $alertname }}{{ "\n" }}
{{- end -}}
{{- if eq $severity "disaster" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "critical" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "performance" -}}🐌 {{ $summary }}{{ "\n" }}
{{- else -}}⚠️ {{ $summary }}{{ "\n" }}
{{- end -}}
{{- if $description -}}
{{- "\n" -}}
{{ $description }}{{ "\n" }}
{{- end -}}
{{- $common_labels := coll.Dict "environment" "__ENV__" -}}
{{- range $label, $value := $common_labels -}}
`{{ $label }}: {{ $value }}`{{ "\n" }}
{{- end -}}
{{- $has_labels := false -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job")
(ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance")
(ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id")
(ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team")
(ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module")
(ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") -}}
{{- $has_labels = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- if $has_labels -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job")
(ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance")
(ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id")
(ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team")
(ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module")
(ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") -}}
`{{ $label }}: {{ $value }}`{{ "\n" }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- if eq .Status "firing" -}}
{{- "\n" -}}
{{- template "print_links" . -}}
{{- end -}}
{{- end -}}
{{- define "print_alerts" -}}
{{- range $index, $alert := . -}}
{{- if ne $index 0 -}}
{{ "---" }}{{ "\n" }}
{{- end -}}
{{- template "print_alert" $alert -}}
{{- end -}}
{{- end -}}
{{- with .Alerts -}}
{{- if .Firing -}}
**🔥 Firing Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Firing -}}
{{- end -}}
{{- if and .Firing .Resolved -}}
{{ "\n\n" }}
{{- end -}}
{{- if .Resolved -}}
**✅ Resolved Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Resolved -}}
{{- end -}}
{{- end -}}

View File

@ -0,0 +1,127 @@
{{- define "print_links" -}}
{{- /* Panel link and GeneratorURL are both primary references */ -}}
{{- if .PanelURL -}}
<{{ .PanelURL }}|📊 Graph>
{{- else -}}
{{- /* Use dashboard_uid and panel_id from labels with fallback to service-based dashboard_uid */ -}}
{{- if and .Labels (index .Labels "panel_id") -}}
{{- $dashboard_uid := index .Labels "dashboard_uid" -}}
{{- if not $dashboard_uid -}}
{{- $service := index .Labels "service" -}}
{{- if $service -}}
{{- $dashboard_uid = printf "%s-dashboard" $service -}}
{{- end -}}
{{- end -}}
{{- if and $dashboard_uid -}}
{{- $url := printf "__GRAFANA_URL__/d/%s?orgId=__ORG_ID__&viewPanel=%s&refresh=2s" $dashboard_uid (index .Labels "panel_id") -}}
<{{ $url }}|📊 Graph>
{{- end -}}
{{- end -}}
{{- end -}}
{{- if and (eq .Status "firing") (.GeneratorURL) -}}
{{- " " -}}<{{ .GeneratorURL }}|⚡ Check Alert>
{{- end -}}
{{- if and (eq .Status "firing") (.SilenceURL) -}}
{{- " " -}}<{{ .SilenceURL }}|🤐 Mute>
{{- end -}}
{{- if .DashboardURL -}}
{{- " " -}}<{{ .DashboardURL }}|dashboard>
{{- end -}}
{{- end -}}
{{- define "print_alert" -}}
{{- $status := "" -}}
{{- $severity := "" -}}
{{- if index .Labels "severity" -}}
{{- $severity = index .Labels "severity" -}}
{{- end -}}
{{- if eq .Status "resolved" -}}
{{- $status = "🟢" -}}
{{- else if eq $severity "disaster" -}}
{{- $status = "🔴" -}}
{{- else if eq $severity "critical" -}}
{{- $status = "🔴" -}}
{{- else if eq $severity "performance" -}}
{{- $status = "🟡" -}}
{{- else -}}
{{- $status = "🟠" -}}
{{- end -}}
{{- $summary := index .Annotations "summary" -}}
{{- $description := index .Annotations "description" -}}
{{- $alertname := index .Labels "alertname" -}}
{{- if or (eq $alertname "DatasourceNoData") (eq $alertname "DatasourceError") -}}
{{- $summary = $alertname -}}
{{- $alertname = index .Labels "rulename" -}}
{{- end -}}
{{- $instance := index .Labels "instance" -}}
{{- $service := "" -}}
{{- if index .Labels "service" -}}
{{- $service = index .Labels "service" -}}
{{- end -}}
{{ $status }} **{{ $instance }}** {{ "\n" }}
{{- if $service -}}
{{ $service }}: {{ $alertname }}{{ "\n" }}
{{- else -}}
{{ $alertname }}{{ "\n" }}
{{- end -}}
{{ "\n" }}
{{- if eq $severity "disaster" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "critical" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "performance" -}}🐌 {{ $summary }}{{ "\n" }}
{{- else -}}⚠️ {{ $summary }}{{ "\n" }}
{{- end -}}
{{- if $description -}}
{{- "\n" -}}
{{ $description }}{{ "\n" }}
{{- end -}}
{{- $has_labels := false -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job") (ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance") (ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id") (ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team") (ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module") (ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") (ne $label "environment") (ne $label "rack") (ne $label "server_type") -}}
{{- $has_labels = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- if $has_labels -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job") (ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance") (ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id") (ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team") (ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module") (ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") (ne $label "environment") (ne $label "rack") (ne $label "server_type") -}}
{{ $label }}: {{ $value }}{{ "\n" }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- if eq .Status "firing" -}}
{{- if $has_labels -}}{{- "\n" -}}{{- end -}}
{{- template "print_links" . -}}
{{- end -}}
{{- end -}}
{{- define "print_alerts" -}}
{{- range $index, $alert := . -}}
{{- if ne $index 0 -}}
{{ "---" }}{{ "\n" }}
{{- end -}}
{{- template "print_alert" $alert -}}
{{- end -}}
{{- end -}}
{{- with .Alerts -}}
{{- if .Firing -}}
**🔥 Firing Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Firing -}}
{{- end -}}
{{- if and .Firing .Resolved -}}
{{ "\n\n" }}
{{- end -}}
{{- if .Resolved -}}
**✅ Resolved Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Resolved -}}
{{- end -}}
{{- end -}}

View File

@ -0,0 +1,127 @@
{{- define "print_links" -}}
{{- /* Panel link and GeneratorURL are both primary references */ -}}
{{- if .PanelURL -}}
<{{ .PanelURL }}|📊 Graph>
{{- else -}}
{{- /* Use dashboard_uid and panel_id from labels with fallback to service-based dashboard_uid */ -}}
{{- if and .Labels (index .Labels "panel_id") -}}
{{- $dashboard_uid := index .Labels "dashboard_uid" -}}
{{- if not $dashboard_uid -}}
{{- $service := index .Labels "service" -}}
{{- if $service -}}
{{- $dashboard_uid = printf "%s-dashboard" $service -}}
{{- end -}}
{{- end -}}
{{- if and $dashboard_uid -}}
{{- $url := printf "__GRAFANA_URL__/d/%s?orgId=__ORG_ID__&viewPanel=%s&refresh=2s" $dashboard_uid (index .Labels "panel_id") -}}
<{{ $url }}|📊 Graph>
{{- end -}}
{{- end -}}
{{- end -}}
{{- if and (eq .Status "firing") (.GeneratorURL) -}}
{{- " " -}}<{{ .GeneratorURL }}|⚡ Check Alert>
{{- end -}}
{{- if and (eq .Status "firing") (.SilenceURL) -}}
{{- " " -}}<{{ .SilenceURL }}|🤐 Mute>
{{- end -}}
{{- if .DashboardURL -}}
{{- " " -}}<{{ .DashboardURL }}|dashboard>
{{- end -}}
{{- end -}}
{{- define "print_alert" -}}
{{- $status := "" -}}
{{- $severity := "" -}}
{{- if index .Labels "severity" -}}
{{- $severity = index .Labels "severity" -}}
{{- end -}}
{{- if eq .Status "resolved" -}}
{{- $status = "🟢" -}}
{{- else if eq $severity "disaster" -}}
{{- $status = "🔴" -}}
{{- else if eq $severity "critical" -}}
{{- $status = "🔴" -}}
{{- else if eq $severity "performance" -}}
{{- $status = "🟡" -}}
{{- else -}}
{{- $status = "🟠" -}}
{{- end -}}
{{- $summary := index .Annotations "summary" -}}
{{- $description := index .Annotations "description" -}}
{{- $alertname := index .Labels "alertname" -}}
{{- if or (eq $alertname "DatasourceNoData") (eq $alertname "DatasourceError") -}}
{{- $summary = $alertname -}}
{{- $alertname = index .Labels "rulename" -}}
{{- end -}}
{{- $instance := index .Labels "instance" -}}
{{- $service := "" -}}
{{- if index .Labels "service" -}}
{{- $service = index .Labels "service" -}}
{{- end -}}
{{ $status }} **{{ $instance }}** {{ "\n" }}
{{- if $service -}}
{{ $service }}: {{ $alertname }}{{ "\n" }}
{{- else -}}
{{ $alertname }}{{ "\n" }}
{{- end -}}
{{ "\n" }}
{{- if eq $severity "disaster" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "critical" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "performance" -}}🐌 {{ $summary }}{{ "\n" }}
{{- else -}}⚠️ {{ $summary }}{{ "\n" }}
{{- end -}}
{{- if $description -}}
{{- "\n" -}}
{{- $description -}}{{ "\n" }}
{{- end -}}
{{- $has_labels := false -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job") (ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance") (ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id") (ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team") (ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module") (ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") (ne $label "environment") (ne $label "rack") (ne $label "server_type") -}}
{{- $has_labels = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- if $has_labels -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job") (ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance") (ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id") (ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team") (ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module") (ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") (ne $label "environment") (ne $label "rack") (ne $label "server_type") -}}
{{ $label }}: {{ $value }}{{ "\n" }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- if eq .Status "firing" -}}
{{- if $has_labels -}}{{- "\n" -}}{{- end -}}
{{- template "print_links" . -}}
{{- end -}}
{{- end -}}
{{- define "print_alerts" -}}
{{- range $index, $alert := . -}}
{{- if ne $index 0 -}}
{{ "---" }}{{ "\n" }}
{{- end -}}
{{- template "print_alert" $alert -}}
{{- end -}}
{{- end -}}
{{- with .Alerts -}}
{{- if .Firing -}}
**🔥 Firing Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Firing -}}
{{- end -}}
{{- if and .Firing .Resolved -}}
{{ "\n\n" }}
{{- end -}}
{{- if .Resolved -}}
**✅ Resolved Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Resolved -}}
{{- end -}}
{{- end -}}

View File

@ -0,0 +1,138 @@
{{- define "print_links" -}}
{{- /* Panel link and GeneratorURL are both primary references */ -}}
{{- if .PanelURL -}}
[📊 Graph]({{ .PanelURL }})
{{- else -}}
{{- if and .Labels (index .Labels "panel_id") -}}
{{- $dashboard_uid := index .Labels "dashboard_uid" -}}
{{- if not $dashboard_uid -}}
{{- $service := index .Labels "service" -}}
{{- if $service -}}
{{- $dashboard_uid = printf "%s-dashboard" $service -}}
{{- end -}}
{{- end -}}
{{- if $dashboard_uid -}}
{{- $url := printf "__GRAFANA_URL__/d/%s?orgId=__ORG_ID__&viewPanel=%s&refresh=2s" $dashboard_uid (index .Labels "panel_id") -}}
[📊 Graph]({{ $url }})
{{- end -}}
{{- end -}}
{{- end -}}
{{- if and (eq .Status "firing") (.GeneratorURL) -}}
{{- " " -}}[⚡ Check Alert]({{ .GeneratorURL }})
{{- end -}}
{{- if and (eq .Status "firing") (.SilenceURL) -}}
{{- " " -}}[🤐 Mute]({{ .SilenceURL }})
{{- end -}}
{{- if .DashboardURL -}}
{{- " " -}}[📈 Dashboard]({{ .DashboardURL }})
{{- end -}}
{{- end -}}
{{- define "print_alert" -}}
{{- $status := "" -}}
{{- $severity := "" -}}
{{- if index .Labels "severity" -}}
{{- $severity = index .Labels "severity" -}}
{{- end -}}
{{- if eq .Status "resolved" -}}
{{- $status = "🟢" -}}
{{- else if eq $severity "disaster" -}}
{{- $status = "🔴" -}}
{{- else if eq $severity "critical" -}}
{{- $status = "🟣" -}}
{{- else if eq $severity "performance" -}}
{{- $status = "🟡" -}}
{{- else -}}
{{- $status = "🟠" -}}
{{- end -}}
{{- $summary := index .Annotations "summary" -}}
{{- $alertname := index .Labels "alertname" -}}
{{- if or (eq $alertname "DatasourceNoData") (eq $alertname "DatasourceError") -}}
{{- $summary = $alertname -}}
{{- $alertname = index .Labels "rulename" -}}
{{- end -}}
{{- $instance := index .Labels "instance" -}}
{{- $service := "" -}}
{{- if index .Labels "service" -}}
{{- $service = index .Labels "service" -}}
{{- end -}}
{{ $status }} **{{ $instance }}**{{ "\n" }}
{{- if $service -}}
{{ $service }}: {{ $alertname }}{{ "\n" }}
{{- else -}}
{{ $alertname }}{{ "\n" }}
{{- end -}}
{{- if eq $severity "disaster" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "critical" -}}🚨 {{ $summary }}{{ "\n" }}
{{- else if eq $severity "performance" -}}🐌 {{ $summary }}{{ "\n" }}
{{- else -}}⚠️ {{ $summary }}{{ "\n" }}
{{- end -}}
{{- $common_labels := coll.Dict "environment" "__ENV__" -}}
{{- range $label, $value := $common_labels -}}
`{{ $label }}: {{ $value }}`{{ "\n" }}
{{- end -}}
{{- $has_labels := false -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job")
(ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance")
(ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id")
(ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team")
(ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module")
(ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") -}}
{{- $has_labels = true -}}
{{- break -}}
{{- end -}}
{{- end -}}
{{- if $has_labels -}}
{{- range $label, $value := .Labels -}}
{{- if and (ne $label "alertname") (ne $label "datasource_uid") (ne $label "grafana_folder") (ne $label "job")
(ne $label "details") (ne $label "ref_id") (ne $label "rulename") (ne $label "instance")
(ne $label "service") (ne $label "severity") (ne $label "dashboard_uid") (ne $label "panel_id")
(ne $label "ip_version") (ne $label "ip") (ne $label "hostname") (ne $label "role") (ne $label "team")
(ne $label "id") (ne $label "endpoint") (ne $label "hostgroup") (ne $label "module")
(ne $label "servername") (ne $label "type") (ne $label "vm_project_id") (ne $label "vm_account_id") -}}
`{{ $label }}: {{ $value }}`{{ "\n" }}
{{- end -}}
{{- end -}}
{{- end -}}
{{- if eq .Status "firing" -}}
{{- "\n" -}}
{{- template "print_links" . -}}
{{- end -}}
{{- end -}}
{{- define "print_alerts" -}}
{{- range $index, $alert := . -}}
{{- if ne $index 0 -}}
{{ "---" }}{{ "\n" }}
{{- end -}}
{{- template "print_alert" $alert -}}
{{- end -}}
{{- end -}}
{{- with .Alerts -}}
{{- if .Firing -}}
**🔥 Firing Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Firing -}}
{{- end -}}
{{- if and .Firing .Resolved -}}
{{ "\n\n" }}
{{- end -}}
{{- if .Resolved -}}
**✅ Resolved Alerts**{{ "\n" }}
{{ "---" }}{{ "\n" }}
{{- template "print_alerts" .Resolved -}}
{{- end -}}
{{- end -}}

View File

@ -0,0 +1 @@
{{ if .Alerts.Firing }}[FIRING:{{ len .Alerts.Firing }}]{{ else }}[RESOLVED]{{ end }} {{ (index .Alerts 0).Labels.alertname }} at {{ (index .Alerts 0).Labels.instance }}

View File

@ -0,0 +1,60 @@
{{ define "custom_alert.title" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}: {{ .Alerts.Firing | len }}{{ if gt (.Alerts.Resolved | len) 0 }}, RESOLVED: {{ .Alerts.Resolved | len }}{{ end }}{{ end }}]{{ if gt (len .GroupLabels) 0 }} Grouped by: {{ range .CommonLabels.SortedPairs }}{{ .Name }}: {{ .Value }}{{ end }}{{ end }}{{ end }}
{{ define "__text_alert_name" }}{{ range .Labels.SortedPairs }}{{ if eq .Name "alertname" }}{{ .Value }}{{ end }}{{ end }}{{ end }}
{{ define "__text_alert_summary" }}{{ range .Annotations.SortedPairs }}{{ if eq .Name "summary" }}{{ .Value }}
{{ end }}{{ end }}{{ end }}
{{ define "__text_alert_description" }}{{ range .Annotations.SortedPairs }}{{ if eq .Name "description" }}{{ .Value }}{{ end }}{{ end }}{{ end }}
{{ define "__text_alert_runbook_url" }}{{ range .Annotations.SortedPairs }}{{ if eq .Name "runbook_url" }}
:bookmark_tabs: <{{ .Value }}|Playbook>{{ end }}{{ end }}{{ end }}
{{ define "__text_alert_firing_item" }}:bell:
{{ $alertName := (index .Labels "alertname") }}
Labels: {{ range .Labels.SortedPairs }}
{{- if ne .Name "alertname" }}
{{- if ne .Name "ref_id" }}
{{- if ne .Name "datasource_uid" }}
{{- if ne .Name "rule_uid" }}
- {{ if and (eq .Name "cluster") (eq $alertName "Kube-apiserver or blackbox is down") }}request_from_cluster{{ else }}{{ .Name }}{{ end }} = {{ .Value }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
Actions:
{{ if gt (len .DashboardURL) 0 }}➡️ <{{ .DashboardURL }}|Go to dashboard>{{ end }}
{{ if gt (len .PanelURL) 0 }}:chart_with_upwards_trend: <{{ .PanelURL }}|Go to panel>{{ end }}
{{ if gt (len .GeneratorURL) 0 }}:arrow_right: <{{ .GeneratorURL }}|Go to alert>{{ end }}
{{ if gt (len .SilenceURL) 0 }}:mute: <{{ .SilenceURL }}|Silence alert>{{ end }}{{ template "__text_alert_runbook_url" . }}{{ end }}
{{ define "__text_alert_resolved_item" }}:large_green_circle: {{ template "__text_alert_name" . }}
{{ $alertName := (index .Labels "alertname") }}
Labels: {{ range .Labels.SortedPairs }}
{{- if ne .Name "alertname" }}
{{- if ne .Name "ref_id" }}
{{- if ne .Name "datasource_uid" }}
{{- if ne .Name "rule_uid" }}
- {{ if and (eq .Name "cluster") (eq $alertName "Kube-apiserver or blackbox is down") }}request_from_cluster{{ else }}{{ .Name }}{{ end }} = {{ .Value }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
Actions:
{{ if gt (len .DashboardURL) 0 }}➡️ <{{ .DashboardURL }}|Go to dashboard>{{ end }}
{{ if gt (len .PanelURL) 0 }}:chart_with_upwards_trend: <{{ .PanelURL }}|Go to panel>{{ end }}
{{ if gt (len .GeneratorURL) 0 }}:arrow_right: <{{ .GeneratorURL }}|Go to alert>{{ end }}{{ end }}
{{ define "__text_alert_list_firing" }}{{ range . }}
{{ template "__text_alert_firing_item" . }}{{ end }}{{ end }}
{{ define "__text_alert_list_resolved" }}{{ range . }}
{{ template "__text_alert_resolved_item" . }}{{ end }}{{ end }}
{{ define "custom_alert.message" }}
{{ if gt (len .Alerts.Firing) 0 }}{{ .Alerts.Firing | len }} Firing{{ template "__text_alert_list_firing" .Alerts.Firing }}{{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}{{ .Alerts.Resolved | len }} Resolved{{ template "__text_alert_list_resolved" .Alerts.Resolved }}{{ end }}{{ end }}

View File

@ -0,0 +1,70 @@
{{ define "telegram_message" }}
{{ if gt (len .Alerts.Firing) 0 }}
<b>🔥 {{ len .Alerts.Firing }} alert(s) firing:</b>
{{ range .Alerts.Firing }} {{ template "telegram_alert_firing" .}} {{ end }} {{ end }}
{{ if gt (len .Alerts.Resolved) 0 }}
<b>✅ {{ len .Alerts.Resolved }} alert(s) resolved:</b>
{{ range .Alerts.Resolved }} {{ template "telegram_alert_resolved" .}} {{ end }} {{ end }}
{{ end }}
{{ define "telegram_alert_firing" }}
<b>Value = </b>
{{- $value := .Values }}
{{- if ne (printf "%.1f" $value.A) "0.0" }}
{{- printf "%.1f" $value.A }}{{- end }}
{{- if ne (printf "%.1f" $value.B) "0.0" }} {{- printf "%.1f" $value.B }}{{- end }}
<b>Labels:</b>
{{- if index .Labels "name" }}
- Name = {{ index .Labels "name" }}
{{- end }}
{{- if index .Labels "instance" }}
- IP = {{ index .Labels "instance" }}
{{- end }}
{{- if index .Labels "Severity" }}
- Severity = {{ index .Labels "Severity" }}
{{- end }}
{{- if index .Labels "grafana_folder" }}
- Grafana_folder = {{ index .Labels "grafana_folder" }}
{{- end }}
{{- if index .Labels "volume" }}
- Volume = {{ index .Labels "volume" }}
{{- end }}
{{- if index .Labels "mountpoint" }}
- Mountpoint = {{ index .Labels "mountpoint" }}
{{- end }}
{{- if index .Labels "job" }}
- OS = {{ index .Labels "job" }}
{{- end }}
{{- if index .Labels "loc" }}
- Location = {{ index .Labels "loc" }}
{{- end }}
{{ end }}
{{ define "telegram_alert_resolved" }}
<b>{{ .Labels.alertname }}</b>
Node: <b>{{ .Annotations.Node }}</b>
{{ .Annotations.AlertValues }}
<b>Labels:</b>
{{- if index .Labels "name" }}
- Name = {{ index .Labels "name" }}
{{- end }}
{{- if index .Labels "instance" }}
- IP = {{ index .Labels "instance" }}
{{- end }}
{{- if index .Labels "Severity" }}
- Severity = {{ index .Labels "Severity" }}
{{- end }}
{{- if index .Labels "grafana_folder" }}
- Grafana_folder = {{ index .Labels "grafana_folder" }}
{{- end }}
{{- if index .Labels "volume" }}
- Volume = {{ index .Labels "volume" }}
{{- end }}
{{- if index .Labels "mountpoint" }}
- Mountpoint = {{ index .Labels "mountpoint" }}
{{- end }}
{{- if index .Labels "job" }}
- OS = {{ index .Labels "job" }}
{{- end }}
{{- if index .Labels "loc" }}
- Location = {{ index .Labels "loc" }}
{{- end }}
{{ end }}

View File

@ -0,0 +1,29 @@
variable "contact_points" {
description = "List of contact points for Grafana alerts"
type = list(object({
name = string
type = string
settings = map(string)
}))
}
variable "disable_provenance" {
description = "Controls whether Grafana provisioning is disabled"
type = bool
default = true
}
variable "org_id" {
description = "ID of the Grafana organization"
type = string
}
variable "grafana_url" {
description = "Grafana URL"
type = string
}
variable "env" {
description = "Grafana environment"
type = string
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,9 @@
output "dashboard_ids" {
description = "IDs of the created Grafana dashboards"
value = merge(
{ for name, dashboard in grafana_dashboard.dashboards_ignore_and_protect : name => dashboard.id },
{ for name, dashboard in grafana_dashboard.dashboards_ignore_only : name => dashboard.id },
{ for name, dashboard in grafana_dashboard.dashboards_protect_only : name => dashboard.id },
{ for name, dashboard in grafana_dashboard.dashboards_standard : name => dashboard.id }
)
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,22 @@
locals {
# Group folders by whether they should keep manual changes or be protected from destruction
folders_ignore_only = {
for group in var.groups : group.dashboard_alert_group_name => group
if lookup(group, "keep_manual_changes", false) && !lookup(group, "prevent_destroy_on_recreate", false)
}
folders_protect_only = {
for group in var.groups : group.dashboard_alert_group_name => group
if !lookup(group, "keep_manual_changes", false) && lookup(group, "prevent_destroy_on_recreate", false)
}
folders_ignore_and_protect = {
for group in var.groups : group.dashboard_alert_group_name => group
if lookup(group, "keep_manual_changes", false) && lookup(group, "prevent_destroy_on_recreate", false)
}
folders_standard = {
for group in var.groups : group.dashboard_alert_group_name => group
if !lookup(group, "keep_manual_changes", false) && !lookup(group, "prevent_destroy_on_recreate", false)
}
}

View File

@ -0,0 +1,44 @@
# Folders with both manual changes allowed and destroy protection
resource "grafana_folder" "folders_ignore_and_protect" {
for_each = local.folders_ignore_and_protect
title = each.value.dashboard_alert_group_name
org_id = var.org_id
lifecycle {
ignore_changes = [title]
prevent_destroy = true
}
}
# Folders with only manual changes allowed
resource "grafana_folder" "folders_ignore_only" {
for_each = local.folders_ignore_only
title = each.value.dashboard_alert_group_name
org_id = var.org_id
lifecycle {
ignore_changes = [title]
}
}
# Folders with only destroy protection
resource "grafana_folder" "folders_protect_only" {
for_each = local.folders_protect_only
title = each.value.dashboard_alert_group_name
org_id = var.org_id
lifecycle {
prevent_destroy = true
}
}
# Standard folders without any special lifecycle management
resource "grafana_folder" "folders_standard" {
for_each = local.folders_standard
title = each.value.dashboard_alert_group_name
org_id = var.org_id
}

View File

@ -0,0 +1,21 @@
# Output for mapping of alert group names to folder IDs
output "folder_ids" {
description = "Mapping of alert group names to their folder IDs in Grafana"
value = merge(
{ for group_name, folder in grafana_folder.folders_ignore_and_protect : group_name => folder.id },
{ for group_name, folder in grafana_folder.folders_ignore_only : group_name => folder.id },
{ for group_name, folder in grafana_folder.folders_protect_only : group_name => folder.id },
{ for group_name, folder in grafana_folder.folders_standard : group_name => folder.id }
)
}
# Output for mapping of alert group names to folder UIDs
output "folder_uids" {
description = "Mapping of alert group names to their folder UIDs in Grafana"
value = merge(
{ for group_name, folder in grafana_folder.folders_ignore_and_protect : group_name => folder.uid },
{ for group_name, folder in grafana_folder.folders_ignore_only : group_name => folder.uid },
{ for group_name, folder in grafana_folder.folders_protect_only : group_name => folder.uid },
{ for group_name, folder in grafana_folder.folders_standard : group_name => folder.uid }
)
}

View File

@ -0,0 +1,16 @@
variable "groups" {
description = "List of alert groups with their definitions"
type = list(object({
dashboard_alert_group_name = string
alert_definitions_path = string
dashboard_path_if_exist = optional(string, null)
keep_manual_changes = optional(bool, false)
prevent_destroy_on_recreate = optional(bool, false)
alerts_on_datasources_uid = list(string)
}))
}
variable "org_id" {
description = "ID of the Grafana organization"
type = string
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,49 @@
locals {
# Create a map of groups by dashboard_alert_group_name for parent lookup
groups_by_name = {
for group in var.groups : group.dashboard_alert_group_name => group
}
# Helper function to extract parent folder name and child folder name
# If dashboard_alert_group_name contains "/", split it into parent and child
folder_structure = {
for group in var.groups : group.dashboard_alert_group_name => {
group = group
parts = split("/", group.dashboard_alert_group_name)
has_parent = length(split("/", group.dashboard_alert_group_name)) > 1
parent_folder_name = length(split("/", group.dashboard_alert_group_name)) > 1 ? join("/", slice(split("/", group.dashboard_alert_group_name), 0, length(split("/", group.dashboard_alert_group_name)) - 1)) : null
folder_title = length(split("/", group.dashboard_alert_group_name)) > 1 ? element(split("/", group.dashboard_alert_group_name), length(split("/", group.dashboard_alert_group_name)) - 1) : group.dashboard_alert_group_name
# Get parent folder UID from parent group's folder_uid
parent_folder_uid = length(split("/", group.dashboard_alert_group_name)) > 1 ? try(local.groups_by_name[join("/", slice(split("/", group.dashboard_alert_group_name), 0, length(split("/", group.dashboard_alert_group_name)) - 1))].folder_uid, null) : null
}
}
# Group folders by whether they should keep manual changes or be protected from destruction
folders_ignore_only = {
for group_name, folder_info in local.folder_structure : group_name => folder_info
if lookup(folder_info.group, "keep_manual_changes", false) && !lookup(folder_info.group, "prevent_destroy_on_recreate", false)
}
folders_protect_only = {
for group_name, folder_info in local.folder_structure : group_name => folder_info
if !lookup(folder_info.group, "keep_manual_changes", false) && lookup(folder_info.group, "prevent_destroy_on_recreate", false)
}
folders_ignore_and_protect = {
for group_name, folder_info in local.folder_structure : group_name => folder_info
if lookup(folder_info.group, "keep_manual_changes", false) && lookup(folder_info.group, "prevent_destroy_on_recreate", false)
}
folders_standard = {
for group_name, folder_info in local.folder_structure : group_name => folder_info
if !lookup(folder_info.group, "keep_manual_changes", false) && !lookup(folder_info.group, "prevent_destroy_on_recreate", false)
}
# Create a map of all folders for parent UID lookup
all_folders = merge(
local.folders_ignore_and_protect,
local.folders_ignore_only,
local.folders_protect_only,
local.folders_standard
)
}

View File

@ -0,0 +1,51 @@
# Folders with both manual changes allowed and destroy protection
# Note: Currently, Terraform Grafana provider doesn't support nested folders directly.
# Folders will be created as flat structure. Nested structure can be configured
# manually in Grafana UI or via API after creation.
resource "grafana_folder" "folders_ignore_and_protect" {
for_each = local.folders_ignore_and_protect
title = each.value.folder_title
uid = each.value.group.folder_uid
org_id = var.org_id
lifecycle {
ignore_changes = [title]
prevent_destroy = true
}
}
# Folders with only manual changes allowed
resource "grafana_folder" "folders_ignore_only" {
for_each = local.folders_ignore_only
title = each.value.folder_title
uid = each.value.group.folder_uid
org_id = var.org_id
lifecycle {
ignore_changes = [title]
}
}
# Folders with only destroy protection
resource "grafana_folder" "folders_protect_only" {
for_each = local.folders_protect_only
title = each.value.folder_title
uid = each.value.group.folder_uid
org_id = var.org_id
lifecycle {
prevent_destroy = true
}
}
# Standard folders without any special lifecycle management
resource "grafana_folder" "folders_standard" {
for_each = local.folders_standard
title = each.value.folder_title
uid = each.value.group.folder_uid
org_id = var.org_id
}

View File

@ -0,0 +1,21 @@
# Output for mapping of alert group names to folder IDs
output "folder_ids" {
description = "Mapping of alert group names to their folder IDs in Grafana"
value = merge(
{ for group_name, folder in grafana_folder.folders_ignore_and_protect : group_name => folder.id },
{ for group_name, folder in grafana_folder.folders_ignore_only : group_name => folder.id },
{ for group_name, folder in grafana_folder.folders_protect_only : group_name => folder.id },
{ for group_name, folder in grafana_folder.folders_standard : group_name => folder.id }
)
}
# Output for mapping of alert group names to folder UIDs
output "folder_uids" {
description = "Mapping of alert group names to their folder UIDs in Grafana"
value = merge(
{ for group_name, folder in grafana_folder.folders_ignore_and_protect : group_name => folder.uid },
{ for group_name, folder in grafana_folder.folders_ignore_only : group_name => folder.uid },
{ for group_name, folder in grafana_folder.folders_protect_only : group_name => folder.uid },
{ for group_name, folder in grafana_folder.folders_standard : group_name => folder.uid }
)
}

View File

@ -0,0 +1,17 @@
variable "groups" {
description = "List of alert groups with their definitions"
type = list(object({
dashboard_alert_group_name = string
folder_uid = string
alert_definitions_path = string
dashboard_path_if_exist = optional(string, null)
keep_manual_changes = optional(bool, false)
prevent_destroy_on_recreate = optional(bool, false)
alerts_on_datasources_uid = list(string)
}))
}
variable "org_id" {
description = "ID of the Grafana organization"
type = string
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,53 @@
locals {
# Dashboards with both manual changes allowed and destroy protection
dashboards_ignore_and_protect = flatten([
for group in var.groups : [
for file in(group.dashboard_path_if_exist != null ? fileset(group.dashboard_path_if_exist, "*.json") : []) : {
group_name = group.dashboard_alert_group_name
file_path = "${group.dashboard_path_if_exist}/${file}"
folder_id = lookup(var.folder_ids, group.dashboard_alert_group_name, null)
keep_manual_changes = lookup(group, "keep_manual_changes", false)
prevent_destroy_on_recreate = lookup(group, "prevent_destroy_on_recreate", false)
}
] if lookup(group, "keep_manual_changes", false) && lookup(group, "prevent_destroy_on_recreate", false)
])
# Dashboards with only manual changes allowed
dashboards_ignore_only = flatten([
for group in var.groups : [
for file in(group.dashboard_path_if_exist != null ? fileset(group.dashboard_path_if_exist, "*.json") : []) : {
group_name = group.dashboard_alert_group_name
file_path = "${group.dashboard_path_if_exist}/${file}"
folder_id = lookup(var.folder_ids, group.dashboard_alert_group_name, null)
keep_manual_changes = lookup(group, "keep_manual_changes", false)
prevent_destroy_on_recreate = lookup(group, "prevent_destroy_on_recreate", false)
}
] if lookup(group, "keep_manual_changes", false) && !lookup(group, "prevent_destroy_on_recreate", false)
])
# Dashboards with only destroy protection
dashboards_protect_only = flatten([
for group in var.groups : [
for file in(group.dashboard_path_if_exist != null ? fileset(group.dashboard_path_if_exist, "*.json") : []) : {
group_name = group.dashboard_alert_group_name
file_path = "${group.dashboard_path_if_exist}/${file}"
folder_id = lookup(var.folder_ids, group.dashboard_alert_group_name, null)
keep_manual_changes = lookup(group, "keep_manual_changes", false)
prevent_destroy_on_recreate = lookup(group, "prevent_destroy_on_recreate", false)
}
] if !lookup(group, "keep_manual_changes", false) && lookup(group, "prevent_destroy_on_recreate", false)
])
# Standard dashboards without any special lifecycle management
dashboards_standard = flatten([
for group in var.groups : [
for file in(group.dashboard_path_if_exist != null ? fileset(group.dashboard_path_if_exist, "*.json") : []) : {
group_name = group.dashboard_alert_group_name
file_path = "${group.dashboard_path_if_exist}/${file}"
folder_id = lookup(var.folder_ids, group.dashboard_alert_group_name, null)
keep_manual_changes = lookup(group, "keep_manual_changes", false)
prevent_destroy_on_recreate = lookup(group, "prevent_destroy_on_recreate", false)
}
] if !lookup(group, "keep_manual_changes", false) && !lookup(group, "prevent_destroy_on_recreate", false)
])
}

View File

@ -0,0 +1,52 @@
# Dashboards with both manual changes allowed and destroy protection
resource "grafana_dashboard" "dashboards_ignore_and_protect" {
for_each = { for d in local.dashboards_ignore_and_protect : d.file_path => d }
config_json = file(each.value.file_path)
folder = each.value.folder_id
org_id = var.org_id
overwrite = true
lifecycle {
prevent_destroy = true
ignore_changes = [config_json]
}
}
# Dashboards with only manual changes allowed
resource "grafana_dashboard" "dashboards_ignore_only" {
for_each = { for d in local.dashboards_ignore_only : d.file_path => d }
config_json = file(each.value.file_path)
folder = each.value.folder_id
org_id = var.org_id
overwrite = true
lifecycle {
ignore_changes = [config_json]
}
}
# Dashboards with only destroy protection
resource "grafana_dashboard" "dashboards_protect_only" {
for_each = { for d in local.dashboards_protect_only : d.file_path => d }
config_json = file(each.value.file_path)
folder = each.value.folder_id
org_id = var.org_id
overwrite = true
lifecycle {
prevent_destroy = true
}
}
# Standard dashboards without any special lifecycle management
resource "grafana_dashboard" "dashboards_standard" {
for_each = { for d in local.dashboards_standard : d.file_path => d }
config_json = file(each.value.file_path)
folder = each.value.folder_id
org_id = var.org_id
overwrite = true
}

View File

@ -0,0 +1,9 @@
output "dashboard_ids" {
description = "IDs of the created Grafana dashboards"
value = merge(
{ for name, dashboard in grafana_dashboard.dashboards_ignore_and_protect : name => dashboard.id },
{ for name, dashboard in grafana_dashboard.dashboards_ignore_only : name => dashboard.id },
{ for name, dashboard in grafana_dashboard.dashboards_protect_only : name => dashboard.id },
{ for name, dashboard in grafana_dashboard.dashboards_standard : name => dashboard.id }
)
}

View File

@ -0,0 +1,21 @@
variable "org_id" {
description = "ID of the organization for dashboards"
type = string
}
variable "groups" {
description = "List of alert groups with their definitions and data sources"
type = list(object({
dashboard_alert_group_name = string
alert_definitions_path = string
dashboard_path_if_exist = optional(string, null)
keep_manual_changes = optional(bool, false)
prevent_destroy_on_recreate = optional(bool, false)
alerts_on_datasources_uid = list(string)
}))
}
variable "folder_ids" {
description = "Mapping of folder IDs for each alert group"
type = map(string)
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,25 @@
locals {
# Data sources with both manual changes allowed and destroy protection
datasources_ignore_and_protect = {
for ds in var.datasources : ds.name => ds
if lookup(ds, "keep_manual_changes", false) && lookup(ds, "prevent_destroy_on_recreate", false)
}
# Data sources with only manual changes allowed
datasources_ignore_only = {
for ds in var.datasources : ds.name => ds
if lookup(ds, "keep_manual_changes", false) && !lookup(ds, "prevent_destroy_on_recreate", false)
}
# Data sources with only destroy protection enabled
datasources_protect_only = {
for ds in var.datasources : ds.name => ds
if !lookup(ds, "keep_manual_changes", false) && lookup(ds, "prevent_destroy_on_recreate", false)
}
# Standard data sources without any special lifecycle management
datasources_standard = {
for ds in var.datasources : ds.name => ds
if !lookup(ds, "keep_manual_changes", false) && !lookup(ds, "prevent_destroy_on_recreate", false)
}
}

View File

@ -0,0 +1,173 @@
resource "grafana_data_source" "datasources_ignore_and_protect" {
for_each = local.datasources_ignore_and_protect
name = each.value.name
access_mode = each.value.access_mode
type = each.value.type
uid = each.value.uid
url = lookup(each.value, "url", null)
username = lookup(each.value, "username", null)
is_default = each.value.is_default
org_id = var.org_id
http_headers = each.value.http_headers
json_data_encoded = jsonencode(
merge(
each.value.json_data,
{
basicAuth = lookup(each.value, "basic_auth", false),
basicAuthUser = lookup(each.value, "basic_auth_user", null),
protocol = lookup(each.value.json_data, "protocol", "http"),
database = lookup(each.value.json_data, "database", null),
maxOpenConns = tonumber(lookup(each.value.json_data, "maxOpenConns", null)),
maxIdleConns = tonumber(lookup(each.value.json_data, "maxIdleConns", null)),
maxIdleConnsAuto = lookup(each.value.json_data, "maxIdleConnsAuto", null),
connMaxLifetime = tonumber(lookup(each.value.json_data, "connMaxLifetime", null))
}
)
)
secure_json_data_encoded = jsonencode(
merge(
each.value.secure_json_data,
{
basicAuthPassword = lookup(each.value.secure_json_data, "basicAuthPassword", null),
password = lookup(each.value.secure_json_data, "password", null)
}
)
)
lifecycle {
ignore_changes = [name, url, access_mode, is_default, json_data_encoded, secure_json_data_encoded]
prevent_destroy = true
}
}
resource "grafana_data_source" "datasources_ignore_only" {
for_each = local.datasources_ignore_only
name = each.value.name
access_mode = each.value.access_mode
type = each.value.type
uid = each.value.uid
url = lookup(each.value, "url", null)
username = lookup(each.value, "username", null)
is_default = each.value.is_default
org_id = var.org_id
http_headers = each.value.http_headers
json_data_encoded = jsonencode(
merge(
each.value.json_data,
{
basicAuth = lookup(each.value, "basic_auth", false),
basicAuthUser = lookup(each.value, "basic_auth_user", null),
protocol = lookup(each.value.json_data, "protocol", "http"),
database = lookup(each.value.json_data, "database", null),
maxOpenConns = tonumber(lookup(each.value.json_data, "maxOpenConns", null)),
maxIdleConns = tonumber(lookup(each.value.json_data, "maxIdleConns", null)),
maxIdleConnsAuto = lookup(each.value.json_data, "maxIdleConnsAuto", null),
connMaxLifetime = tonumber(lookup(each.value.json_data, "connMaxLifetime", null))
}
)
)
secure_json_data_encoded = jsonencode(
merge(
each.value.secure_json_data,
{
basicAuthPassword = lookup(each.value.secure_json_data, "basicAuthPassword", null),
password = lookup(each.value.secure_json_data, "password", null)
}
)
)
lifecycle {
ignore_changes = [name, url, access_mode, is_default, json_data_encoded, secure_json_data_encoded]
}
}
resource "grafana_data_source" "datasources_protect_only" {
for_each = local.datasources_protect_only
name = each.value.name
access_mode = each.value.access_mode
type = each.value.type
uid = each.value.uid
url = lookup(each.value, "url", null)
username = lookup(each.value, "username", null)
is_default = each.value.is_default
org_id = var.org_id
http_headers = each.value.http_headers
json_data_encoded = jsonencode(
merge(
each.value.json_data,
{
basicAuth = lookup(each.value, "basic_auth", false),
basicAuthUser = lookup(each.value, "basic_auth_user", null),
protocol = lookup(each.value.json_data, "protocol", "http"),
database = lookup(each.value.json_data, "database", null),
maxOpenConns = tonumber(lookup(each.value.json_data, "maxOpenConns", null)),
maxIdleConns = tonumber(lookup(each.value.json_data, "maxIdleConns", null)),
maxIdleConnsAuto = lookup(each.value.json_data, "maxIdleConnsAuto", null),
connMaxLifetime = tonumber(lookup(each.value.json_data, "connMaxLifetime", null))
}
)
)
secure_json_data_encoded = jsonencode(
merge(
each.value.secure_json_data,
{
basicAuthPassword = lookup(each.value.secure_json_data, "basicAuthPassword", null),
password = lookup(each.value.secure_json_data, "password", null)
}
)
)
lifecycle {
prevent_destroy = true
}
}
resource "grafana_data_source" "datasources_standard" {
for_each = local.datasources_standard
name = each.value.name
access_mode = each.value.access_mode
type = each.value.type
uid = each.value.uid
url = lookup(each.value, "url", null)
username = lookup(each.value, "username", null)
is_default = each.value.is_default
org_id = var.org_id
http_headers = each.value.http_headers
json_data_encoded = jsonencode(
merge(
each.value.json_data,
{
basicAuth = lookup(each.value, "basic_auth", false),
basicAuthUser = lookup(each.value, "basic_auth_user", null),
protocol = lookup(each.value.json_data, "protocol", "http"),
database = lookup(each.value.json_data, "database", null),
maxOpenConns = tonumber(lookup(each.value.json_data, "maxOpenConns", null)),
maxIdleConns = tonumber(lookup(each.value.json_data, "maxIdleConns", null)),
maxIdleConnsAuto = lookup(each.value.json_data, "maxIdleConnsAuto", null),
connMaxLifetime = tonumber(lookup(each.value.json_data, "connMaxLifetime", null))
}
)
)
secure_json_data_encoded = jsonencode(
merge(
each.value.secure_json_data,
{
basicAuthPassword = lookup(each.value.secure_json_data, "basicAuthPassword", null),
password = lookup(each.value.secure_json_data, "password", null)
}
)
)
}

View File

@ -0,0 +1,17 @@
output "datasource_mapping" {
description = "Mapping of data source names to their UIDs across all datasource categories"
value = merge(
{
for ds in grafana_data_source.datasources_ignore_and_protect : ds.name => ds.uid if ds.id != null
},
{
for ds in grafana_data_source.datasources_ignore_only : ds.name => ds.uid if ds.id != null
},
{
for ds in grafana_data_source.datasources_protect_only : ds.name => ds.uid if ds.id != null
},
{
for ds in grafana_data_source.datasources_standard : ds.name => ds.uid if ds.id != null
}
)
}

View File

@ -0,0 +1,32 @@
variable "org_id" {
type = string
description = "Organization ID where the resources should be created"
}
variable "datasources" {
description = "List of Grafana data sources"
type = list(object({
# Main parameters
name = string # Data source name (displayed in Grafana)
uid = string # Unique source identifier
type = string # Data source type (e.g., prometheus, mysql, clickhouse)
url = optional(string, null) # Connection URL (for most sources)
username = optional(string, null)
access_mode = string # Access mode: proxy or direct
is_default = bool # Set as default source
# Authentication settings
basic_auth = optional(bool, false) # Use basic authentication
basic_auth_user = optional(string, null) # Username for basic authentication
basic_auth_password = optional(string, null) # Password for basic authentication
# Additional parameters
json_data = optional(map(any), {}) # Additional parameters in JSON format
secure_json_data = optional(map(string), {}) # Sensitive data in JSON format
http_headers = optional(map(string), {})
# Terraform lifecycle management fields
keep_manual_changes = optional(bool, false) # Ignore manual changes in Grafana
prevent_destroy_on_recreate = optional(bool, false) # Prevent resource deletion on update
}))
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,7 @@
locals {
# Define the default contact point name from the contact_points list
default_contact_point_name = try(
element([for cp in var.contact_points : cp.name if try(cp.is_default, false)], 0),
null
)
}

View File

@ -0,0 +1,62 @@
resource "grafana_notification_policy" "default_policy" {
count = local.default_contact_point_name != null ? 1 : 0
org_id = var.org_id
disable_provenance = var.disable_provenance
group_by = var.group_by
contact_point = local.default_contact_point_name
group_wait = var.group_wait
group_interval = var.group_interval
repeat_interval = var.repeat_interval
dynamic "policy" {
for_each = var.notification_policies
content {
contact_point = policy.value.contact_point
continue = policy.value.continue
group_by = policy.value.group_by
group_wait = policy.value.group_wait
group_interval = policy.value.group_interval
repeat_interval = policy.value.repeat_interval
dynamic "matcher" {
for_each = policy.value.matchers
content {
label = matcher.value.label
match = matcher.value.match
value = matcher.value.value
}
}
dynamic "policy" {
for_each = try(policy.value.policies, [])
content {
contact_point = policy.value.contact_point
continue = policy.value.continue
group_by = policy.value.group_by
group_wait = policy.value.group_wait
group_interval = policy.value.group_interval
repeat_interval = policy.value.repeat_interval
dynamic "matcher" {
for_each = policy.value.matchers
content {
label = matcher.value.label
match = matcher.value.match
value = matcher.value.value
}
}
}
}
}
}
# lifecycle {
# prevent_destroy = false
# ignore_changes = all
# }
}

View File

@ -0,0 +1,76 @@
variable "contact_points" {
description = "List of contact points"
type = list(object({
name = string
type = string
is_default = optional(bool, false)
labels = optional(map(string))
settings = map(string)
}))
}
variable "org_id" {
description = "Grafana organization ID"
type = string
}
variable "disable_provenance" {
description = "Controls whether Grafana provisioning is disabled"
type = bool
default = true
}
variable "group_by" {
description = "A list of alert labels to group alerts into notifications"
type = list(string)
default = ["alertname"]
}
variable "group_wait" {
description = "Time to wait to buffer alerts of the same group before sending a notification"
type = string
default = "30s"
}
variable "group_interval" {
description = "Minimum time interval between two notifications for the same group"
type = string
default = "5m"
}
variable "repeat_interval" {
description = "Minimum time interval for re-sending a notification if an alert is still firing"
type = string
default = "4h"
}
variable "notification_policies" {
description = "Routing rules for specific label sets"
type = list(object({
contact_point = string
continue = optional(bool)
group_by = optional(list(string))
group_wait = optional(string)
group_interval = optional(string)
repeat_interval = optional(string)
matchers = list(object({
label = string
match = string # Allowed operators are = for equality, != for negated equality, =~ for regex equality, and !~ for negated regex equality
value = string
}))
policies = optional(list(object({
contact_point = string
continue = optional(bool)
group_by = optional(list(string))
group_wait = optional(string)
group_interval = optional(string)
repeat_interval = optional(string)
matchers = list(object({
label = string
match = string
value = string
}))
})), [])
}))
default = []
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,65 @@
locals {
# Duration parsing map
duration_units = {
"s" = 1
"m" = 60
"h" = 3600
"d" = 86400
}
# Mapping for datasources
datasource_mapping = { for ds in var.datasources : ds.uid => ds.name }
datasource_mapping_type = { for ds in var.datasources : ds.uid => lookup(ds, "type", "prometheus") }
# Folder time range mapping with 1-minute default
folder_time_ranges = {
for uid in distinct([for group in var.groups : lookup(var.folder_uids, group.dashboard_alert_group_name, null) if group.dashboard_alert_group_name != null]) :
uid => lookup(var.folder_time_ranges, uid, 60) # Default to 60 seconds (1 minute) if not specified
}
# Combine all alerts and their respective configurations
combined_alerts = flatten([
for group in var.groups : [
for datasource_uid in group.alerts_on_datasources_uid :
{
alert_group_name = group.dashboard_alert_group_name
folder_uid = lookup(var.folder_uids, group.dashboard_alert_group_name, null)
datasource_name = lookup(local.datasource_mapping, datasource_uid, "unknown")
datasource_uid = datasource_uid
datasource_type = lookup(local.datasource_mapping_type, datasource_uid, "prometheus")
alert_files = [
for file_path in fileset(group.alert_definitions_path, "**/*.yaml") :
{
# Store full YAML content
content = yamldecode(file("${group.alert_definitions_path}/${file_path}"))
# Extract commonly used fields
name = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).name, null)
alert_type = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).datasource_type, "prometheus")
editor_type = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).editor_type, null)
mode = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).mode, "single")
# File metadata
alert_file_path = "${group.alert_definitions_path}/${file_path}"
alert_category = split("/", file_path)[0]
}
if can(group.alert_definitions_path) &&
group.alert_definitions_path != null &&
(try(trimspace(group.alert_definitions_path), "") != "")
]
}
]
])
# Group alerts by datasource
grouped_alerts_by_datasource = {
for alert in local.combined_alerts :
"${alert.datasource_name} (${alert.alert_group_name})" => merge(alert, {
alert_files = flatten([
for a in local.combined_alerts :
a.alert_files if a.datasource_name == alert.datasource_name && a.alert_group_name == alert.alert_group_name
])
})
}
}

View File

@ -0,0 +1,224 @@
resource "grafana_rule_group" "alert_groups" {
for_each = {
for k, v in local.grouped_alerts_by_datasource :
k => v if length(v.alert_files) > 0
}
# Main parameters
name = each.key
org_id = var.org_id
interval_seconds = var.default_evaluation_interval # Group-wide evaluation interval
folder_uid = each.value.folder_uid
disable_provenance = var.disable_provenance
# Rules configuration
dynamic "rule" {
for_each = each.value.alert_files
content {
name = "${rule.value.name} (${each.value.datasource_uid})"
# Business alerts: use math node D as condition
# Simple alerts: use threshold node T
condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T"
for = try(
coalesce(
# Try to parse duration string (e.g., "15m", "24h")
can(regex("^[0-9]+(s|m|h|d)$", rule.value.content.for)) ? format(
"%ds",
tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) *
lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1)
) : null,
# Fallback to frequency or default duration
format("%ds", try(rule.value.content.frequency, var.default_alert_duration))
),
format("%ds", var.default_alert_duration)
)
# Data configuration
# Single data block for the alert expression
# Both SQL and Prometheus alerts use a single expression
dynamic "data" {
for_each = can(rule.value.content.queries) ? [
for ref_id, query in rule.value.content.queries : {
ref_id = ref_id
query = query
}
] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }]
content {
ref_id = data.value.ref_id
datasource_uid = each.value.datasource_uid
query_type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
model = jsonencode(
merge(
{
refId = data.value.ref_id
intervalMs = var.default_interval_ms
maxDataPoints = var.default_max_data_points
instant = false
datasource = {
type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
uid = each.value.datasource_uid
}
},
# The query type is determined by the datasource type
# The expression field contains the actual query for both SQL and Prometheus
# Query configuration based on datasource type
# Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones
# SQL query configuration (mysql, clickhouse)
contains(["grafana-clickhouse-datasource"],
lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? {
# Use time_series format for queries with time grouping, table format for direct aggregation
format = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null
formatAs = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table"
queryType = "sql"
rawSql = data.value.query
editorMode = "code"
editorType = "sql"
} : {
# Prometheus-compatible datasources (prometheus, victoriametrics)
expr = try(rule.value.content.expression, "")
format = "time_series"
queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
}
)
)
relative_time_range {
from = try(
rule.value.content.relative_time_range.from, # First try alert's own config
lookup( # Then try folder settings
local.folder_time_ranges,
each.value.folder_uid,
var.default_time_range_from # Finally fallback to global default
)
)
to = 0
}
}
}
# Unified reduction processing
# Handle both function-based and simple reductions
dynamic "data" {
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
for func in rule.value.content.functions : {
ref_id = try(func.reduce.ref_id, "B")
expression = try(func.reduce.input, "A")
reducer = try(func.reduce.function, "last")
mode = try(func.reduce.mode, "strict")
} if try(func.reduce, null) != null
] : try(rule.value.content.need_reduce, false) ? [{
ref_id = "B"
expression = "A"
# Map 'avg' reducer to 'mean' which is supported by Grafana
# Other reducers (last, max, min, sum) are already supported
reducer = try(
rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type,
"last"
)
mode = "strict"
}] : []
content {
# Use exact ref_id and values from the for_each structure
ref_id = data.value.ref_id
datasource_uid = "__expr__"
model = jsonencode({
refId = data.value.ref_id
type = "reduce"
expression = data.value.expression
reducer = data.value.reducer
mode = data.value.mode
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
}
# Math expressions processing
# Node references in Grafana alerts:
# - Node A: Initial query (SQL or Prometheus)
# - Node B: Reduction operation (created when need_reduce is true)
# - Node M: Math expression (optional, for complex calculations)
# - Node C: Final threshold evaluation
dynamic "data" {
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
for idx, func in rule.value.content.functions : {
ref_id = try(func.math.ref_id, "M${idx}")
expression = func.math.expression
} if try(func.math, null) != null
] : try(rule.value.content.math_expression, null) != null ? [{
ref_id = "M"
expression = rule.value.content.math_expression
}] : []
content {
ref_id = data.value.ref_id
datasource_uid = "__expr__"
model = jsonencode({
refId = data.value.ref_id
type = "math"
expression = data.value.expression
input = try(rule.value.content.need_reduce ? "B" : "A", "A")
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
}
# Unified threshold evaluation
data {
ref_id = "T" # Use T consistently for threshold
datasource_uid = "__expr__"
model = jsonencode({
refId = "T"
type = "threshold"
# Simple alerts: evaluate B (with reduction) or A (without reduction)
expression = try(rule.value.content.need_reduce ? "B" : "A", "A")
conditions = [
{
evaluator = merge(
{
type = rule.value.content.condition_type
},
# Handle range conditions for site monitoring
contains(["outside_range", "within_range"], rule.value.content.condition_type) ? {
params = rule.value.content.threshold_range
} : {
# Handle single threshold for business/system alerts
params = [rule.value.content.threshold]
}
)
operator = { type = "and" }
# Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions)
reducer = { type = "mean", params = [] }
query = { params = [] }
type = "query"
}
]
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
# Rule metadata
annotations = {
summary = rule.value.content.summary
description = try(rule.value.content.description, "")
threshold = try(
contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ?
tostring(try(rule.value.content.threshold_range[0], "")) :
tostring(try(rule.value.content.threshold, ""))
)
}
labels = rule.value.content.labels
no_data_state = try(rule.value.content.no_data_state, var.default_no_data_state)
exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state)
}
}
}

View File

@ -0,0 +1,122 @@
variable "groups" {
description = "List of alert groups with their definitions and data sources"
type = list(object({
dashboard_alert_group_name = string
alert_definitions_path = optional(string, null)
dashboard_path_if_exist = optional(string, null)
keep_manual_changes = optional(bool, false)
prevent_destroy_on_recreate = optional(bool, false)
alerts_on_datasources_uid = list(string)
}))
}
variable "datasources" {
description = "List of Grafana data sources"
type = list(object({
# Main parameters
name = string # Data source name (displayed in Grafana)
uid = string # Unique source identifier
type = string # Data source type (e.g., prometheus, mysql, clickhouse)
url = optional(string, null) # Connection URL (for most sources)
username = optional(string, null)
access_mode = string # Access mode: proxy or direct
is_default = bool # Set as default source
# Authentication settings
basic_auth = optional(bool, false) # Use basic authentication
basic_auth_user = optional(string, null) # Username for basic authentication
basic_auth_password = optional(string, null) # Password for basic authentication
# Additional parameters
json_data = optional(map(any), {}) # Additional parameters in JSON format
secure_json_data = optional(map(string), {}) # Sensitive data in JSON format
# Terraform lifecycle management fields
keep_manual_changes = optional(bool, false) # Ignore manual changes in Grafana
prevent_destroy_on_recreate = optional(bool, false) # Prevent resource deletion on update
}))
}
variable "org_id" {
description = "ID of the Grafana organization"
type = string
}
variable "folder_uids" {
description = "Mapping of alert group names to their folder UIDs"
type = map(string)
}
variable "folder_time_ranges" {
description = <<-EOT
Mapping of folder UIDs to their default time ranges in seconds.
If not specified for a folder, alerts will use the folder's default of 60 seconds (1 minute).
Example:
{
"folder1_uid" = 300 # 5 minutes
"folder2_uid" = 3600 # 1 hour
}
EOT
type = map(number)
default = {}
}
# Alert duration and timing configuration
variable "interval_seconds" {
description = "Interval in seconds for evaluating alerts"
type = number
default = 60
}
variable "default_interval_ms" {
description = "Default interval in milliseconds for evaluating alert expressions"
type = number
default = 60000
}
variable "default_max_data_points" {
description = "Default maximum number of data points"
type = number
default = 43200
}
variable "default_no_data_state" {
description = "Default no data state for alerts"
type = string
default = "OK"
}
variable "default_exec_err_state" {
description = "Default execution error state for alerts"
type = string
default = "Error"
}
variable "default_alert_duration" {
description = "Default duration (in seconds) for how long a condition must be true before alerting"
type = number
default = 300 # 5 minutes
}
variable "default_evaluation_interval" {
description = "Default interval (in seconds) between alert rule evaluations"
type = number
default = 60 # 1 minute
}
variable "default_time_range_from" {
description = "Default time range (in seconds) for main query lookback"
type = number
default = 604800 # 7 days
}
variable "default_processing_range" {
description = "Default time range (in seconds) for processing blocks"
type = number
default = 600 # 10 minutes
}
variable "disable_provenance" {
description = "Controls whether Grafana provisioning is disabled"
type = bool
default = true
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}

View File

@ -0,0 +1,65 @@
locals {
# Duration parsing map
duration_units = {
"s" = 1
"m" = 60
"h" = 3600
"d" = 86400
}
# Mapping for datasources
datasource_mapping = { for ds in var.datasources : ds.uid => ds.name }
datasource_mapping_type = { for ds in var.datasources : ds.uid => lookup(ds, "type", "prometheus") }
# Folder time range mapping with 1-minute default
folder_time_ranges = {
for uid in distinct([for group in var.groups : lookup(var.folder_uids, group.dashboard_alert_group_name, null) if group.dashboard_alert_group_name != null]) :
uid => lookup(var.folder_time_ranges, uid, 60) # Default to 60 seconds (1 minute) if not specified
}
# Combine all alerts and their respective configurations
combined_alerts = flatten([
for group in var.groups : [
for datasource_uid in group.alerts_on_datasources_uid :
{
alert_group_name = group.dashboard_alert_group_name
folder_uid = lookup(var.folder_uids, group.dashboard_alert_group_name, null)
datasource_name = lookup(local.datasource_mapping, datasource_uid, "unknown")
datasource_uid = datasource_uid
datasource_type = lookup(local.datasource_mapping_type, datasource_uid, "prometheus")
alert_files = [
for file_path in fileset(group.alert_definitions_path, "**/*.yaml") :
{
# Store full YAML content after variable substitution
content = yamldecode(templatefile("${group.alert_definitions_path}/${file_path}", var.alert_variables))
# Extract commonly used fields
name = try(yamldecode(templatefile("${group.alert_definitions_path}/${file_path}", var.alert_variables)).name, null)
alert_type = try(yamldecode(templatefile("${group.alert_definitions_path}/${file_path}", var.alert_variables)).datasource_type, "prometheus")
editor_type = try(yamldecode(templatefile("${group.alert_definitions_path}/${file_path}", var.alert_variables)).editor_type, null)
mode = try(yamldecode(templatefile("${group.alert_definitions_path}/${file_path}", var.alert_variables)).mode, "single")
# File metadata
alert_file_path = "${group.alert_definitions_path}/${file_path}"
alert_category = split("/", file_path)[0]
}
if can(group.alert_definitions_path) &&
group.alert_definitions_path != null &&
(try(trimspace(group.alert_definitions_path), "") != "")
]
}
]
])
# Group alerts by datasource
grouped_alerts_by_datasource = {
for alert in local.combined_alerts :
"${alert.datasource_name} (${alert.alert_group_name})" => merge(alert, {
alert_files = flatten([
for a in local.combined_alerts :
a.alert_files if a.datasource_name == alert.datasource_name && a.alert_group_name == alert.alert_group_name
])
})
}
}

View File

@ -0,0 +1,224 @@
resource "grafana_rule_group" "alert_groups" {
for_each = {
for k, v in local.grouped_alerts_by_datasource :
k => v if length(v.alert_files) > 0
}
# Main parameters
name = each.key
org_id = var.org_id
interval_seconds = var.default_evaluation_interval # Group-wide evaluation interval
folder_uid = each.value.folder_uid
disable_provenance = var.disable_provenance
# Rules configuration
dynamic "rule" {
for_each = each.value.alert_files
content {
name = "${rule.value.name} (${each.value.datasource_uid})"
# Business alerts: use math node D as condition
# Simple alerts: use threshold node T
condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T"
for = try(
coalesce(
# Try to parse duration string (e.g., "15m", "24h")
can(regex("^[0-9]+(s|m|h|d)$", rule.value.content.for)) ? format(
"%ds",
tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) *
lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1)
) : null,
# Fallback to frequency or default duration
format("%ds", try(rule.value.content.frequency, var.default_alert_duration))
),
format("%ds", var.default_alert_duration)
)
# Data configuration
# Single data block for the alert expression
# Both SQL and Prometheus alerts use a single expression
dynamic "data" {
for_each = can(rule.value.content.queries) ? [
for ref_id, query in rule.value.content.queries : {
ref_id = ref_id
query = query
}
] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }]
content {
ref_id = data.value.ref_id
datasource_uid = each.value.datasource_uid
query_type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
model = jsonencode(
merge(
{
refId = data.value.ref_id
intervalMs = var.default_interval_ms
maxDataPoints = var.default_max_data_points
instant = false
datasource = {
type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
uid = each.value.datasource_uid
}
},
# The query type is determined by the datasource type
# The expression field contains the actual query for both SQL and Prometheus
# Query configuration based on datasource type
# Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones
# SQL query configuration (mysql, clickhouse)
contains(["grafana-clickhouse-datasource"],
lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? {
# Use time_series format for queries with time grouping, table format for direct aggregation
format = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null
formatAs = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table"
queryType = "sql"
rawSql = data.value.query
editorMode = "code"
editorType = "sql"
} : {
# Prometheus-compatible datasources (prometheus, victoriametrics)
expr = try(rule.value.content.expression, "")
format = "time_series"
queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
}
)
)
relative_time_range {
from = try(
rule.value.content.relative_time_range.from, # First try alert's own config
lookup( # Then try folder settings
local.folder_time_ranges,
each.value.folder_uid,
var.default_time_range_from # Finally fallback to global default
)
)
to = 0
}
}
}
# Unified reduction processing
# Handle both function-based and simple reductions
dynamic "data" {
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
for func in rule.value.content.functions : {
ref_id = try(func.reduce.ref_id, "B")
expression = try(func.reduce.input, "A")
reducer = try(func.reduce.function, "last")
mode = try(func.reduce.mode, "strict")
} if try(func.reduce, null) != null
] : try(rule.value.content.need_reduce, false) ? [{
ref_id = "B"
expression = "A"
# Map 'avg' reducer to 'mean' which is supported by Grafana
# Other reducers (last, max, min, sum) are already supported
reducer = try(
rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type,
"last"
)
mode = "strict"
}] : []
content {
# Use exact ref_id and values from the for_each structure
ref_id = data.value.ref_id
datasource_uid = "__expr__"
model = jsonencode({
refId = data.value.ref_id
type = "reduce"
expression = data.value.expression
reducer = data.value.reducer
mode = data.value.mode
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
}
# Math expressions processing
# Node references in Grafana alerts:
# - Node A: Initial query (SQL or Prometheus)
# - Node B: Reduction operation (created when need_reduce is true)
# - Node M: Math expression (optional, for complex calculations)
# - Node C: Final threshold evaluation
dynamic "data" {
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
for idx, func in rule.value.content.functions : {
ref_id = try(func.math.ref_id, "M${idx}")
expression = func.math.expression
} if try(func.math, null) != null
] : try(rule.value.content.math_expression, null) != null ? [{
ref_id = "M"
expression = rule.value.content.math_expression
}] : []
content {
ref_id = data.value.ref_id
datasource_uid = "__expr__"
model = jsonencode({
refId = data.value.ref_id
type = "math"
expression = data.value.expression
input = try(rule.value.content.need_reduce ? "B" : "A", "A")
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
}
# Unified threshold evaluation
data {
ref_id = "T" # Use T consistently for threshold
datasource_uid = "__expr__"
model = jsonencode({
refId = "T"
type = "threshold"
# Simple alerts: evaluate B (with reduction) or A (without reduction)
expression = try(rule.value.content.need_reduce ? "B" : "A", "A")
conditions = [
{
evaluator = merge(
{
type = rule.value.content.condition_type
},
# Handle range conditions for site monitoring
contains(["outside_range", "within_range"], rule.value.content.condition_type) ? {
params = rule.value.content.threshold_range
} : {
# Handle single threshold for business/system alerts
params = [rule.value.content.threshold]
}
)
operator = { type = "and" }
# Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions)
reducer = { type = "mean", params = [] }
query = { params = [] }
type = "query"
}
]
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
# Rule metadata
annotations = {
summary = rule.value.content.summary
description = try(rule.value.content.description, "")
threshold = try(
contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ?
tostring(try(rule.value.content.threshold_range[0], "")) :
tostring(try(rule.value.content.threshold, ""))
)
}
labels = rule.value.content.labels
no_data_state = try(rule.value.content.no_data_state, var.default_no_data_state)
exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state)
}
}
}

View File

@ -0,0 +1,128 @@
variable "groups" {
description = "List of alert groups with their definitions and data sources"
type = list(object({
dashboard_alert_group_name = string
alert_definitions_path = optional(string, null)
dashboard_path_if_exist = optional(string, null)
keep_manual_changes = optional(bool, false)
prevent_destroy_on_recreate = optional(bool, false)
alerts_on_datasources_uid = list(string)
}))
}
variable "datasources" {
description = "List of Grafana data sources"
type = list(object({
# Main parameters
name = string # Data source name (displayed in Grafana)
uid = string # Unique source identifier
type = string # Data source type (e.g., prometheus, mysql, clickhouse)
url = optional(string, null) # Connection URL (for most sources)
username = optional(string, null)
access_mode = string # Access mode: proxy or direct
is_default = bool # Set as default source
# Authentication settings
basic_auth = optional(bool, false) # Use basic authentication
basic_auth_user = optional(string, null) # Username for basic authentication
basic_auth_password = optional(string, null) # Password for basic authentication
# Additional parameters
json_data = optional(map(any), {}) # Additional parameters in JSON format
secure_json_data = optional(map(string), {}) # Sensitive data in JSON format
# Terraform lifecycle management fields
keep_manual_changes = optional(bool, false) # Ignore manual changes in Grafana
prevent_destroy_on_recreate = optional(bool, false) # Prevent resource deletion on update
}))
}
variable "org_id" {
description = "ID of the Grafana organization"
type = string
}
variable "folder_uids" {
description = "Mapping of alert group names to their folder UIDs"
type = map(string)
}
variable "folder_time_ranges" {
description = <<-EOT
Mapping of folder UIDs to their default time ranges in seconds.
If not specified for a folder, alerts will use the folder's default of 60 seconds (1 minute).
Example:
{
"folder1_uid" = 300 # 5 minutes
"folder2_uid" = 3600 # 1 hour
}
EOT
type = map(number)
default = {}
}
# Alert duration and timing configuration
variable "interval_seconds" {
description = "Interval in seconds for evaluating alerts"
type = number
default = 60
}
variable "default_interval_ms" {
description = "Default interval in milliseconds for evaluating alert expressions"
type = number
default = 60000
}
variable "default_max_data_points" {
description = "Default maximum number of data points"
type = number
default = 43200
}
variable "default_no_data_state" {
description = "Default no data state for alerts"
type = string
default = "OK"
}
variable "default_exec_err_state" {
description = "Default execution error state for alerts"
type = string
default = "Error"
}
variable "default_alert_duration" {
description = "Default duration (in seconds) for how long a condition must be true before alerting"
type = number
default = 300 # 5 minutes
}
variable "default_evaluation_interval" {
description = "Default interval (in seconds) between alert rule evaluations"
type = number
default = 60 # 1 minute
}
variable "default_time_range_from" {
description = "Default time range (in seconds) for main query lookback"
type = number
default = 604800 # 7 days
}
variable "default_processing_range" {
description = "Default time range (in seconds) for processing blocks"
type = number
default = 600 # 10 minutes
}
variable "disable_provenance" {
description = "Controls whether Grafana provisioning is disabled"
type = bool
default = true
}
variable "alert_variables" {
description = "Map of variables to substitute in alert YAML files using templatefile() syntax (e.g., $${variable_name})"
type = map(string)
default = {}
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}