Add dashboard UID auto-generation and Gitea CI workflow
Some checks failed
terraform-dev / validate (push) Failing after 1m53s
terraform-dev / plan (push) Has been skipped
terraform-dev / apply (push) Has been skipped

This commit is contained in:
Alexandr
2026-03-25 06:41:19 +03:00
parent 345c5786b3
commit 558a23d916
83 changed files with 53372 additions and 1 deletions

View File

@ -0,0 +1,65 @@
locals {
# Duration parsing map
duration_units = {
"s" = 1
"m" = 60
"h" = 3600
"d" = 86400
}
# Mapping for datasources
datasource_mapping = { for ds in var.datasources : ds.uid => ds.name }
datasource_mapping_type = { for ds in var.datasources : ds.uid => lookup(ds, "type", "prometheus") }
# Folder time range mapping with 1-minute default
folder_time_ranges = {
for uid in distinct([for group in var.groups : lookup(var.folder_uids, group.dashboard_alert_group_name, null) if group.dashboard_alert_group_name != null]) :
uid => lookup(var.folder_time_ranges, uid, 60) # Default to 60 seconds (1 minute) if not specified
}
# Combine all alerts and their respective configurations
combined_alerts = flatten([
for group in var.groups : [
for datasource_uid in group.alerts_on_datasources_uid :
{
alert_group_name = group.dashboard_alert_group_name
folder_uid = lookup(var.folder_uids, group.dashboard_alert_group_name, null)
datasource_name = lookup(local.datasource_mapping, datasource_uid, "unknown")
datasource_uid = datasource_uid
datasource_type = lookup(local.datasource_mapping_type, datasource_uid, "prometheus")
alert_files = [
for file_path in fileset(group.alert_definitions_path, "**/*.yaml") :
{
# Store full YAML content
content = yamldecode(file("${group.alert_definitions_path}/${file_path}"))
# Extract commonly used fields
name = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).name, null)
alert_type = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).datasource_type, "prometheus")
editor_type = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).editor_type, null)
mode = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).mode, "single")
# File metadata
alert_file_path = "${group.alert_definitions_path}/${file_path}"
alert_category = split("/", file_path)[0]
}
if can(group.alert_definitions_path) &&
group.alert_definitions_path != null &&
(try(trimspace(group.alert_definitions_path), "") != "")
]
}
]
])
# Group alerts by datasource
grouped_alerts_by_datasource = {
for alert in local.combined_alerts :
"${alert.datasource_name} (${alert.alert_group_name})" => merge(alert, {
alert_files = flatten([
for a in local.combined_alerts :
a.alert_files if a.datasource_name == alert.datasource_name && a.alert_group_name == alert.alert_group_name
])
})
}
}

View File

@ -0,0 +1,224 @@
resource "grafana_rule_group" "alert_groups" {
for_each = {
for k, v in local.grouped_alerts_by_datasource :
k => v if length(v.alert_files) > 0
}
# Main parameters
name = each.key
org_id = var.org_id
interval_seconds = var.default_evaluation_interval # Group-wide evaluation interval
folder_uid = each.value.folder_uid
disable_provenance = var.disable_provenance
# Rules configuration
dynamic "rule" {
for_each = each.value.alert_files
content {
name = "${rule.value.name} (${each.value.datasource_uid})"
# Business alerts: use math node D as condition
# Simple alerts: use threshold node T
condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T"
for = try(
coalesce(
# Try to parse duration string (e.g., "15m", "24h")
can(regex("^[0-9]+(s|m|h|d)$", rule.value.content.for)) ? format(
"%ds",
tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) *
lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1)
) : null,
# Fallback to frequency or default duration
format("%ds", try(rule.value.content.frequency, var.default_alert_duration))
),
format("%ds", var.default_alert_duration)
)
# Data configuration
# Single data block for the alert expression
# Both SQL and Prometheus alerts use a single expression
dynamic "data" {
for_each = can(rule.value.content.queries) ? [
for ref_id, query in rule.value.content.queries : {
ref_id = ref_id
query = query
}
] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }]
content {
ref_id = data.value.ref_id
datasource_uid = each.value.datasource_uid
query_type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
model = jsonencode(
merge(
{
refId = data.value.ref_id
intervalMs = var.default_interval_ms
maxDataPoints = var.default_max_data_points
instant = false
datasource = {
type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
uid = each.value.datasource_uid
}
},
# The query type is determined by the datasource type
# The expression field contains the actual query for both SQL and Prometheus
# Query configuration based on datasource type
# Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones
# SQL query configuration (mysql, clickhouse)
contains(["grafana-clickhouse-datasource"],
lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? {
# Use time_series format for queries with time grouping, table format for direct aggregation
format = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null
formatAs = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table"
queryType = "sql"
rawSql = data.value.query
editorMode = "code"
editorType = "sql"
} : {
# Prometheus-compatible datasources (prometheus, victoriametrics)
expr = try(rule.value.content.expression, "")
format = "time_series"
queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
}
)
)
relative_time_range {
from = try(
rule.value.content.relative_time_range.from, # First try alert's own config
lookup( # Then try folder settings
local.folder_time_ranges,
each.value.folder_uid,
var.default_time_range_from # Finally fallback to global default
)
)
to = 0
}
}
}
# Unified reduction processing
# Handle both function-based and simple reductions
dynamic "data" {
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
for func in rule.value.content.functions : {
ref_id = try(func.reduce.ref_id, "B")
expression = try(func.reduce.input, "A")
reducer = try(func.reduce.function, "last")
mode = try(func.reduce.mode, "strict")
} if try(func.reduce, null) != null
] : try(rule.value.content.need_reduce, false) ? [{
ref_id = "B"
expression = "A"
# Map 'avg' reducer to 'mean' which is supported by Grafana
# Other reducers (last, max, min, sum) are already supported
reducer = try(
rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type,
"last"
)
mode = "strict"
}] : []
content {
# Use exact ref_id and values from the for_each structure
ref_id = data.value.ref_id
datasource_uid = "__expr__"
model = jsonencode({
refId = data.value.ref_id
type = "reduce"
expression = data.value.expression
reducer = data.value.reducer
mode = data.value.mode
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
}
# Math expressions processing
# Node references in Grafana alerts:
# - Node A: Initial query (SQL or Prometheus)
# - Node B: Reduction operation (created when need_reduce is true)
# - Node M: Math expression (optional, for complex calculations)
# - Node C: Final threshold evaluation
dynamic "data" {
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
for idx, func in rule.value.content.functions : {
ref_id = try(func.math.ref_id, "M${idx}")
expression = func.math.expression
} if try(func.math, null) != null
] : try(rule.value.content.math_expression, null) != null ? [{
ref_id = "M"
expression = rule.value.content.math_expression
}] : []
content {
ref_id = data.value.ref_id
datasource_uid = "__expr__"
model = jsonencode({
refId = data.value.ref_id
type = "math"
expression = data.value.expression
input = try(rule.value.content.need_reduce ? "B" : "A", "A")
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
}
# Unified threshold evaluation
data {
ref_id = "T" # Use T consistently for threshold
datasource_uid = "__expr__"
model = jsonencode({
refId = "T"
type = "threshold"
# Simple alerts: evaluate B (with reduction) or A (without reduction)
expression = try(rule.value.content.need_reduce ? "B" : "A", "A")
conditions = [
{
evaluator = merge(
{
type = rule.value.content.condition_type
},
# Handle range conditions for site monitoring
contains(["outside_range", "within_range"], rule.value.content.condition_type) ? {
params = rule.value.content.threshold_range
} : {
# Handle single threshold for business/system alerts
params = [rule.value.content.threshold]
}
)
operator = { type = "and" }
# Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions)
reducer = { type = "mean", params = [] }
query = { params = [] }
type = "query"
}
]
})
relative_time_range {
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
to = try(rule.value.content.relative_time_range.to, 0)
}
}
# Rule metadata
annotations = {
summary = rule.value.content.summary
description = try(rule.value.content.description, "")
threshold = try(
contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ?
tostring(try(rule.value.content.threshold_range[0], "")) :
tostring(try(rule.value.content.threshold, ""))
)
}
labels = rule.value.content.labels
no_data_state = try(rule.value.content.no_data_state, var.default_no_data_state)
exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state)
}
}
}

View File

@ -0,0 +1,122 @@
variable "groups" {
description = "List of alert groups with their definitions and data sources"
type = list(object({
dashboard_alert_group_name = string
alert_definitions_path = optional(string, null)
dashboard_path_if_exist = optional(string, null)
keep_manual_changes = optional(bool, false)
prevent_destroy_on_recreate = optional(bool, false)
alerts_on_datasources_uid = list(string)
}))
}
variable "datasources" {
description = "List of Grafana data sources"
type = list(object({
# Main parameters
name = string # Data source name (displayed in Grafana)
uid = string # Unique source identifier
type = string # Data source type (e.g., prometheus, mysql, clickhouse)
url = optional(string, null) # Connection URL (for most sources)
username = optional(string, null)
access_mode = string # Access mode: proxy or direct
is_default = bool # Set as default source
# Authentication settings
basic_auth = optional(bool, false) # Use basic authentication
basic_auth_user = optional(string, null) # Username for basic authentication
basic_auth_password = optional(string, null) # Password for basic authentication
# Additional parameters
json_data = optional(map(any), {}) # Additional parameters in JSON format
secure_json_data = optional(map(string), {}) # Sensitive data in JSON format
# Terraform lifecycle management fields
keep_manual_changes = optional(bool, false) # Ignore manual changes in Grafana
prevent_destroy_on_recreate = optional(bool, false) # Prevent resource deletion on update
}))
}
variable "org_id" {
description = "ID of the Grafana organization"
type = string
}
variable "folder_uids" {
description = "Mapping of alert group names to their folder UIDs"
type = map(string)
}
variable "folder_time_ranges" {
description = <<-EOT
Mapping of folder UIDs to their default time ranges in seconds.
If not specified for a folder, alerts will use the folder's default of 60 seconds (1 minute).
Example:
{
"folder1_uid" = 300 # 5 minutes
"folder2_uid" = 3600 # 1 hour
}
EOT
type = map(number)
default = {}
}
# Alert duration and timing configuration
variable "interval_seconds" {
description = "Interval in seconds for evaluating alerts"
type = number
default = 60
}
variable "default_interval_ms" {
description = "Default interval in milliseconds for evaluating alert expressions"
type = number
default = 60000
}
variable "default_max_data_points" {
description = "Default maximum number of data points"
type = number
default = 43200
}
variable "default_no_data_state" {
description = "Default no data state for alerts"
type = string
default = "OK"
}
variable "default_exec_err_state" {
description = "Default execution error state for alerts"
type = string
default = "Error"
}
variable "default_alert_duration" {
description = "Default duration (in seconds) for how long a condition must be true before alerting"
type = number
default = 300 # 5 minutes
}
variable "default_evaluation_interval" {
description = "Default interval (in seconds) between alert rule evaluations"
type = number
default = 60 # 1 minute
}
variable "default_time_range_from" {
description = "Default time range (in seconds) for main query lookback"
type = number
default = 604800 # 7 days
}
variable "default_processing_range" {
description = "Default time range (in seconds) for processing blocks"
type = number
default = 600 # 10 minutes
}
variable "disable_provenance" {
description = "Controls whether Grafana provisioning is disabled"
type = bool
default = true
}

View File

@ -0,0 +1,7 @@
terraform {
required_providers {
grafana = {
source = "grafana/grafana"
}
}
}