Add dashboard UID auto-generation and Gitea CI workflow
This commit is contained in:
65
environments/modules/grafana_rule_group/locals.tf
Normal file
65
environments/modules/grafana_rule_group/locals.tf
Normal file
@ -0,0 +1,65 @@
|
||||
locals {
|
||||
# Duration parsing map
|
||||
duration_units = {
|
||||
"s" = 1
|
||||
"m" = 60
|
||||
"h" = 3600
|
||||
"d" = 86400
|
||||
}
|
||||
|
||||
# Mapping for datasources
|
||||
datasource_mapping = { for ds in var.datasources : ds.uid => ds.name }
|
||||
datasource_mapping_type = { for ds in var.datasources : ds.uid => lookup(ds, "type", "prometheus") }
|
||||
|
||||
# Folder time range mapping with 1-minute default
|
||||
folder_time_ranges = {
|
||||
for uid in distinct([for group in var.groups : lookup(var.folder_uids, group.dashboard_alert_group_name, null) if group.dashboard_alert_group_name != null]) :
|
||||
uid => lookup(var.folder_time_ranges, uid, 60) # Default to 60 seconds (1 minute) if not specified
|
||||
}
|
||||
|
||||
# Combine all alerts and their respective configurations
|
||||
combined_alerts = flatten([
|
||||
for group in var.groups : [
|
||||
for datasource_uid in group.alerts_on_datasources_uid :
|
||||
{
|
||||
alert_group_name = group.dashboard_alert_group_name
|
||||
folder_uid = lookup(var.folder_uids, group.dashboard_alert_group_name, null)
|
||||
datasource_name = lookup(local.datasource_mapping, datasource_uid, "unknown")
|
||||
datasource_uid = datasource_uid
|
||||
datasource_type = lookup(local.datasource_mapping_type, datasource_uid, "prometheus")
|
||||
|
||||
alert_files = [
|
||||
for file_path in fileset(group.alert_definitions_path, "**/*.yaml") :
|
||||
{
|
||||
# Store full YAML content
|
||||
content = yamldecode(file("${group.alert_definitions_path}/${file_path}"))
|
||||
|
||||
# Extract commonly used fields
|
||||
name = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).name, null)
|
||||
alert_type = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).datasource_type, "prometheus")
|
||||
editor_type = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).editor_type, null)
|
||||
mode = try(yamldecode(file("${group.alert_definitions_path}/${file_path}")).mode, "single")
|
||||
|
||||
# File metadata
|
||||
alert_file_path = "${group.alert_definitions_path}/${file_path}"
|
||||
alert_category = split("/", file_path)[0]
|
||||
}
|
||||
if can(group.alert_definitions_path) &&
|
||||
group.alert_definitions_path != null &&
|
||||
(try(trimspace(group.alert_definitions_path), "") != "")
|
||||
]
|
||||
}
|
||||
]
|
||||
])
|
||||
|
||||
# Group alerts by datasource
|
||||
grouped_alerts_by_datasource = {
|
||||
for alert in local.combined_alerts :
|
||||
"${alert.datasource_name} (${alert.alert_group_name})" => merge(alert, {
|
||||
alert_files = flatten([
|
||||
for a in local.combined_alerts :
|
||||
a.alert_files if a.datasource_name == alert.datasource_name && a.alert_group_name == alert.alert_group_name
|
||||
])
|
||||
})
|
||||
}
|
||||
}
|
||||
224
environments/modules/grafana_rule_group/main.tf
Normal file
224
environments/modules/grafana_rule_group/main.tf
Normal file
@ -0,0 +1,224 @@
|
||||
resource "grafana_rule_group" "alert_groups" {
|
||||
for_each = {
|
||||
for k, v in local.grouped_alerts_by_datasource :
|
||||
k => v if length(v.alert_files) > 0
|
||||
}
|
||||
|
||||
# Main parameters
|
||||
name = each.key
|
||||
org_id = var.org_id
|
||||
interval_seconds = var.default_evaluation_interval # Group-wide evaluation interval
|
||||
folder_uid = each.value.folder_uid
|
||||
disable_provenance = var.disable_provenance
|
||||
|
||||
# Rules configuration
|
||||
dynamic "rule" {
|
||||
for_each = each.value.alert_files
|
||||
content {
|
||||
name = "${rule.value.name} (${each.value.datasource_uid})"
|
||||
# Business alerts: use math node D as condition
|
||||
# Simple alerts: use threshold node T
|
||||
condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T"
|
||||
for = try(
|
||||
coalesce(
|
||||
# Try to parse duration string (e.g., "15m", "24h")
|
||||
can(regex("^[0-9]+(s|m|h|d)$", rule.value.content.for)) ? format(
|
||||
"%ds",
|
||||
tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) *
|
||||
lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1)
|
||||
) : null,
|
||||
# Fallback to frequency or default duration
|
||||
format("%ds", try(rule.value.content.frequency, var.default_alert_duration))
|
||||
),
|
||||
format("%ds", var.default_alert_duration)
|
||||
)
|
||||
|
||||
# Data configuration
|
||||
# Single data block for the alert expression
|
||||
# Both SQL and Prometheus alerts use a single expression
|
||||
dynamic "data" {
|
||||
for_each = can(rule.value.content.queries) ? [
|
||||
for ref_id, query in rule.value.content.queries : {
|
||||
ref_id = ref_id
|
||||
query = query
|
||||
}
|
||||
] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }]
|
||||
content {
|
||||
ref_id = data.value.ref_id
|
||||
datasource_uid = each.value.datasource_uid
|
||||
query_type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
|
||||
model = jsonencode(
|
||||
merge(
|
||||
{
|
||||
refId = data.value.ref_id
|
||||
intervalMs = var.default_interval_ms
|
||||
maxDataPoints = var.default_max_data_points
|
||||
instant = false
|
||||
datasource = {
|
||||
type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
|
||||
uid = each.value.datasource_uid
|
||||
}
|
||||
},
|
||||
|
||||
# The query type is determined by the datasource type
|
||||
# The expression field contains the actual query for both SQL and Prometheus
|
||||
# Query configuration based on datasource type
|
||||
# Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones
|
||||
# SQL query configuration (mysql, clickhouse)
|
||||
contains(["grafana-clickhouse-datasource"],
|
||||
lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? {
|
||||
# Use time_series format for queries with time grouping, table format for direct aggregation
|
||||
format = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null
|
||||
formatAs = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table"
|
||||
queryType = "sql"
|
||||
rawSql = data.value.query
|
||||
editorMode = "code"
|
||||
editorType = "sql"
|
||||
} : {
|
||||
# Prometheus-compatible datasources (prometheus, victoriametrics)
|
||||
expr = try(rule.value.content.expression, "")
|
||||
format = "time_series"
|
||||
queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
|
||||
}
|
||||
)
|
||||
)
|
||||
relative_time_range {
|
||||
from = try(
|
||||
rule.value.content.relative_time_range.from, # First try alert's own config
|
||||
lookup( # Then try folder settings
|
||||
local.folder_time_ranges,
|
||||
each.value.folder_uid,
|
||||
var.default_time_range_from # Finally fallback to global default
|
||||
)
|
||||
)
|
||||
to = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Unified reduction processing
|
||||
# Handle both function-based and simple reductions
|
||||
dynamic "data" {
|
||||
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
|
||||
for func in rule.value.content.functions : {
|
||||
ref_id = try(func.reduce.ref_id, "B")
|
||||
expression = try(func.reduce.input, "A")
|
||||
reducer = try(func.reduce.function, "last")
|
||||
mode = try(func.reduce.mode, "strict")
|
||||
} if try(func.reduce, null) != null
|
||||
] : try(rule.value.content.need_reduce, false) ? [{
|
||||
ref_id = "B"
|
||||
expression = "A"
|
||||
# Map 'avg' reducer to 'mean' which is supported by Grafana
|
||||
# Other reducers (last, max, min, sum) are already supported
|
||||
reducer = try(
|
||||
rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type,
|
||||
"last"
|
||||
)
|
||||
mode = "strict"
|
||||
}] : []
|
||||
content {
|
||||
# Use exact ref_id and values from the for_each structure
|
||||
ref_id = data.value.ref_id
|
||||
datasource_uid = "__expr__"
|
||||
model = jsonencode({
|
||||
refId = data.value.ref_id
|
||||
type = "reduce"
|
||||
expression = data.value.expression
|
||||
reducer = data.value.reducer
|
||||
mode = data.value.mode
|
||||
})
|
||||
relative_time_range {
|
||||
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
|
||||
to = try(rule.value.content.relative_time_range.to, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Math expressions processing
|
||||
# Node references in Grafana alerts:
|
||||
# - Node A: Initial query (SQL or Prometheus)
|
||||
# - Node B: Reduction operation (created when need_reduce is true)
|
||||
# - Node M: Math expression (optional, for complex calculations)
|
||||
# - Node C: Final threshold evaluation
|
||||
dynamic "data" {
|
||||
for_each = length(try(rule.value.content.functions, [])) > 0 ? [
|
||||
for idx, func in rule.value.content.functions : {
|
||||
ref_id = try(func.math.ref_id, "M${idx}")
|
||||
expression = func.math.expression
|
||||
} if try(func.math, null) != null
|
||||
] : try(rule.value.content.math_expression, null) != null ? [{
|
||||
ref_id = "M"
|
||||
expression = rule.value.content.math_expression
|
||||
}] : []
|
||||
content {
|
||||
ref_id = data.value.ref_id
|
||||
datasource_uid = "__expr__"
|
||||
model = jsonencode({
|
||||
refId = data.value.ref_id
|
||||
type = "math"
|
||||
expression = data.value.expression
|
||||
input = try(rule.value.content.need_reduce ? "B" : "A", "A")
|
||||
})
|
||||
relative_time_range {
|
||||
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
|
||||
to = try(rule.value.content.relative_time_range.to, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Unified threshold evaluation
|
||||
data {
|
||||
ref_id = "T" # Use T consistently for threshold
|
||||
datasource_uid = "__expr__"
|
||||
model = jsonencode({
|
||||
refId = "T"
|
||||
type = "threshold"
|
||||
# Simple alerts: evaluate B (with reduction) or A (without reduction)
|
||||
expression = try(rule.value.content.need_reduce ? "B" : "A", "A")
|
||||
conditions = [
|
||||
{
|
||||
evaluator = merge(
|
||||
{
|
||||
type = rule.value.content.condition_type
|
||||
},
|
||||
# Handle range conditions for site monitoring
|
||||
contains(["outside_range", "within_range"], rule.value.content.condition_type) ? {
|
||||
params = rule.value.content.threshold_range
|
||||
} : {
|
||||
# Handle single threshold for business/system alerts
|
||||
params = [rule.value.content.threshold]
|
||||
}
|
||||
)
|
||||
operator = { type = "and" }
|
||||
# Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions)
|
||||
reducer = { type = "mean", params = [] }
|
||||
query = { params = [] }
|
||||
type = "query"
|
||||
}
|
||||
]
|
||||
})
|
||||
relative_time_range {
|
||||
from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
|
||||
to = try(rule.value.content.relative_time_range.to, 0)
|
||||
}
|
||||
}
|
||||
|
||||
# Rule metadata
|
||||
annotations = {
|
||||
summary = rule.value.content.summary
|
||||
description = try(rule.value.content.description, "")
|
||||
threshold = try(
|
||||
contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ?
|
||||
tostring(try(rule.value.content.threshold_range[0], "")) :
|
||||
tostring(try(rule.value.content.threshold, ""))
|
||||
)
|
||||
}
|
||||
|
||||
labels = rule.value.content.labels
|
||||
|
||||
no_data_state = try(rule.value.content.no_data_state, var.default_no_data_state)
|
||||
exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state)
|
||||
}
|
||||
}
|
||||
}
|
||||
122
environments/modules/grafana_rule_group/variables.tf
Normal file
122
environments/modules/grafana_rule_group/variables.tf
Normal file
@ -0,0 +1,122 @@
|
||||
variable "groups" {
|
||||
description = "List of alert groups with their definitions and data sources"
|
||||
type = list(object({
|
||||
dashboard_alert_group_name = string
|
||||
alert_definitions_path = optional(string, null)
|
||||
dashboard_path_if_exist = optional(string, null)
|
||||
keep_manual_changes = optional(bool, false)
|
||||
prevent_destroy_on_recreate = optional(bool, false)
|
||||
alerts_on_datasources_uid = list(string)
|
||||
}))
|
||||
}
|
||||
|
||||
variable "datasources" {
|
||||
description = "List of Grafana data sources"
|
||||
type = list(object({
|
||||
# Main parameters
|
||||
name = string # Data source name (displayed in Grafana)
|
||||
uid = string # Unique source identifier
|
||||
type = string # Data source type (e.g., prometheus, mysql, clickhouse)
|
||||
url = optional(string, null) # Connection URL (for most sources)
|
||||
username = optional(string, null)
|
||||
access_mode = string # Access mode: proxy or direct
|
||||
is_default = bool # Set as default source
|
||||
# Authentication settings
|
||||
basic_auth = optional(bool, false) # Use basic authentication
|
||||
basic_auth_user = optional(string, null) # Username for basic authentication
|
||||
basic_auth_password = optional(string, null) # Password for basic authentication
|
||||
|
||||
# Additional parameters
|
||||
json_data = optional(map(any), {}) # Additional parameters in JSON format
|
||||
secure_json_data = optional(map(string), {}) # Sensitive data in JSON format
|
||||
|
||||
# Terraform lifecycle management fields
|
||||
keep_manual_changes = optional(bool, false) # Ignore manual changes in Grafana
|
||||
prevent_destroy_on_recreate = optional(bool, false) # Prevent resource deletion on update
|
||||
}))
|
||||
}
|
||||
|
||||
variable "org_id" {
|
||||
description = "ID of the Grafana organization"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "folder_uids" {
|
||||
description = "Mapping of alert group names to their folder UIDs"
|
||||
type = map(string)
|
||||
}
|
||||
|
||||
variable "folder_time_ranges" {
|
||||
description = <<-EOT
|
||||
Mapping of folder UIDs to their default time ranges in seconds.
|
||||
If not specified for a folder, alerts will use the folder's default of 60 seconds (1 minute).
|
||||
Example:
|
||||
{
|
||||
"folder1_uid" = 300 # 5 minutes
|
||||
"folder2_uid" = 3600 # 1 hour
|
||||
}
|
||||
EOT
|
||||
type = map(number)
|
||||
default = {}
|
||||
}
|
||||
|
||||
# Alert duration and timing configuration
|
||||
variable "interval_seconds" {
|
||||
description = "Interval in seconds for evaluating alerts"
|
||||
type = number
|
||||
default = 60
|
||||
}
|
||||
|
||||
variable "default_interval_ms" {
|
||||
description = "Default interval in milliseconds for evaluating alert expressions"
|
||||
type = number
|
||||
default = 60000
|
||||
}
|
||||
|
||||
variable "default_max_data_points" {
|
||||
description = "Default maximum number of data points"
|
||||
type = number
|
||||
default = 43200
|
||||
}
|
||||
|
||||
variable "default_no_data_state" {
|
||||
description = "Default no data state for alerts"
|
||||
type = string
|
||||
default = "OK"
|
||||
}
|
||||
|
||||
variable "default_exec_err_state" {
|
||||
description = "Default execution error state for alerts"
|
||||
type = string
|
||||
default = "Error"
|
||||
}
|
||||
|
||||
variable "default_alert_duration" {
|
||||
description = "Default duration (in seconds) for how long a condition must be true before alerting"
|
||||
type = number
|
||||
default = 300 # 5 minutes
|
||||
}
|
||||
|
||||
variable "default_evaluation_interval" {
|
||||
description = "Default interval (in seconds) between alert rule evaluations"
|
||||
type = number
|
||||
default = 60 # 1 minute
|
||||
}
|
||||
|
||||
variable "default_time_range_from" {
|
||||
description = "Default time range (in seconds) for main query lookback"
|
||||
type = number
|
||||
default = 604800 # 7 days
|
||||
}
|
||||
|
||||
variable "default_processing_range" {
|
||||
description = "Default time range (in seconds) for processing blocks"
|
||||
type = number
|
||||
default = 600 # 10 minutes
|
||||
}
|
||||
|
||||
variable "disable_provenance" {
|
||||
description = "Controls whether Grafana provisioning is disabled"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
7
environments/modules/grafana_rule_group/versions.tf
Normal file
7
environments/modules/grafana_rule_group/versions.tf
Normal file
@ -0,0 +1,7 @@
|
||||
terraform {
|
||||
required_providers {
|
||||
grafana = {
|
||||
source = "grafana/grafana"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user