resource "grafana_rule_group" "alert_groups" { for_each = { for k, v in local.grouped_alerts_by_datasource : k => v if length(v.alert_files) > 0 } # Main parameters name = each.key org_id = var.org_id interval_seconds = var.default_evaluation_interval # Group-wide evaluation interval folder_uid = each.value.folder_uid disable_provenance = var.disable_provenance # Rules configuration dynamic "rule" { for_each = each.value.alert_files content { name = "${rule.value.name} (${each.value.datasource_uid})" # Business alerts: use math node D as condition # Simple alerts: use threshold node T condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T" for = try( coalesce( # Try to parse duration string (e.g., "15m", "24h") can(regex("^[0-9]+(s|m|h|d)$", rule.value.content.for)) ? format( "%ds", tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) * lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1) ) : null, # Fallback to frequency or default duration format("%ds", try(rule.value.content.frequency, var.default_alert_duration)) ), format("%ds", var.default_alert_duration) ) # Data configuration # Single data block for the alert expression # Both SQL and Prometheus alerts use a single expression dynamic "data" { for_each = can(rule.value.content.queries) ? [ for ref_id, query in rule.value.content.queries : { ref_id = ref_id query = query } ] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }] content { ref_id = data.value.ref_id datasource_uid = each.value.datasource_uid query_type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus") model = jsonencode( merge( { refId = data.value.ref_id intervalMs = var.default_interval_ms maxDataPoints = var.default_max_data_points instant = false datasource = { type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus") uid = each.value.datasource_uid } }, # The query type is determined by the datasource type # The expression field contains the actual query for both SQL and Prometheus # Query configuration based on datasource type # Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones # SQL query configuration (mysql, clickhouse) contains(["grafana-clickhouse-datasource"], lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? { # Use time_series format for queries with time grouping, table format for direct aggregation format = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null formatAs = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table" queryType = "sql" rawSql = data.value.query editorMode = "code" editorType = "sql" } : { # Prometheus-compatible datasources (prometheus, victoriametrics) expr = try(rule.value.content.expression, "") format = "time_series" queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus") } ) ) relative_time_range { from = try( rule.value.content.relative_time_range.from, # First try alert's own config lookup( # Then try folder settings local.folder_time_ranges, each.value.folder_uid, var.default_time_range_from # Finally fallback to global default ) ) to = 0 } } } # Unified reduction processing # Handle both function-based and simple reductions dynamic "data" { for_each = length(try(rule.value.content.functions, [])) > 0 ? [ for func in rule.value.content.functions : { ref_id = try(func.reduce.ref_id, "B") expression = try(func.reduce.input, "A") reducer = try(func.reduce.function, "last") mode = try(func.reduce.mode, "strict") } if try(func.reduce, null) != null ] : try(rule.value.content.need_reduce, false) ? [{ ref_id = "B" expression = "A" # Map 'avg' reducer to 'mean' which is supported by Grafana # Other reducers (last, max, min, sum) are already supported reducer = try( rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type, "last" ) mode = "strict" }] : [] content { # Use exact ref_id and values from the for_each structure ref_id = data.value.ref_id datasource_uid = "__expr__" model = jsonencode({ refId = data.value.ref_id type = "reduce" expression = data.value.expression reducer = data.value.reducer mode = data.value.mode }) relative_time_range { from = try(rule.value.content.relative_time_range.from, var.default_processing_range) to = try(rule.value.content.relative_time_range.to, 0) } } } # Math expressions processing # Node references in Grafana alerts: # - Node A: Initial query (SQL or Prometheus) # - Node B: Reduction operation (created when need_reduce is true) # - Node M: Math expression (optional, for complex calculations) # - Node C: Final threshold evaluation dynamic "data" { for_each = length(try(rule.value.content.functions, [])) > 0 ? [ for idx, func in rule.value.content.functions : { ref_id = try(func.math.ref_id, "M${idx}") expression = func.math.expression } if try(func.math, null) != null ] : try(rule.value.content.math_expression, null) != null ? [{ ref_id = "M" expression = rule.value.content.math_expression }] : [] content { ref_id = data.value.ref_id datasource_uid = "__expr__" model = jsonencode({ refId = data.value.ref_id type = "math" expression = data.value.expression input = try(rule.value.content.need_reduce ? "B" : "A", "A") }) relative_time_range { from = try(rule.value.content.relative_time_range.from, var.default_processing_range) to = try(rule.value.content.relative_time_range.to, 0) } } } # Unified threshold evaluation data { ref_id = "T" # Use T consistently for threshold datasource_uid = "__expr__" model = jsonencode({ refId = "T" type = "threshold" # Simple alerts: evaluate B (with reduction) or A (without reduction) expression = try(rule.value.content.need_reduce ? "B" : "A", "A") conditions = [ { evaluator = merge( { type = rule.value.content.condition_type }, # Handle range conditions for site monitoring contains(["outside_range", "within_range"], rule.value.content.condition_type) ? { params = rule.value.content.threshold_range } : { # Handle single threshold for business/system alerts params = [rule.value.content.threshold] } ) operator = { type = "and" } # Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions) reducer = { type = "mean", params = [] } query = { params = [] } type = "query" } ] }) relative_time_range { from = try(rule.value.content.relative_time_range.from, var.default_processing_range) to = try(rule.value.content.relative_time_range.to, 0) } } # Rule metadata annotations = { summary = rule.value.content.summary description = try(rule.value.content.description, "") threshold = try( contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ? tostring(try(rule.value.content.threshold_range[0], "")) : tostring(try(rule.value.content.threshold, "")) ) } labels = rule.value.content.labels no_data_state = try(rule.value.content.no_data_state, var.default_no_data_state) exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state) } } }