grafana-terraform/environments/modules/grafana_rule_group/main.tf

resource "grafana_rule_group" "alert_groups" {
  for_each = {
    for k, v in local.grouped_alerts_by_datasource :
    k => v if length(v.alert_files) > 0
  }

  # Main parameters
  name               = each.key
  org_id             = var.org_id
  interval_seconds   = var.default_evaluation_interval # Group-wide evaluation interval
  folder_uid         = each.value.folder_uid
  disable_provenance = var.disable_provenance

  # Rules configuration
  dynamic "rule" {
    for_each = each.value.alert_files
    content {
      name = "${rule.value.name} (${each.value.datasource_uid})"
      # Business alerts: use math node D as condition
      # Simple alerts: use threshold node T
      condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T"
      for = try(
        coalesce(
          # Try to parse duration string (e.g., "15m", "24h")
          can(regex("^[0-9]+(s|m|h|d)$", rule.value.content.for)) ? format(
            "%ds",
            tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) *
            lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1)
          ) : null,
          # Fallback to frequency or default duration
          format("%ds", try(rule.value.content.frequency, var.default_alert_duration))
        ),
        format("%ds", var.default_alert_duration)
      )

      # Data configuration
      # Single data block for the alert expression
      # Both SQL and Prometheus alerts use a single expression
      dynamic "data" {
        for_each = can(rule.value.content.queries) ? [
          for ref_id, query in rule.value.content.queries : {
            ref_id = ref_id
            query  = query
          }
        ] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }]
        content {
          ref_id         = data.value.ref_id
          datasource_uid = each.value.datasource_uid
          query_type     = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
          model = jsonencode(
            merge(
              {
                refId         = data.value.ref_id
                intervalMs    = var.default_interval_ms
                maxDataPoints = var.default_max_data_points
                instant       = false
                datasource = {
                  type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
                  uid  = each.value.datasource_uid
                }
              },

              # The query type is determined by the datasource type
              # The expression field contains the actual query for both SQL and Prometheus
              # Query configuration based on datasource type
              # Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones
              # SQL query configuration (mysql, clickhouse)
              contains(["grafana-clickhouse-datasource"],
                lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? {
                # Use time_series format for queries with time grouping, table format for direct aggregation
                format     = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null
                formatAs   = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table"
                queryType  = "sql"
                rawSql     = data.value.query
                editorMode = "code"
                editorType = "sql"
                } : {
                # Prometheus-compatible datasources (prometheus, victoriametrics)
                expr      = try(rule.value.content.expression, "")
                format    = "time_series"
                queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")
              }
            )
          )
          relative_time_range {
            from = try(
              rule.value.content.relative_time_range.from, # First try alert's own config
              lookup(                                      # Then try folder settings
                local.folder_time_ranges,
                each.value.folder_uid,
                var.default_time_range_from # Finally fallback to global default
              )
            )
            to = 0
          }
        }
      }

      # Unified reduction processing
      # Handle both function-based and simple reductions
      dynamic "data" {
        for_each = length(try(rule.value.content.functions, [])) > 0 ? [
          for func in rule.value.content.functions : {
            ref_id     = try(func.reduce.ref_id, "B")
            expression = try(func.reduce.input, "A")
            reducer    = try(func.reduce.function, "last")
            mode       = try(func.reduce.mode, "strict")
          } if try(func.reduce, null) != null
          ] : try(rule.value.content.need_reduce, false) ? [{
            ref_id     = "B"
            expression = "A"
            # Map 'avg' reducer to 'mean' which is supported by Grafana
            # Other reducers (last, max, min, sum) are already supported
            reducer = try(
              rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type,
              "last"
            )
            mode = "strict"
        }] : []
        content {
          # Use exact ref_id and values from the for_each structure
          ref_id         = data.value.ref_id
          datasource_uid = "__expr__"
          model = jsonencode({
            refId      = data.value.ref_id
            type       = "reduce"
            expression = data.value.expression
            reducer    = data.value.reducer
            mode       = data.value.mode
          })
          relative_time_range {
            from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
            to   = try(rule.value.content.relative_time_range.to, 0)
          }
        }
      }

      # Math expressions processing
      # Node references in Grafana alerts:
      # - Node A: Initial query (SQL or Prometheus)
      # - Node B: Reduction operation (created when need_reduce is true)
      # - Node M: Math expression (optional, for complex calculations)
      # - Node C: Final threshold evaluation
      dynamic "data" {
        for_each = length(try(rule.value.content.functions, [])) > 0 ? [
          for idx, func in rule.value.content.functions : {
            ref_id     = try(func.math.ref_id, "M${idx}")
            expression = func.math.expression
          } if try(func.math, null) != null
          ] : try(rule.value.content.math_expression, null) != null ? [{
            ref_id     = "M"
            expression = rule.value.content.math_expression
        }] : []
        content {
          ref_id         = data.value.ref_id
          datasource_uid = "__expr__"
          model = jsonencode({
            refId      = data.value.ref_id
            type       = "math"
            expression = data.value.expression
            input      = try(rule.value.content.need_reduce ? "B" : "A", "A")
          })
          relative_time_range {
            from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
            to   = try(rule.value.content.relative_time_range.to, 0)
          }
        }
      }

      # Unified threshold evaluation
      data {
        ref_id         = "T" # Use T consistently for threshold
        datasource_uid = "__expr__"
        model = jsonencode({
          refId = "T"
          type  = "threshold"
          # Simple alerts: evaluate B (with reduction) or A (without reduction)
          expression = try(rule.value.content.need_reduce ? "B" : "A", "A")
          conditions = [
            {
              evaluator = merge(
                {
                  type = rule.value.content.condition_type
                },
                # Handle range conditions for site monitoring
                contains(["outside_range", "within_range"], rule.value.content.condition_type) ? {
                  params = rule.value.content.threshold_range
                  } : {
                  # Handle single threshold for business/system alerts
                  params = [rule.value.content.threshold]
                }
              )
              operator = { type = "and" }
              # Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions)
              reducer = { type = "mean", params = [] }
              query   = { params = [] }
              type    = "query"
            }
          ]
        })
        relative_time_range {
          from = try(rule.value.content.relative_time_range.from, var.default_processing_range)
          to   = try(rule.value.content.relative_time_range.to, 0)
        }
      }

      # Rule metadata
      annotations = {
        summary     = rule.value.content.summary
        description = try(rule.value.content.description, "")
        threshold = try(
          contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ?
          tostring(try(rule.value.content.threshold_range[0], "")) :
          tostring(try(rule.value.content.threshold, ""))
        )
      }

      labels = rule.value.content.labels

      no_data_state  = try(rule.value.content.no_data_state, var.default_no_data_state)
      exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state)
    }
  }
}
Add dashboard UID auto-generation and Gitea CI workflow 2026-03-25 06:41:19 +03:00			`resource "grafana_rule_group" "alert_groups" {`
			`for_each = {`
			`for k, v in local.grouped_alerts_by_datasource :`
			`k => v if length(v.alert_files) > 0`
			`}`

			`# Main parameters`
			`name = each.key`
			`org_id = var.org_id`
			`interval_seconds = var.default_evaluation_interval # Group-wide evaluation interval`
			`folder_uid = each.value.folder_uid`
			`disable_provenance = var.disable_provenance`

			`# Rules configuration`
			`dynamic "rule" {`
			`for_each = each.value.alert_files`
			`content {`
			`name = "${rule.value.name} (${each.value.datasource_uid})"`
			`# Business alerts: use math node D as condition`
			`# Simple alerts: use threshold node T`
			`condition = length(try(rule.value.content.functions, [])) > 0 ? try(rule.value.content.functions[length(rule.value.content.functions) - 1].math.ref_id, "T") : "T"`
			`for = try(`
			`coalesce(`
			`# Try to parse duration string (e.g., "15m", "24h")`
			`can(regex("^[0-9]+(s\|m\|h\|d)$", rule.value.content.for)) ? format(`
			`"%ds",`
			`tonumber(regex("^([0-9]+)", rule.value.content.for)[0]) *`
			`lookup(local.duration_units, regex("[smhd]$", rule.value.content.for), 1)`
			`) : null,`
			`# Fallback to frequency or default duration`
			`format("%ds", try(rule.value.content.frequency, var.default_alert_duration))`
			`),`
			`format("%ds", var.default_alert_duration)`
			`)`

			`# Data configuration`
			`# Single data block for the alert expression`
			`# Both SQL and Prometheus alerts use a single expression`
			`dynamic "data" {`
			`for_each = can(rule.value.content.queries) ? [`
			`for ref_id, query in rule.value.content.queries : {`
			`ref_id = ref_id`
			`query = query`
			`}`
			`] : [{ ref_id = "A", query = try(rule.value.content.expression, "") }]`
			`content {`
			`ref_id = data.value.ref_id`
			`datasource_uid = each.value.datasource_uid`
			`query_type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")`
			`model = jsonencode(`
			`merge(`
			`{`
			`refId = data.value.ref_id`
			`intervalMs = var.default_interval_ms`
			`maxDataPoints = var.default_max_data_points`
			`instant = false`
			`datasource = {`
			`type = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")`
			`uid = each.value.datasource_uid`
			`}`
			`},`

			`# The query type is determined by the datasource type`
			`# The expression field contains the actual query for both SQL and Prometheus`
			`# Query configuration based on datasource type`
			`# Handle SQL-based datasources (mysql, clickhouse) and prometheus-compatible ones`
			`# SQL query configuration (mysql, clickhouse)`
			`contains(["grafana-clickhouse-datasource"],`
			`lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")) ? {`
			`# Use time_series format for queries with time grouping, table format for direct aggregation`
			`format = can(regex("\\$__timeGroupAlias", data.value.query)) ? "time_series" : null`
			`formatAs = can(regex("\\$__timeGroupAlias", data.value.query)) ? null : "table"`
			`queryType = "sql"`
			`rawSql = data.value.query`
			`editorMode = "code"`
			`editorType = "sql"`
			`} : {`
			`# Prometheus-compatible datasources (prometheus, victoriametrics)`
			`expr = try(rule.value.content.expression, "")`
			`format = "time_series"`
			`queryType = lookup(local.datasource_mapping_type, each.value.datasource_uid, "prometheus")`
			`}`
			`)`
			`)`
			`relative_time_range {`
			`from = try(`
			`rule.value.content.relative_time_range.from, # First try alert's own config`
			`lookup( # Then try folder settings`
			`local.folder_time_ranges,`
			`each.value.folder_uid,`
			`var.default_time_range_from # Finally fallback to global default`
			`)`
			`)`
			`to = 0`
			`}`
			`}`
			`}`

			`# Unified reduction processing`
			`# Handle both function-based and simple reductions`
			`dynamic "data" {`
			`for_each = length(try(rule.value.content.functions, [])) > 0 ? [`
			`for func in rule.value.content.functions : {`
			`ref_id = try(func.reduce.ref_id, "B")`
			`expression = try(func.reduce.input, "A")`
			`reducer = try(func.reduce.function, "last")`
			`mode = try(func.reduce.mode, "strict")`
			`} if try(func.reduce, null) != null`
			`] : try(rule.value.content.need_reduce, false) ? [{`
			`ref_id = "B"`
			`expression = "A"`
			`# Map 'avg' reducer to 'mean' which is supported by Grafana`
			`# Other reducers (last, max, min, sum) are already supported`
			`reducer = try(`
			`rule.value.content.reducer_type == "avg" ? "mean" : rule.value.content.reducer_type,`
			`"last"`
			`)`
			`mode = "strict"`
			`}] : []`
			`content {`
			`# Use exact ref_id and values from the for_each structure`
			`ref_id = data.value.ref_id`
			`datasource_uid = "__expr__"`
			`model = jsonencode({`
			`refId = data.value.ref_id`
			`type = "reduce"`
			`expression = data.value.expression`
			`reducer = data.value.reducer`
			`mode = data.value.mode`
			`})`
			`relative_time_range {`
			`from = try(rule.value.content.relative_time_range.from, var.default_processing_range)`
			`to = try(rule.value.content.relative_time_range.to, 0)`
			`}`
			`}`
			`}`

			`# Math expressions processing`
			`# Node references in Grafana alerts:`
			`# - Node A: Initial query (SQL or Prometheus)`
			`# - Node B: Reduction operation (created when need_reduce is true)`
			`# - Node M: Math expression (optional, for complex calculations)`
			`# - Node C: Final threshold evaluation`
			`dynamic "data" {`
			`for_each = length(try(rule.value.content.functions, [])) > 0 ? [`
			`for idx, func in rule.value.content.functions : {`
			`ref_id = try(func.math.ref_id, "M${idx}")`
			`expression = func.math.expression`
			`} if try(func.math, null) != null`
			`] : try(rule.value.content.math_expression, null) != null ? [{`
			`ref_id = "M"`
			`expression = rule.value.content.math_expression`
			`}] : []`
			`content {`
			`ref_id = data.value.ref_id`
			`datasource_uid = "__expr__"`
			`model = jsonencode({`
			`refId = data.value.ref_id`
			`type = "math"`
			`expression = data.value.expression`
			`input = try(rule.value.content.need_reduce ? "B" : "A", "A")`
			`})`
			`relative_time_range {`
			`from = try(rule.value.content.relative_time_range.from, var.default_processing_range)`
			`to = try(rule.value.content.relative_time_range.to, 0)`
			`}`
			`}`
			`}`

			`# Unified threshold evaluation`
			`data {`
			`ref_id = "T" # Use T consistently for threshold`
			`datasource_uid = "__expr__"`
			`model = jsonencode({`
			`refId = "T"`
			`type = "threshold"`
			`# Simple alerts: evaluate B (with reduction) or A (without reduction)`
			`expression = try(rule.value.content.need_reduce ? "B" : "A", "A")`
			`conditions = [`
			`{`
			`evaluator = merge(`
			`{`
			`type = rule.value.content.condition_type`
			`},`
			`# Handle range conditions for site monitoring`
			`contains(["outside_range", "within_range"], rule.value.content.condition_type) ? {`
			`params = rule.value.content.threshold_range`
			`} : {`
			`# Handle single threshold for business/system alerts`
			`params = [rule.value.content.threshold]`
			`}`
			`)`
			`operator = { type = "and" }`
			`# Use 'mean' reducer for consistency (same as mapping 'avg' to 'mean' in reductions)`
			`reducer = { type = "mean", params = [] }`
			`query = { params = [] }`
			`type = "query"`
			`}`
			`]`
			`})`
			`relative_time_range {`
			`from = try(rule.value.content.relative_time_range.from, var.default_processing_range)`
			`to = try(rule.value.content.relative_time_range.to, 0)`
			`}`
			`}`

			`# Rule metadata`
			`annotations = {`
			`summary = rule.value.content.summary`
			`description = try(rule.value.content.description, "")`
			`threshold = try(`
			`contains(["outside_range", "within_range"], try(rule.value.content.condition_type, "gt")) ?`
			`tostring(try(rule.value.content.threshold_range[0], "")) :`
			`tostring(try(rule.value.content.threshold, ""))`
			`)`
			`}`

			`labels = rule.value.content.labels`

			`no_data_state = try(rule.value.content.no_data_state, var.default_no_data_state)`
			`exec_err_state = try(rule.value.content.exec_err_state, var.default_exec_err_state)`
			`}`
			`}`
			`}`