From 5d05640e805658cd7250e2da3e189a7f07c6aba4 Mon Sep 17 00:00:00 2001 From: Alexandr Date: Wed, 1 Apr 2026 08:21:03 +0300 Subject: [PATCH] feat: grafana IRM escalation module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit новый модуль modules/grafana_irm_escalation, dev/adibrov подключён. секреты (oncall_access_token, user id) хранятся в Vault, в git не попадают --- environments/dev/adibrov/locals.tf | 26 +++++++ environments/dev/adibrov/main.tf | 16 +++++ environments/dev/adibrov/providers.tf | 4 ++ environments/dev/adibrov/terraform.tfvars | 6 ++ environments/dev/adibrov/variables_irm.tf | 5 ++ .../modules/grafana_irm_escalation/main.tf | 21 ++++++ .../modules/grafana_irm_escalation/outputs.tf | 9 +++ .../grafana_irm_escalation/variables.tf | 67 +++++++++++++++++++ .../grafana_irm_escalation/versions.tf | 7 ++ 9 files changed, 161 insertions(+) create mode 100644 environments/dev/adibrov/variables_irm.tf create mode 100644 environments/modules/grafana_irm_escalation/main.tf create mode 100644 environments/modules/grafana_irm_escalation/outputs.tf create mode 100644 environments/modules/grafana_irm_escalation/variables.tf create mode 100644 environments/modules/grafana_irm_escalation/versions.tf diff --git a/environments/dev/adibrov/locals.tf b/environments/dev/adibrov/locals.tf index 7cd677e..1a50be2 100644 --- a/environments/dev/adibrov/locals.tf +++ b/environments/dev/adibrov/locals.tf @@ -5,6 +5,32 @@ locals { "X-Disable-Provenance" = var.disable_provenance ? "true" : "false" } + # IRM escalation chains + # ID пользователей/расписаний хранятся в Vault: app/groups/monitoring/grafana/dev/ext + irm_escalation_chains = [ + { + name = "infra-default" + steps = [ + { + type = "notify_persons" + persons_to_notify = [data.vault_kv_secret_v2.secret_ext.data["irm_user_adibrov"]] + }, + { + type = "wait" + duration_seconds = 300 + }, + { + type = "notify_persons" + persons_to_notify = [data.vault_kv_secret_v2.secret_ext.data["irm_user_adibrov"]] + important = true + }, + { + type = "repeat_escalation" + } + ] + } + ] + # Contact points configuration contact_points = [ { diff --git a/environments/dev/adibrov/main.tf b/environments/dev/adibrov/main.tf index ce0afd7..b138e27 100644 --- a/environments/dev/adibrov/main.tf +++ b/environments/dev/adibrov/main.tf @@ -85,3 +85,19 @@ module "grafana_rule_group01" { module.grafana_contact_points01 ] } + +# Модуль управления цепочками эскалации Grafana IRM +# Включается через enable_irm = true в terraform.tfvars +# Перед включением: добавить oncall_access_token в Vault (app/groups/monitoring/grafana/dev/int) +module "grafana_irm_escalation" { + for_each = var.enable_irm ? { for chain in local.irm_escalation_chains : chain.name => chain } : {} + + source = "../../modules/grafana_irm_escalation" + name = each.value.name + team_id = try(each.value.team_id, null) + steps = each.value.steps + + providers = { + grafana = grafana.grafana01 + } +} diff --git a/environments/dev/adibrov/providers.tf b/environments/dev/adibrov/providers.tf index 46d0f2f..6319d16 100644 --- a/environments/dev/adibrov/providers.tf +++ b/environments/dev/adibrov/providers.tf @@ -18,4 +18,8 @@ provider "grafana" { auth = "admin:${data.vault_kv_secret_v2.secret_int.data["grafana_local_admin_password"]}" insecure_skip_verify = true http_headers = local.grafana_headers + + # Grafana IRM / OnCall (Grafana Cloud) + oncall_access_token = try(data.vault_kv_secret_v2.secret_int.data["oncall_access_token"], null) + oncall_url = "https://oncall-prod-us-central-0.grafana.net/oncall" } \ No newline at end of file diff --git a/environments/dev/adibrov/terraform.tfvars b/environments/dev/adibrov/terraform.tfvars index cbc70ba..b1107a2 100644 --- a/environments/dev/adibrov/terraform.tfvars +++ b/environments/dev/adibrov/terraform.tfvars @@ -153,3 +153,9 @@ notification_policies = [ ] } ] + +# ── Grafana IRM ────────────────────────────────────────────────────────────── +# Цепочки эскалации описаны в locals.tf, ID пользователей — в Vault +# (app/groups/monitoring/grafana/dev/ext → irm_user_adibrov) + +enable_irm = true diff --git a/environments/dev/adibrov/variables_irm.tf b/environments/dev/adibrov/variables_irm.tf new file mode 100644 index 0000000..726bfd8 --- /dev/null +++ b/environments/dev/adibrov/variables_irm.tf @@ -0,0 +1,5 @@ +variable "enable_irm" { + description = "Включить управление цепочками эскалации Grafana IRM" + type = bool + default = false +} diff --git a/environments/modules/grafana_irm_escalation/main.tf b/environments/modules/grafana_irm_escalation/main.tf new file mode 100644 index 0000000..709ddad --- /dev/null +++ b/environments/modules/grafana_irm_escalation/main.tf @@ -0,0 +1,21 @@ +resource "grafana_oncall_escalation_chain" "this" { + name = var.name + team_id = var.team_id +} + +resource "grafana_oncall_escalation" "steps" { + # ключ = строковый индекс; порядок шагов фиксирован через position + for_each = { for idx, step in var.steps : tostring(idx) => step } + + escalation_chain_id = grafana_oncall_escalation_chain.this.id + type = each.value.type + position = tonumber(each.key) + duration = each.value.duration_seconds + persons_to_notify = each.value.persons_to_notify + persons_to_notify_next_each_time = each.value.persons_to_notify_next_each_time + notify_on_call_from_schedule = each.value.notify_on_call_from_schedule + group_to_notify = each.value.group_to_notify + action_to_trigger = each.value.action_to_trigger + notify_to_team_members = each.value.notify_to_team_members + important = each.value.important +} diff --git a/environments/modules/grafana_irm_escalation/outputs.tf b/environments/modules/grafana_irm_escalation/outputs.tf new file mode 100644 index 0000000..d8a4343 --- /dev/null +++ b/environments/modules/grafana_irm_escalation/outputs.tf @@ -0,0 +1,9 @@ +output "escalation_chain_id" { + description = "ID цепочки эскалации — используется в grafana_oncall_route для привязки алертов" + value = grafana_oncall_escalation_chain.this.id +} + +output "escalation_chain_name" { + description = "Название цепочки эскалации" + value = grafana_oncall_escalation_chain.this.name +} diff --git a/environments/modules/grafana_irm_escalation/variables.tf b/environments/modules/grafana_irm_escalation/variables.tf new file mode 100644 index 0000000..05bad7e --- /dev/null +++ b/environments/modules/grafana_irm_escalation/variables.tf @@ -0,0 +1,67 @@ +variable "name" { + description = "Название цепочки эскалации" + type = string +} + +variable "team_id" { + description = "ID команды в Grafana IRM (опционально)" + type = string + default = null +} + +variable "steps" { + description = <<-EOT + Шаги эскалации по порядку. position назначается автоматически (0, 1, 2...). + + Доступные типы (type): + notify_persons — уведомить конкретных пользователей + notify_person_next_each_time — round-robin по списку + notify_on_call_from_schedule — кто сейчас на дежурстве по расписанию + notify_user_group — уведомить группу пользователей + notify_team_members — уведомить команду + wait — пауза (duration_seconds секунд) + trigger_webhook — вызвать webhook + resolve — авторезолв инцидента + repeat_escalation — повторить цепочку с начала + + Пример: + steps = [ + { type = "notify_on_call_from_schedule", notify_on_call_from_schedule = "" }, + { type = "wait", duration_seconds = 300 }, + { type = "notify_persons", persons_to_notify = [""] }, + { type = "repeat_escalation" }, + ] + EOT + type = list(object({ + type = string + duration_seconds = optional(number, null) + persons_to_notify = optional(list(string), null) + persons_to_notify_next_each_time = optional(list(string), null) + notify_on_call_from_schedule = optional(string, null) + group_to_notify = optional(string, null) + action_to_trigger = optional(string, null) + notify_to_team_members = optional(string, null) + important = optional(bool, false) + })) + + validation { + condition = alltrue([ + for s in var.steps : contains([ + "notify_persons", + "notify_person_next_each_time", + "notify_on_call_from_schedule", + "notify_user_group", + "notify_team_members", + "wait", + "trigger_webhook", + "resolve", + "repeat_escalation", + "declare_incident", + "notify_whole_channel", + "notify_if_time_from_to", + "notify_if_num_alerts_in_window", + ], s.type) + ]) + error_message = "Недопустимый тип шага эскалации. Проверь список допустимых значений в описании переменной." + } +} diff --git a/environments/modules/grafana_irm_escalation/versions.tf b/environments/modules/grafana_irm_escalation/versions.tf new file mode 100644 index 0000000..f13f855 --- /dev/null +++ b/environments/modules/grafana_irm_escalation/versions.tf @@ -0,0 +1,7 @@ +terraform { + required_providers { + grafana = { + source = "grafana/grafana" + } + } +}