feat: add test alert group with low thresholds for notification testing
This commit is contained in:
21
environments/dev/adibrov/alerts/test/test_cpu_firing.yaml
Normal file
21
environments/dev/adibrov/alerts/test/test_cpu_firing.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
name: "DEV ADIBROV - TEST CPU > 2%"
|
||||
expression: |
|
||||
(
|
||||
1 - avg by(instance) (rate(node_cpu_seconds_total{job="node_exporter", mode="idle"}[1m]))
|
||||
) * 100
|
||||
threshold: 2
|
||||
for: "1m"
|
||||
condition_type: "gt"
|
||||
need_reduce: true
|
||||
reducer_type: "max"
|
||||
no_data_state: "OK"
|
||||
exec_err_state: "OK"
|
||||
labels:
|
||||
service: "test"
|
||||
severity: "info"
|
||||
status: "test"
|
||||
team: "infra"
|
||||
summary: |
|
||||
[TEST] CPU {{ printf "%.1f" $values.B.Value }}% на {{ $labels.instance }}
|
||||
description: |
|
||||
Тестовый алерт — срабатывает при CPU > 2%. Используется для проверки доставки уведомлений.
|
||||
@ -0,0 +1,19 @@
|
||||
name: "DEV ADIBROV - TEST Go Goroutines"
|
||||
expression: |
|
||||
go_goroutines{job="prometheus"}
|
||||
threshold: 10
|
||||
for: "1m"
|
||||
condition_type: "gt"
|
||||
need_reduce: true
|
||||
reducer_type: "max"
|
||||
no_data_state: "OK"
|
||||
exec_err_state: "OK"
|
||||
labels:
|
||||
service: "test"
|
||||
severity: "info"
|
||||
status: "test"
|
||||
team: "infra"
|
||||
summary: |
|
||||
[TEST] Goroutines prometheus: {{ printf "%.0f" $values.B.Value }}
|
||||
description: |
|
||||
Тестовый алерт — горутин в prometheus больше 10 (норма ~100+). Всегда файрится, используется для проверки resolve.
|
||||
21
environments/dev/adibrov/alerts/test/test_memory_firing.yaml
Normal file
21
environments/dev/adibrov/alerts/test/test_memory_firing.yaml
Normal file
@ -0,0 +1,21 @@
|
||||
name: "DEV ADIBROV - TEST Memory > 20%"
|
||||
expression: |
|
||||
(
|
||||
1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})
|
||||
) * 100
|
||||
threshold: 20
|
||||
for: "1m"
|
||||
condition_type: "gt"
|
||||
need_reduce: true
|
||||
reducer_type: "max"
|
||||
no_data_state: "OK"
|
||||
exec_err_state: "OK"
|
||||
labels:
|
||||
service: "test"
|
||||
severity: "info"
|
||||
status: "test"
|
||||
team: "infra"
|
||||
summary: |
|
||||
[TEST] Память {{ printf "%.1f" $values.B.Value }}% на {{ $labels.instance }}
|
||||
description: |
|
||||
Тестовый алерт — срабатывает при использовании памяти > 20%. Используется для проверки доставки уведомлений.
|
||||
@ -109,6 +109,15 @@ groups = [
|
||||
keep_manual_changes = false
|
||||
prevent_destroy_on_recreate = false
|
||||
alerts_on_datasources_uid = ["prometheus"]
|
||||
},
|
||||
{
|
||||
dashboard_alert_group_name = "Test Alerts"
|
||||
folder_uid = "test-alerts"
|
||||
alert_definitions_path = "alerts/test"
|
||||
dashboard_path_if_exist = null
|
||||
keep_manual_changes = false
|
||||
prevent_destroy_on_recreate = false
|
||||
alerts_on_datasources_uid = ["prometheus"]
|
||||
}
|
||||
]
|
||||
# Data sources configuration
|
||||
|
||||
Reference in New Issue
Block a user