feat: add test alert group with low thresholds for notification testing
This commit is contained in:
21
environments/dev/adibrov/alerts/test/test_cpu_firing.yaml
Normal file
21
environments/dev/adibrov/alerts/test/test_cpu_firing.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
name: "DEV ADIBROV - TEST CPU > 2%"
|
||||||
|
expression: |
|
||||||
|
(
|
||||||
|
1 - avg by(instance) (rate(node_cpu_seconds_total{job="node_exporter", mode="idle"}[1m]))
|
||||||
|
) * 100
|
||||||
|
threshold: 2
|
||||||
|
for: "1m"
|
||||||
|
condition_type: "gt"
|
||||||
|
need_reduce: true
|
||||||
|
reducer_type: "max"
|
||||||
|
no_data_state: "OK"
|
||||||
|
exec_err_state: "OK"
|
||||||
|
labels:
|
||||||
|
service: "test"
|
||||||
|
severity: "info"
|
||||||
|
status: "test"
|
||||||
|
team: "infra"
|
||||||
|
summary: |
|
||||||
|
[TEST] CPU {{ printf "%.1f" $values.B.Value }}% на {{ $labels.instance }}
|
||||||
|
description: |
|
||||||
|
Тестовый алерт — срабатывает при CPU > 2%. Используется для проверки доставки уведомлений.
|
||||||
@ -0,0 +1,19 @@
|
|||||||
|
name: "DEV ADIBROV - TEST Go Goroutines"
|
||||||
|
expression: |
|
||||||
|
go_goroutines{job="prometheus"}
|
||||||
|
threshold: 10
|
||||||
|
for: "1m"
|
||||||
|
condition_type: "gt"
|
||||||
|
need_reduce: true
|
||||||
|
reducer_type: "max"
|
||||||
|
no_data_state: "OK"
|
||||||
|
exec_err_state: "OK"
|
||||||
|
labels:
|
||||||
|
service: "test"
|
||||||
|
severity: "info"
|
||||||
|
status: "test"
|
||||||
|
team: "infra"
|
||||||
|
summary: |
|
||||||
|
[TEST] Goroutines prometheus: {{ printf "%.0f" $values.B.Value }}
|
||||||
|
description: |
|
||||||
|
Тестовый алерт — горутин в prometheus больше 10 (норма ~100+). Всегда файрится, используется для проверки resolve.
|
||||||
21
environments/dev/adibrov/alerts/test/test_memory_firing.yaml
Normal file
21
environments/dev/adibrov/alerts/test/test_memory_firing.yaml
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
name: "DEV ADIBROV - TEST Memory > 20%"
|
||||||
|
expression: |
|
||||||
|
(
|
||||||
|
1 - (node_memory_MemAvailable_bytes{job="node_exporter"} / node_memory_MemTotal_bytes{job="node_exporter"})
|
||||||
|
) * 100
|
||||||
|
threshold: 20
|
||||||
|
for: "1m"
|
||||||
|
condition_type: "gt"
|
||||||
|
need_reduce: true
|
||||||
|
reducer_type: "max"
|
||||||
|
no_data_state: "OK"
|
||||||
|
exec_err_state: "OK"
|
||||||
|
labels:
|
||||||
|
service: "test"
|
||||||
|
severity: "info"
|
||||||
|
status: "test"
|
||||||
|
team: "infra"
|
||||||
|
summary: |
|
||||||
|
[TEST] Память {{ printf "%.1f" $values.B.Value }}% на {{ $labels.instance }}
|
||||||
|
description: |
|
||||||
|
Тестовый алерт — срабатывает при использовании памяти > 20%. Используется для проверки доставки уведомлений.
|
||||||
@ -109,6 +109,15 @@ groups = [
|
|||||||
keep_manual_changes = false
|
keep_manual_changes = false
|
||||||
prevent_destroy_on_recreate = false
|
prevent_destroy_on_recreate = false
|
||||||
alerts_on_datasources_uid = ["prometheus"]
|
alerts_on_datasources_uid = ["prometheus"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
dashboard_alert_group_name = "Test Alerts"
|
||||||
|
folder_uid = "test-alerts"
|
||||||
|
alert_definitions_path = "alerts/test"
|
||||||
|
dashboard_path_if_exist = null
|
||||||
|
keep_manual_changes = false
|
||||||
|
prevent_destroy_on_recreate = false
|
||||||
|
alerts_on_datasources_uid = ["prometheus"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
# Data sources configuration
|
# Data sources configuration
|
||||||
|
|||||||
Reference in New Issue
Block a user