release: v1.9.0 — IRM-алерты отдельно от инцидентов
- Alembic 005: таблицы irm_alerts и incident_alert_links - Модуль alerts: API/UI, Ack/Resolve, привязка к инциденту через alert_ids - Вебхук Grafana: одна транзакция ingress + irm_alerts; разбор payload в grafana_payload - По умолчанию инцидент из вебхука не создаётся (AUTO_INCIDENT_FROM_ALERT) - Документация IRM_GRAFANA_PARITY.md, обновления IRM.md и CHANGELOG Made-with: Cursor
This commit is contained in:
346
onguard24/modules/alerts.py
Normal file
346
onguard24/modules/alerts.py
Normal file
@ -0,0 +1,346 @@
|
||||
"""Учёт входящих алертов (отдельно от инцидентов): firing → acknowledged → resolved."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import json
|
||||
import logging
|
||||
from uuid import UUID
|
||||
|
||||
import asyncpg
|
||||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||
from fastapi.responses import HTMLResponse
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from onguard24.deps import get_pool
|
||||
from onguard24.domain.events import EventBus
|
||||
from onguard24.modules.ui_support import wrap_module_html_page
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["module-alerts"])
|
||||
ui_router = APIRouter(tags=["web-alerts"], include_in_schema=False)
|
||||
|
||||
_VALID_STATUS = frozenset({"firing", "acknowledged", "resolved", "silenced"})
|
||||
|
||||
|
||||
def register_events(_bus: EventBus, _pool: asyncpg.Pool | None = None) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class AckBody(BaseModel):
|
||||
by_user: str | None = Field(default=None, max_length=200, description="Кто подтвердил")
|
||||
|
||||
|
||||
class ResolveBody(BaseModel):
|
||||
by_user: str | None = Field(default=None, max_length=200)
|
||||
|
||||
|
||||
def _row_to_item(r: asyncpg.Record) -> dict:
|
||||
return {
|
||||
"id": str(r["id"]),
|
||||
"ingress_event_id": str(r["ingress_event_id"]),
|
||||
"status": r["status"],
|
||||
"title": r["title"],
|
||||
"severity": r["severity"],
|
||||
"source": r["source"],
|
||||
"grafana_org_slug": r["grafana_org_slug"],
|
||||
"service_name": r["service_name"],
|
||||
"labels": r["labels"] if isinstance(r["labels"], dict) else {},
|
||||
"fingerprint": r["fingerprint"],
|
||||
"acknowledged_at": r["acknowledged_at"].isoformat() if r["acknowledged_at"] else None,
|
||||
"acknowledged_by": r["acknowledged_by"],
|
||||
"resolved_at": r["resolved_at"].isoformat() if r["resolved_at"] else None,
|
||||
"resolved_by": r["resolved_by"],
|
||||
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
|
||||
"updated_at": r["updated_at"].isoformat() if r["updated_at"] else None,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def list_alerts_api(
|
||||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||||
status: str | None = None,
|
||||
limit: int = 100,
|
||||
):
|
||||
if pool is None:
|
||||
return {"items": [], "database": "disabled"}
|
||||
limit = min(max(limit, 1), 200)
|
||||
st = (status or "").strip().lower()
|
||||
if st and st not in _VALID_STATUS:
|
||||
raise HTTPException(status_code=400, detail="invalid status filter")
|
||||
async with pool.acquire() as conn:
|
||||
if st:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT * FROM irm_alerts WHERE status = $1
|
||||
ORDER BY created_at DESC LIMIT $2
|
||||
""",
|
||||
st,
|
||||
limit,
|
||||
)
|
||||
else:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT * FROM irm_alerts
|
||||
ORDER BY created_at DESC LIMIT $1
|
||||
""",
|
||||
limit,
|
||||
)
|
||||
return {"items": [_row_to_item(r) for r in rows]}
|
||||
|
||||
|
||||
@router.get("/{alert_id}")
|
||||
async def get_alert_api(alert_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
|
||||
if pool is None:
|
||||
raise HTTPException(status_code=503, detail="database disabled")
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("SELECT * FROM irm_alerts WHERE id = $1::uuid", alert_id)
|
||||
raw = None
|
||||
if row and row.get("ingress_event_id"):
|
||||
raw = await conn.fetchrow(
|
||||
"SELECT id, body, received_at FROM ingress_events WHERE id = $1::uuid",
|
||||
row["ingress_event_id"],
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(status_code=404, detail="not found")
|
||||
out = _row_to_item(row)
|
||||
if raw:
|
||||
out["raw_received_at"] = raw["received_at"].isoformat() if raw["received_at"] else None
|
||||
body = raw["body"]
|
||||
out["raw_body"] = dict(body) if hasattr(body, "keys") else body
|
||||
else:
|
||||
out["raw_received_at"] = None
|
||||
out["raw_body"] = None
|
||||
return out
|
||||
|
||||
|
||||
@router.patch("/{alert_id}/acknowledge", status_code=200)
|
||||
async def acknowledge_alert_api(
|
||||
alert_id: UUID,
|
||||
body: AckBody,
|
||||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||||
):
|
||||
if pool is None:
|
||||
raise HTTPException(status_code=503, detail="database disabled")
|
||||
who = (body.by_user or "").strip() or None
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
UPDATE irm_alerts SET
|
||||
status = 'acknowledged',
|
||||
acknowledged_at = now(),
|
||||
acknowledged_by = COALESCE($2, acknowledged_by),
|
||||
updated_at = now()
|
||||
WHERE id = $1::uuid AND status = 'firing'
|
||||
RETURNING *
|
||||
""",
|
||||
alert_id,
|
||||
who,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(status_code=409, detail="alert not in firing state or not found")
|
||||
return _row_to_item(row)
|
||||
|
||||
|
||||
@router.patch("/{alert_id}/resolve", status_code=200)
|
||||
async def resolve_alert_api(
|
||||
alert_id: UUID,
|
||||
body: ResolveBody,
|
||||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||||
):
|
||||
if pool is None:
|
||||
raise HTTPException(status_code=503, detail="database disabled")
|
||||
who = (body.by_user or "").strip() or None
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow(
|
||||
"""
|
||||
UPDATE irm_alerts SET
|
||||
status = 'resolved',
|
||||
resolved_at = now(),
|
||||
resolved_by = COALESCE($2, resolved_by),
|
||||
updated_at = now()
|
||||
WHERE id = $1::uuid AND status IN ('firing', 'acknowledged')
|
||||
RETURNING *
|
||||
""",
|
||||
alert_id,
|
||||
who,
|
||||
)
|
||||
if not row:
|
||||
raise HTTPException(
|
||||
status_code=409,
|
||||
detail="alert cannot be resolved from current state or not found",
|
||||
)
|
||||
return _row_to_item(row)
|
||||
|
||||
|
||||
_SYNC_BTN_STYLE = """
|
||||
<script>
|
||||
function ogAck(aid){fetch('/api/v1/modules/alerts/'+aid+'/acknowledge',{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({})}).then(r=>{if(r.ok)location.reload();else r.text().then(t=>alert('Ошибка '+r.status+': '+t.slice(0,200)));});}
|
||||
function ogRes(aid){fetch('/api/v1/modules/alerts/'+aid+'/resolve',{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({})}).then(r=>{if(r.ok)location.reload();else r.text().then(t=>alert('Ошибка '+r.status+': '+t.slice(0,200)));});}
|
||||
function ogInc(aid,title){var t=prompt('Заголовок инцидента',title||'');if(t===null)return;fetch('/api/v1/modules/incidents/',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({title:t,alert_ids:[aid]})}).then(r=>{if(r.ok)r.json().then(j=>location.href='/ui/modules/incidents/'+j.id);else r.text().then(x=>alert('Ошибка '+r.status+': '+x.slice(0,200)));});}
|
||||
</script>
|
||||
"""
|
||||
|
||||
|
||||
@ui_router.get("/", response_class=HTMLResponse)
|
||||
async def alerts_ui_list(request: Request):
|
||||
pool = get_pool(request)
|
||||
body = ""
|
||||
if pool is None:
|
||||
body = "<p>База не настроена.</p>"
|
||||
else:
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
SELECT id, status, title, severity, grafana_org_slug, service_name, created_at, fingerprint
|
||||
FROM irm_alerts
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 150
|
||||
"""
|
||||
)
|
||||
if not rows:
|
||||
body = "<p>Пока нет алертов. События появляются после вебхука Grafana.</p>"
|
||||
else:
|
||||
trs = []
|
||||
for r in rows:
|
||||
aid = str(r["id"])
|
||||
trs.append(
|
||||
"<tr>"
|
||||
f"<td>{html.escape(r['status'])}</td>"
|
||||
f"<td><a href=\"/ui/modules/alerts/{html.escape(aid, quote=True)}\">"
|
||||
f"{html.escape(aid[:8])}…</a></td>"
|
||||
f"<td>{html.escape((r['title'] or '—')[:200])}</td>"
|
||||
f"<td>{html.escape(r['severity'])}</td>"
|
||||
f"<td>{html.escape(str(r['grafana_org_slug'] or '—'))}</td>"
|
||||
f"<td>{html.escape(str(r['service_name'] or '—'))}</td>"
|
||||
f"<td>{html.escape(r['created_at'].isoformat() if r['created_at'] else '—')}</td>"
|
||||
"</tr>"
|
||||
)
|
||||
body = (
|
||||
"<p class='gc-muted'>Алерт — запись о входящем уведомлении. "
|
||||
"<strong>Инцидент</strong> создаётся вручную (из карточки алерта или раздела «Инциденты») "
|
||||
"и может ссылаться на один или несколько алертов.</p>"
|
||||
"<table class='irm-table'><thead><tr><th>Статус</th><th>ID</th><th>Заголовок</th>"
|
||||
"<th>Важность</th><th>Grafana slug</th><th>Сервис</th><th>Создан</th></tr></thead><tbody>"
|
||||
+ "".join(trs)
|
||||
+ "</tbody></table>"
|
||||
)
|
||||
except Exception as e:
|
||||
body = f"<p class='module-err'>{html.escape(str(e))}</p>"
|
||||
page = f"<h1>Алерты</h1>{body}{_SYNC_BTN_STYLE}"
|
||||
return HTMLResponse(
|
||||
wrap_module_html_page(
|
||||
document_title="Алерты — onGuard24",
|
||||
current_slug="alerts",
|
||||
main_inner_html=page,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ui_router.get("/{alert_id:uuid}", response_class=HTMLResponse)
|
||||
async def alerts_ui_detail(request: Request, alert_id: UUID):
|
||||
pool = get_pool(request)
|
||||
if pool is None:
|
||||
return HTMLResponse(
|
||||
wrap_module_html_page(
|
||||
document_title="Алерт — onGuard24",
|
||||
current_slug="alerts",
|
||||
main_inner_html="<h1>Алерт</h1><p>База не настроена.</p>",
|
||||
)
|
||||
)
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
row = await conn.fetchrow("SELECT * FROM irm_alerts WHERE id = $1::uuid", alert_id)
|
||||
raw = None
|
||||
if row and row.get("ingress_event_id"):
|
||||
raw = await conn.fetchrow(
|
||||
"SELECT body, received_at FROM ingress_events WHERE id = $1::uuid",
|
||||
row["ingress_event_id"],
|
||||
)
|
||||
except Exception as e:
|
||||
return HTMLResponse(
|
||||
wrap_module_html_page(
|
||||
document_title="Алерт — onGuard24",
|
||||
current_slug="alerts",
|
||||
main_inner_html=f"<h1>Алерт</h1><p class='module-err'>{html.escape(str(e))}</p>",
|
||||
)
|
||||
)
|
||||
if not row:
|
||||
inner = "<p>Не найдено.</p>"
|
||||
else:
|
||||
aid = str(row["id"])
|
||||
st = row["status"]
|
||||
title_js = json.dumps(row["title"] or "")
|
||||
btns = []
|
||||
if st == "firing":
|
||||
btns.append(
|
||||
f"<button type='button' class='og-btn og-btn-primary' "
|
||||
f"onclick=\"ogAck('{html.escape(aid, quote=True)}')\">Подтвердить (Ack)</button>"
|
||||
)
|
||||
if st in ("firing", "acknowledged"):
|
||||
btns.append(
|
||||
f"<button type='button' class='og-btn' "
|
||||
f"onclick=\"ogRes('{html.escape(aid, quote=True)}')\">Resolve</button>"
|
||||
)
|
||||
btns.append(
|
||||
f"<button type='button' class='og-btn' "
|
||||
f"onclick=\"ogInc('{html.escape(aid, quote=True)}',{title_js})\">"
|
||||
"Создать инцидент</button>"
|
||||
)
|
||||
lab = row["labels"]
|
||||
lab_s = json.dumps(dict(lab), ensure_ascii=False, indent=2) if isinstance(lab, dict) else "{}"
|
||||
raw_pre = ""
|
||||
if raw:
|
||||
b = raw["body"]
|
||||
pretty = json.dumps(dict(b), ensure_ascii=False, indent=2) if hasattr(b, "keys") else str(b)
|
||||
if len(pretty) > 14000:
|
||||
pretty = pretty[:14000] + "\n…"
|
||||
raw_pre = (
|
||||
"<h2 style='font-size:1.05rem;margin-top:1rem'>Полное тело вебхука</h2>"
|
||||
f"<pre style='overflow:auto;max-height:26rem;font-size:0.78rem;"
|
||||
f"background:#18181b;color:#e4e4e7;padding:0.75rem;border-radius:8px'>"
|
||||
f"{html.escape(pretty)}</pre>"
|
||||
)
|
||||
inner = (
|
||||
f"<p><a href=\"/ui/modules/alerts/\">← К списку алертов</a></p>"
|
||||
f"<h1>Алерт</h1><div class='og-sync-bar'>{''.join(btns)}</div>"
|
||||
f"<dl style='display:grid;grid-template-columns:11rem 1fr;gap:0.35rem 1rem;font-size:0.9rem'>"
|
||||
f"<dt>ID</dt><dd><code>{html.escape(aid)}</code></dd>"
|
||||
f"<dt>Статус</dt><dd>{html.escape(st)}</dd>"
|
||||
f"<dt>Заголовок</dt><dd>{html.escape(row['title'] or '—')}</dd>"
|
||||
f"<dt>Важность</dt><dd>{html.escape(row['severity'])}</dd>"
|
||||
f"<dt>Grafana slug</dt><dd>{html.escape(str(row['grafana_org_slug'] or '—'))}</dd>"
|
||||
f"<dt>Сервис</dt><dd>{html.escape(str(row['service_name'] or '—'))}</dd>"
|
||||
f"<dt>Fingerprint</dt><dd><code>{html.escape(str(row['fingerprint'] or '—'))}</code></dd>"
|
||||
f"<dt>Labels</dt><dd><pre style='margin:0;font-size:0.8rem'>{html.escape(lab_s)}</pre></dd>"
|
||||
f"</dl>{raw_pre}"
|
||||
)
|
||||
page = f"{inner}{_SYNC_BTN_STYLE}"
|
||||
return HTMLResponse(
|
||||
wrap_module_html_page(
|
||||
document_title="Алерт — onGuard24",
|
||||
current_slug="alerts",
|
||||
main_inner_html=page,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def render_home_fragment(request: Request) -> str:
|
||||
pool = get_pool(request)
|
||||
if pool is None:
|
||||
return '<p class="module-note">Нужна БД для учёта алертов.</p>'
|
||||
try:
|
||||
async with pool.acquire() as conn:
|
||||
n = await conn.fetchval("SELECT count(*)::int FROM irm_alerts")
|
||||
nf = await conn.fetchval(
|
||||
"SELECT count(*)::int FROM irm_alerts WHERE status = 'firing'"
|
||||
)
|
||||
except Exception:
|
||||
return '<p class="module-note">Таблица алертов недоступна (миграция 005?).</p>'
|
||||
return (
|
||||
f'<div class="module-fragment"><p>Алертов в учёте: <strong>{int(n)}</strong> '
|
||||
f'(<strong>{int(nf)}</strong> firing). '
|
||||
f'<a href="/ui/modules/alerts/">Открыть</a></p></div>'
|
||||
)
|
||||
Reference in New Issue
Block a user