- Alembic 005: таблицы irm_alerts и incident_alert_links - Модуль alerts: API/UI, Ack/Resolve, привязка к инциденту через alert_ids - Вебхук Grafana: одна транзакция ingress + irm_alerts; разбор payload в grafana_payload - По умолчанию инцидент из вебхука не создаётся (AUTO_INCIDENT_FROM_ALERT) - Документация IRM_GRAFANA_PARITY.md, обновления IRM.md и CHANGELOG Made-with: Cursor
347 lines
14 KiB
Python
347 lines
14 KiB
Python
"""Учёт входящих алертов (отдельно от инцидентов): firing → acknowledged → resolved."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import html
|
||
import json
|
||
import logging
|
||
from uuid import UUID
|
||
|
||
import asyncpg
|
||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||
from fastapi.responses import HTMLResponse
|
||
from pydantic import BaseModel, Field
|
||
|
||
from onguard24.deps import get_pool
|
||
from onguard24.domain.events import EventBus
|
||
from onguard24.modules.ui_support import wrap_module_html_page
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(tags=["module-alerts"])
|
||
ui_router = APIRouter(tags=["web-alerts"], include_in_schema=False)
|
||
|
||
_VALID_STATUS = frozenset({"firing", "acknowledged", "resolved", "silenced"})
|
||
|
||
|
||
def register_events(_bus: EventBus, _pool: asyncpg.Pool | None = None) -> None:
|
||
pass
|
||
|
||
|
||
class AckBody(BaseModel):
|
||
by_user: str | None = Field(default=None, max_length=200, description="Кто подтвердил")
|
||
|
||
|
||
class ResolveBody(BaseModel):
|
||
by_user: str | None = Field(default=None, max_length=200)
|
||
|
||
|
||
def _row_to_item(r: asyncpg.Record) -> dict:
|
||
return {
|
||
"id": str(r["id"]),
|
||
"ingress_event_id": str(r["ingress_event_id"]),
|
||
"status": r["status"],
|
||
"title": r["title"],
|
||
"severity": r["severity"],
|
||
"source": r["source"],
|
||
"grafana_org_slug": r["grafana_org_slug"],
|
||
"service_name": r["service_name"],
|
||
"labels": r["labels"] if isinstance(r["labels"], dict) else {},
|
||
"fingerprint": r["fingerprint"],
|
||
"acknowledged_at": r["acknowledged_at"].isoformat() if r["acknowledged_at"] else None,
|
||
"acknowledged_by": r["acknowledged_by"],
|
||
"resolved_at": r["resolved_at"].isoformat() if r["resolved_at"] else None,
|
||
"resolved_by": r["resolved_by"],
|
||
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
|
||
"updated_at": r["updated_at"].isoformat() if r["updated_at"] else None,
|
||
}
|
||
|
||
|
||
@router.get("/")
|
||
async def list_alerts_api(
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
status: str | None = None,
|
||
limit: int = 100,
|
||
):
|
||
if pool is None:
|
||
return {"items": [], "database": "disabled"}
|
||
limit = min(max(limit, 1), 200)
|
||
st = (status or "").strip().lower()
|
||
if st and st not in _VALID_STATUS:
|
||
raise HTTPException(status_code=400, detail="invalid status filter")
|
||
async with pool.acquire() as conn:
|
||
if st:
|
||
rows = await conn.fetch(
|
||
"""
|
||
SELECT * FROM irm_alerts WHERE status = $1
|
||
ORDER BY created_at DESC LIMIT $2
|
||
""",
|
||
st,
|
||
limit,
|
||
)
|
||
else:
|
||
rows = await conn.fetch(
|
||
"""
|
||
SELECT * FROM irm_alerts
|
||
ORDER BY created_at DESC LIMIT $1
|
||
""",
|
||
limit,
|
||
)
|
||
return {"items": [_row_to_item(r) for r in rows]}
|
||
|
||
|
||
@router.get("/{alert_id}")
|
||
async def get_alert_api(alert_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow("SELECT * FROM irm_alerts WHERE id = $1::uuid", alert_id)
|
||
raw = None
|
||
if row and row.get("ingress_event_id"):
|
||
raw = await conn.fetchrow(
|
||
"SELECT id, body, received_at FROM ingress_events WHERE id = $1::uuid",
|
||
row["ingress_event_id"],
|
||
)
|
||
if not row:
|
||
raise HTTPException(status_code=404, detail="not found")
|
||
out = _row_to_item(row)
|
||
if raw:
|
||
out["raw_received_at"] = raw["received_at"].isoformat() if raw["received_at"] else None
|
||
body = raw["body"]
|
||
out["raw_body"] = dict(body) if hasattr(body, "keys") else body
|
||
else:
|
||
out["raw_received_at"] = None
|
||
out["raw_body"] = None
|
||
return out
|
||
|
||
|
||
@router.patch("/{alert_id}/acknowledge", status_code=200)
|
||
async def acknowledge_alert_api(
|
||
alert_id: UUID,
|
||
body: AckBody,
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
who = (body.by_user or "").strip() or None
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow(
|
||
"""
|
||
UPDATE irm_alerts SET
|
||
status = 'acknowledged',
|
||
acknowledged_at = now(),
|
||
acknowledged_by = COALESCE($2, acknowledged_by),
|
||
updated_at = now()
|
||
WHERE id = $1::uuid AND status = 'firing'
|
||
RETURNING *
|
||
""",
|
||
alert_id,
|
||
who,
|
||
)
|
||
if not row:
|
||
raise HTTPException(status_code=409, detail="alert not in firing state or not found")
|
||
return _row_to_item(row)
|
||
|
||
|
||
@router.patch("/{alert_id}/resolve", status_code=200)
|
||
async def resolve_alert_api(
|
||
alert_id: UUID,
|
||
body: ResolveBody,
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
who = (body.by_user or "").strip() or None
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow(
|
||
"""
|
||
UPDATE irm_alerts SET
|
||
status = 'resolved',
|
||
resolved_at = now(),
|
||
resolved_by = COALESCE($2, resolved_by),
|
||
updated_at = now()
|
||
WHERE id = $1::uuid AND status IN ('firing', 'acknowledged')
|
||
RETURNING *
|
||
""",
|
||
alert_id,
|
||
who,
|
||
)
|
||
if not row:
|
||
raise HTTPException(
|
||
status_code=409,
|
||
detail="alert cannot be resolved from current state or not found",
|
||
)
|
||
return _row_to_item(row)
|
||
|
||
|
||
_SYNC_BTN_STYLE = """
|
||
<script>
|
||
function ogAck(aid){fetch('/api/v1/modules/alerts/'+aid+'/acknowledge',{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({})}).then(r=>{if(r.ok)location.reload();else r.text().then(t=>alert('Ошибка '+r.status+': '+t.slice(0,200)));});}
|
||
function ogRes(aid){fetch('/api/v1/modules/alerts/'+aid+'/resolve',{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({})}).then(r=>{if(r.ok)location.reload();else r.text().then(t=>alert('Ошибка '+r.status+': '+t.slice(0,200)));});}
|
||
function ogInc(aid,title){var t=prompt('Заголовок инцидента',title||'');if(t===null)return;fetch('/api/v1/modules/incidents/',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({title:t,alert_ids:[aid]})}).then(r=>{if(r.ok)r.json().then(j=>location.href='/ui/modules/incidents/'+j.id);else r.text().then(x=>alert('Ошибка '+r.status+': '+x.slice(0,200)));});}
|
||
</script>
|
||
"""
|
||
|
||
|
||
@ui_router.get("/", response_class=HTMLResponse)
|
||
async def alerts_ui_list(request: Request):
|
||
pool = get_pool(request)
|
||
body = ""
|
||
if pool is None:
|
||
body = "<p>База не настроена.</p>"
|
||
else:
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
rows = await conn.fetch(
|
||
"""
|
||
SELECT id, status, title, severity, grafana_org_slug, service_name, created_at, fingerprint
|
||
FROM irm_alerts
|
||
ORDER BY created_at DESC
|
||
LIMIT 150
|
||
"""
|
||
)
|
||
if not rows:
|
||
body = "<p>Пока нет алертов. События появляются после вебхука Grafana.</p>"
|
||
else:
|
||
trs = []
|
||
for r in rows:
|
||
aid = str(r["id"])
|
||
trs.append(
|
||
"<tr>"
|
||
f"<td>{html.escape(r['status'])}</td>"
|
||
f"<td><a href=\"/ui/modules/alerts/{html.escape(aid, quote=True)}\">"
|
||
f"{html.escape(aid[:8])}…</a></td>"
|
||
f"<td>{html.escape((r['title'] or '—')[:200])}</td>"
|
||
f"<td>{html.escape(r['severity'])}</td>"
|
||
f"<td>{html.escape(str(r['grafana_org_slug'] or '—'))}</td>"
|
||
f"<td>{html.escape(str(r['service_name'] or '—'))}</td>"
|
||
f"<td>{html.escape(r['created_at'].isoformat() if r['created_at'] else '—')}</td>"
|
||
"</tr>"
|
||
)
|
||
body = (
|
||
"<p class='gc-muted'>Алерт — запись о входящем уведомлении. "
|
||
"<strong>Инцидент</strong> создаётся вручную (из карточки алерта или раздела «Инциденты») "
|
||
"и может ссылаться на один или несколько алертов.</p>"
|
||
"<table class='irm-table'><thead><tr><th>Статус</th><th>ID</th><th>Заголовок</th>"
|
||
"<th>Важность</th><th>Grafana slug</th><th>Сервис</th><th>Создан</th></tr></thead><tbody>"
|
||
+ "".join(trs)
|
||
+ "</tbody></table>"
|
||
)
|
||
except Exception as e:
|
||
body = f"<p class='module-err'>{html.escape(str(e))}</p>"
|
||
page = f"<h1>Алерты</h1>{body}{_SYNC_BTN_STYLE}"
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Алерты — onGuard24",
|
||
current_slug="alerts",
|
||
main_inner_html=page,
|
||
)
|
||
)
|
||
|
||
|
||
@ui_router.get("/{alert_id:uuid}", response_class=HTMLResponse)
|
||
async def alerts_ui_detail(request: Request, alert_id: UUID):
|
||
pool = get_pool(request)
|
||
if pool is None:
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Алерт — onGuard24",
|
||
current_slug="alerts",
|
||
main_inner_html="<h1>Алерт</h1><p>База не настроена.</p>",
|
||
)
|
||
)
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow("SELECT * FROM irm_alerts WHERE id = $1::uuid", alert_id)
|
||
raw = None
|
||
if row and row.get("ingress_event_id"):
|
||
raw = await conn.fetchrow(
|
||
"SELECT body, received_at FROM ingress_events WHERE id = $1::uuid",
|
||
row["ingress_event_id"],
|
||
)
|
||
except Exception as e:
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Алерт — onGuard24",
|
||
current_slug="alerts",
|
||
main_inner_html=f"<h1>Алерт</h1><p class='module-err'>{html.escape(str(e))}</p>",
|
||
)
|
||
)
|
||
if not row:
|
||
inner = "<p>Не найдено.</p>"
|
||
else:
|
||
aid = str(row["id"])
|
||
st = row["status"]
|
||
title_js = json.dumps(row["title"] or "")
|
||
btns = []
|
||
if st == "firing":
|
||
btns.append(
|
||
f"<button type='button' class='og-btn og-btn-primary' "
|
||
f"onclick=\"ogAck('{html.escape(aid, quote=True)}')\">Подтвердить (Ack)</button>"
|
||
)
|
||
if st in ("firing", "acknowledged"):
|
||
btns.append(
|
||
f"<button type='button' class='og-btn' "
|
||
f"onclick=\"ogRes('{html.escape(aid, quote=True)}')\">Resolve</button>"
|
||
)
|
||
btns.append(
|
||
f"<button type='button' class='og-btn' "
|
||
f"onclick=\"ogInc('{html.escape(aid, quote=True)}',{title_js})\">"
|
||
"Создать инцидент</button>"
|
||
)
|
||
lab = row["labels"]
|
||
lab_s = json.dumps(dict(lab), ensure_ascii=False, indent=2) if isinstance(lab, dict) else "{}"
|
||
raw_pre = ""
|
||
if raw:
|
||
b = raw["body"]
|
||
pretty = json.dumps(dict(b), ensure_ascii=False, indent=2) if hasattr(b, "keys") else str(b)
|
||
if len(pretty) > 14000:
|
||
pretty = pretty[:14000] + "\n…"
|
||
raw_pre = (
|
||
"<h2 style='font-size:1.05rem;margin-top:1rem'>Полное тело вебхука</h2>"
|
||
f"<pre style='overflow:auto;max-height:26rem;font-size:0.78rem;"
|
||
f"background:#18181b;color:#e4e4e7;padding:0.75rem;border-radius:8px'>"
|
||
f"{html.escape(pretty)}</pre>"
|
||
)
|
||
inner = (
|
||
f"<p><a href=\"/ui/modules/alerts/\">← К списку алертов</a></p>"
|
||
f"<h1>Алерт</h1><div class='og-sync-bar'>{''.join(btns)}</div>"
|
||
f"<dl style='display:grid;grid-template-columns:11rem 1fr;gap:0.35rem 1rem;font-size:0.9rem'>"
|
||
f"<dt>ID</dt><dd><code>{html.escape(aid)}</code></dd>"
|
||
f"<dt>Статус</dt><dd>{html.escape(st)}</dd>"
|
||
f"<dt>Заголовок</dt><dd>{html.escape(row['title'] or '—')}</dd>"
|
||
f"<dt>Важность</dt><dd>{html.escape(row['severity'])}</dd>"
|
||
f"<dt>Grafana slug</dt><dd>{html.escape(str(row['grafana_org_slug'] or '—'))}</dd>"
|
||
f"<dt>Сервис</dt><dd>{html.escape(str(row['service_name'] or '—'))}</dd>"
|
||
f"<dt>Fingerprint</dt><dd><code>{html.escape(str(row['fingerprint'] or '—'))}</code></dd>"
|
||
f"<dt>Labels</dt><dd><pre style='margin:0;font-size:0.8rem'>{html.escape(lab_s)}</pre></dd>"
|
||
f"</dl>{raw_pre}"
|
||
)
|
||
page = f"{inner}{_SYNC_BTN_STYLE}"
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Алерт — onGuard24",
|
||
current_slug="alerts",
|
||
main_inner_html=page,
|
||
)
|
||
)
|
||
|
||
|
||
async def render_home_fragment(request: Request) -> str:
|
||
pool = get_pool(request)
|
||
if pool is None:
|
||
return '<p class="module-note">Нужна БД для учёта алертов.</p>'
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
n = await conn.fetchval("SELECT count(*)::int FROM irm_alerts")
|
||
nf = await conn.fetchval(
|
||
"SELECT count(*)::int FROM irm_alerts WHERE status = 'firing'"
|
||
)
|
||
except Exception:
|
||
return '<p class="module-note">Таблица алертов недоступна (миграция 005?).</p>'
|
||
return (
|
||
f'<div class="module-fragment"><p>Алертов в учёте: <strong>{int(n)}</strong> '
|
||
f'(<strong>{int(nf)}</strong> firing). '
|
||
f'<a href="/ui/modules/alerts/">Открыть</a></p></div>'
|
||
)
|