- Alembic 005: таблицы irm_alerts и incident_alert_links - Модуль alerts: API/UI, Ack/Resolve, привязка к инциденту через alert_ids - Вебхук Grafana: одна транзакция ingress + irm_alerts; разбор payload в grafana_payload - По умолчанию инцидент из вебхука не создаётся (AUTO_INCIDENT_FROM_ALERT) - Документация IRM_GRAFANA_PARITY.md, обновления IRM.md и CHANGELOG Made-with: Cursor
426 lines
17 KiB
Python
426 lines
17 KiB
Python
"""IRM: инциденты — учёт сбоев, связь с сырым ingress и событием alert.received."""
|
||
|
||
from __future__ import annotations
|
||
|
||
import html
|
||
import json
|
||
import logging
|
||
from uuid import UUID
|
||
|
||
import asyncpg
|
||
from fastapi import APIRouter, Depends, HTTPException, Request
|
||
from fastapi.responses import HTMLResponse
|
||
from pydantic import BaseModel, Field
|
||
|
||
from onguard24.config import get_settings
|
||
from onguard24.deps import get_pool
|
||
from onguard24.domain.events import AlertReceived, DomainEvent, EventBus
|
||
from onguard24.modules.ui_support import wrap_module_html_page
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(tags=["module-incidents"])
|
||
ui_router = APIRouter(tags=["web-incidents"], include_in_schema=False)
|
||
|
||
|
||
class IncidentCreate(BaseModel):
|
||
title: str = Field(..., min_length=1, max_length=500)
|
||
status: str = Field(default="open", max_length=64)
|
||
severity: str = Field(default="warning", max_length=32)
|
||
alert_ids: list[UUID] = Field(default_factory=list, description="Привязка к irm_alerts")
|
||
|
||
|
||
class IncidentPatch(BaseModel):
|
||
title: str | None = Field(default=None, min_length=1, max_length=500)
|
||
status: str | None = Field(default=None, max_length=64)
|
||
severity: str | None = Field(default=None, max_length=32)
|
||
|
||
|
||
def register_events(bus: EventBus, pool: asyncpg.Pool | None = None) -> None:
|
||
if pool is None:
|
||
return
|
||
|
||
async def on_alert(ev: DomainEvent) -> None:
|
||
if not get_settings().auto_incident_from_alert:
|
||
return
|
||
if not isinstance(ev, AlertReceived) or ev.raw_payload_ref is None:
|
||
return
|
||
a = ev.alert
|
||
title = (a.title if a else "Алерт без названия")[:500]
|
||
sev = (a.severity.value if a else "warning")
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
await conn.execute(
|
||
"""
|
||
INSERT INTO incidents (
|
||
title, status, severity, source, ingress_event_id,
|
||
grafana_org_slug, service_name
|
||
)
|
||
VALUES ($1, 'open', $2, 'grafana', $3::uuid, $4, $5)
|
||
""",
|
||
title,
|
||
sev,
|
||
ev.raw_payload_ref,
|
||
ev.grafana_org_slug,
|
||
ev.service_name,
|
||
)
|
||
except Exception:
|
||
log.exception("incidents: не удалось создать инцидент из alert.received")
|
||
|
||
bus.subscribe("alert.received", on_alert)
|
||
|
||
|
||
async def render_home_fragment(request: Request) -> str:
|
||
pool = get_pool(request)
|
||
if pool is None:
|
||
return '<p class="module-note">Нужна БД для списка инцидентов.</p>'
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
n = await conn.fetchval("SELECT count(*)::int FROM incidents")
|
||
except Exception:
|
||
return '<p class="module-note">Таблица инцидентов недоступна (миграции?).</p>'
|
||
return f'<div class="module-fragment"><p>Инцидентов в учёте: <strong>{int(n)}</strong></p></div>'
|
||
|
||
|
||
@router.get("/")
|
||
async def list_incidents_api(
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
limit: int = 50,
|
||
grafana_org_slug: str | None = None,
|
||
service_name: str | None = None,
|
||
):
|
||
if pool is None:
|
||
return {"items": [], "database": "disabled"}
|
||
limit = min(max(limit, 1), 200)
|
||
conditions: list[str] = []
|
||
args: list = []
|
||
if grafana_org_slug and grafana_org_slug.strip():
|
||
args.append(grafana_org_slug.strip())
|
||
conditions.append(f"grafana_org_slug = ${len(args)}")
|
||
if service_name and service_name.strip():
|
||
args.append(service_name.strip())
|
||
conditions.append(f"service_name = ${len(args)}")
|
||
where_sql = ("WHERE " + " AND ".join(conditions)) if conditions else ""
|
||
args.append(limit)
|
||
lim_ph = f"${len(args)}"
|
||
q = f"""
|
||
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
|
||
grafana_org_slug, service_name
|
||
FROM incidents
|
||
{where_sql}
|
||
ORDER BY created_at DESC
|
||
LIMIT {lim_ph}
|
||
"""
|
||
async with pool.acquire() as conn:
|
||
rows = await conn.fetch(q, *args)
|
||
items = []
|
||
for r in rows:
|
||
items.append(
|
||
{
|
||
"id": str(r["id"]),
|
||
"title": r["title"],
|
||
"status": r["status"],
|
||
"severity": r["severity"],
|
||
"source": r["source"],
|
||
"ingress_event_id": str(r["ingress_event_id"]) if r["ingress_event_id"] else None,
|
||
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
|
||
"updated_at": r["updated_at"].isoformat() if r.get("updated_at") else None,
|
||
"grafana_org_slug": r.get("grafana_org_slug"),
|
||
"service_name": r.get("service_name"),
|
||
}
|
||
)
|
||
return {"items": items}
|
||
|
||
|
||
@router.post("/", status_code=201)
|
||
async def create_incident_api(
|
||
body: IncidentCreate,
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
async with pool.acquire() as conn:
|
||
async with conn.transaction():
|
||
row = await conn.fetchrow(
|
||
"""
|
||
INSERT INTO incidents (title, status, severity, source, grafana_org_slug, service_name)
|
||
VALUES ($1, $2, $3, 'manual', NULL, NULL)
|
||
RETURNING id, title, status, severity, source, ingress_event_id, created_at, updated_at,
|
||
grafana_org_slug, service_name
|
||
""",
|
||
body.title.strip(),
|
||
body.status,
|
||
body.severity,
|
||
)
|
||
iid = row["id"]
|
||
for aid in body.alert_ids[:50]:
|
||
await conn.execute(
|
||
"""
|
||
INSERT INTO incident_alert_links (incident_id, alert_id)
|
||
VALUES ($1::uuid, $2::uuid)
|
||
ON CONFLICT DO NOTHING
|
||
""",
|
||
iid,
|
||
aid,
|
||
)
|
||
return {
|
||
"id": str(row["id"]),
|
||
"title": row["title"],
|
||
"status": row["status"],
|
||
"severity": row["severity"],
|
||
"source": row["source"],
|
||
"ingress_event_id": None,
|
||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
||
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else None,
|
||
"grafana_org_slug": row.get("grafana_org_slug"),
|
||
"service_name": row.get("service_name"),
|
||
}
|
||
|
||
|
||
def _incident_row_dict(row) -> dict:
|
||
return {
|
||
"id": str(row["id"]),
|
||
"title": row["title"],
|
||
"status": row["status"],
|
||
"severity": row["severity"],
|
||
"source": row["source"],
|
||
"ingress_event_id": str(row["ingress_event_id"]) if row["ingress_event_id"] else None,
|
||
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
|
||
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else None,
|
||
"grafana_org_slug": row.get("grafana_org_slug"),
|
||
"service_name": row.get("service_name"),
|
||
}
|
||
|
||
|
||
@router.get("/{incident_id}/tasks")
|
||
async def list_incident_tasks_api(
|
||
incident_id: UUID,
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
limit: int = 100,
|
||
):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
limit = min(max(limit, 1), 200)
|
||
async with pool.acquire() as conn:
|
||
exists = await conn.fetchval("SELECT 1 FROM incidents WHERE id = $1::uuid", incident_id)
|
||
if not exists:
|
||
raise HTTPException(status_code=404, detail="incident not found")
|
||
rows = await conn.fetch(
|
||
"""
|
||
SELECT id, incident_id, title, status, created_at
|
||
FROM tasks WHERE incident_id = $1::uuid
|
||
ORDER BY created_at DESC LIMIT $2
|
||
""",
|
||
incident_id,
|
||
limit,
|
||
)
|
||
items = []
|
||
for r in rows:
|
||
items.append(
|
||
{
|
||
"id": str(r["id"]),
|
||
"incident_id": str(r["incident_id"]) if r["incident_id"] else None,
|
||
"title": r["title"],
|
||
"status": r["status"],
|
||
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
|
||
}
|
||
)
|
||
return {"incident_id": str(incident_id), "items": items}
|
||
|
||
|
||
@router.get("/{incident_id}")
|
||
async def get_incident_api(incident_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow(
|
||
"""
|
||
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
|
||
grafana_org_slug, service_name
|
||
FROM incidents WHERE id = $1::uuid
|
||
""",
|
||
incident_id,
|
||
)
|
||
if not row:
|
||
raise HTTPException(status_code=404, detail="not found")
|
||
return _incident_row_dict(row)
|
||
|
||
|
||
@router.patch("/{incident_id}")
|
||
async def patch_incident_api(
|
||
incident_id: UUID,
|
||
body: IncidentPatch,
|
||
pool: asyncpg.Pool | None = Depends(get_pool),
|
||
):
|
||
if pool is None:
|
||
raise HTTPException(status_code=503, detail="database disabled")
|
||
if body.title is None and body.status is None and body.severity is None:
|
||
raise HTTPException(status_code=400, detail="no fields to update")
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow(
|
||
"""
|
||
UPDATE incidents SET
|
||
title = COALESCE($2, title),
|
||
status = COALESCE($3, status),
|
||
severity = COALESCE($4, severity),
|
||
updated_at = now()
|
||
WHERE id = $1::uuid
|
||
RETURNING id, title, status, severity, source, ingress_event_id, created_at, updated_at,
|
||
grafana_org_slug, service_name
|
||
""",
|
||
incident_id,
|
||
body.title.strip() if body.title is not None else None,
|
||
body.status,
|
||
body.severity,
|
||
)
|
||
if not row:
|
||
raise HTTPException(status_code=404, detail="not found")
|
||
return _incident_row_dict(row)
|
||
|
||
|
||
def _title_cell(raw: object) -> str:
|
||
t = (str(raw).strip() if raw is not None else "") or "—"
|
||
return html.escape(t)
|
||
|
||
|
||
@ui_router.get("/", response_class=HTMLResponse)
|
||
async def incidents_ui_home(request: Request):
|
||
pool = get_pool(request)
|
||
rows_html = ""
|
||
err = ""
|
||
if pool is None:
|
||
err = "<p>База данных не настроена.</p>"
|
||
else:
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
rows = await conn.fetch(
|
||
"""
|
||
SELECT id, title, status, severity, source, created_at, grafana_org_slug, service_name
|
||
FROM incidents
|
||
ORDER BY created_at DESC
|
||
LIMIT 100
|
||
"""
|
||
)
|
||
for r in rows:
|
||
iid = r["id"]
|
||
iid_s = str(iid)
|
||
org = html.escape(str(r["grafana_org_slug"] or "—"))
|
||
svc = html.escape(str(r["service_name"] or "—"))
|
||
ca = r["created_at"].isoformat() if r["created_at"] else "—"
|
||
rows_html += (
|
||
"<tr>"
|
||
f"<td><a href=\"/ui/modules/incidents/{html.escape(iid_s, quote=True)}\">"
|
||
f"{html.escape(iid_s[:8])}…</a></td>"
|
||
f"<td>{_title_cell(r['title'])}</td>"
|
||
f"<td>{html.escape(r['status'])}</td>"
|
||
f"<td>{html.escape(r['severity'])}</td>"
|
||
f"<td>{html.escape(r['source'])}</td>"
|
||
f"<td>{org}</td>"
|
||
f"<td>{svc}</td>"
|
||
f"<td>{html.escape(ca)}</td>"
|
||
"</tr>"
|
||
)
|
||
except Exception as e:
|
||
err = f"<p class=\"module-err\">{html.escape(str(e))}</p>"
|
||
inner = f"""<h1>Инциденты</h1>
|
||
{err}
|
||
<table class="irm-table">
|
||
<thead><tr><th>ID</th><th>Заголовок</th><th>Статус</th><th>Важность</th><th>Источник</th><th>Grafana slug</th><th>Сервис</th><th>Создан</th></tr></thead>
|
||
<tbody>{rows_html or '<tr><td colspan="8">Пока нет записей</td></tr>'}</tbody>
|
||
</table>
|
||
<p><small>Сначала вебхук создаёт <a href="/ui/modules/alerts/">алерт</a> (учёт, Ack/Resolve). Инцидент — отдельная сущность: создаётся вручную или из карточки алерта, к нему можно привязать один или несколько алертов. Пустой заголовок в списке — часто тестовый JSON без полей правила.</small></p>"""
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Инциденты — onGuard24",
|
||
current_slug="incidents",
|
||
main_inner_html=inner,
|
||
)
|
||
)
|
||
|
||
|
||
@ui_router.get("/{incident_id:uuid}", response_class=HTMLResponse)
|
||
async def incident_detail_ui(request: Request, incident_id: UUID):
|
||
pool = get_pool(request)
|
||
if pool is None:
|
||
body = "<p>База данных не настроена.</p>"
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Инцидент — onGuard24",
|
||
current_slug="incidents",
|
||
main_inner_html=f"<h1>Инцидент</h1>{body}",
|
||
)
|
||
)
|
||
try:
|
||
async with pool.acquire() as conn:
|
||
row = await conn.fetchrow(
|
||
"""
|
||
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
|
||
grafana_org_slug, service_name
|
||
FROM incidents WHERE id = $1::uuid
|
||
""",
|
||
incident_id,
|
||
)
|
||
raw_row = None
|
||
if row and row.get("ingress_event_id"):
|
||
raw_row = await conn.fetchrow(
|
||
"""
|
||
SELECT id, source, received_at, body, org_slug, service_name
|
||
FROM ingress_events WHERE id = $1::uuid
|
||
""",
|
||
row["ingress_event_id"],
|
||
)
|
||
except Exception as e:
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Инцидент — onGuard24",
|
||
current_slug="incidents",
|
||
main_inner_html=f"<h1>Инцидент</h1><p class='module-err'>{html.escape(str(e))}</p>",
|
||
)
|
||
)
|
||
if not row:
|
||
body = "<p>Запись не найдена.</p>"
|
||
else:
|
||
title = _title_cell(row["title"])
|
||
ing = row["ingress_event_id"]
|
||
ing_l = html.escape(str(ing)) if ing else "—"
|
||
meta = f"""<dl style="display:grid;grid-template-columns:10rem 1fr;gap:0.35rem 1rem;font-size:0.9rem">
|
||
<dt>ID</dt><dd><code>{html.escape(str(row['id']))}</code></dd>
|
||
<dt>Заголовок</dt><dd>{title}</dd>
|
||
<dt>Статус</dt><dd>{html.escape(row['status'])}</dd>
|
||
<dt>Важность</dt><dd>{html.escape(row['severity'])}</dd>
|
||
<dt>Источник</dt><dd>{html.escape(row['source'])}</dd>
|
||
<dt>Grafana slug</dt><dd>{html.escape(str(row['grafana_org_slug'] or '—'))}</dd>
|
||
<dt>Сервис</dt><dd>{html.escape(str(row['service_name'] or '—'))}</dd>
|
||
<dt>Создан</dt><dd>{html.escape(row['created_at'].isoformat() if row['created_at'] else '—')}</dd>
|
||
<dt>Обновлён</dt><dd>{html.escape(row['updated_at'].isoformat() if row.get('updated_at') else '—')}</dd>
|
||
<dt>ingress_event_id</dt><dd><code>{ing_l}</code></dd>
|
||
</dl>"""
|
||
raw_block = ""
|
||
if raw_row:
|
||
try:
|
||
body_obj = raw_row["body"]
|
||
if hasattr(body_obj, "keys"):
|
||
pretty = json.dumps(dict(body_obj), ensure_ascii=False, indent=2)
|
||
else:
|
||
pretty = str(body_obj)
|
||
if len(pretty) > 12000:
|
||
pretty = pretty[:12000] + "\n…"
|
||
raw_block = (
|
||
"<h2 style='font-size:1.05rem;margin-top:1.25rem'>Сырой JSON вебхука</h2>"
|
||
f"<p class='gc-muted'>ingress_events · {html.escape(str(raw_row['received_at']))}</p>"
|
||
f"<pre style='overflow:auto;max-height:28rem;font-size:0.78rem;background:#18181b;color:#e4e4e7;"
|
||
f"padding:0.75rem;border-radius:8px'>{html.escape(pretty)}</pre>"
|
||
)
|
||
except Exception as ex:
|
||
raw_block = f"<p class='module-err'>Не удалось показать JSON: {html.escape(str(ex))}</p>"
|
||
body = (
|
||
f"<p><a href=\"/ui/modules/incidents/\">← К списку</a></p><h1>Инцидент</h1>{meta}{raw_block}"
|
||
)
|
||
return HTMLResponse(
|
||
wrap_module_html_page(
|
||
document_title="Инцидент — onGuard24",
|
||
current_slug="incidents",
|
||
main_inner_html=body,
|
||
)
|
||
)
|