Files
onGuard24/onguard24/modules/incidents.py

426 lines
17 KiB
Python
Raw Permalink Normal View History

"""IRM: инциденты — учёт сбоев, связь с сырым ingress и событием alert.received."""
from __future__ import annotations
import html
import json
import logging
from uuid import UUID
import asyncpg
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from pydantic import BaseModel, Field
from onguard24.config import get_settings
from onguard24.deps import get_pool
from onguard24.domain.events import AlertReceived, DomainEvent, EventBus
from onguard24.modules.ui_support import wrap_module_html_page
log = logging.getLogger(__name__)
router = APIRouter(tags=["module-incidents"])
ui_router = APIRouter(tags=["web-incidents"], include_in_schema=False)
class IncidentCreate(BaseModel):
title: str = Field(..., min_length=1, max_length=500)
status: str = Field(default="open", max_length=64)
severity: str = Field(default="warning", max_length=32)
alert_ids: list[UUID] = Field(default_factory=list, description="Привязка к irm_alerts")
class IncidentPatch(BaseModel):
title: str | None = Field(default=None, min_length=1, max_length=500)
status: str | None = Field(default=None, max_length=64)
severity: str | None = Field(default=None, max_length=32)
def register_events(bus: EventBus, pool: asyncpg.Pool | None = None) -> None:
if pool is None:
return
async def on_alert(ev: DomainEvent) -> None:
if not get_settings().auto_incident_from_alert:
return
if not isinstance(ev, AlertReceived) or ev.raw_payload_ref is None:
return
a = ev.alert
title = (a.title if a else "Алерт без названия")[:500]
sev = (a.severity.value if a else "warning")
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO incidents (
title, status, severity, source, ingress_event_id,
grafana_org_slug, service_name
)
VALUES ($1, 'open', $2, 'grafana', $3::uuid, $4, $5)
""",
title,
sev,
ev.raw_payload_ref,
ev.grafana_org_slug,
ev.service_name,
)
except Exception:
log.exception("incidents: не удалось создать инцидент из alert.received")
bus.subscribe("alert.received", on_alert)
async def render_home_fragment(request: Request) -> str:
pool = get_pool(request)
if pool is None:
return '<p class="module-note">Нужна БД для списка инцидентов.</p>'
try:
async with pool.acquire() as conn:
n = await conn.fetchval("SELECT count(*)::int FROM incidents")
except Exception:
return '<p class="module-note">Таблица инцидентов недоступна (миграции?).</p>'
return f'<div class="module-fragment"><p>Инцидентов в учёте: <strong>{int(n)}</strong></p></div>'
@router.get("/")
async def list_incidents_api(
pool: asyncpg.Pool | None = Depends(get_pool),
limit: int = 50,
grafana_org_slug: str | None = None,
service_name: str | None = None,
):
if pool is None:
return {"items": [], "database": "disabled"}
limit = min(max(limit, 1), 200)
conditions: list[str] = []
args: list = []
if grafana_org_slug and grafana_org_slug.strip():
args.append(grafana_org_slug.strip())
conditions.append(f"grafana_org_slug = ${len(args)}")
if service_name and service_name.strip():
args.append(service_name.strip())
conditions.append(f"service_name = ${len(args)}")
where_sql = ("WHERE " + " AND ".join(conditions)) if conditions else ""
args.append(limit)
lim_ph = f"${len(args)}"
q = f"""
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
FROM incidents
{where_sql}
ORDER BY created_at DESC
LIMIT {lim_ph}
"""
async with pool.acquire() as conn:
rows = await conn.fetch(q, *args)
items = []
for r in rows:
items.append(
{
"id": str(r["id"]),
"title": r["title"],
"status": r["status"],
"severity": r["severity"],
"source": r["source"],
"ingress_event_id": str(r["ingress_event_id"]) if r["ingress_event_id"] else None,
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
"updated_at": r["updated_at"].isoformat() if r.get("updated_at") else None,
"grafana_org_slug": r.get("grafana_org_slug"),
"service_name": r.get("service_name"),
}
)
return {"items": items}
@router.post("/", status_code=201)
async def create_incident_api(
body: IncidentCreate,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
async with conn.transaction():
row = await conn.fetchrow(
"""
INSERT INTO incidents (title, status, severity, source, grafana_org_slug, service_name)
VALUES ($1, $2, $3, 'manual', NULL, NULL)
RETURNING id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
""",
body.title.strip(),
body.status,
body.severity,
)
iid = row["id"]
for aid in body.alert_ids[:50]:
await conn.execute(
"""
INSERT INTO incident_alert_links (incident_id, alert_id)
VALUES ($1::uuid, $2::uuid)
ON CONFLICT DO NOTHING
""",
iid,
aid,
)
return {
"id": str(row["id"]),
"title": row["title"],
"status": row["status"],
"severity": row["severity"],
"source": row["source"],
"ingress_event_id": None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else None,
"grafana_org_slug": row.get("grafana_org_slug"),
"service_name": row.get("service_name"),
}
def _incident_row_dict(row) -> dict:
return {
"id": str(row["id"]),
"title": row["title"],
"status": row["status"],
"severity": row["severity"],
"source": row["source"],
"ingress_event_id": str(row["ingress_event_id"]) if row["ingress_event_id"] else None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else None,
"grafana_org_slug": row.get("grafana_org_slug"),
"service_name": row.get("service_name"),
}
@router.get("/{incident_id}/tasks")
async def list_incident_tasks_api(
incident_id: UUID,
pool: asyncpg.Pool | None = Depends(get_pool),
limit: int = 100,
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
limit = min(max(limit, 1), 200)
async with pool.acquire() as conn:
exists = await conn.fetchval("SELECT 1 FROM incidents WHERE id = $1::uuid", incident_id)
if not exists:
raise HTTPException(status_code=404, detail="incident not found")
rows = await conn.fetch(
"""
SELECT id, incident_id, title, status, created_at
FROM tasks WHERE incident_id = $1::uuid
ORDER BY created_at DESC LIMIT $2
""",
incident_id,
limit,
)
items = []
for r in rows:
items.append(
{
"id": str(r["id"]),
"incident_id": str(r["incident_id"]) if r["incident_id"] else None,
"title": r["title"],
"status": r["status"],
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
)
return {"incident_id": str(incident_id), "items": items}
@router.get("/{incident_id}")
async def get_incident_api(incident_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
FROM incidents WHERE id = $1::uuid
""",
incident_id,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return _incident_row_dict(row)
@router.patch("/{incident_id}")
async def patch_incident_api(
incident_id: UUID,
body: IncidentPatch,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
if body.title is None and body.status is None and body.severity is None:
raise HTTPException(status_code=400, detail="no fields to update")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
UPDATE incidents SET
title = COALESCE($2, title),
status = COALESCE($3, status),
severity = COALESCE($4, severity),
updated_at = now()
WHERE id = $1::uuid
RETURNING id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
""",
incident_id,
body.title.strip() if body.title is not None else None,
body.status,
body.severity,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return _incident_row_dict(row)
def _title_cell(raw: object) -> str:
t = (str(raw).strip() if raw is not None else "") or ""
return html.escape(t)
@ui_router.get("/", response_class=HTMLResponse)
async def incidents_ui_home(request: Request):
pool = get_pool(request)
rows_html = ""
err = ""
if pool is None:
err = "<p>База данных не настроена.</p>"
else:
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, title, status, severity, source, created_at, grafana_org_slug, service_name
FROM incidents
ORDER BY created_at DESC
LIMIT 100
"""
)
for r in rows:
iid = r["id"]
iid_s = str(iid)
org = html.escape(str(r["grafana_org_slug"] or ""))
svc = html.escape(str(r["service_name"] or ""))
ca = r["created_at"].isoformat() if r["created_at"] else ""
rows_html += (
"<tr>"
f"<td><a href=\"/ui/modules/incidents/{html.escape(iid_s, quote=True)}\">"
f"{html.escape(iid_s[:8])}…</a></td>"
f"<td>{_title_cell(r['title'])}</td>"
f"<td>{html.escape(r['status'])}</td>"
f"<td>{html.escape(r['severity'])}</td>"
f"<td>{html.escape(r['source'])}</td>"
f"<td>{org}</td>"
f"<td>{svc}</td>"
f"<td>{html.escape(ca)}</td>"
"</tr>"
)
except Exception as e:
err = f"<p class=\"module-err\">{html.escape(str(e))}</p>"
inner = f"""<h1>Инциденты</h1>
{err}
<table class="irm-table">
<thead><tr><th>ID</th><th>Заголовок</th><th>Статус</th><th>Важность</th><th>Источник</th><th>Grafana slug</th><th>Сервис</th><th>Создан</th></tr></thead>
<tbody>{rows_html or '<tr><td colspan="8">Пока нет записей</td></tr>'}</tbody>
</table>
<p><small>Сначала вебхук создаёт <a href="/ui/modules/alerts/">алерт</a> (учёт, Ack/Resolve). Инцидент отдельная сущность: создаётся вручную или из карточки алерта, к нему можно привязать один или несколько алертов. Пустой заголовок в списке часто тестовый JSON без полей правила.</small></p>"""
return HTMLResponse(
wrap_module_html_page(
document_title="Инциденты — onGuard24",
current_slug="incidents",
main_inner_html=inner,
)
)
@ui_router.get("/{incident_id:uuid}", response_class=HTMLResponse)
async def incident_detail_ui(request: Request, incident_id: UUID):
pool = get_pool(request)
if pool is None:
body = "<p>База данных не настроена.</p>"
return HTMLResponse(
wrap_module_html_page(
document_title="Инцидент — onGuard24",
current_slug="incidents",
main_inner_html=f"<h1>Инцидент</h1>{body}",
)
)
try:
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
FROM incidents WHERE id = $1::uuid
""",
incident_id,
)
raw_row = None
if row and row.get("ingress_event_id"):
raw_row = await conn.fetchrow(
"""
SELECT id, source, received_at, body, org_slug, service_name
FROM ingress_events WHERE id = $1::uuid
""",
row["ingress_event_id"],
)
except Exception as e:
return HTMLResponse(
wrap_module_html_page(
document_title="Инцидент — onGuard24",
current_slug="incidents",
main_inner_html=f"<h1>Инцидент</h1><p class='module-err'>{html.escape(str(e))}</p>",
)
)
if not row:
body = "<p>Запись не найдена.</p>"
else:
title = _title_cell(row["title"])
ing = row["ingress_event_id"]
ing_l = html.escape(str(ing)) if ing else ""
meta = f"""<dl style="display:grid;grid-template-columns:10rem 1fr;gap:0.35rem 1rem;font-size:0.9rem">
<dt>ID</dt><dd><code>{html.escape(str(row['id']))}</code></dd>
<dt>Заголовок</dt><dd>{title}</dd>
<dt>Статус</dt><dd>{html.escape(row['status'])}</dd>
<dt>Важность</dt><dd>{html.escape(row['severity'])}</dd>
<dt>Источник</dt><dd>{html.escape(row['source'])}</dd>
<dt>Grafana slug</dt><dd>{html.escape(str(row['grafana_org_slug'] or ''))}</dd>
<dt>Сервис</dt><dd>{html.escape(str(row['service_name'] or ''))}</dd>
<dt>Создан</dt><dd>{html.escape(row['created_at'].isoformat() if row['created_at'] else '')}</dd>
<dt>Обновлён</dt><dd>{html.escape(row['updated_at'].isoformat() if row.get('updated_at') else '')}</dd>
<dt>ingress_event_id</dt><dd><code>{ing_l}</code></dd>
</dl>"""
raw_block = ""
if raw_row:
try:
body_obj = raw_row["body"]
if hasattr(body_obj, "keys"):
pretty = json.dumps(dict(body_obj), ensure_ascii=False, indent=2)
else:
pretty = str(body_obj)
if len(pretty) > 12000:
pretty = pretty[:12000] + "\n"
raw_block = (
"<h2 style='font-size:1.05rem;margin-top:1.25rem'>Сырой JSON вебхука</h2>"
f"<p class='gc-muted'>ingress_events · {html.escape(str(raw_row['received_at']))}</p>"
f"<pre style='overflow:auto;max-height:28rem;font-size:0.78rem;background:#18181b;color:#e4e4e7;"
f"padding:0.75rem;border-radius:8px'>{html.escape(pretty)}</pre>"
)
except Exception as ex:
raw_block = f"<p class='module-err'>Не удалось показать JSON: {html.escape(str(ex))}</p>"
body = (
f"<p><a href=\"/ui/modules/incidents/\">← К списку</a></p><h1>Инцидент</h1>{meta}{raw_block}"
)
return HTMLResponse(
wrap_module_html_page(
document_title="Инцидент — onGuard24",
current_slug="incidents",
main_inner_html=body,
)
)