v1.5.0: IRM — инциденты, задачи, эскалации

- docs/IRM.md; Alembic 002: incidents, tasks, escalation_policies
- Модули incidents/tasks/escalations: API, UI, register_events(bus, pool)
- Авто-инцидент из alert.received; тесты test_irm_modules.py

Made-with: Cursor
This commit is contained in:
Alexandr
2026-04-03 09:03:16 +03:00
parent 0787745098
commit 89b5983526
20 changed files with 772 additions and 16 deletions

View File

@ -0,0 +1,200 @@
"""IRM: инциденты — учёт сбоев, связь с сырым ingress и событием alert.received."""
from __future__ import annotations
import html
import logging
from uuid import UUID
import asyncpg
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from pydantic import BaseModel, Field
from onguard24.deps import get_pool
from onguard24.domain.events import AlertReceived, DomainEvent, EventBus
from onguard24.modules.ui_support import wrap_module_html_page
log = logging.getLogger(__name__)
router = APIRouter(tags=["module-incidents"])
ui_router = APIRouter(tags=["web-incidents"], include_in_schema=False)
class IncidentCreate(BaseModel):
title: str = Field(..., min_length=1, max_length=500)
status: str = Field(default="open", max_length=64)
severity: str = Field(default="warning", max_length=32)
def register_events(bus: EventBus, pool: asyncpg.Pool | None = None) -> None:
if pool is None:
return
async def on_alert(ev: DomainEvent) -> None:
if not isinstance(ev, AlertReceived) or ev.raw_payload_ref is None:
return
a = ev.alert
title = (a.title if a else "Алерт без названия")[:500]
sev = (a.severity.value if a else "warning")
try:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO incidents (title, status, severity, source, ingress_event_id)
VALUES ($1, 'open', $2, 'grafana', $3::uuid)
""",
title,
sev,
ev.raw_payload_ref,
)
except Exception:
log.exception("incidents: не удалось создать инцидент из alert.received")
bus.subscribe("alert.received", on_alert)
async def render_home_fragment(request: Request) -> str:
pool = get_pool(request)
if pool is None:
return '<p class="module-note">Нужна БД для списка инцидентов.</p>'
try:
async with pool.acquire() as conn:
n = await conn.fetchval("SELECT count(*)::int FROM incidents")
except Exception:
return '<p class="module-note">Таблица инцидентов недоступна (миграции?).</p>'
return f'<div class="module-fragment"><p>Инцидентов в учёте: <strong>{int(n)}</strong></p></div>'
@router.get("/")
async def list_incidents_api(
pool: asyncpg.Pool | None = Depends(get_pool),
limit: int = 50,
):
if pool is None:
return {"items": [], "database": "disabled"}
limit = min(max(limit, 1), 200)
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, title, status, severity, source, ingress_event_id, created_at
FROM incidents
ORDER BY created_at DESC
LIMIT $1
""",
limit,
)
items = []
for r in rows:
items.append(
{
"id": str(r["id"]),
"title": r["title"],
"status": r["status"],
"severity": r["severity"],
"source": r["source"],
"ingress_event_id": str(r["ingress_event_id"]) if r["ingress_event_id"] else None,
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
)
return {"items": items}
@router.post("/", status_code=201)
async def create_incident_api(
body: IncidentCreate,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO incidents (title, status, severity, source)
VALUES ($1, $2, $3, 'manual')
RETURNING id, title, status, severity, source, ingress_event_id, created_at
""",
body.title.strip(),
body.status,
body.severity,
)
return {
"id": str(row["id"]),
"title": row["title"],
"status": row["status"],
"severity": row["severity"],
"source": row["source"],
"ingress_event_id": None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
@router.get("/{incident_id}")
async def get_incident_api(incident_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT id, title, status, severity, source, ingress_event_id, created_at
FROM incidents WHERE id = $1::uuid
""",
incident_id,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return {
"id": str(row["id"]),
"title": row["title"],
"status": row["status"],
"severity": row["severity"],
"source": row["source"],
"ingress_event_id": str(row["ingress_event_id"]) if row["ingress_event_id"] else None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
@ui_router.get("/", response_class=HTMLResponse)
async def incidents_ui_home(request: Request):
pool = get_pool(request)
rows_html = ""
err = ""
if pool is None:
err = "<p>База данных не настроена.</p>"
else:
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, title, status, severity, source, created_at
FROM incidents
ORDER BY created_at DESC
LIMIT 100
"""
)
for r in rows:
rows_html += (
"<tr>"
f"<td>{html.escape(str(r['id']))[:8]}…</td>"
f"<td>{html.escape(r['title'])}</td>"
f"<td>{html.escape(r['status'])}</td>"
f"<td>{html.escape(r['severity'])}</td>"
f"<td>{html.escape(r['source'])}</td>"
"</tr>"
)
except Exception as e:
err = f"<p class=\"module-err\">{html.escape(str(e))}</p>"
inner = f"""<h1>Инциденты</h1>
{err}
<table class="irm-table">
<thead><tr><th>ID</th><th>Заголовок</th><th>Статус</th><th>Важность</th><th>Источник</th></tr></thead>
<tbody>{rows_html or '<tr><td colspan="5">Пока нет записей</td></tr>'}</tbody>
</table>
<p><small>Создание из Grafana: webhook → запись в <code>ingress_events</code> → событие → строка здесь.</small></p>"""
return HTMLResponse(
wrap_module_html_page(
document_title="Инциденты — onGuard24",
current_slug="incidents",
main_inner_html=inner,
)
)