Files
onGuard24/onguard24/modules/alerts.py
Alexandr 18ba48e8d0
Some checks failed
CI / test (push) Successful in 43s
Deploy / deploy (push) Failing after 17s
release: v1.10.0 — модуль команд (teams), team_id на алертах
- Alembic 006: teams, team_label_rules, irm_alerts.team_id
- Вебхук: сопоставление команды по правилам лейблов (priority)
- API/UI Команды; алерты: JOIN team, фильтр team_id
- Тесты test_team_match, test_teams_api; обновлён test_root_ui

Made-with: Cursor
2026-04-03 15:34:46 +03:00

458 lines
19 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Учёт входящих алертов (отдельно от инцидентов): firing → acknowledged → resolved."""
from __future__ import annotations
import html
import json
import logging
from uuid import UUID
import asyncpg
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from pydantic import BaseModel, Field
from onguard24.deps import get_pool
from onguard24.domain.events import EventBus
from onguard24.modules.ui_support import wrap_module_html_page
log = logging.getLogger(__name__)
router = APIRouter(tags=["module-alerts"])
ui_router = APIRouter(tags=["web-alerts"], include_in_schema=False)
_VALID_STATUS = frozenset({"firing", "acknowledged", "resolved", "silenced"})
def register_events(_bus: EventBus, _pool: asyncpg.Pool | None = None) -> None:
pass
class AckBody(BaseModel):
by_user: str | None = Field(default=None, max_length=200, description="Кто подтвердил")
class ResolveBody(BaseModel):
by_user: str | None = Field(default=None, max_length=200)
def _row_to_item(r: asyncpg.Record) -> dict:
tid = r.get("team_id")
out = {
"id": str(r["id"]),
"ingress_event_id": str(r["ingress_event_id"]),
"status": r["status"],
"title": r["title"],
"severity": r["severity"],
"source": r["source"],
"grafana_org_slug": r["grafana_org_slug"],
"service_name": r["service_name"],
"labels": r["labels"] if isinstance(r["labels"], dict) else {},
"fingerprint": r["fingerprint"],
"team_id": str(tid) if tid is not None else None,
"team_slug": r.get("team_slug"),
"team_name": r.get("team_name"),
"acknowledged_at": r["acknowledged_at"].isoformat() if r["acknowledged_at"] else None,
"acknowledged_by": r["acknowledged_by"],
"resolved_at": r["resolved_at"].isoformat() if r["resolved_at"] else None,
"resolved_by": r["resolved_by"],
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
"updated_at": r["updated_at"].isoformat() if r["updated_at"] else None,
}
return out
_ALERT_SELECT = """
SELECT a.*, t.slug AS team_slug, t.name AS team_name
FROM irm_alerts a
LEFT JOIN teams t ON t.id = a.team_id
"""
async def _fetch_alert_with_team(conn: asyncpg.Connection, alert_id: UUID) -> asyncpg.Record | None:
return await conn.fetchrow(
f"{_ALERT_SELECT} WHERE a.id = $1::uuid",
alert_id,
)
@router.get("/")
async def list_alerts_api(
pool: asyncpg.Pool | None = Depends(get_pool),
status: str | None = None,
team_id: UUID | None = None,
limit: int = 100,
):
if pool is None:
return {"items": [], "database": "disabled"}
limit = min(max(limit, 1), 200)
st = (status or "").strip().lower()
if st and st not in _VALID_STATUS:
raise HTTPException(status_code=400, detail="invalid status filter")
async with pool.acquire() as conn:
if st and team_id is not None:
rows = await conn.fetch(
f"""
{_ALERT_SELECT}
WHERE a.status = $1 AND a.team_id = $2::uuid
ORDER BY a.created_at DESC LIMIT $3
""",
st,
team_id,
limit,
)
elif st:
rows = await conn.fetch(
f"""
{_ALERT_SELECT}
WHERE a.status = $1
ORDER BY a.created_at DESC LIMIT $2
""",
st,
limit,
)
elif team_id is not None:
rows = await conn.fetch(
f"""
{_ALERT_SELECT}
WHERE a.team_id = $1::uuid
ORDER BY a.created_at DESC LIMIT $2
""",
team_id,
limit,
)
else:
rows = await conn.fetch(
f"""
{_ALERT_SELECT}
ORDER BY a.created_at DESC LIMIT $1
""",
limit,
)
return {"items": [_row_to_item(r) for r in rows]}
@router.get("/{alert_id}")
async def get_alert_api(alert_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
row = await _fetch_alert_with_team(conn, alert_id)
raw = None
if row and row.get("ingress_event_id"):
raw = await conn.fetchrow(
"SELECT id, body, received_at FROM ingress_events WHERE id = $1::uuid",
row["ingress_event_id"],
)
if not row:
raise HTTPException(status_code=404, detail="not found")
out = _row_to_item(row)
if raw:
out["raw_received_at"] = raw["received_at"].isoformat() if raw["received_at"] else None
body = raw["body"]
out["raw_body"] = dict(body) if hasattr(body, "keys") else body
else:
out["raw_received_at"] = None
out["raw_body"] = None
return out
@router.patch("/{alert_id}/acknowledge", status_code=200)
async def acknowledge_alert_api(
alert_id: UUID,
body: AckBody,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
who = (body.by_user or "").strip() or None
async with pool.acquire() as conn:
uid = await conn.fetchval(
"""
UPDATE irm_alerts SET
status = 'acknowledged',
acknowledged_at = now(),
acknowledged_by = COALESCE($2, acknowledged_by),
updated_at = now()
WHERE id = $1::uuid AND status = 'firing'
RETURNING id
""",
alert_id,
who,
)
if not uid:
raise HTTPException(status_code=409, detail="alert not in firing state or not found")
row = await _fetch_alert_with_team(conn, alert_id)
assert row is not None
return _row_to_item(row)
@router.patch("/{alert_id}/resolve", status_code=200)
async def resolve_alert_api(
alert_id: UUID,
body: ResolveBody,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
who = (body.by_user or "").strip() or None
async with pool.acquire() as conn:
uid = await conn.fetchval(
"""
UPDATE irm_alerts SET
status = 'resolved',
resolved_at = now(),
resolved_by = COALESCE($2, resolved_by),
updated_at = now()
WHERE id = $1::uuid AND status IN ('firing', 'acknowledged')
RETURNING id
""",
alert_id,
who,
)
if not uid:
raise HTTPException(
status_code=409,
detail="alert cannot be resolved from current state or not found",
)
row = await _fetch_alert_with_team(conn, alert_id)
assert row is not None
return _row_to_item(row)
_SYNC_BTN_STYLE = """
<script>
function ogAck(aid){fetch('/api/v1/modules/alerts/'+aid+'/acknowledge',{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({})}).then(r=>{if(r.ok)location.reload();else r.text().then(t=>alert('Ошибка '+r.status+': '+t.slice(0,200)));});}
function ogRes(aid){fetch('/api/v1/modules/alerts/'+aid+'/resolve',{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({})}).then(r=>{if(r.ok)location.reload();else r.text().then(t=>alert('Ошибка '+r.status+': '+t.slice(0,200)));});}
function ogInc(aid,title){var t=prompt('Заголовок инцидента',title||'');if(t===null)return;fetch('/api/v1/modules/incidents/',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({title:t,alert_ids:[aid]})}).then(r=>{if(r.ok)r.json().then(j=>location.href='/ui/modules/incidents/'+j.id);else r.text().then(x=>alert('Ошибка '+r.status+': '+x.slice(0,200)));});}
</script>
"""
@ui_router.get("/", response_class=HTMLResponse)
async def alerts_ui_list(request: Request):
pool = get_pool(request)
body = ""
filter_tid: UUID | None = None
raw_team = (request.query_params.get("team_id") or "").strip()
if raw_team:
try:
filter_tid = UUID(raw_team)
except ValueError:
filter_tid = None
if pool is None:
body = "<p>База не настроена.</p>"
else:
try:
async with pool.acquire() as conn:
teams_opts = await conn.fetch(
"SELECT id, slug, name FROM teams ORDER BY name"
)
if filter_tid is not None:
rows = await conn.fetch(
"""
SELECT a.id, a.status, a.title, a.severity, a.grafana_org_slug,
a.service_name, a.created_at, a.fingerprint,
t.slug AS team_slug, t.name AS team_name, a.team_id
FROM irm_alerts a
LEFT JOIN teams t ON t.id = a.team_id
WHERE a.team_id = $1::uuid
ORDER BY a.created_at DESC
LIMIT 150
""",
filter_tid,
)
else:
rows = await conn.fetch(
"""
SELECT a.id, a.status, a.title, a.severity, a.grafana_org_slug,
a.service_name, a.created_at, a.fingerprint,
t.slug AS team_slug, t.name AS team_name, a.team_id
FROM irm_alerts a
LEFT JOIN teams t ON t.id = a.team_id
ORDER BY a.created_at DESC
LIMIT 150
"""
)
if not rows:
body = "<p>Пока нет алертов. События появляются после вебхука Grafana.</p>"
else:
trs = []
for r in rows:
aid = str(r["id"])
ts = r.get("team_slug")
tn = r.get("team_name")
tid = r.get("team_id")
team_cell = ""
if tid and ts:
team_cell = (
f"<a href=\"/ui/modules/teams/{html.escape(str(tid), quote=True)}\">"
f"{html.escape(ts)}</a>"
)
elif tn:
team_cell = html.escape(str(tn))
trs.append(
"<tr>"
f"<td>{html.escape(r['status'])}</td>"
f"<td><a href=\"/ui/modules/alerts/{html.escape(aid, quote=True)}\">"
f"{html.escape(aid[:8])}…</a></td>"
f"<td>{html.escape((r['title'] or '')[:200])}</td>"
f"<td>{html.escape(r['severity'])}</td>"
f"<td>{team_cell}</td>"
f"<td>{html.escape(str(r['grafana_org_slug'] or ''))}</td>"
f"<td>{html.escape(str(r['service_name'] or ''))}</td>"
f"<td>{html.escape(r['created_at'].isoformat() if r['created_at'] else '')}</td>"
"</tr>"
)
opts = ["<option value=''>Все команды</option>"]
for t in teams_opts:
tid = str(t["id"])
sel = " selected" if filter_tid and str(filter_tid) == tid else ""
opts.append(
f"<option value='{html.escape(tid, quote=True)}'{sel}>"
f"{html.escape(t['slug'])}{html.escape(t['name'])}</option>"
)
filter_form = (
"<form method='get' class='og-filter-bar' style='margin-bottom:1rem'>"
"<label>Команда <select name='team_id' onchange='this.form.submit()'>"
+ "".join(opts)
+ "</select></label></form>"
)
body = (
"<p class='gc-muted'>Алерт — запись о входящем уведомлении. "
"<strong>Инцидент</strong> создаётся вручную (из карточки алерта или раздела «Инциденты») "
"и может ссылаться на один или несколько алертов. Команда назначается по "
"<a href=\"/ui/modules/teams/\">правилам лейблов</a>.</p>"
+ filter_form
+ "<table class='irm-table'><thead><tr><th>Статус</th><th>ID</th><th>Заголовок</th>"
"<th>Важность</th><th>Команда</th><th>Grafana slug</th><th>Сервис</th><th>Создан</th></tr></thead><tbody>"
+ "".join(trs)
+ "</tbody></table>"
)
except Exception as e:
body = f"<p class='module-err'>{html.escape(str(e))}</p>"
page = f"<h1>Алерты</h1>{body}{_SYNC_BTN_STYLE}"
return HTMLResponse(
wrap_module_html_page(
document_title="Алерты — onGuard24",
current_slug="alerts",
main_inner_html=page,
)
)
@ui_router.get("/{alert_id:uuid}", response_class=HTMLResponse)
async def alerts_ui_detail(request: Request, alert_id: UUID):
pool = get_pool(request)
if pool is None:
return HTMLResponse(
wrap_module_html_page(
document_title="Алерт — onGuard24",
current_slug="alerts",
main_inner_html="<h1>Алерт</h1><p>База не настроена.</p>",
)
)
try:
async with pool.acquire() as conn:
row = await _fetch_alert_with_team(conn, alert_id)
raw = None
if row and row.get("ingress_event_id"):
raw = await conn.fetchrow(
"SELECT body, received_at FROM ingress_events WHERE id = $1::uuid",
row["ingress_event_id"],
)
except Exception as e:
return HTMLResponse(
wrap_module_html_page(
document_title="Алерт — onGuard24",
current_slug="alerts",
main_inner_html=f"<h1>Алерт</h1><p class='module-err'>{html.escape(str(e))}</p>",
)
)
if not row:
inner = "<p>Не найдено.</p>"
else:
aid = str(row["id"])
st = row["status"]
title_js = json.dumps(row["title"] or "")
btns = []
if st == "firing":
btns.append(
f"<button type='button' class='og-btn og-btn-primary' "
f"onclick=\"ogAck('{html.escape(aid, quote=True)}')\">Подтвердить (Ack)</button>"
)
if st in ("firing", "acknowledged"):
btns.append(
f"<button type='button' class='og-btn' "
f"onclick=\"ogRes('{html.escape(aid, quote=True)}')\">Resolve</button>"
)
btns.append(
f"<button type='button' class='og-btn' "
f"onclick=\"ogInc('{html.escape(aid, quote=True)}',{title_js})\">"
"Создать инцидент</button>"
)
lab = row["labels"]
lab_s = json.dumps(dict(lab), ensure_ascii=False, indent=2) if isinstance(lab, dict) else "{}"
raw_pre = ""
if raw:
b = raw["body"]
pretty = json.dumps(dict(b), ensure_ascii=False, indent=2) if hasattr(b, "keys") else str(b)
if len(pretty) > 14000:
pretty = pretty[:14000] + "\n"
raw_pre = (
"<h2 style='font-size:1.05rem;margin-top:1rem'>Полное тело вебхука</h2>"
f"<pre style='overflow:auto;max-height:26rem;font-size:0.78rem;"
f"background:#18181b;color:#e4e4e7;padding:0.75rem;border-radius:8px'>"
f"{html.escape(pretty)}</pre>"
)
team_dd = ""
if row.get("team_id") and row.get("team_slug"):
team_dd = (
f"<dt>Команда</dt><dd><a href=\"/ui/modules/teams/{html.escape(str(row['team_id']), quote=True)}\">"
f"{html.escape(row['team_slug'])}</a> ({html.escape(row.get('team_name') or '')})</dd>"
)
elif row.get("team_id"):
team_dd = f"<dt>Команда</dt><dd><code>{html.escape(str(row['team_id']))}</code></dd>"
inner = (
f"<p><a href=\"/ui/modules/alerts/\">← К списку алертов</a></p>"
f"<h1>Алерт</h1><div class='og-sync-bar'>{''.join(btns)}</div>"
f"<dl style='display:grid;grid-template-columns:11rem 1fr;gap:0.35rem 1rem;font-size:0.9rem'>"
f"<dt>ID</dt><dd><code>{html.escape(aid)}</code></dd>"
f"<dt>Статус</dt><dd>{html.escape(st)}</dd>"
f"<dt>Заголовок</dt><dd>{html.escape(row['title'] or '')}</dd>"
f"<dt>Важность</dt><dd>{html.escape(row['severity'])}</dd>"
f"<dt>Grafana slug</dt><dd>{html.escape(str(row['grafana_org_slug'] or ''))}</dd>"
f"<dt>Сервис</dt><dd>{html.escape(str(row['service_name'] or ''))}</dd>"
+ team_dd
+ f"<dt>Fingerprint</dt><dd><code>{html.escape(str(row['fingerprint'] or ''))}</code></dd>"
f"<dt>Labels</dt><dd><pre style='margin:0;font-size:0.8rem'>{html.escape(lab_s)}</pre></dd>"
f"</dl>{raw_pre}"
)
page = f"{inner}{_SYNC_BTN_STYLE}"
return HTMLResponse(
wrap_module_html_page(
document_title="Алерт — onGuard24",
current_slug="alerts",
main_inner_html=page,
)
)
async def render_home_fragment(request: Request) -> str:
pool = get_pool(request)
if pool is None:
return '<p class="module-note">Нужна БД для учёта алертов.</p>'
try:
async with pool.acquire() as conn:
n = await conn.fetchval("SELECT count(*)::int FROM irm_alerts")
nf = await conn.fetchval(
"SELECT count(*)::int FROM irm_alerts WHERE status = 'firing'"
)
except Exception:
return '<p class="module-note">Таблица алертов недоступна (миграция 005?).</p>'
return (
f'<div class="module-fragment"><p>Алертов в учёте: <strong>{int(n)}</strong> '
f'(<strong>{int(nf)}</strong> firing). '
f'<a href="/ui/modules/alerts/">Открыть</a></p></div>'
)