Release 1.7.0: Grafana catalog, ingress/IRM, tests
Some checks failed
CI / test (push) Successful in 57s
Deploy / deploy (push) Failing after 13s

This commit is contained in:
Alexandr
2026-04-03 13:53:19 +03:00
parent f275260b0d
commit 5788f995b9
29 changed files with 1956 additions and 67 deletions

View File

@ -1,3 +1,3 @@
"""onGuard24 — модульный монолит (ядро + модули)."""
__version__ = "1.6.0"
__version__ = "1.7.0"

View File

@ -19,6 +19,9 @@ class Settings(BaseSettings):
http_addr: str = Field(default="0.0.0.0:8080", validation_alias="HTTP_ADDR")
database_url: str = Field(default="", validation_alias="DATABASE_URL")
grafana_webhook_secret: str = Field(default="", validation_alias="GRAFANA_WEBHOOK_SECRET")
# JSON-массив: [{"slug":"adibrov","api_url":"https://...","api_token":"glsa_...","webhook_secret":"..."}]
# Пусто + задан GRAFANA_URL → один источник slug "default"
grafana_sources_json: str = Field(default="", validation_alias="GRAFANA_SOURCES_JSON")
# HTTP API (service account): Grafana → Administration → Service accounts → токен
grafana_url: str = Field(default="", validation_alias="GRAFANA_URL")
grafana_service_account_token: str = Field(

View File

@ -26,6 +26,8 @@ class AlertReceived(DomainEvent):
name: str = "alert.received"
alert: Alert | None = None
raw_payload_ref: UUID | None = None
grafana_org_slug: str | None = None
service_name: str | None = None
Handler = Callable[[DomainEvent], Awaitable[None]]
@ -59,6 +61,18 @@ class InMemoryEventBus:
for h in self._subs.get(event.name, []):
await h(event)
async def publish_alert_received(self, alert: Alert, raw_payload_ref: UUID | None = None) -> None:
ev = AlertReceived(alert=alert, raw_payload_ref=raw_payload_ref)
async def publish_alert_received(
self,
alert: Alert,
raw_payload_ref: UUID | None = None,
*,
grafana_org_slug: str | None = None,
service_name: str | None = None,
) -> None:
ev = AlertReceived(
alert=alert,
raw_payload_ref=raw_payload_ref,
grafana_org_slug=grafana_org_slug,
service_name=service_name,
)
await self.publish(ev)

View File

@ -0,0 +1,84 @@
"""Несколько инстансов Grafana: URL API, токен, секрет вебхука по slug (организация / стек)."""
from __future__ import annotations
import json
import re
from pydantic import BaseModel, Field, field_validator
_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,62}$")
class GrafanaSourceEntry(BaseModel):
"""Один инстанс Grafana для API и/или приёма вебхуков."""
slug: str = Field(..., description="Идентификатор в URL: /ingress/grafana/{slug}")
api_url: str = Field(..., description="Базовый URL без завершающего слэша")
api_token: str = Field(default="", description="Service account token для HTTP API")
webhook_secret: str = Field(
default="",
description="Если задан — только этот секрет для вебхука этого slug; иначе см. GRAFANA_WEBHOOK_SECRET",
)
@field_validator("slug")
@classmethod
def slug_ok(cls, v: str) -> str:
s = v.strip().lower()
if not _SLUG_RE.match(s):
raise ValueError(
"slug: только a-z, цифры, - и _, длина 163, с буквы/цифры"
)
return s
@field_validator("api_url")
@classmethod
def strip_url(cls, v: str) -> str:
return v.rstrip("/")
def parse_grafana_sources_json(raw: str) -> list[GrafanaSourceEntry]:
if not raw.strip():
return []
data = json.loads(raw)
if not isinstance(data, list):
raise ValueError("GRAFANA_SOURCES_JSON должен быть JSON-массивом объектов")
return [GrafanaSourceEntry.model_validate(x) for x in data]
def iter_grafana_sources(settings: "Settings") -> list[GrafanaSourceEntry]:
"""Список источников: из GRAFANA_SOURCES_JSON или один синтетический default из GRAFANA_URL."""
try:
parsed = parse_grafana_sources_json(settings.grafana_sources_json)
except (json.JSONDecodeError, ValueError):
parsed = []
if parsed:
return parsed
gu = settings.grafana_url.strip()
if not gu:
return []
return [
GrafanaSourceEntry(
slug="default",
api_url=gu.rstrip("/"),
api_token=settings.grafana_service_account_token.strip(),
webhook_secret="",
)
]
def sources_by_slug(settings: "Settings") -> dict[str, GrafanaSourceEntry]:
return {s.slug: s for s in iter_grafana_sources(settings)}
def effective_webhook_secret(settings: "Settings", source: GrafanaSourceEntry | None) -> str:
"""Пустая строка = проверка вебхука отключена (только для dev)."""
if source and source.webhook_secret.strip():
return source.webhook_secret.strip()
return settings.grafana_webhook_secret.strip()
def webhook_authorized(settings: "Settings", source: GrafanaSourceEntry | None, header: str | None) -> bool:
need = effective_webhook_secret(settings, source)
if not need:
return True
return (header or "") == need

View File

@ -1,29 +1,92 @@
import json
import logging
import re
from datetime import datetime, timezone
from urllib.parse import urlparse
from fastapi import APIRouter, Depends, Header, HTTPException, Request
from starlette.responses import Response
from onguard24.domain.entities import Alert, Severity
from onguard24.grafana_sources import sources_by_slug, webhook_authorized
logger = logging.getLogger(__name__)
router = APIRouter(tags=["ingress"])
_PATH_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9_-]{0,62}$")
def sanitize_source_key(raw: str) -> str:
s = re.sub(r"[^a-zA-Z0-9._-]+", "-", raw.strip()).strip("-").lower()
return s[:200] if s else ""
def extract_grafana_source_key(body: dict) -> str | None:
"""
Идентификатор инстанса/организации из тела вебхука Grafana (без ручной прописки в .env).
См. поля payload: externalURL, orgId, лейблы в alerts[].
"""
chunks: list[str] = []
ext = body.get("externalURL") or body.get("external_url")
if isinstance(ext, str) and ext.strip():
try:
host = urlparse(ext.strip()).hostname
if host:
chunks.append(host)
except Exception:
pass
for key in ("orgId", "org_id"):
v = body.get(key)
if v is not None and str(v).strip():
chunks.append(f"o{str(v).strip()[:24]}")
break
alerts = body.get("alerts")
if isinstance(alerts, list) and alerts:
a0 = alerts[0] if isinstance(alerts[0], dict) else {}
labels = a0.get("labels") if isinstance(a0.get("labels"), dict) else {}
for lk in ("__org_id__", "grafana_org", "tenant", "cluster", "namespace"):
v = labels.get(lk)
if v is not None and str(v).strip():
chunks.append(str(v).strip()[:64])
break
if not chunks:
return None
return sanitize_source_key("-".join(chunks)) or None
async def get_pool(request: Request):
return getattr(request.app.state, "pool", None)
@router.post("/ingress/grafana", status_code=202)
async def grafana_webhook(
def service_hint_from_grafana_body(body: dict, header_service: str | None) -> str | None:
"""Имя сервиса: заголовок X-OnGuard-Service или лейблы из Unified Alerting."""
if header_service and header_service.strip():
return header_service.strip()[:200]
alerts = body.get("alerts")
if isinstance(alerts, list) and alerts:
labels = alerts[0].get("labels") if isinstance(alerts[0], dict) else None
if isinstance(labels, dict):
for key in ("service", "job", "namespace", "cluster", "instance"):
v = labels.get(key)
if v is not None and str(v).strip():
return str(v).strip()[:200]
common = body.get("commonLabels")
if isinstance(common, dict):
for key in ("service", "job", "namespace"):
v = common.get(key)
if v is not None and str(v).strip():
return str(v).strip()[:200]
return None
async def _grafana_webhook_impl(
request: Request,
pool=Depends(get_pool),
x_onguard_secret: str | None = Header(default=None, alias="X-OnGuard-Secret"),
):
pool,
x_onguard_secret: str | None,
x_onguard_service: str | None,
path_slug: str | None,
) -> Response:
settings = request.app.state.settings
if settings.grafana_webhook_secret and x_onguard_secret != settings.grafana_webhook_secret:
raise HTTPException(status_code=401, detail="unauthorized")
raw = await request.body()
if len(raw) > 1_000_000:
@ -32,6 +95,25 @@ async def grafana_webhook(
body = json.loads(raw.decode() or "{}")
except json.JSONDecodeError:
body = {}
if not isinstance(body, dict):
body = {}
derived = extract_grafana_source_key(body)
path_key: str | None = None
if path_slug is not None:
path_key = path_slug.strip().lower()
if not _PATH_SLUG_RE.match(path_key):
raise HTTPException(status_code=400, detail="invalid path slug")
stored_org_slug = path_key
else:
stored_org_slug = derived
by = sources_by_slug(settings)
source = by.get(path_key) if path_key else None
if not webhook_authorized(settings, source, x_onguard_secret):
raise HTTPException(status_code=401, detail="unauthorized")
service_name = service_hint_from_grafana_body(body, x_onguard_service)
if pool is None:
logger.warning("ingress: database not configured, event not persisted")
@ -39,9 +121,15 @@ async def grafana_webhook(
async with pool.acquire() as conn:
row = await conn.fetchrow(
"INSERT INTO ingress_events (source, body) VALUES ($1, $2::jsonb) RETURNING id",
"""
INSERT INTO ingress_events (source, body, org_slug, service_name)
VALUES ($1, $2::jsonb, $3, $4)
RETURNING id
""",
"grafana",
json.dumps(body),
stored_org_slug,
service_name,
)
raw_id = row["id"] if row else None
bus = getattr(request.app.state, "event_bus", None)
@ -54,5 +142,43 @@ async def grafana_webhook(
payload=body,
received_at=datetime.now(timezone.utc),
)
await bus.publish_alert_received(alert, raw_payload_ref=raw_id)
await bus.publish_alert_received(
alert,
raw_payload_ref=raw_id,
grafana_org_slug=stored_org_slug,
service_name=service_name,
)
return Response(status_code=202)
@router.post("/ingress/grafana", status_code=202)
async def grafana_webhook_legacy(
request: Request,
pool=Depends(get_pool),
x_onguard_secret: str | None = Header(default=None, alias="X-OnGuard-Secret"),
x_onguard_service: str | None = Header(default=None, alias="X-OnGuard-Service"),
):
"""
Универсальный URL для любого инстанса Grafana: org_slug в БД берётся из тела
(externalURL, orgId, лейблы), без преднастройки в .env.
"""
return await _grafana_webhook_impl(
request, pool, x_onguard_secret, x_onguard_service, path_slug=None
)
@router.post("/ingress/grafana/{org_slug}", status_code=202)
async def grafana_webhook_org(
org_slug: str,
request: Request,
pool=Depends(get_pool),
x_onguard_secret: str | None = Header(default=None, alias="X-OnGuard-Secret"),
x_onguard_service: str | None = Header(default=None, alias="X-OnGuard-Service"),
):
"""
Опционально: явный ярлык в URL (перекрывает авто-извлечение из JSON).
Секрет для пути: webhook_secret из GRAFANA_SOURCES_JSON для этого slug, иначе общий.
"""
return await _grafana_webhook_impl(
request, pool, x_onguard_secret, x_onguard_service, path_slug=org_slug
)

View File

@ -0,0 +1,169 @@
"""Чтение иерархии Grafana: org, папки, managed alert rules (Ruler API)."""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass
from typing import Any
import httpx
log = logging.getLogger(__name__)
_HEADERS_JSON = {"Accept": "application/json"}
def _auth_headers(token: str) -> dict[str, str]:
return {**_HEADERS_JSON, "Authorization": f"Bearer {token.strip()}"}
@dataclass
class ParsedRuleRow:
namespace_uid: str
rule_group_name: str
rule_group_interval: str | None
rule_uid: str
title: str
labels: dict[str, Any]
async def fetch_org(base_url: str, token: str) -> tuple[dict[str, Any] | None, str | None]:
base = base_url.rstrip("/")
try:
async with httpx.AsyncClient(timeout=30.0, verify=True, follow_redirects=True) as client:
r = await client.get(f"{base}/api/org", headers=_auth_headers(token))
except Exception as e:
return None, str(e)
if r.status_code != 200:
return None, f"http {r.status_code}: {(r.text or '')[:300]}"
try:
return r.json(), None
except Exception:
return None, "invalid json from /api/org"
async def fetch_all_folders(base_url: str, token: str) -> tuple[list[dict[str, Any]], str | None]:
"""Обход дерева папок через /api/folders?parentUid=…"""
base = base_url.rstrip("/")
out: list[dict[str, Any]] = []
seen: set[str] = set()
queue: list[str | None] = [None]
try:
async with httpx.AsyncClient(timeout=60.0, verify=True, follow_redirects=True) as client:
while queue:
parent_uid = queue.pop(0)
params: dict[str, str] = {}
if parent_uid is not None:
params["parentUid"] = parent_uid
r = await client.get(f"{base}/api/folders", headers=_auth_headers(token), params=params)
if r.status_code != 200:
return out, f"folders http {r.status_code}: {(r.text or '')[:200]}"
try:
chunk = r.json()
except Exception:
return out, "invalid json from /api/folders"
if not isinstance(chunk, list):
return out, "folders response is not a list"
for f in chunk:
if not isinstance(f, dict):
continue
uid = f.get("uid")
if not uid or uid in seen:
continue
seen.add(str(uid))
out.append(f)
queue.append(str(uid))
except Exception as e:
return out, str(e)
return out, None
async def fetch_ruler_rules_raw(base_url: str, token: str) -> tuple[dict[str, Any] | None, str | None]:
"""GET Ruler API для Grafana-managed правил (namespace → группы → rules)."""
base = base_url.rstrip("/")
paths = (
"/api/ruler/grafana/api/v1/rules",
"/api/ruler/Grafana/api/v1/rules",
)
last_err: str | None = None
try:
async with httpx.AsyncClient(timeout=90.0, verify=True, follow_redirects=True) as client:
for path in paths:
r = await client.get(f"{base}{path}", headers=_auth_headers(token))
if r.status_code == 200:
try:
data = r.json()
except Exception:
return None, "invalid json from ruler"
if isinstance(data, dict):
return data, None
return None, "ruler response is not an object"
last_err = f"ruler {path} http {r.status_code}: {(r.text or '')[:200]}"
except Exception as e:
return None, str(e)
return None, last_err or "ruler: no path matched"
def parse_ruler_rules(data: dict[str, Any]) -> list[ParsedRuleRow]:
rows: list[ParsedRuleRow] = []
for namespace_uid, groups in data.items():
if not isinstance(namespace_uid, str) or not namespace_uid.strip():
continue
ns = namespace_uid.strip()
if not isinstance(groups, list):
continue
for grp in groups:
if not isinstance(grp, dict):
continue
gname = str(grp.get("name") or "group")
interval = grp.get("interval")
interval_s = str(interval) if interval is not None else None
rules = grp.get("rules")
if not isinstance(rules, list):
continue
for rule in rules:
if not isinstance(rule, dict):
continue
ga = rule.get("grafana_alert")
if not isinstance(ga, dict):
continue
uid = ga.get("uid") or rule.get("uid")
if not uid:
continue
title = str(ga.get("title") or gname or uid)
labels = rule.get("labels")
if not isinstance(labels, dict):
labels = {}
lbl = {str(k): str(v) for k, v in labels.items() if v is not None}
rows.append(
ParsedRuleRow(
namespace_uid=ns,
rule_group_name=gname,
rule_group_interval=interval_s,
rule_uid=str(uid),
title=title[:500],
labels=lbl,
)
)
return rows
def merge_folder_rows(
api_folders: list[dict[str, Any]],
rule_namespaces: set[str],
) -> list[tuple[str, str, str | None]]:
"""(uid, title, parent_uid); добавляем namespace из ruler без записи в /api/folders."""
by_uid: dict[str, tuple[str, str | None]] = {}
for f in api_folders:
uid = f.get("uid")
if not uid:
continue
p = f.get("parentUid")
parent = str(p) if p else None
by_uid[str(uid)] = (str(f.get("title") or uid), parent)
for ns in rule_namespaces:
if ns not in by_uid:
by_uid[ns] = (ns, None)
return [(uid, t[0], t[1]) for uid, t in sorted(by_uid.items(), key=lambda x: x[0])]

View File

@ -25,6 +25,12 @@ class PolicyCreate(BaseModel):
steps: list[dict] = Field(default_factory=list)
class PolicyPatch(BaseModel):
name: str | None = Field(default=None, min_length=1, max_length=200)
enabled: bool | None = None
steps: list[dict] | None = None
def register_events(_bus: EventBus, _pool: asyncpg.Pool | None = None) -> None:
pass
@ -95,6 +101,67 @@ async def create_policy_api(body: PolicyCreate, pool: asyncpg.Pool | None = Depe
}
def _policy_dict(row) -> dict:
steps = row["steps"]
if isinstance(steps, str):
steps = json.loads(steps)
return {
"id": str(row["id"]),
"name": row["name"],
"enabled": row["enabled"],
"steps": steps if isinstance(steps, list) else [],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
@router.get("/{policy_id}")
async def get_policy_api(policy_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT id, name, enabled, steps, created_at
FROM escalation_policies WHERE id = $1::uuid
""",
policy_id,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return _policy_dict(row)
@router.patch("/{policy_id}")
async def patch_policy_api(
policy_id: UUID,
body: PolicyPatch,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
if body.name is None and body.enabled is None and body.steps is None:
raise HTTPException(status_code=400, detail="no fields to update")
steps_json = json.dumps(body.steps) if body.steps is not None else None
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
UPDATE escalation_policies SET
name = COALESCE($2, name),
enabled = COALESCE($3, enabled),
steps = COALESCE($4::jsonb, steps)
WHERE id = $1::uuid
RETURNING id, name, enabled, steps, created_at
""",
policy_id,
body.name.strip() if body.name is not None else None,
body.enabled,
steps_json,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return _policy_dict(row)
@router.delete("/{policy_id}", status_code=204)
async def delete_policy_api(policy_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:

View File

@ -0,0 +1,380 @@
"""Кэш иерархии Grafana (инстанс → org → папки → правила) через API + БД."""
from __future__ import annotations
import html
import json
import logging
from dataclasses import dataclass
import asyncpg
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import HTMLResponse
from pydantic import BaseModel, Field
from onguard24.config import get_settings
from onguard24.deps import get_pool
from onguard24.domain.events import EventBus
from onguard24.grafana_sources import iter_grafana_sources
from onguard24.integrations import grafana_topology as gt
from onguard24.modules.ui_support import wrap_module_html_page
log = logging.getLogger(__name__)
router = APIRouter(tags=["module-grafana-catalog"])
ui_router = APIRouter(tags=["web-grafana-catalog"], include_in_schema=False)
def register_events(_bus: EventBus, _pool: asyncpg.Pool | None = None) -> None:
pass
class SyncBody(BaseModel):
instance_slug: str | None = Field(
default=None,
description="Slug из GRAFANA_SOURCES_JSON; пусто — все источники с api_token",
)
@dataclass
class PullOutcome:
org_id: int
org_name: str
folder_rows: list[tuple[str, str, str | None]]
rules: list[gt.ParsedRuleRow]
warnings: list[str]
async def pull_topology(api_url: str, token: str) -> tuple[PullOutcome | None, str | None]:
org, oerr = await gt.fetch_org(api_url, token)
if oerr or not org:
return None, oerr or "no org"
oid = org.get("id")
if oid is None:
return None, "org response without id"
oname = str(org.get("name") or "")
warnings: list[str] = []
folders_raw, ferr = await gt.fetch_all_folders(api_url, token)
if ferr:
warnings.append(ferr)
ruler_raw, rerr = await gt.fetch_ruler_rules_raw(api_url, token)
if rerr:
warnings.append(rerr)
rules: list[gt.ParsedRuleRow] = []
namespaces: set[str] = set()
else:
rules = gt.parse_ruler_rules(ruler_raw or {})
namespaces = {r.namespace_uid for r in rules}
merged = gt.merge_folder_rows(folders_raw, namespaces)
return (
PullOutcome(
org_id=int(oid),
org_name=oname,
folder_rows=merged,
rules=rules,
warnings=warnings,
),
None,
)
async def persist_topology(
conn: asyncpg.Connection,
instance_slug: str,
outcome: PullOutcome,
) -> None:
oid = outcome.org_id
await conn.execute(
"""
DELETE FROM grafana_catalog_rules
WHERE instance_slug = $1 AND grafana_org_id = $2
""",
instance_slug,
oid,
)
await conn.execute(
"""
DELETE FROM grafana_catalog_folders
WHERE instance_slug = $1 AND grafana_org_id = $2
""",
instance_slug,
oid,
)
for uid, title, parent in outcome.folder_rows:
await conn.execute(
"""
INSERT INTO grafana_catalog_folders
(instance_slug, grafana_org_id, folder_uid, title, parent_uid)
VALUES ($1, $2, $3, $4, $5)
""",
instance_slug,
oid,
uid,
title,
parent,
)
for r in outcome.rules:
await conn.execute(
"""
INSERT INTO grafana_catalog_rules (
instance_slug, grafana_org_id, namespace_uid, rule_group_name,
rule_uid, title, rule_group_interval, labels
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb)
""",
instance_slug,
oid,
r.namespace_uid,
r.rule_group_name,
r.rule_uid,
r.title,
r.rule_group_interval,
json.dumps(r.labels),
)
fc = len(outcome.folder_rows)
rc = len(outcome.rules)
warn_txt = "; ".join(outcome.warnings) if outcome.warnings else None
if warn_txt and len(warn_txt) > 1900:
warn_txt = warn_txt[:1900] + ""
await conn.execute(
"""
INSERT INTO grafana_catalog_meta (
instance_slug, grafana_org_id, org_name, synced_at,
folder_count, rule_count, error_text
)
VALUES ($1, $2, $3, now(), $4, $5, $6)
ON CONFLICT (instance_slug, grafana_org_id) DO UPDATE SET
org_name = EXCLUDED.org_name,
synced_at = EXCLUDED.synced_at,
folder_count = EXCLUDED.folder_count,
rule_count = EXCLUDED.rule_count,
error_text = EXCLUDED.error_text
""",
instance_slug,
oid,
outcome.org_name,
fc,
rc,
warn_txt,
)
@router.post("/sync", status_code=200)
async def sync_catalog_api(
body: SyncBody,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
sources = iter_grafana_sources(get_settings())
if body.instance_slug:
sl = body.instance_slug.strip().lower()
sources = [s for s in sources if s.slug == sl]
if not sources:
raise HTTPException(status_code=404, detail="unknown instance slug")
to_run = [s for s in sources if s.api_token.strip()]
if not to_run:
raise HTTPException(
status_code=400,
detail="нет источников с api_token; задайте GRAFANA_SOURCES_JSON или GRAFANA_SERVICE_ACCOUNT_TOKEN",
)
results: list[dict] = []
for src in to_run:
outcome, err = await pull_topology(src.api_url, src.api_token)
if err or outcome is None:
results.append({"slug": src.slug, "ok": False, "error": err})
log.warning("grafana_catalog sync failed %s: %s", src.slug, err)
continue
async with pool.acquire() as conn:
async with conn.transaction():
await persist_topology(conn, src.slug, outcome)
results.append(
{
"slug": src.slug,
"ok": True,
"org_id": outcome.org_id,
"org_name": outcome.org_name,
"folders": len(outcome.folder_rows),
"rules": len(outcome.rules),
"warnings": outcome.warnings,
}
)
return {"results": results}
@router.get("/meta")
async def list_meta_api(pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
return {"items": [], "database": "disabled"}
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT instance_slug, grafana_org_id, org_name, synced_at,
folder_count, rule_count, error_text
FROM grafana_catalog_meta
ORDER BY instance_slug, grafana_org_id
"""
)
items = []
for r in rows:
items.append(
{
"instance_slug": r["instance_slug"],
"grafana_org_id": r["grafana_org_id"],
"org_name": r["org_name"],
"synced_at": r["synced_at"].isoformat() if r["synced_at"] else None,
"folder_count": r["folder_count"],
"rule_count": r["rule_count"],
"error_text": r["error_text"],
}
)
return {"items": items}
@router.get("/tree")
async def tree_api(
instance_slug: str,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
slug = instance_slug.strip().lower()
async with pool.acquire() as conn:
meta = await conn.fetchrow(
"""
SELECT * FROM grafana_catalog_meta
WHERE instance_slug = $1 AND grafana_org_id > 0
ORDER BY synced_at DESC LIMIT 1
""",
slug,
)
if not meta:
raise HTTPException(status_code=404, detail="no catalog for this slug; run POST /sync first")
oid = meta["grafana_org_id"]
folders = await conn.fetch(
"""
SELECT folder_uid, title, parent_uid
FROM grafana_catalog_folders
WHERE instance_slug = $1 AND grafana_org_id = $2
ORDER BY title
""",
slug,
oid,
)
rules = await conn.fetch(
"""
SELECT namespace_uid, rule_group_name, rule_uid, title,
rule_group_interval, labels
FROM grafana_catalog_rules
WHERE instance_slug = $1 AND grafana_org_id = $2
ORDER BY namespace_uid, rule_group_name, title
""",
slug,
oid,
)
by_ns: dict[str, list[dict]] = {}
for r in rules:
ns = r["namespace_uid"]
by_ns.setdefault(ns, []).append(
{
"rule_uid": r["rule_uid"],
"title": r["title"],
"rule_group": r["rule_group_name"],
"interval": r["rule_group_interval"],
"labels": r["labels"] if isinstance(r["labels"], dict) else {},
}
)
folder_nodes = []
for f in folders:
uid = f["folder_uid"]
folder_nodes.append(
{
"folder_uid": uid,
"title": f["title"],
"parent_uid": f["parent_uid"],
"rules": by_ns.get(uid, []),
}
)
return {
"instance_slug": slug,
"grafana_org_id": oid,
"org_name": meta["org_name"],
"synced_at": meta["synced_at"].isoformat() if meta["synced_at"] else None,
"folders": folder_nodes,
"orphan_rule_namespaces": sorted(set(by_ns.keys()) - {f["folder_uid"] for f in folders}),
}
@ui_router.get("/", response_class=HTMLResponse)
async def grafana_catalog_ui(request: Request):
pool = get_pool(request)
inner = ""
if pool is None:
inner = "<p>База не настроена.</p>"
else:
try:
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT instance_slug, org_name, synced_at, folder_count, rule_count, error_text
FROM grafana_catalog_meta
ORDER BY instance_slug
"""
)
if not rows:
inner = "<p>Каталог пуст. Вызовите <code>POST /api/v1/modules/grafana-catalog/sync</code>.</p>"
else:
inner = "<table class='irm-table'><thead><tr><th>Slug</th><th>Org</th><th>Синхр.</th><th>Папок</th><th>Правил</th><th>Ошибка</th></tr></thead><tbody>"
for r in rows:
err = html.escape(str(r["error_text"] or ""))[:120]
st = r["synced_at"].isoformat() if r["synced_at"] else ""
inner += (
f"<tr><td>{html.escape(r['instance_slug'])}</td>"
f"<td>{html.escape(str(r['org_name']))}</td>"
f"<td>{html.escape(st)}</td>"
f"<td>{r['folder_count']}</td><td>{r['rule_count']}</td>"
f"<td>{err}</td></tr>"
)
inner += "</tbody></table>"
except Exception as e:
inner = f"<p class='module-err'>{html.escape(str(e))}</p>"
page = f"""<h1>Каталог Grafana</h1>
<p>Иерархия: инстанс (slug) → организация → папки → правила. Синхронизация по HTTP API.</p>
{inner}
<p><small>API: <code>POST …/grafana-catalog/sync</code>, <code>GET …/grafana-catalog/tree?instance_slug=…</code></small></p>"""
return HTMLResponse(
wrap_module_html_page(
document_title="Каталог Grafana — onGuard24",
current_slug="grafana-catalog",
main_inner_html=page,
)
)
async def render_home_fragment(request: Request) -> str:
pool = get_pool(request)
if pool is None:
return '<p class="module-note">Нужна БД для каталога Grafana.</p>'
try:
async with pool.acquire() as conn:
n = await conn.fetchval("SELECT count(*)::int FROM grafana_catalog_meta WHERE grafana_org_id >= 0")
last = await conn.fetchrow(
"SELECT max(synced_at) AS m FROM grafana_catalog_meta WHERE grafana_org_id >= 0"
)
except Exception:
return '<p class="module-note">Таблицы каталога недоступны (миграции?).</p>'
ts = last["m"].isoformat() if last and last["m"] else "никогда"
return (
f'<div class="module-fragment"><p>Источников с синхронизацией: <strong>{int(n)}</strong>. '
f"Последняя синхр.: <strong>{html.escape(ts)}</strong></p></div>"
)

View File

@ -27,6 +27,12 @@ class IncidentCreate(BaseModel):
severity: str = Field(default="warning", max_length=32)
class IncidentPatch(BaseModel):
title: str | None = Field(default=None, min_length=1, max_length=500)
status: str | None = Field(default=None, max_length=64)
severity: str | None = Field(default=None, max_length=32)
def register_events(bus: EventBus, pool: asyncpg.Pool | None = None) -> None:
if pool is None:
return
@ -41,12 +47,17 @@ def register_events(bus: EventBus, pool: asyncpg.Pool | None = None) -> None:
async with pool.acquire() as conn:
await conn.execute(
"""
INSERT INTO incidents (title, status, severity, source, ingress_event_id)
VALUES ($1, 'open', $2, 'grafana', $3::uuid)
INSERT INTO incidents (
title, status, severity, source, ingress_event_id,
grafana_org_slug, service_name
)
VALUES ($1, 'open', $2, 'grafana', $3::uuid, $4, $5)
""",
title,
sev,
ev.raw_payload_ref,
ev.grafana_org_slug,
ev.service_name,
)
except Exception:
log.exception("incidents: не удалось создать инцидент из alert.received")
@ -70,20 +81,33 @@ async def render_home_fragment(request: Request) -> str:
async def list_incidents_api(
pool: asyncpg.Pool | None = Depends(get_pool),
limit: int = 50,
grafana_org_slug: str | None = None,
service_name: str | None = None,
):
if pool is None:
return {"items": [], "database": "disabled"}
limit = min(max(limit, 1), 200)
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, title, status, severity, source, ingress_event_id, created_at
conditions: list[str] = []
args: list = []
if grafana_org_slug and grafana_org_slug.strip():
args.append(grafana_org_slug.strip())
conditions.append(f"grafana_org_slug = ${len(args)}")
if service_name and service_name.strip():
args.append(service_name.strip())
conditions.append(f"service_name = ${len(args)}")
where_sql = ("WHERE " + " AND ".join(conditions)) if conditions else ""
args.append(limit)
lim_ph = f"${len(args)}"
q = f"""
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
FROM incidents
{where_sql}
ORDER BY created_at DESC
LIMIT $1
""",
limit,
)
LIMIT {lim_ph}
"""
async with pool.acquire() as conn:
rows = await conn.fetch(q, *args)
items = []
for r in rows:
items.append(
@ -95,6 +119,9 @@ async def list_incidents_api(
"source": r["source"],
"ingress_event_id": str(r["ingress_event_id"]) if r["ingress_event_id"] else None,
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
"updated_at": r["updated_at"].isoformat() if r.get("updated_at") else None,
"grafana_org_slug": r.get("grafana_org_slug"),
"service_name": r.get("service_name"),
}
)
return {"items": items}
@ -110,9 +137,10 @@ async def create_incident_api(
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
INSERT INTO incidents (title, status, severity, source)
VALUES ($1, $2, $3, 'manual')
RETURNING id, title, status, severity, source, ingress_event_id, created_at
INSERT INTO incidents (title, status, severity, source, grafana_org_slug, service_name)
VALUES ($1, $2, $3, 'manual', NULL, NULL)
RETURNING id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
""",
body.title.strip(),
body.status,
@ -126,9 +154,63 @@ async def create_incident_api(
"source": row["source"],
"ingress_event_id": None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else None,
"grafana_org_slug": row.get("grafana_org_slug"),
"service_name": row.get("service_name"),
}
def _incident_row_dict(row) -> dict:
return {
"id": str(row["id"]),
"title": row["title"],
"status": row["status"],
"severity": row["severity"],
"source": row["source"],
"ingress_event_id": str(row["ingress_event_id"]) if row["ingress_event_id"] else None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
"updated_at": row["updated_at"].isoformat() if row.get("updated_at") else None,
"grafana_org_slug": row.get("grafana_org_slug"),
"service_name": row.get("service_name"),
}
@router.get("/{incident_id}/tasks")
async def list_incident_tasks_api(
incident_id: UUID,
pool: asyncpg.Pool | None = Depends(get_pool),
limit: int = 100,
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
limit = min(max(limit, 1), 200)
async with pool.acquire() as conn:
exists = await conn.fetchval("SELECT 1 FROM incidents WHERE id = $1::uuid", incident_id)
if not exists:
raise HTTPException(status_code=404, detail="incident not found")
rows = await conn.fetch(
"""
SELECT id, incident_id, title, status, created_at
FROM tasks WHERE incident_id = $1::uuid
ORDER BY created_at DESC LIMIT $2
""",
incident_id,
limit,
)
items = []
for r in rows:
items.append(
{
"id": str(r["id"]),
"incident_id": str(r["incident_id"]) if r["incident_id"] else None,
"title": r["title"],
"status": r["status"],
"created_at": r["created_at"].isoformat() if r["created_at"] else None,
}
)
return {"incident_id": str(incident_id), "items": items}
@router.get("/{incident_id}")
async def get_incident_api(incident_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
@ -136,22 +218,47 @@ async def get_incident_api(incident_id: UUID, pool: asyncpg.Pool | None = Depend
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT id, title, status, severity, source, ingress_event_id, created_at
SELECT id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
FROM incidents WHERE id = $1::uuid
""",
incident_id,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return {
"id": str(row["id"]),
"title": row["title"],
"status": row["status"],
"severity": row["severity"],
"source": row["source"],
"ingress_event_id": str(row["ingress_event_id"]) if row["ingress_event_id"] else None,
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
return _incident_row_dict(row)
@router.patch("/{incident_id}")
async def patch_incident_api(
incident_id: UUID,
body: IncidentPatch,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
if body.title is None and body.status is None and body.severity is None:
raise HTTPException(status_code=400, detail="no fields to update")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
UPDATE incidents SET
title = COALESCE($2, title),
status = COALESCE($3, status),
severity = COALESCE($4, severity),
updated_at = now()
WHERE id = $1::uuid
RETURNING id, title, status, severity, source, ingress_event_id, created_at, updated_at,
grafana_org_slug, service_name
""",
incident_id,
body.title.strip() if body.title is not None else None,
body.status,
body.severity,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return _incident_row_dict(row)
@ui_router.get("/", response_class=HTMLResponse)
@ -166,13 +273,15 @@ async def incidents_ui_home(request: Request):
async with pool.acquire() as conn:
rows = await conn.fetch(
"""
SELECT id, title, status, severity, source, created_at
SELECT id, title, status, severity, source, created_at, grafana_org_slug, service_name
FROM incidents
ORDER BY created_at DESC
LIMIT 100
"""
)
for r in rows:
org = html.escape(str(r["grafana_org_slug"] or ""))
svc = html.escape(str(r["service_name"] or ""))
rows_html += (
"<tr>"
f"<td>{html.escape(str(r['id']))[:8]}…</td>"
@ -180,6 +289,8 @@ async def incidents_ui_home(request: Request):
f"<td>{html.escape(r['status'])}</td>"
f"<td>{html.escape(r['severity'])}</td>"
f"<td>{html.escape(r['source'])}</td>"
f"<td>{org}</td>"
f"<td>{svc}</td>"
"</tr>"
)
except Exception as e:
@ -187,8 +298,8 @@ async def incidents_ui_home(request: Request):
inner = f"""<h1>Инциденты</h1>
{err}
<table class="irm-table">
<thead><tr><th>ID</th><th>Заголовок</th><th>Статус</th><th>Важность</th><th>Источник</th></tr></thead>
<tbody>{rows_html or '<tr><td colspan="5">Пока нет записей</td></tr>'}</tbody>
<thead><tr><th>ID</th><th>Заголовок</th><th>Статус</th><th>Важность</th><th>Источник</th><th>Grafana slug</th><th>Сервис</th></tr></thead>
<tbody>{rows_html or '<tr><td colspan="7">Пока нет записей</td></tr>'}</tbody>
</table>
<p><small>Создание из Grafana: webhook → запись в <code>ingress_events</code> → событие → строка здесь.</small></p>"""
return HTMLResponse(

View File

@ -16,6 +16,7 @@ from onguard24.domain.events import EventBus
from onguard24.modules import (
contacts,
escalations,
grafana_catalog,
incidents,
schedules,
statusboard,
@ -42,6 +43,15 @@ class ModuleMount:
def _mounts() -> list[ModuleMount]:
return [
ModuleMount(
router=grafana_catalog.router,
url_prefix="/api/v1/modules/grafana-catalog",
register_events=grafana_catalog.register_events,
slug="grafana-catalog",
title="Каталог Grafana",
ui_router=grafana_catalog.ui_router,
render_home_fragment=grafana_catalog.render_home_fragment,
),
ModuleMount(
router=incidents.router,
url_prefix="/api/v1/modules/incidents",

View File

@ -23,6 +23,11 @@ class TaskCreate(BaseModel):
incident_id: UUID | None = None
class TaskPatch(BaseModel):
title: str | None = Field(default=None, min_length=1, max_length=500)
status: str | None = Field(default=None, max_length=64)
def register_events(_bus: EventBus, _pool: asyncpg.Pool | None = None) -> None:
pass
@ -114,6 +119,63 @@ async def create_task_api(body: TaskCreate, pool: asyncpg.Pool | None = Depends(
}
@router.get("/{task_id}")
async def get_task_api(task_id: UUID, pool: asyncpg.Pool | None = Depends(get_pool)):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
SELECT id, incident_id, title, status, created_at
FROM tasks WHERE id = $1::uuid
""",
task_id,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return {
"id": str(row["id"]),
"incident_id": str(row["incident_id"]) if row["incident_id"] else None,
"title": row["title"],
"status": row["status"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
@router.patch("/{task_id}")
async def patch_task_api(
task_id: UUID,
body: TaskPatch,
pool: asyncpg.Pool | None = Depends(get_pool),
):
if pool is None:
raise HTTPException(status_code=503, detail="database disabled")
if body.title is None and body.status is None:
raise HTTPException(status_code=400, detail="no fields to update")
async with pool.acquire() as conn:
row = await conn.fetchrow(
"""
UPDATE tasks SET
title = COALESCE($2, title),
status = COALESCE($3, status)
WHERE id = $1::uuid
RETURNING id, incident_id, title, status, created_at
""",
task_id,
body.title.strip() if body.title is not None else None,
body.status,
)
if not row:
raise HTTPException(status_code=404, detail="not found")
return {
"id": str(row["id"]),
"incident_id": str(row["incident_id"]) if row["incident_id"] else None,
"title": row["title"],
"status": row["status"],
"created_at": row["created_at"].isoformat() if row["created_at"] else None,
}
@ui_router.get("/", response_class=HTMLResponse)
async def tasks_ui_home(request: Request):
pool = get_pool(request)

View File

@ -3,6 +3,7 @@
from fastapi import Request
from onguard24.config import Settings
from onguard24.grafana_sources import iter_grafana_sources
from onguard24.integrations import forgejo_api, grafana_api
from onguard24.vaultcheck import ping as vault_ping
@ -31,31 +32,46 @@ async def build(request: Request) -> dict:
else:
out["vault"] = "disabled"
gu = settings.grafana_url.strip()
if not gu:
sources = iter_grafana_sources(settings)
if not sources:
out["grafana"] = "disabled"
elif settings.grafana_service_account_token.strip():
ok, err = await grafana_api.ping(gu, settings.grafana_service_account_token)
if ok:
user, _ = await grafana_api.get_signed_in_user(gu, settings.grafana_service_account_token)
entry: dict = {"status": "ok", "url": gu, "api": "authenticated"}
if user:
login = user.get("login") or user.get("email")
if login:
entry["service_account_login"] = login
out["grafana"] = entry
else:
out["grafana"] = {"status": "error", "detail": err, "url": gu}
else:
live_ok, live_err = await grafana_api.health_live(gu)
if live_ok:
out["grafana"] = {
"status": "reachable",
"url": gu,
"detail": "задай GRAFANA_SERVICE_ACCOUNT_TOKEN для вызовов API",
}
instances: list[dict] = []
for src in sources:
entry: dict = {"slug": src.slug, "url": src.api_url}
if src.api_token.strip():
ok, err = await grafana_api.ping(src.api_url, src.api_token)
if ok:
user, _ = await grafana_api.get_signed_in_user(src.api_url, src.api_token)
entry["status"] = "ok"
entry["api"] = "authenticated"
if user:
login = user.get("login") or user.get("email")
if login:
entry["service_account_login"] = login
else:
entry["status"] = "error"
entry["detail"] = err
else:
live_ok, live_err = await grafana_api.health_live(src.api_url)
if live_ok:
entry["status"] = "reachable"
entry["detail"] = "задай api_token в GRAFANA_SOURCES_JSON для проверки API"
else:
entry["status"] = "error"
entry["detail"] = live_err
instances.append(entry)
if all(i.get("status") == "ok" for i in instances):
agg = "ok"
elif any(i.get("status") == "error" for i in instances):
agg = "error"
else:
out["grafana"] = {"status": "error", "detail": live_err, "url": gu}
agg = "reachable"
out["grafana"] = {
"status": agg,
"instances": instances,
}
fj = settings.forgejo_url.strip()
if not fj: