2452 lines
55 KiB
JSON
2452 lines
55 KiB
JSON
|
|
{
|
||
|
|
"__inputs": [
|
||
|
|
{
|
||
|
|
"name": "DS_PROMETHEUS",
|
||
|
|
"label": "Prometheus",
|
||
|
|
"description": "",
|
||
|
|
"type": "datasource",
|
||
|
|
"pluginId": "prometheus",
|
||
|
|
"pluginName": "Prometheus"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "DS_LOKI",
|
||
|
|
"label": "Loki",
|
||
|
|
"description": "",
|
||
|
|
"type": "datasource",
|
||
|
|
"pluginId": "loki",
|
||
|
|
"pluginName": "Loki"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"__requires": [
|
||
|
|
{
|
||
|
|
"type": "panel",
|
||
|
|
"id": "alertlist",
|
||
|
|
"name": "Alert list",
|
||
|
|
"version": ""
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"type": "grafana",
|
||
|
|
"id": "grafana",
|
||
|
|
"name": "Grafana",
|
||
|
|
"version": "7.3.5"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"type": "panel",
|
||
|
|
"id": "graph",
|
||
|
|
"name": "Graph",
|
||
|
|
"version": ""
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"type": "panel",
|
||
|
|
"id": "logs",
|
||
|
|
"name": "Logs",
|
||
|
|
"version": ""
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"type": "datasource",
|
||
|
|
"id": "loki",
|
||
|
|
"name": "Loki",
|
||
|
|
"version": "1.0.0"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"type": "datasource",
|
||
|
|
"id": "prometheus",
|
||
|
|
"name": "Prometheus",
|
||
|
|
"version": "1.0.0"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"annotations": {
|
||
|
|
"list": [
|
||
|
|
{
|
||
|
|
"builtIn": 1,
|
||
|
|
"datasource": "-- Grafana --",
|
||
|
|
"enable": true,
|
||
|
|
"hide": false,
|
||
|
|
"iconColor": "rgba(0, 211, 255, 1)",
|
||
|
|
"limit": 100,
|
||
|
|
"name": "Annotations & Alerts",
|
||
|
|
"showIn": 0,
|
||
|
|
"type": "dashboard"
|
||
|
|
}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"description": "This dashboard can be used to detect issues on the Loki stack, when deployed in Kubernetes. Shows: some error metrics published by Promtail/Loki. Error and warning logs emitted by Promtail/Loki. Memory and CPU usage of Promtail/Loki compared against the Kubernetes memory/cpu limits and requests.",
|
||
|
|
"editable": true,
|
||
|
|
"gnetId": 14055,
|
||
|
|
"graphTooltip": 0,
|
||
|
|
"id": null,
|
||
|
|
"links": [],
|
||
|
|
"panels": [
|
||
|
|
{
|
||
|
|
"dashboardFilter": "",
|
||
|
|
"dashboardTags": [],
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {}
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"folderId": null,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 0
|
||
|
|
},
|
||
|
|
"id": 987,
|
||
|
|
"limit": "100",
|
||
|
|
"nameFilter": "",
|
||
|
|
"onlyAlertsOnDashboard": true,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"show": "current",
|
||
|
|
"sortOrder": 3,
|
||
|
|
"stateFilter": [],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Summary of alerts status",
|
||
|
|
"type": "alertlist"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"collapsed": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"gridPos": {
|
||
|
|
"h": 1,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 6
|
||
|
|
},
|
||
|
|
"id": 435,
|
||
|
|
"panels": [],
|
||
|
|
"title": "Problems in the Loki Stack",
|
||
|
|
"type": "row"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
0
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Loki has emitted error/warning messages in the last 5m",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": true,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"description": "Total number of messages logged by Loki itself",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 5,
|
||
|
|
"w": 9,
|
||
|
|
"x": 0,
|
||
|
|
"y": 7
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 10,
|
||
|
|
"interval": "1m",
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": false,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [
|
||
|
|
{
|
||
|
|
"alias": "debug",
|
||
|
|
"color": "#C0D8FF"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "info",
|
||
|
|
"color": "#5794F2"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "warn",
|
||
|
|
"color": "#FF9830"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "error",
|
||
|
|
"color": "#C4162A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": true,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "sum(rate(log_messages_total{app=\"loki\",level=~\"error|warn\"}[1m])) by (level)\n",
|
||
|
|
"format": "time_series",
|
||
|
|
"hide": false,
|
||
|
|
"interval": "",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "{{level}}",
|
||
|
|
"refId": "A"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "log_messages_total",
|
||
|
|
"format": "time_series",
|
||
|
|
"hide": true,
|
||
|
|
"interval": "",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "B"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 0
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Number of error/warning messages logged by Loki itself",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "none",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": "Loki",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {}
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 5,
|
||
|
|
"w": 15,
|
||
|
|
"x": 9,
|
||
|
|
"y": 7
|
||
|
|
},
|
||
|
|
"id": 511,
|
||
|
|
"options": {
|
||
|
|
"showLabels": false,
|
||
|
|
"showTime": false,
|
||
|
|
"sortOrder": "Descending",
|
||
|
|
"wrapLogMessage": false
|
||
|
|
},
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "{app=\"loki\"} | logfmt | level=\"warn\" or level=\"error\"",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Error/warning messages logged by Loki itself",
|
||
|
|
"type": "logs"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
0
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Promtail has emitted error/warning messages in the last 5m",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": true,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"description": "Total number of messages logged by Promtail",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 5,
|
||
|
|
"w": 9,
|
||
|
|
"x": 0,
|
||
|
|
"y": 12
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 106,
|
||
|
|
"interval": "1m",
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": false,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [
|
||
|
|
{
|
||
|
|
"alias": "debug",
|
||
|
|
"color": "#C0D8FF"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "info",
|
||
|
|
"color": "#5794F2"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "warn",
|
||
|
|
"color": "#FF9830"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "error",
|
||
|
|
"color": "#C4162A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": true,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "sum(rate(log_messages_total{app_kubernetes_io_name=\"promtail\",level=~\"error|warn\"}[1m])) by (level)\n",
|
||
|
|
"format": "time_series",
|
||
|
|
"hide": false,
|
||
|
|
"interval": "",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "{{level}}",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 0
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Error/warning messages logged by Promtail",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "none",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": "Loki",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {}
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 5,
|
||
|
|
"w": 15,
|
||
|
|
"x": 9,
|
||
|
|
"y": 12
|
||
|
|
},
|
||
|
|
"id": 586,
|
||
|
|
"options": {
|
||
|
|
"showLabels": false,
|
||
|
|
"showTime": false,
|
||
|
|
"sortOrder": "Descending",
|
||
|
|
"wrapLogMessage": false
|
||
|
|
},
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "{app=\"promtail\"} | logfmt | level=\"warn\" or level=\"error\"",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Error/warning messages logged by Promtail",
|
||
|
|
"type": "logs"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
0.01
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "max"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Loki distributor has failed to send batches to ingesters",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {}
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 5,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 17
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 676,
|
||
|
|
"interval": "1m",
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "rate(loki_distributor_ingester_append_failures_total[1m])*60",
|
||
|
|
"instant": false,
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 0.01
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Loki (distributor) - failed batch appends sent to ingesters",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
0.01
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "max"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Promtail has dropped logs in the last 5m",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"description": "Number of log entries dropped because failed to be sent to the Loki ingester after all retries.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {}
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 5,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 22
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 677,
|
||
|
|
"interval": "1m",
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "rate(promtail_dropped_entries_total[1m])*60",
|
||
|
|
"instant": false,
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "pod={{kubernetes_pod_name}}, instance={{instance}}",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 0.01
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Promtail - dropped log entries",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"collapsed": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"gridPos": {
|
||
|
|
"h": 1,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 27
|
||
|
|
},
|
||
|
|
"id": 47,
|
||
|
|
"panels": [],
|
||
|
|
"title": "Logging activity",
|
||
|
|
"type": "row"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
5000
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"1m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "The number of streams in Loki is above 5000",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"description": "The total number of streams created per tenant.\nThis should not increase after startup.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 28
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 78,
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": true,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": true
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "loki_ingester_memory_streams",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "Streams",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 5000
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Number of Streams in Loki",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"description": "The total number of log entries received per tenant (not necessarily of lines, as an entry can have more than one line of text).",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 34
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 56,
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "rate(loki_distributor_lines_received_total[1m])",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Loki (distributor) - log entries received per second",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
1000000
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "1m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Quantity of data received by the Loki ingester is suspiciously high",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"description": "The total number of uncompressed bytes received per tenant.\n",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 1,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 40
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 137,
|
||
|
|
"legend": {
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"show": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 1,
|
||
|
|
"nullPointMode": "null",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 2,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "sum(rate(loki_distributor_bytes_received_total[1m]))",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 1000000
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Bytes received in ingester per second",
|
||
|
|
"tooltip": {
|
||
|
|
"shared": true,
|
||
|
|
"sort": 0,
|
||
|
|
"value_type": "individual"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"format": "decbytes",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": true
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"collapsed": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"gridPos": {
|
||
|
|
"h": 1,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 46
|
||
|
|
},
|
||
|
|
"id": 18,
|
||
|
|
"panels": [],
|
||
|
|
"title": "Memory usage",
|
||
|
|
"type": "row"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
80
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "1m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Loki memory usage is above 80% of the defined limit",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 2,
|
||
|
|
"description": "Percentage of actual usage over configured limit",
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 47
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 247,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sideWidth": 200,
|
||
|
|
"sort": "current",
|
||
|
|
"sortDesc": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "100 * max by(pod,container) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"loki.*\"})\n/\non (pod,container) group_left kube_pod_container_resource_limits_memory_bytes{pod=~\"loki.*\"}",
|
||
|
|
"hide": false,
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "Usage (pod={{pod}}, container={{container}}, name={{name}})",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 80
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Loki memory usage (% of Kubernetes memory limit)",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": false,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:241",
|
||
|
|
"format": "percent",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": "100",
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:242",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 2,
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 53
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 25,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sideWidth": 200,
|
||
|
|
"sort": "current",
|
||
|
|
"sortDesc": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"repeat": null,
|
||
|
|
"seriesOverrides": [
|
||
|
|
{
|
||
|
|
"alias": "Limit",
|
||
|
|
"color": "rgba(255, 255, 255, 0.56)",
|
||
|
|
"dashes": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "Request",
|
||
|
|
"color": "rgba(255, 255, 255, 0.78)"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "Usage",
|
||
|
|
"fill": 1
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"loki.*\"}",
|
||
|
|
"interval": "10s",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "Usage (pod={{pod}}, container={{container}})",
|
||
|
|
"metric": "container_memory_usage:sort_desc",
|
||
|
|
"refId": "A",
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_limits_memory_bytes{pod=~\"loki.*\"})",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "Limit",
|
||
|
|
"refId": "B"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_requests_memory_bytes{pod=~\"loki.*\"})",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "Request",
|
||
|
|
"refId": "C"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Loki memory usage vs Kubernetes limit and request",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": false,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:581",
|
||
|
|
"format": "bytes",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:582",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
80
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Promtail memory usage is above 80% of the defined limit",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 2,
|
||
|
|
"description": "Percentage of actual usage over configured limit",
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 59
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 772,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sideWidth": 200,
|
||
|
|
"sort": "current",
|
||
|
|
"sortDesc": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "100* max by(pod,container) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"})\n/\non (pod,container) (kube_pod_container_resource_limits_memory_bytes{pod=~\"promtail.*\"})",
|
||
|
|
"hide": false,
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 80
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Promtail memory usage (% of Kubernetes memory limit)",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": false,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:335",
|
||
|
|
"format": "percent",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": "100",
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:336",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 2,
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 65
|
||
|
|
},
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 1103,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sideWidth": 200,
|
||
|
|
"sort": "current",
|
||
|
|
"sortDesc": true,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [
|
||
|
|
{
|
||
|
|
"alias": "Limit",
|
||
|
|
"color": "rgba(255, 255, 255, 0.56)",
|
||
|
|
"dashes": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "Request",
|
||
|
|
"color": "rgba(255, 255, 255, 0.78)"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "Usage",
|
||
|
|
"fill": 1
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}",
|
||
|
|
"interval": "10s",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "Usage (pod={{pod}}, container={{container}})",
|
||
|
|
"metric": "container_memory_usage:sort_desc",
|
||
|
|
"refId": "A",
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_limits_memory_bytes{pod=~\"promtail.*\"})",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "Limit",
|
||
|
|
"refId": "B"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_requests_memory_bytes{pod=~\"promtail.*\"})",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "Request",
|
||
|
|
"refId": "C"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Promtail memory usage vs Kubernetes limit and request",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": false,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:581",
|
||
|
|
"format": "bytes",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:582",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"collapsed": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"gridPos": {
|
||
|
|
"h": 1,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 71
|
||
|
|
},
|
||
|
|
"id": 8,
|
||
|
|
"panels": [],
|
||
|
|
"title": "CPU Usage",
|
||
|
|
"type": "row"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
80
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "1m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Loki CPU usage is above 80% of the defined limit",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 3,
|
||
|
|
"description": "percentage of configured limit",
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 72
|
||
|
|
},
|
||
|
|
"height": "",
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 347,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sort": null,
|
||
|
|
"sortDesc": null,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "100* rate(container_cpu_usage_seconds_total{container!=\"POD\",container!=\"\",pod=~\"loki.*\"}[1m])\n/\non (pod,container) kube_pod_container_resource_limits_cpu_cores{container=\"loki\"}",
|
||
|
|
"interval": "10s",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "",
|
||
|
|
"metric": "container_cpu",
|
||
|
|
"refId": "A",
|
||
|
|
"step": 10
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 80
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Loki CPU usage (% of Kubernetes CPU limit)",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": true,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:413",
|
||
|
|
"format": "percent",
|
||
|
|
"label": "cores",
|
||
|
|
"logBase": 1,
|
||
|
|
"max": "100",
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:414",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 3,
|
||
|
|
"description": "",
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 78
|
||
|
|
},
|
||
|
|
"height": "",
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 6,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sort": null,
|
||
|
|
"sortDesc": null,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"repeat": null,
|
||
|
|
"seriesOverrides": [
|
||
|
|
{
|
||
|
|
"alias": "CPU Limit",
|
||
|
|
"color": "rgba(255, 255, 255, 0.46)",
|
||
|
|
"dashes": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "CPU Request",
|
||
|
|
"color": "rgba(255, 255, 255, 0.54)"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "Usage",
|
||
|
|
"fill": 1
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"loki.*\"}[1m])",
|
||
|
|
"interval": "10s",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "CPU Usage (pod={{pod}}, container={{container}})",
|
||
|
|
"metric": "container_cpu",
|
||
|
|
"refId": "A",
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_limits_cpu_cores{pod=~\"loki.*\"})",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "CPU Limit",
|
||
|
|
"refId": "B"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_requests_cpu_cores{pod=~\"loki.*\"})",
|
||
|
|
"hide": false,
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "CPU Request",
|
||
|
|
"refId": "C"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Loki CPU usage vs Kubernetes limit and request",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": true,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:666",
|
||
|
|
"format": "none",
|
||
|
|
"label": "cores",
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:667",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alert": {
|
||
|
|
"alertRuleTags": {},
|
||
|
|
"conditions": [
|
||
|
|
{
|
||
|
|
"evaluator": {
|
||
|
|
"params": [
|
||
|
|
80
|
||
|
|
],
|
||
|
|
"type": "gt"
|
||
|
|
},
|
||
|
|
"operator": {
|
||
|
|
"type": "and"
|
||
|
|
},
|
||
|
|
"query": {
|
||
|
|
"params": [
|
||
|
|
"A",
|
||
|
|
"5m",
|
||
|
|
"now"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"reducer": {
|
||
|
|
"params": [],
|
||
|
|
"type": "avg"
|
||
|
|
},
|
||
|
|
"type": "query"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"executionErrorState": "keep_state",
|
||
|
|
"for": "5m",
|
||
|
|
"frequency": "1m",
|
||
|
|
"handler": 1,
|
||
|
|
"name": "Promtail CPU usage is above 80% of the defined limit",
|
||
|
|
"noDataState": "no_data",
|
||
|
|
"notifications": []
|
||
|
|
},
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 3,
|
||
|
|
"description": "percentage of configured limit",
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 84
|
||
|
|
},
|
||
|
|
"height": "",
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 910,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sort": null,
|
||
|
|
"sortDesc": null,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "100 * rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}[1m])\n/\non (pod,container) kube_pod_container_resource_limits_cpu_cores{pod=~\"promtail.*\"}",
|
||
|
|
"interval": "10s",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "",
|
||
|
|
"metric": "container_cpu",
|
||
|
|
"refId": "A",
|
||
|
|
"step": 10
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [
|
||
|
|
{
|
||
|
|
"colorMode": "critical",
|
||
|
|
"fill": true,
|
||
|
|
"line": true,
|
||
|
|
"op": "gt",
|
||
|
|
"value": 80
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Promtail CPU usage (% of Kubernetes limit)",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": true,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:231",
|
||
|
|
"format": "percent",
|
||
|
|
"label": "cores",
|
||
|
|
"logBase": 1,
|
||
|
|
"max": "100",
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:232",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"aliasColors": {},
|
||
|
|
"bars": false,
|
||
|
|
"dashLength": 10,
|
||
|
|
"dashes": false,
|
||
|
|
"datasource": "Prometheus",
|
||
|
|
"decimals": 3,
|
||
|
|
"description": "",
|
||
|
|
"editable": true,
|
||
|
|
"error": false,
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"custom": {},
|
||
|
|
"links": []
|
||
|
|
},
|
||
|
|
"overrides": []
|
||
|
|
},
|
||
|
|
"fill": 0,
|
||
|
|
"fillGradient": 0,
|
||
|
|
"grid": {},
|
||
|
|
"gridPos": {
|
||
|
|
"h": 6,
|
||
|
|
"w": 24,
|
||
|
|
"x": 0,
|
||
|
|
"y": 90
|
||
|
|
},
|
||
|
|
"height": "",
|
||
|
|
"hiddenSeries": false,
|
||
|
|
"id": 1104,
|
||
|
|
"isNew": true,
|
||
|
|
"legend": {
|
||
|
|
"alignAsTable": false,
|
||
|
|
"avg": false,
|
||
|
|
"current": false,
|
||
|
|
"max": false,
|
||
|
|
"min": false,
|
||
|
|
"rightSide": false,
|
||
|
|
"show": true,
|
||
|
|
"sort": null,
|
||
|
|
"sortDesc": null,
|
||
|
|
"total": false,
|
||
|
|
"values": false
|
||
|
|
},
|
||
|
|
"lines": true,
|
||
|
|
"linewidth": 2,
|
||
|
|
"links": [],
|
||
|
|
"nullPointMode": "connected",
|
||
|
|
"options": {
|
||
|
|
"alertThreshold": true
|
||
|
|
},
|
||
|
|
"percentage": false,
|
||
|
|
"pluginVersion": "7.3.5",
|
||
|
|
"pointradius": 5,
|
||
|
|
"points": false,
|
||
|
|
"renderer": "flot",
|
||
|
|
"seriesOverrides": [
|
||
|
|
{
|
||
|
|
"alias": "CPU Limit",
|
||
|
|
"color": "rgba(255, 255, 255, 0.46)",
|
||
|
|
"dashes": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "CPU Request",
|
||
|
|
"color": "rgba(255, 255, 255, 0.54)"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"alias": "Usage",
|
||
|
|
"fill": 1
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"spaceLength": 10,
|
||
|
|
"stack": false,
|
||
|
|
"steppedLine": false,
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"expr": "rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}[1m])",
|
||
|
|
"interval": "10s",
|
||
|
|
"intervalFactor": 1,
|
||
|
|
"legendFormat": "CPU Usage (pod={{pod}}, container={{container}})",
|
||
|
|
"metric": "container_cpu",
|
||
|
|
"refId": "A",
|
||
|
|
"step": 10
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_limits_cpu_cores{pod=~\"promtail.*\"})",
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "CPU Limit",
|
||
|
|
"refId": "B"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"expr": "min(kube_pod_container_resource_requests_cpu_cores{pod=~\"promtail.*\"})",
|
||
|
|
"hide": false,
|
||
|
|
"interval": "",
|
||
|
|
"legendFormat": "CPU Request",
|
||
|
|
"refId": "C"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"thresholds": [],
|
||
|
|
"timeFrom": null,
|
||
|
|
"timeRegions": [],
|
||
|
|
"timeShift": null,
|
||
|
|
"title": "Promtail CPU usage vs Kubernetes limit and request",
|
||
|
|
"tooltip": {
|
||
|
|
"msResolution": true,
|
||
|
|
"shared": true,
|
||
|
|
"sort": 2,
|
||
|
|
"value_type": "cumulative"
|
||
|
|
},
|
||
|
|
"type": "graph",
|
||
|
|
"xaxis": {
|
||
|
|
"buckets": null,
|
||
|
|
"mode": "time",
|
||
|
|
"name": null,
|
||
|
|
"show": true,
|
||
|
|
"values": []
|
||
|
|
},
|
||
|
|
"yaxes": [
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:666",
|
||
|
|
"format": "none",
|
||
|
|
"label": "cores",
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": "0",
|
||
|
|
"show": true
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"$$hashKey": "object:667",
|
||
|
|
"format": "short",
|
||
|
|
"label": null,
|
||
|
|
"logBase": 1,
|
||
|
|
"max": null,
|
||
|
|
"min": null,
|
||
|
|
"show": false
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"yaxis": {
|
||
|
|
"align": false,
|
||
|
|
"alignLevel": null
|
||
|
|
}
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"refresh": "1m",
|
||
|
|
"schemaVersion": 26,
|
||
|
|
"style": "dark",
|
||
|
|
"tags": [
|
||
|
|
"tools",
|
||
|
|
"loki"
|
||
|
|
],
|
||
|
|
"templating": {
|
||
|
|
"list": []
|
||
|
|
},
|
||
|
|
"time": {
|
||
|
|
"from": "now-24h",
|
||
|
|
"to": "now"
|
||
|
|
},
|
||
|
|
"timepicker": {
|
||
|
|
"refresh_intervals": [
|
||
|
|
"10s",
|
||
|
|
"30s",
|
||
|
|
"1m",
|
||
|
|
"5m",
|
||
|
|
"15m",
|
||
|
|
"30m",
|
||
|
|
"1h",
|
||
|
|
"2h",
|
||
|
|
"1d"
|
||
|
|
],
|
||
|
|
"time_options": [
|
||
|
|
"5m",
|
||
|
|
"15m",
|
||
|
|
"1h",
|
||
|
|
"6h",
|
||
|
|
"12h",
|
||
|
|
"24h",
|
||
|
|
"2d",
|
||
|
|
"7d",
|
||
|
|
"30d"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"timezone": "",
|
||
|
|
"title": "Loki stack monitoring (Promtail, Loki)",
|
||
|
|
"uid": "loki_stack_monitoring_quortex",
|
||
|
|
"version": 5
|
||
|
|
}
|