{ "__inputs": [ { "name": "DS_PROMETHEUS", "label": "Prometheus", "description": "", "type": "datasource", "pluginId": "prometheus", "pluginName": "Prometheus" }, { "name": "DS_LOKI", "label": "Loki", "description": "", "type": "datasource", "pluginId": "loki", "pluginName": "Loki" } ], "__requires": [ { "type": "panel", "id": "alertlist", "name": "Alert list", "version": "" }, { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "7.3.5" }, { "type": "panel", "id": "graph", "name": "Graph", "version": "" }, { "type": "panel", "id": "logs", "name": "Logs", "version": "" }, { "type": "datasource", "id": "loki", "name": "Loki", "version": "1.0.0" }, { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": false, "iconColor": "rgba(0, 211, 255, 1)", "limit": 100, "name": "Annotations & Alerts", "showIn": 0, "type": "dashboard" } ] }, "description": "This dashboard can be used to detect issues on the Loki stack, when deployed in Kubernetes. Shows: some error metrics published by Promtail/Loki. Error and warning logs emitted by Promtail/Loki. Memory and CPU usage of Promtail/Loki compared against the Kubernetes memory/cpu limits and requests.", "editable": true, "gnetId": 14055, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "dashboardFilter": "", "dashboardTags": [], "datasource": "Prometheus", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "folderId": null, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 0 }, "id": 987, "limit": "100", "nameFilter": "", "onlyAlertsOnDashboard": true, "pluginVersion": "7.3.5", "show": "current", "sortOrder": 3, "stateFilter": [], "timeFrom": null, "timeShift": null, "title": "Summary of alerts status", "type": "alertlist" }, { "collapsed": false, "datasource": "Prometheus", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 }, "id": 435, "panels": [], "title": "Problems in the Loki Stack", "type": "row" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "Loki has emitted error/warning messages in the last 5m", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "Total number of messages logged by Loki itself", "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 9, "x": 0, "y": 7 }, "hiddenSeries": false, "id": 10, "interval": "1m", "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": false, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "debug", "color": "#C0D8FF" }, { "alias": "info", "color": "#5794F2" }, { "alias": "warn", "color": "#FF9830" }, { "alias": "error", "color": "#C4162A" } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(log_messages_total{app=\"loki\",level=~\"error|warn\"}[1m])) by (level)\n", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{level}}", "refId": "A" }, { "expr": "log_messages_total", "format": "time_series", "hide": true, "interval": "", "intervalFactor": 1, "legendFormat": "", "refId": "B" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Number of error/warning messages logged by Loki itself", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "datasource": "Loki", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "gridPos": { "h": 5, "w": 15, "x": 9, "y": 7 }, "id": 511, "options": { "showLabels": false, "showTime": false, "sortOrder": "Descending", "wrapLogMessage": false }, "pluginVersion": "7.3.5", "targets": [ { "expr": "{app=\"loki\"} | logfmt | level=\"warn\" or level=\"error\"", "legendFormat": "", "refId": "A" } ], "timeFrom": null, "timeShift": null, "title": "Error/warning messages logged by Loki itself", "type": "logs" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "Promtail has emitted error/warning messages in the last 5m", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": true, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "Total number of messages logged by Promtail", "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 9, "x": 0, "y": 12 }, "hiddenSeries": false, "id": 106, "interval": "1m", "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": false, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "debug", "color": "#C0D8FF" }, { "alias": "info", "color": "#5794F2" }, { "alias": "warn", "color": "#FF9830" }, { "alias": "error", "color": "#C4162A" } ], "spaceLength": 10, "stack": true, "steppedLine": false, "targets": [ { "expr": "sum(rate(log_messages_total{app_kubernetes_io_name=\"promtail\",level=~\"error|warn\"}[1m])) by (level)\n", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, "legendFormat": "{{level}}", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Error/warning messages logged by Promtail", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "none", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "datasource": "Loki", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "gridPos": { "h": 5, "w": 15, "x": 9, "y": 12 }, "id": 586, "options": { "showLabels": false, "showTime": false, "sortOrder": "Descending", "wrapLogMessage": false }, "pluginVersion": "7.3.5", "targets": [ { "expr": "{app=\"promtail\"} | logfmt | level=\"warn\" or level=\"error\"", "legendFormat": "", "refId": "A" } ], "timeFrom": null, "timeShift": null, "title": "Error/warning messages logged by Promtail", "type": "logs" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0.01 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "Loki distributor has failed to send batches to ingesters", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 24, "x": 0, "y": 17 }, "hiddenSeries": false, "id": 676, "interval": "1m", "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(loki_distributor_ingester_append_failures_total[1m])*60", "instant": false, "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.01 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Loki (distributor) - failed batch appends sent to ingesters", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 0.01 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "max" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "Promtail has dropped logs in the last 5m", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "Number of log entries dropped because failed to be sent to the Loki ingester after all retries.", "fieldConfig": { "defaults": { "custom": {} }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 5, "w": 24, "x": 0, "y": 22 }, "hiddenSeries": false, "id": 677, "interval": "1m", "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(promtail_dropped_entries_total[1m])*60", "instant": false, "interval": "", "legendFormat": "pod={{kubernetes_pod_name}}, instance={{instance}}", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 0.01 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Promtail - dropped log entries", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "Prometheus", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 27 }, "id": 47, "panels": [], "title": "Logging activity", "type": "row" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 5000 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "1m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "The number of streams in Loki is above 5000", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "The total number of streams created per tenant.\nThis should not increase after startup.", "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 28 }, "hiddenSeries": false, "id": 78, "legend": { "avg": false, "current": true, "max": false, "min": false, "show": true, "total": false, "values": true }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "loki_ingester_memory_streams", "interval": "", "legendFormat": "Streams", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 5000 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Number of Streams in Loki", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "The total number of log entries received per tenant (not necessarily of lines, as an entry can have more than one line of text).", "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 34 }, "hiddenSeries": false, "id": 56, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(loki_distributor_lines_received_total[1m])", "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Loki (distributor) - log entries received per second", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 1000000 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "1m", "frequency": "1m", "handler": 1, "name": "Quantity of data received by the Loki ingester is suspiciously high", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "description": "The total number of uncompressed bytes received per tenant.\n", "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 1, "fillGradient": 0, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 40 }, "hiddenSeries": false, "id": 137, "legend": { "avg": false, "current": false, "max": false, "min": false, "show": true, "total": false, "values": false }, "lines": true, "linewidth": 1, "nullPointMode": "null", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 2, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "sum(rate(loki_distributor_bytes_received_total[1m]))", "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 1000000 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Bytes received in ingester per second", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "decbytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": true } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "Prometheus", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 46 }, "id": 18, "panels": [], "title": "Memory usage", "type": "row" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 80 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "1m", "frequency": "1m", "handler": 1, "name": "Loki memory usage is above 80% of the defined limit", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "Percentage of actual usage over configured limit", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 47 }, "hiddenSeries": false, "id": 247, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "100 * max by(pod,container) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"loki.*\"})\n/\non (pod,container) group_left kube_pod_container_resource_limits_memory_bytes{pod=~\"loki.*\"}", "hide": false, "interval": "", "legendFormat": "Usage (pod={{pod}}, container={{container}}, name={{name}})", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 80 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Loki memory usage (% of Kubernetes memory limit)", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:241", "format": "percent", "label": null, "logBase": 1, "max": "100", "min": "0", "show": true }, { "$$hashKey": "object:242", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 53 }, "hiddenSeries": false, "id": 25, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "Limit", "color": "rgba(255, 255, 255, 0.56)", "dashes": true }, { "alias": "Request", "color": "rgba(255, 255, 255, 0.78)" }, { "alias": "Usage", "fill": 1 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"loki.*\"}", "interval": "10s", "intervalFactor": 1, "legendFormat": "Usage (pod={{pod}}, container={{container}})", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 }, { "expr": "min(kube_pod_container_resource_limits_memory_bytes{pod=~\"loki.*\"})", "interval": "", "legendFormat": "Limit", "refId": "B" }, { "expr": "min(kube_pod_container_resource_requests_memory_bytes{pod=~\"loki.*\"})", "interval": "", "legendFormat": "Request", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Loki memory usage vs Kubernetes limit and request", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:581", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:582", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 80 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "Promtail memory usage is above 80% of the defined limit", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "description": "Percentage of actual usage over configured limit", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 59 }, "hiddenSeries": false, "id": 772, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "100* max by(pod,container) (container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"})\n/\non (pod,container) (kube_pod_container_resource_limits_memory_bytes{pod=~\"promtail.*\"})", "hide": false, "interval": "", "legendFormat": "", "refId": "A" } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 80 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Promtail memory usage (% of Kubernetes memory limit)", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:335", "format": "percent", "label": null, "logBase": 1, "max": "100", "min": "0", "show": true }, { "$$hashKey": "object:336", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 2, "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 65 }, "hiddenSeries": false, "id": 1103, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": 200, "sort": "current", "sortDesc": true, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "Limit", "color": "rgba(255, 255, 255, 0.56)", "dashes": true }, { "alias": "Request", "color": "rgba(255, 255, 255, 0.78)" }, { "alias": "Usage", "fill": 1 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "container_memory_working_set_bytes{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}", "interval": "10s", "intervalFactor": 1, "legendFormat": "Usage (pod={{pod}}, container={{container}})", "metric": "container_memory_usage:sort_desc", "refId": "A", "step": 10 }, { "expr": "min(kube_pod_container_resource_limits_memory_bytes{pod=~\"promtail.*\"})", "interval": "", "legendFormat": "Limit", "refId": "B" }, { "expr": "min(kube_pod_container_resource_requests_memory_bytes{pod=~\"promtail.*\"})", "interval": "", "legendFormat": "Request", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Promtail memory usage vs Kubernetes limit and request", "tooltip": { "msResolution": false, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:581", "format": "bytes", "label": null, "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:582", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "collapsed": false, "datasource": "Prometheus", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 71 }, "id": 8, "panels": [], "title": "CPU Usage", "type": "row" }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 80 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "1m", "frequency": "1m", "handler": 1, "name": "Loki CPU usage is above 80% of the defined limit", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "percentage of configured limit", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 72 }, "height": "", "hiddenSeries": false, "id": 347, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sort": null, "sortDesc": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "100* rate(container_cpu_usage_seconds_total{container!=\"POD\",container!=\"\",pod=~\"loki.*\"}[1m])\n/\non (pod,container) kube_pod_container_resource_limits_cpu_cores{container=\"loki\"}", "interval": "10s", "intervalFactor": 1, "legendFormat": "", "metric": "container_cpu", "refId": "A", "step": 10 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 80 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Loki CPU usage (% of Kubernetes CPU limit)", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:413", "format": "percent", "label": "cores", "logBase": 1, "max": "100", "min": "0", "show": true }, { "$$hashKey": "object:414", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 78 }, "height": "", "hiddenSeries": false, "id": 6, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sort": null, "sortDesc": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ { "alias": "CPU Limit", "color": "rgba(255, 255, 255, 0.46)", "dashes": true }, { "alias": "CPU Request", "color": "rgba(255, 255, 255, 0.54)" }, { "alias": "Usage", "fill": 1 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"loki.*\"}[1m])", "interval": "10s", "intervalFactor": 1, "legendFormat": "CPU Usage (pod={{pod}}, container={{container}})", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "min(kube_pod_container_resource_limits_cpu_cores{pod=~\"loki.*\"})", "interval": "", "legendFormat": "CPU Limit", "refId": "B" }, { "expr": "min(kube_pod_container_resource_requests_cpu_cores{pod=~\"loki.*\"})", "hide": false, "interval": "", "legendFormat": "CPU Request", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Loki CPU usage vs Kubernetes limit and request", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:666", "format": "none", "label": "cores", "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:667", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "alert": { "alertRuleTags": {}, "conditions": [ { "evaluator": { "params": [ 80 ], "type": "gt" }, "operator": { "type": "and" }, "query": { "params": [ "A", "5m", "now" ] }, "reducer": { "params": [], "type": "avg" }, "type": "query" } ], "executionErrorState": "keep_state", "for": "5m", "frequency": "1m", "handler": 1, "name": "Promtail CPU usage is above 80% of the defined limit", "noDataState": "no_data", "notifications": [] }, "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "percentage of configured limit", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 84 }, "height": "", "hiddenSeries": false, "id": 910, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sort": null, "sortDesc": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "100 * rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}[1m])\n/\non (pod,container) kube_pod_container_resource_limits_cpu_cores{pod=~\"promtail.*\"}", "interval": "10s", "intervalFactor": 1, "legendFormat": "", "metric": "container_cpu", "refId": "A", "step": 10 } ], "thresholds": [ { "colorMode": "critical", "fill": true, "line": true, "op": "gt", "value": 80 } ], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Promtail CPU usage (% of Kubernetes limit)", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:231", "format": "percent", "label": "cores", "logBase": 1, "max": "100", "min": "0", "show": true }, { "$$hashKey": "object:232", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, "datasource": "Prometheus", "decimals": 3, "description": "", "editable": true, "error": false, "fieldConfig": { "defaults": { "custom": {}, "links": [] }, "overrides": [] }, "fill": 0, "fillGradient": 0, "grid": {}, "gridPos": { "h": 6, "w": 24, "x": 0, "y": 90 }, "height": "", "hiddenSeries": false, "id": 1104, "isNew": true, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sort": null, "sortDesc": null, "total": false, "values": false }, "lines": true, "linewidth": 2, "links": [], "nullPointMode": "connected", "options": { "alertThreshold": true }, "percentage": false, "pluginVersion": "7.3.5", "pointradius": 5, "points": false, "renderer": "flot", "seriesOverrides": [ { "alias": "CPU Limit", "color": "rgba(255, 255, 255, 0.46)", "dashes": true }, { "alias": "CPU Request", "color": "rgba(255, 255, 255, 0.54)" }, { "alias": "Usage", "fill": 1 } ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "rate(container_cpu_usage_seconds_total{container!=\"\",container!=\"POD\",pod=~\"promtail.*\"}[1m])", "interval": "10s", "intervalFactor": 1, "legendFormat": "CPU Usage (pod={{pod}}, container={{container}})", "metric": "container_cpu", "refId": "A", "step": 10 }, { "expr": "min(kube_pod_container_resource_limits_cpu_cores{pod=~\"promtail.*\"})", "interval": "", "legendFormat": "CPU Limit", "refId": "B" }, { "expr": "min(kube_pod_container_resource_requests_cpu_cores{pod=~\"promtail.*\"})", "hide": false, "interval": "", "legendFormat": "CPU Request", "refId": "C" } ], "thresholds": [], "timeFrom": null, "timeRegions": [], "timeShift": null, "title": "Promtail CPU usage vs Kubernetes limit and request", "tooltip": { "msResolution": true, "shared": true, "sort": 2, "value_type": "cumulative" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [] }, "yaxes": [ { "$$hashKey": "object:666", "format": "none", "label": "cores", "logBase": 1, "max": null, "min": "0", "show": true }, { "$$hashKey": "object:667", "format": "short", "label": null, "logBase": 1, "max": null, "min": null, "show": false } ], "yaxis": { "align": false, "alignLevel": null } } ], "refresh": "1m", "schemaVersion": 26, "style": "dark", "tags": [ "tools", "loki" ], "templating": { "list": [] }, "time": { "from": "now-24h", "to": "now" }, "timepicker": { "refresh_intervals": [ "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "Loki stack monitoring (Promtail, Loki)", "uid": "loki_stack_monitoring_quortex", "version": 5 }