79 lines
6.2 KiB
JSON
Raw Permalink Normal View History

{
"annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]},
"description": "Per-container CPU, memory, and I/O for the OpenBMP stack — collected by the Telegraf docker input. Watch memory % to catch a container approaching its mem_limit before it OOM-crashes.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [{"asDropdown": true,"icon": "external link","includeVars": true,"keepTime": true,"tags": ["obmp-nav"],"title": "OBMP Dashboards","type": "dashboards"}],
"liveNow": false,
"panels": [
{
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
"description": "Memory usage as a percentage of each container's mem_limit. Sustained values near 100% precede an OOM kill.",
"fieldConfig": {
"defaults": {"color": {"mode": "palette-classic"},"custom": {"axisPlacement": "auto","drawStyle": "line","fillOpacity": 10,"lineInterpolation": "smooth","lineWidth": 1,"pointSize": 5,"showPoints": "never","spanNulls": false,"stacking": {"group": "A","mode": "none"}},"unit": "percent","min": 0,"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "orange","value": 80},{"color": "red","value": 95}]}},
"overrides": []
},
"gridPos": {"h": 9,"w": 12,"x": 0,"y": 0},
"id": 1,
"options": {"legend": {"calcs": ["max"],"displayMode": "table","placement": "right","showLegend": true},"tooltip": {"mode": "multi","sort": "desc"}},
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"docker_container_mem\" and r._field == \"usage_percent\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> keep(columns: [\"_time\", \"_value\", \"container_name\"])\n |> group(columns: [\"container_name\"])","refId": "A"}],
"title": "Container Memory %",
"type": "timeseries"
},
{
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
"description": "CPU usage per container (cpu-total). Can exceed 100% — that is multiple cores.",
"fieldConfig": {
"defaults": {"color": {"mode": "palette-classic"},"custom": {"axisPlacement": "auto","drawStyle": "line","fillOpacity": 10,"lineInterpolation": "smooth","lineWidth": 1,"pointSize": 5,"showPoints": "never","spanNulls": false,"stacking": {"group": "A","mode": "none"}},"unit": "percent","min": 0},
"overrides": []
},
"gridPos": {"h": 9,"w": 12,"x": 12,"y": 0},
"id": 2,
"options": {"legend": {"calcs": ["max"],"displayMode": "table","placement": "right","showLegend": true},"tooltip": {"mode": "multi","sort": "desc"}},
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"docker_container_cpu\" and r._field == \"usage_percent\" and r.cpu == \"cpu-total\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> keep(columns: [\"_time\", \"_value\", \"container_name\"])\n |> group(columns: [\"container_name\"])","refId": "A"}],
"title": "Container CPU %",
"type": "timeseries"
},
{
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
"description": "Absolute memory usage per container.",
"fieldConfig": {
"defaults": {"color": {"mode": "palette-classic"},"custom": {"axisPlacement": "auto","drawStyle": "line","fillOpacity": 10,"lineInterpolation": "smooth","lineWidth": 1,"pointSize": 5,"showPoints": "never","spanNulls": false,"stacking": {"group": "A","mode": "none"}},"unit": "bytes","min": 0},
"overrides": []
},
"gridPos": {"h": 9,"w": 12,"x": 0,"y": 9},
"id": 3,
"options": {"legend": {"calcs": ["max"],"displayMode": "table","placement": "right","showLegend": true},"tooltip": {"mode": "multi","sort": "desc"}},
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"docker_container_mem\" and r._field == \"usage\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> keep(columns: [\"_time\", \"_value\", \"container_name\"])\n |> group(columns: [\"container_name\"])","refId": "A"}],
"title": "Container Memory Usage",
"type": "timeseries"
},
{
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
"description": "Current memory pressure per container. Anything in orange/red is close to its mem_limit.",
"fieldConfig": {
"defaults": {"custom": {"align": "auto","displayMode": "auto"},"unit": "percent"},
"overrides": [{"matcher": {"id": "byName","options": "Memory %"},"properties": [{"id": "custom.displayMode","value": "gradient-gauge"},{"id": "max","value": 100},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "orange","value": 80},{"color": "red","value": 95}]}}]}]
},
"gridPos": {"h": 9,"w": 12,"x": 12,"y": 9},
"id": 4,
"options": {"showHeader": true,"sortBy": [{"desc": true,"displayName": "Memory %"}]},
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: -5m)\n |> filter(fn: (r) => r._measurement == \"docker_container_mem\" and r._field == \"usage_percent\")\n |> last()\n |> keep(columns: [\"container_name\", \"_value\"])\n |> group()\n |> rename(columns: {_value: \"Memory %\", container_name: \"Container\"})\n |> sort(columns: [\"Memory %\"], desc: true)","refId": "A"}],
"title": "Current Memory % by Container",
"type": "table"
}
],
"refresh": "30s",
"schemaVersion": 36,
"style": "dark",
"tags": ["obmp", "obmp-nav", "telemetry", "resources"],
"time": {"from": "now-1h","to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "Stack Resources",
"uid": "obmp-stack-resources",
"version": 1
}