208 lines
18 KiB
JSON
Raw Permalink Normal View History

{
"annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]},
"description": "Network-operations overview — answers 'is the network healthy right now?' at a glance. Counts come from stats_* aggregate tables so the dashboard stays fast at production scale.",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 1,
"id": null,
"links": [
{"asDropdown": true,"icon": "external link","includeVars": true,"keepTime": true,"tags": ["obmp-nav"],"title": "OBMP Dashboards","type": "dashboards"}
],
"liveNow": true,
"panels": [
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Total routers reporting BMP to the collector.",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 0,"y": 0},
"id": 1,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, count(*) AS \"Routers\" FROM routers","refId": "A"}],
"title": "Routers Monitored",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Routers whose BMP session is not up. Should be 0.",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "red","value": 1}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 3,"y": 0},
"id": 2,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, count(*) AS \"Routers Down\" FROM routers WHERE state != 'up'","refId": "A"}],
"title": "Routers Down",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "BGP peers currently up (pre-policy Adj-RIB-In sessions).",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 6,"y": 0},
"id": 3,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, count(*) AS \"Peers Up\" FROM bgp_peers WHERE isprepolicy = true AND state = 'up'","refId": "A"}],
"title": "Peers Up",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "BGP peers that went down within the selected time range. Investigate any non-zero value. (Removed/decommissioned peers fall outside the range and are not counted.)",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "red","value": 1}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 9,"y": 0},
"id": 4,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, count(*) AS \"Peers Down\" FROM bgp_peers WHERE isprepolicy = true AND state != 'up' AND $__timeFilter(timestamp)","refId": "A"}],
"title": "Peers Down",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Peer session down-events in the last hour. Sustained flapping needs investigation.",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 1},{"color": "red","value": 5}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 12,"y": 0},
"id": 5,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, count(*) AS \"Flaps (1h)\" FROM peer_event_log WHERE state = 'down' AND timestamp > NOW() - INTERVAL '1 hour'","refId": "A"}],
"title": "Flap Events (1h)",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Total BGP updates across all peers in the last 5 minutes (from stats_chg_bypeer).",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 15,"y": 0},
"id": 6,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, COALESCE(SUM(updates),0) AS \"RIB Updates (5m)\" FROM stats_chg_bypeer WHERE interval_time > NOW() - INTERVAL '5 minutes'","refId": "A"}],
"title": "RIB Updates (5m)",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Routes whose origin AS conflicts with a covering ROA (RPKI-invalid). Potential hijacks or misconfigurations.",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "red","value": 1}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 18,"y": 0},
"id": 7,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time, count(*) AS \"RPKI Invalid\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\n AND EXISTS (SELECT 1 FROM rpki_validator rv WHERE rv.prefix >>= r.prefix AND rv.origin_as != ba.origin_as)\n AND NOT EXISTS (SELECT 1 FROM rpki_validator rv WHERE rv.prefix >>= r.prefix AND rv.origin_as = ba.origin_as AND r.prefix_len <= rv.prefix_len_max)","refId": "A"}],
"title": "RPKI Invalid Routes",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "BGP-LS link and node changes in the last hour. A spike indicates topology instability.",
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 1},{"color": "red","value": 20}]},"unit": "short"}},
"gridPos": {"h": 4,"w": 3,"x": 21,"y": 0},
"id": 8,
"options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT NOW() AS time,\n (SELECT count(*) FROM ls_links_log WHERE timestamp > NOW() - INTERVAL '1 hour')\n + (SELECT count(*) FROM ls_nodes_log WHERE timestamp > NOW() - INTERVAL '1 hour') AS \"LS Changes (1h)\"","refId": "A"}],
"title": "LS Topology Changes (1h)",
"type": "stat"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Per-peer session state over the selected range. Any gap is a flap.",
"fieldConfig": {
"defaults": {
"color": {"mode": "thresholds"},
"custom": {"fillOpacity": 70,"lineWidth": 0,"spanNulls": false},
"mappings": [{"options": {"0": {"color": "red","index": 1,"text": "DOWN"},"1": {"color": "green","index": 0,"text": "UP"}},"type": "value"}],
"thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "green","value": 1}]}
}
},
"gridPos": {"h": 9,"w": 12,"x": 0,"y": 4},
"id": 9,
"options": {"alignValue": "left","legend": {"displayMode": "list","placement": "bottom","showLegend": false},"mergeValues": true,"rowHeight": 0.9,"showValue": "never","tooltip": {"mode": "single"}},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT\n $__timeGroupAlias(e.timestamp,'1m'),\n COALESCE(p.name, p.peer_addr::text) AS metric,\n CASE WHEN e.state = 'up' THEN 1 ELSE 0 END AS \"value\"\nFROM peer_event_log e\nJOIN bgp_peers p ON p.hash_id = e.peer_hash_id\nWHERE $__timeFilter(e.timestamp)\nORDER BY 1, 2","refId": "A"}],
"title": "Peer Session State",
"type": "state-timeline"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "BGP update vs withdraw rate across all peers (from stats_chg_bypeer).",
"fieldConfig": {
"defaults": {"color": {"mode": "palette-classic"},"custom": {"axisCenteredZero": false,"axisColorMode": "text","axisLabel": "","axisPlacement": "auto","barAlignment": 0,"drawStyle": "line","fillOpacity": 20,"gradientMode": "none","lineInterpolation": "smooth","lineWidth": 1,"pointSize": 5,"scaleDistribution": {"type": "linear"},"showPoints": "never","spanNulls": false,"stacking": {"group": "A","mode": "none"},"thresholdsStyle": {"mode": "off"}},"unit": "short"},
"overrides": [{"matcher": {"id": "byName","options": "Withdraws"},"properties": [{"id": "color","value": {"fixedColor": "red","mode": "fixed"}}]},{"matcher": {"id": "byName","options": "Updates"},"properties": [{"id": "color","value": {"fixedColor": "green","mode": "fixed"}}]}]
},
"gridPos": {"h": 9,"w": 12,"x": 12,"y": 4},
"id": 10,
"options": {"legend": {"calcs": ["sum"],"displayMode": "table","placement": "bottom","showLegend": true},"tooltip": {"mode": "multi","sort": "none"}},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "time_series","rawSql": "SELECT\n $__timeGroupAlias(interval_time,'5m'),\n SUM(updates) AS \"Updates\",\n SUM(withdraws) AS \"Withdraws\"\nFROM stats_chg_bypeer\nWHERE $__timeFilter(interval_time)\nGROUP BY 1\nORDER BY 1","refId": "A"}],
"title": "BGP Update Rate",
"type": "timeseries"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Peers that went down within the selected time range. Empty is healthy. Widen the time range to see longer-standing issues. Click a peer to open Peer Detail.",
"fieldConfig": {
"defaults": {"custom": {"align": "auto","displayMode": "auto"}},
"overrides": [
{"matcher": {"id": "byName","options": "State"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "mappings","value": [{"options": {"down": {"color": "red","index": 0,"text": "DOWN"}},"type": "value"}]}]},
{"matcher": {"id": "byName","options": "Peer"},"properties": [{"id": "links","value": [{"title": "Open Peer Detail","url": "/d/obmp-peer-detail/peer-detail?var-peer_hash=${__data.fields[\"peer_hash_id\"]}"}]}]},
{"matcher": {"id": "byName","options": "peer_hash_id"},"properties": [{"id": "custom.hidden","value": true}]}
]
},
"gridPos": {"h": 9,"w": 12,"x": 0,"y": 13},
"id": 11,
"options": {"footer": {"countRows": false,"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Last Change"}]},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT\n p.hash_id AS peer_hash_id,\n COALESCE(p.name, p.peer_addr::text) AS \"Peer\",\n p.peer_addr AS \"Address\",\n p.peer_as AS \"AS\",\n p.state AS \"State\",\n p.timestamp AS \"Last Change\",\n p.error_text AS \"Reason\"\nFROM bgp_peers p\nWHERE p.isprepolicy = true AND p.state != 'up' AND $__timeFilter(p.timestamp)\nORDER BY p.timestamp DESC","refId": "A"}],
"title": "Peers Down",
"type": "table"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Most-churned prefixes in the last hour (from stats_chg_byprefix). Click a prefix to open Prefix Explorer.",
"fieldConfig": {
"defaults": {"custom": {"align": "auto","displayMode": "auto"}},
"overrides": [
{"matcher": {"id": "byName","options": "Total Changes"},"properties": [{"id": "custom.displayMode","value": "gradient-gauge"},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 50},{"color": "red","value": 500}]}}]},
{"matcher": {"id": "byName","options": "Prefix"},"properties": [{"id": "links","value": [{"title": "Open in Prefix Explorer","url": "/d/prefix-hist/prefix-explorer?var-prefix=${__value.text}"}]}]}
]
},
"gridPos": {"h": 9,"w": 12,"x": 12,"y": 13},
"id": 12,
"options": {"footer": {"countRows": false,"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Total Changes"}]},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT\n (host(prefix) || '/' || prefix_len) AS \"Prefix\",\n SUM(updates) AS \"Updates\",\n SUM(withdraws) AS \"Withdraws\",\n SUM(updates + withdraws) AS \"Total Changes\"\nFROM stats_chg_byprefix\nWHERE interval_time > NOW() - INTERVAL '1 hour'\nGROUP BY prefix, prefix_len\nORDER BY \"Total Changes\" DESC\nLIMIT 25","refId": "A"}],
"title": "Top Churning Prefixes (1h)",
"type": "table"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Routes whose observed origin AS conflicts with a covering ROA — potential hijacks or leaks.",
"fieldConfig": {
"defaults": {"custom": {"align": "auto","displayMode": "auto"}},
"overrides": [{"matcher": {"id": "byName","options": "Status"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "mappings","value": [{"options": {"Invalid": {"color": "red","index": 0}},"type": "value"}]}]}]
},
"gridPos": {"h": 9,"w": 12,"x": 0,"y": 22},
"id": 13,
"options": {"footer": {"countRows": false,"fields": "","reducer": ["sum"],"show": false},"showHeader": true},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT\n r.prefix AS \"Prefix\",\n ba.origin_as AS \"Observed Origin AS\",\n rv.origin_as AS \"Authorized AS (ROA)\",\n 'Invalid' AS \"Status\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nJOIN rpki_validator rv ON rv.prefix >>= r.prefix AND rv.origin_as != ba.origin_as\nWHERE r.iswithdrawn = false AND r.isipv4 = true\n AND NOT EXISTS (SELECT 1 FROM rpki_validator rv2 WHERE rv2.prefix >>= r.prefix AND rv2.origin_as = ba.origin_as AND r.prefix_len <= rv2.prefix_len_max)\nORDER BY r.prefix\nLIMIT 50","refId": "A"}],
"title": "RPKI Invalid Routes — Potential Hijacks",
"type": "table"
},
{
"datasource": {"type": "postgres","uid": "obmp_postgres"},
"description": "Recent BGP-LS link changes — topology churn over the selected range.",
"fieldConfig": {
"defaults": {"custom": {"align": "auto","displayMode": "auto"}},
"overrides": [{"matcher": {"id": "byName","options": "Action"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "mappings","value": [{"options": {"updated": {"color": "blue","index": 0},"withdrawn": {"color": "orange","index": 1}},"type": "value"}]}]}]
},
"gridPos": {"h": 9,"w": 12,"x": 12,"y": 22},
"id": 14,
"options": {"footer": {"countRows": false,"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Time"}]},
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT\n timestamp AS \"Time\",\n COALESCE(interface_addr::text, '') AS \"Local\",\n COALESCE(neighbor_addr::text, '') AS \"Neighbor\",\n CASE WHEN iswithdrawn THEN 'withdrawn' ELSE 'updated' END AS \"Action\"\nFROM ls_links_log\nWHERE $__timeFilter(timestamp)\nORDER BY timestamp DESC\nLIMIT 50","refId": "A"}],
"title": "Recent LS Topology Changes",
"type": "table"
}
],
"refresh": "1m",
"schemaVersion": 36,
"style": "dark",
"tags": ["obmp","obmp-nav","noc","overview"],
"time": {"from": "now-6h","to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "NOC Overview",
"uid": "obmp-noc-overview",
"version": 1
}