153 lines
9.6 KiB
JSON
153 lines
9.6 KiB
JSON
|
|
{
|
||
|
|
"annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","target": {"limit": 100,"matchAny": false,"tags": [],"type": "dashboard"},"type": "dashboard"}]},
|
||
|
|
"description": "Prefix stability analysis and route churn visualization. Teaches how to identify unstable routes and understand BGP churn.",
|
||
|
|
"editable": true,
|
||
|
|
"fiscalYearStartMonth": 0,
|
||
|
|
"graphTooltip": 1,
|
||
|
|
"id": null,
|
||
|
|
"links": [],
|
||
|
|
"panels": [
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"description": "Learn: This chart shows BGP advertisements and withdrawals bucketed per hour. A healthy network has steady low churn. Spikes in withdrawals indicate route instability events — link failures, IBGP reconvergence, or policy changes. Run 'inject.py churn' to generate synthetic churn data and observe it here.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"color": {"mode": "palette-classic"},
|
||
|
|
"custom": {"drawStyle": "bars","fillOpacity": 60,"lineWidth": 1,"spanNulls": false,"stacking": {"group": "A","mode": "none"}},
|
||
|
|
"unit": "short"
|
||
|
|
},
|
||
|
|
"overrides": [
|
||
|
|
{"matcher": {"id": "byName","options": "Advertisements"},"properties": [{"id": "color","value": {"fixedColor": "green","mode": "fixed"}}]},
|
||
|
|
{"matcher": {"id": "byName","options": "Withdrawals"},"properties": [{"id": "color","value": {"fixedColor": "red","mode": "fixed"}}]}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"gridPos": {"h": 9,"w": 24,"x": 0,"y": 0},
|
||
|
|
"id": 1,
|
||
|
|
"options": {"legend": {"calcs": ["sum","max"],"displayMode": "list","placement": "bottom"},"tooltip": {"mode": "multi"}},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"format": "time_series",
|
||
|
|
"rawSql": "SELECT\n $__timeGroupAlias(timestamp,'1h'),\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) AS \"Advertisements\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) AS \"Withdrawals\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY 1\nORDER BY 1",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Advertisements vs Withdrawals Rate (per hour)",
|
||
|
|
"type": "timeseries"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"description": "Learn: A prefix with more than 30 updates per day is considered unstable — it is flapping or being re-announced frequently. The Stability column categorizes each prefix. Run 'inject.py churn' to generate churn data and observe it here. Sort by 'Total Updates' to find the most problematic prefixes.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {"color": {"mode": "thresholds"},"custom": {"align": "auto","displayMode": "auto"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]}},
|
||
|
|
"overrides": [
|
||
|
|
{"matcher": {"id": "byName","options": "Stability"},"properties": [{"id": "custom.displayMode","value": "color-text"},{"id": "mappings","value": [{"options": {"Very Stable": {"color": "green","index": 0},"Stable": {"color": "blue","index": 1},"Moderate": {"color": "yellow","index": 2},"Unstable": {"color": "red","index": 3}},"type": "value"}]}]},
|
||
|
|
{"matcher": {"id": "byName","options": "Total Updates"},"properties": [{"id": "custom.displayMode","value": "lcd-gauge"},{"id": "color","value": {"mode": "thresholds"}},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 7},{"color": "red","value": 30}]}}]}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"gridPos": {"h": 12,"w": 24,"x": 0,"y": 9},
|
||
|
|
"id": 2,
|
||
|
|
"options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Total Updates"}]},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"format": "table",
|
||
|
|
"rawSql": "SELECT\n prefix::text AS \"Prefix\",\n COUNT(*) AS \"Total Updates\",\n SUM(CASE WHEN iswithdrawn THEN 1 ELSE 0 END) AS \"Withdrawals\",\n SUM(CASE WHEN NOT iswithdrawn THEN 1 ELSE 0 END) AS \"Announcements\",\n MAX(timestamp) AS \"Last Change\",\n CASE\n WHEN COUNT(*) = 1 THEN 'Very Stable'\n WHEN COUNT(*) <= 7 THEN 'Stable'\n WHEN COUNT(*) <= 30 THEN 'Moderate'\n ELSE 'Unstable'\n END AS \"Stability\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix\nORDER BY \"Total Updates\" DESC\nLIMIT 100",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Top Churning Prefixes",
|
||
|
|
"type": "table"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"description": "Learn: This bar chart shows how many prefixes fall into each stability tier. In a healthy network, the vast majority of prefixes should be 'Very Stable' (only announced once during the window). A large 'Unstable' bar is a red flag. Run 'inject.py churn' to shift prefixes into the Unstable tier.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"color": {"mode": "fixed","fixedColor": "blue"},
|
||
|
|
"custom": {"fillOpacity": 80,"lineWidth": 0},
|
||
|
|
"unit": "short"
|
||
|
|
},
|
||
|
|
"overrides": [
|
||
|
|
{"matcher": {"id": "byName","options": "1. Very Stable (1 update)"},"properties": [{"id": "color","value": {"fixedColor": "green","mode": "fixed"}}]},
|
||
|
|
{"matcher": {"id": "byName","options": "2. Stable (2-7 updates)"},"properties": [{"id": "color","value": {"fixedColor": "blue","mode": "fixed"}}]},
|
||
|
|
{"matcher": {"id": "byName","options": "3. Moderate (8-30 updates)"},"properties": [{"id": "color","value": {"fixedColor": "yellow","mode": "fixed"}}]},
|
||
|
|
{"matcher": {"id": "byName","options": "4. Unstable (31+ updates)"},"properties": [{"id": "color","value": {"fixedColor": "red","mode": "fixed"}}]}
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"gridPos": {"h": 9,"w": 14,"x": 0,"y": 21},
|
||
|
|
"id": 3,
|
||
|
|
"options": {"barRadius": 0.1,"barWidth": 0.6,"groupWidth": 0.7,"legend": {"displayMode": "list","placement": "bottom"},"orientation": "auto","text": {},"tooltip": {"mode": "single"},"xTickLabelRotation": 0,"xTickLabelSpacing": 200},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"format": "table",
|
||
|
|
"rawSql": "SELECT\n CASE\n WHEN cnt = 1 THEN '1. Very Stable (1 update)'\n WHEN cnt <= 7 THEN '2. Stable (2-7 updates)'\n WHEN cnt <= 30 THEN '3. Moderate (8-30 updates)'\n ELSE '4. Unstable (31+ updates)'\n END AS \"Stability Tier\",\n COUNT(*) AS \"Prefix Count\"\nFROM (\n SELECT prefix, COUNT(*) as cnt\n FROM ip_rib_log\n WHERE $__timeFilter(timestamp)\n GROUP BY prefix\n) sub\nGROUP BY 1\nORDER BY 1",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Prefix Distribution by Stability Tier",
|
||
|
|
"type": "barchart"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"description": "Learn: This is the single most churning prefix in the selected time range. If a prefix appears here repeatedly across time ranges, it may warrant investigation — check the AS path and peers announcing it.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"color": {"mode": "thresholds"},
|
||
|
|
"thresholds": {"mode": "absolute","steps": [{"color": "red","value": null}]},
|
||
|
|
"unit": "string",
|
||
|
|
"mappings": []
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"gridPos": {"h": 5,"w": 10,"x": 14,"y": 21},
|
||
|
|
"id": 4,
|
||
|
|
"options": {"colorMode": "background","graphMode": "none","justifyMode": "center","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {"titleSize": 14,"valueSize": 18}},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"format": "time_series",
|
||
|
|
"rawSql": "SELECT NOW() AS time, prefix::text AS \"Most Churned Prefix\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix\nORDER BY COUNT(*) DESC\nLIMIT 1",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Most Churned Prefix",
|
||
|
|
"type": "stat"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"description": "Learn: This counts how many distinct prefixes had at least one update event in the selected time window. During a normal steady state this number should be low. After a major routing event (e.g., upstream link failure) you may see thousands of prefixes change simultaneously.",
|
||
|
|
"fieldConfig": {
|
||
|
|
"defaults": {
|
||
|
|
"color": {"mode": "thresholds"},
|
||
|
|
"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 500},{"color": "red","value": 2000}]},
|
||
|
|
"unit": "short",
|
||
|
|
"mappings": []
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"gridPos": {"h": 4,"w": 10,"x": 14,"y": 26},
|
||
|
|
"id": 5,
|
||
|
|
"options": {"colorMode": "background","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}},
|
||
|
|
"targets": [
|
||
|
|
{
|
||
|
|
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||
|
|
"format": "time_series",
|
||
|
|
"rawSql": "SELECT NOW() AS time, COUNT(DISTINCT prefix) AS \"Prefixes with Updates\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)",
|
||
|
|
"refId": "A"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"title": "Total Unique Prefixes with Updates",
|
||
|
|
"type": "stat"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"schemaVersion": 36,
|
||
|
|
"style": "dark",
|
||
|
|
"tags": ["obmp","learning","bgp","churn","stability"],
|
||
|
|
"time": {"from": "now-24h","to": "now"},
|
||
|
|
"timepicker": {},
|
||
|
|
"timezone": "browser",
|
||
|
|
"title": "Route Churn & Stability Score",
|
||
|
|
"uid": "obmp-learn-05",
|
||
|
|
"version": 1
|
||
|
|
}
|