sam 7e3370b5a5 Rework Grafana dashboard information architecture
Reorganizes 31 dashboards into an operator-first structure with real
navigation. Adds Router Detail and Peer Detail drilldown dashboards; merges
LS Nodes+Links and the two L3VPN dashboards; modernizes all deprecated panels
(table-old/graph/worldmap). Every dashboard gets the obmp-nav dropdown so the
whole set is reachable from anywhere. Graduates the operational "Learning"
dashboards into Operations/Routing/LinkState folders, retires the Tops folder,
and relabels folders (Base->Operations, History->Routing, Learning->Reference).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 20:55:03 -07:00

369 lines
12 KiB
JSON

{
"uid": "obmp-learn-09",
"title": "Topology Change & Anomaly Detection",
"tags": [
"obmp-learning",
"obmp",
"obmp-nav"
],
"editable": true,
"schemaVersion": 39,
"time": {
"from": "now-6h",
"to": "now"
},
"templating": {
"list": [
{
"name": "peer_hash",
"label": "BGP Peer",
"type": "query",
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"query": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0",
"refresh": 1,
"multi": false
}
]
},
"panels": [
{
"id": 1,
"title": "Link State Changes Over Time",
"type": "timeseries",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Links Up\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Links Down\"\nFROM ls_links_log\nWHERE $__timeFilter(timestamp) AND peer_hash_id = '$peer_hash'\nGROUP BY 1 ORDER BY 1",
"format": "time_series",
"refId": "A"
}
]
},
{
"id": 2,
"title": "Node Changes Over Time",
"type": "timeseries",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Nodes Appeared\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Nodes Withdrawn\"\nFROM ls_nodes_log\nWHERE $__timeFilter(timestamp) AND peer_hash_id = '$peer_hash'\nGROUP BY 1 ORDER BY 1",
"format": "time_series",
"refId": "A"
}
]
},
{
"id": 3,
"title": "BGP Peer Session Events",
"type": "timeseries",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT $__timeGroupAlias(pel.timestamp, '5m') as time,\n SUM(CASE WHEN pel.state = 'up' THEN 1 ELSE 0 END) as \"Sessions Up\",\n SUM(CASE WHEN pel.state = 'down' THEN 1 ELSE 0 END) as \"Sessions Down\"\nFROM peer_event_log pel\nWHERE $__timeFilter(pel.timestamp)\nGROUP BY 1 ORDER BY 1",
"format": "time_series",
"refId": "A"
}
]
},
{
"id": 4,
"title": "RIB Update Rate",
"type": "timeseries",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Advertisements\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Withdrawals\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY 1 ORDER BY 1",
"format": "time_series",
"refId": "A"
}
]
},
{
"id": 5,
"title": "Origin AS Changes (Potential Hijacks)",
"type": "table",
"gridPos": {
"h": 10,
"w": 12,
"x": 0,
"y": 16
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT DISTINCT ON (r1.prefix, r1.prefix_len)\n r1.prefix::text as \"Prefix\",\n r1.prefix_len as \"Len\",\n r1.origin_as as \"Current Origin AS\",\n r2.origin_as as \"Previous Origin AS\",\n r1.timestamp as \"Changed At\"\nFROM ip_rib_log r1\nJOIN ip_rib_log r2 ON r1.prefix = r2.prefix \n AND r1.prefix_len = r2.prefix_len\n AND r1.timestamp > r2.timestamp\nWHERE r1.origin_as != r2.origin_as\n AND $__timeFilter(r1.timestamp)\nORDER BY r1.prefix, r1.prefix_len, r1.timestamp DESC\nLIMIT 50",
"format": "table",
"refId": "A"
}
]
},
{
"id": 6,
"title": "Most Churned Prefixes",
"type": "table",
"gridPos": {
"h": 10,
"w": 12,
"x": 12,
"y": 16
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT prefix::text as \"Prefix\",\n prefix_len as \"Len\",\n COUNT(*) as \"Total Updates\",\n SUM(CASE WHEN iswithdrawn THEN 1 ELSE 0 END) as \"Withdrawals\",\n MIN(timestamp) as \"First Seen\",\n MAX(timestamp) as \"Last Change\",\n CASE \n WHEN COUNT(*) <= 2 THEN 'Stable'\n WHEN COUNT(*) <= 10 THEN 'Moderate'\n ELSE 'Unstable'\n END as \"Stability\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix, prefix_len\nHAVING COUNT(*) > 1\nORDER BY COUNT(*) DESC\nLIMIT 30",
"format": "table",
"refId": "A"
}
]
},
{
"id": 7,
"title": "Recent Link State Changes",
"type": "table",
"gridPos": {
"h": 10,
"w": 24,
"x": 0,
"y": 26
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT l.timestamp as \"Time\",\n CASE WHEN l.iswithdrawn THEN 'DOWN' ELSE 'UP' END as \"State\",\n ln.name as \"Local Node\",\n l.local_igp_router_id as \"Local IGP ID\",\n rn.name as \"Remote Node\",\n l.remote_igp_router_id as \"Remote IGP ID\",\n l.igp_metric as \"IGP Metric\",\n l.protocol::text as \"Protocol\"\nFROM ls_links_log l\nLEFT JOIN ls_nodes ln ON ln.hash_id = l.local_node_hash_id AND ln.peer_hash_id = l.peer_hash_id\nLEFT JOIN ls_nodes rn ON rn.hash_id = l.remote_node_hash_id AND rn.peer_hash_id = l.peer_hash_id\nWHERE $__timeFilter(l.timestamp) AND l.peer_hash_id = '$peer_hash'\nORDER BY l.timestamp DESC\nLIMIT 50",
"format": "table",
"refId": "A"
}
]
},
{
"id": 8,
"title": "Multi-Peer Route Consistency",
"type": "table",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 36
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT r.prefix::text as \"Prefix\",\n r.prefix_len as \"Len\",\n COUNT(DISTINCT r.peer_hash_id) as \"Peer Count\",\n COUNT(DISTINCT ba.origin_as) as \"Distinct Origins\",\n COUNT(DISTINCT ba.as_path_count) as \"Distinct Path Lengths\",\n string_agg(DISTINCT ba.origin_as::text, ', ') as \"Origin ASNs\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY r.prefix, r.prefix_len\nHAVING COUNT(DISTINCT ba.origin_as) > 1\nORDER BY COUNT(DISTINCT ba.origin_as) DESC\nLIMIT 30",
"format": "table",
"refId": "A"
}
]
},
{
"id": 9,
"title": "Active Peers",
"type": "stat",
"gridPos": {
"h": 4,
"w": 4,
"x": 0,
"y": 44
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT COUNT(*) FROM bgp_peers WHERE state = 'up'",
"format": "table",
"refId": "A"
}
]
},
{
"id": 10,
"title": "Total LS Links",
"type": "stat",
"gridPos": {
"h": 4,
"w": 4,
"x": 4,
"y": 44
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT COUNT(*) FROM ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false",
"format": "table",
"refId": "A"
}
]
},
{
"id": 11,
"title": "Total LS Nodes",
"type": "stat",
"gridPos": {
"h": 4,
"w": 4,
"x": 8,
"y": 44
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT COUNT(*) FROM ls_nodes WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false",
"format": "table",
"refId": "A"
}
]
},
{
"id": 12,
"title": "RIB Updates (24h)",
"type": "stat",
"gridPos": {
"h": 4,
"w": 4,
"x": 12,
"y": 44
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT COUNT(*) FROM ip_rib_log WHERE timestamp > NOW() - INTERVAL '24 hours'",
"format": "table",
"refId": "A"
}
]
},
{
"id": 13,
"title": "Link Changes (24h)",
"type": "stat",
"gridPos": {
"h": 4,
"w": 4,
"x": 16,
"y": 44
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT COUNT(*) FROM ls_links_log WHERE timestamp > NOW() - INTERVAL '24 hours' AND peer_hash_id = '$peer_hash'",
"format": "table",
"refId": "A"
}
]
},
{
"id": 14,
"title": "Origin Changes (24h)",
"type": "stat",
"gridPos": {
"h": 4,
"w": 4,
"x": 20,
"y": 44
},
"datasource": {
"type": "postgres",
"uid": "obmp_postgres"
},
"targets": [
{
"rawSql": "SELECT COUNT(DISTINCT r1.prefix) FROM ip_rib_log r1\nJOIN ip_rib_log r2 ON r1.prefix = r2.prefix AND r1.prefix_len = r2.prefix_len AND r1.timestamp > r2.timestamp\nWHERE r1.origin_as != r2.origin_as AND r1.timestamp > NOW() - INTERVAL '24 hours'",
"format": "table",
"refId": "A"
}
]
},
{
"id": 15,
"title": "About This Dashboard",
"type": "text",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 36
},
"options": {
"mode": "markdown",
"content": "## Topology Change & Anomaly Detection\n\nThis dashboard provides heuristic analysis of BMP data to detect network anomalies:\n\n### What to Watch For\n- **Link flaps**: Rapid up/down cycles in the Link State Changes panel indicate instability\n- **Origin AS changes**: Could indicate a route hijack or legitimate migration\n- **Multi-origin prefixes**: Same prefix seen from different origin ASNs across peers\n- **Correlated events**: Peer session drops followed by mass withdrawals indicate convergence events\n\n### Testing with ExaBGP Scenarios\n1. Load `origin_shift` scenario to simulate origin AS changes\n2. Load `hijack_simulation` to see how shorter paths override legitimate routes\n3. Load/unload `churn` scenario repeatedly to generate instability patterns\n\n### Data Sources\n- **ls_links_log / ls_nodes_log**: TimescaleDB hypertables tracking all BGP-LS topology changes\n- **ip_rib_log**: All BGP RIB updates and withdrawals with timestamps\n- **peer_event_log**: BGP session state changes (up/down events)"
}
}
],
"links": [
{
"asDropdown": true,
"icon": "external link",
"includeVars": true,
"keepTime": true,
"tags": [
"obmp-nav"
],
"title": "OBMP Dashboards",
"type": "dashboards"
}
]
}