Reorganizes 31 dashboards into an operator-first structure with real navigation. Adds Router Detail and Peer Detail drilldown dashboards; merges LS Nodes+Links and the two L3VPN dashboards; modernizes all deprecated panels (table-old/graph/worldmap). Every dashboard gets the obmp-nav dropdown so the whole set is reachable from anywhere. Graduates the operational "Learning" dashboards into Operations/Routing/LinkState folders, retires the Tops folder, and relabels folders (Base->Operations, History->Routing, Learning->Reference). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
369 lines
12 KiB
JSON
369 lines
12 KiB
JSON
{
|
|
"uid": "obmp-learn-09",
|
|
"title": "Topology Change & Anomaly Detection",
|
|
"tags": [
|
|
"obmp-learning",
|
|
"obmp",
|
|
"obmp-nav"
|
|
],
|
|
"editable": true,
|
|
"schemaVersion": 39,
|
|
"time": {
|
|
"from": "now-6h",
|
|
"to": "now"
|
|
},
|
|
"templating": {
|
|
"list": [
|
|
{
|
|
"name": "peer_hash",
|
|
"label": "BGP Peer",
|
|
"type": "query",
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"query": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0",
|
|
"refresh": 1,
|
|
"multi": false
|
|
}
|
|
]
|
|
},
|
|
"panels": [
|
|
{
|
|
"id": 1,
|
|
"title": "Link State Changes Over Time",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 0
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Links Up\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Links Down\"\nFROM ls_links_log\nWHERE $__timeFilter(timestamp) AND peer_hash_id = '$peer_hash'\nGROUP BY 1 ORDER BY 1",
|
|
"format": "time_series",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Node Changes Over Time",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 0
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Nodes Appeared\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Nodes Withdrawn\"\nFROM ls_nodes_log\nWHERE $__timeFilter(timestamp) AND peer_hash_id = '$peer_hash'\nGROUP BY 1 ORDER BY 1",
|
|
"format": "time_series",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "BGP Peer Session Events",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 8
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT $__timeGroupAlias(pel.timestamp, '5m') as time,\n SUM(CASE WHEN pel.state = 'up' THEN 1 ELSE 0 END) as \"Sessions Up\",\n SUM(CASE WHEN pel.state = 'down' THEN 1 ELSE 0 END) as \"Sessions Down\"\nFROM peer_event_log pel\nWHERE $__timeFilter(pel.timestamp)\nGROUP BY 1 ORDER BY 1",
|
|
"format": "time_series",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 4,
|
|
"title": "RIB Update Rate",
|
|
"type": "timeseries",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 8
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Advertisements\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Withdrawals\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY 1 ORDER BY 1",
|
|
"format": "time_series",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 5,
|
|
"title": "Origin AS Changes (Potential Hijacks)",
|
|
"type": "table",
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 16
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT DISTINCT ON (r1.prefix, r1.prefix_len)\n r1.prefix::text as \"Prefix\",\n r1.prefix_len as \"Len\",\n r1.origin_as as \"Current Origin AS\",\n r2.origin_as as \"Previous Origin AS\",\n r1.timestamp as \"Changed At\"\nFROM ip_rib_log r1\nJOIN ip_rib_log r2 ON r1.prefix = r2.prefix \n AND r1.prefix_len = r2.prefix_len\n AND r1.timestamp > r2.timestamp\nWHERE r1.origin_as != r2.origin_as\n AND $__timeFilter(r1.timestamp)\nORDER BY r1.prefix, r1.prefix_len, r1.timestamp DESC\nLIMIT 50",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 6,
|
|
"title": "Most Churned Prefixes",
|
|
"type": "table",
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 16
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT prefix::text as \"Prefix\",\n prefix_len as \"Len\",\n COUNT(*) as \"Total Updates\",\n SUM(CASE WHEN iswithdrawn THEN 1 ELSE 0 END) as \"Withdrawals\",\n MIN(timestamp) as \"First Seen\",\n MAX(timestamp) as \"Last Change\",\n CASE \n WHEN COUNT(*) <= 2 THEN 'Stable'\n WHEN COUNT(*) <= 10 THEN 'Moderate'\n ELSE 'Unstable'\n END as \"Stability\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix, prefix_len\nHAVING COUNT(*) > 1\nORDER BY COUNT(*) DESC\nLIMIT 30",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 7,
|
|
"title": "Recent Link State Changes",
|
|
"type": "table",
|
|
"gridPos": {
|
|
"h": 10,
|
|
"w": 24,
|
|
"x": 0,
|
|
"y": 26
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT l.timestamp as \"Time\",\n CASE WHEN l.iswithdrawn THEN 'DOWN' ELSE 'UP' END as \"State\",\n ln.name as \"Local Node\",\n l.local_igp_router_id as \"Local IGP ID\",\n rn.name as \"Remote Node\",\n l.remote_igp_router_id as \"Remote IGP ID\",\n l.igp_metric as \"IGP Metric\",\n l.protocol::text as \"Protocol\"\nFROM ls_links_log l\nLEFT JOIN ls_nodes ln ON ln.hash_id = l.local_node_hash_id AND ln.peer_hash_id = l.peer_hash_id\nLEFT JOIN ls_nodes rn ON rn.hash_id = l.remote_node_hash_id AND rn.peer_hash_id = l.peer_hash_id\nWHERE $__timeFilter(l.timestamp) AND l.peer_hash_id = '$peer_hash'\nORDER BY l.timestamp DESC\nLIMIT 50",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 8,
|
|
"title": "Multi-Peer Route Consistency",
|
|
"type": "table",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 0,
|
|
"y": 36
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT r.prefix::text as \"Prefix\",\n r.prefix_len as \"Len\",\n COUNT(DISTINCT r.peer_hash_id) as \"Peer Count\",\n COUNT(DISTINCT ba.origin_as) as \"Distinct Origins\",\n COUNT(DISTINCT ba.as_path_count) as \"Distinct Path Lengths\",\n string_agg(DISTINCT ba.origin_as::text, ', ') as \"Origin ASNs\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY r.prefix, r.prefix_len\nHAVING COUNT(DISTINCT ba.origin_as) > 1\nORDER BY COUNT(DISTINCT ba.origin_as) DESC\nLIMIT 30",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 9,
|
|
"title": "Active Peers",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 0,
|
|
"y": 44
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT COUNT(*) FROM bgp_peers WHERE state = 'up'",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 10,
|
|
"title": "Total LS Links",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 4,
|
|
"y": 44
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT COUNT(*) FROM ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 11,
|
|
"title": "Total LS Nodes",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 8,
|
|
"y": 44
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT COUNT(*) FROM ls_nodes WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 12,
|
|
"title": "RIB Updates (24h)",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 12,
|
|
"y": 44
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT COUNT(*) FROM ip_rib_log WHERE timestamp > NOW() - INTERVAL '24 hours'",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 13,
|
|
"title": "Link Changes (24h)",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 16,
|
|
"y": 44
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT COUNT(*) FROM ls_links_log WHERE timestamp > NOW() - INTERVAL '24 hours' AND peer_hash_id = '$peer_hash'",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 14,
|
|
"title": "Origin Changes (24h)",
|
|
"type": "stat",
|
|
"gridPos": {
|
|
"h": 4,
|
|
"w": 4,
|
|
"x": 20,
|
|
"y": 44
|
|
},
|
|
"datasource": {
|
|
"type": "postgres",
|
|
"uid": "obmp_postgres"
|
|
},
|
|
"targets": [
|
|
{
|
|
"rawSql": "SELECT COUNT(DISTINCT r1.prefix) FROM ip_rib_log r1\nJOIN ip_rib_log r2 ON r1.prefix = r2.prefix AND r1.prefix_len = r2.prefix_len AND r1.timestamp > r2.timestamp\nWHERE r1.origin_as != r2.origin_as AND r1.timestamp > NOW() - INTERVAL '24 hours'",
|
|
"format": "table",
|
|
"refId": "A"
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"id": 15,
|
|
"title": "About This Dashboard",
|
|
"type": "text",
|
|
"gridPos": {
|
|
"h": 8,
|
|
"w": 12,
|
|
"x": 12,
|
|
"y": 36
|
|
},
|
|
"options": {
|
|
"mode": "markdown",
|
|
"content": "## Topology Change & Anomaly Detection\n\nThis dashboard provides heuristic analysis of BMP data to detect network anomalies:\n\n### What to Watch For\n- **Link flaps**: Rapid up/down cycles in the Link State Changes panel indicate instability\n- **Origin AS changes**: Could indicate a route hijack or legitimate migration\n- **Multi-origin prefixes**: Same prefix seen from different origin ASNs across peers\n- **Correlated events**: Peer session drops followed by mass withdrawals indicate convergence events\n\n### Testing with ExaBGP Scenarios\n1. Load `origin_shift` scenario to simulate origin AS changes\n2. Load `hijack_simulation` to see how shorter paths override legitimate routes\n3. Load/unload `churn` scenario repeatedly to generate instability patterns\n\n### Data Sources\n- **ls_links_log / ls_nodes_log**: TimescaleDB hypertables tracking all BGP-LS topology changes\n- **ip_rib_log**: All BGP RIB updates and withdrawals with timestamps\n- **peer_event_log**: BGP session state changes (up/down events)"
|
|
}
|
|
}
|
|
],
|
|
"links": [
|
|
{
|
|
"asDropdown": true,
|
|
"icon": "external link",
|
|
"includeVars": true,
|
|
"keepTime": true,
|
|
"tags": [
|
|
"obmp-nav"
|
|
],
|
|
"title": "OBMP Dashboards",
|
|
"type": "dashboards"
|
|
}
|
|
]
|
|
} |