From 66219420325f106f33810a3823fc439a04e6cde4 Mon Sep 17 00:00:00 2001 From: sam Date: Thu, 5 Mar 2026 15:37:16 -0700 Subject: [PATCH] Add Phase 2: Vue 3 control panel, 6 learning dashboards, new BGP scenarios - exabgp-ui/: Vue 3 + Vite SPA served by NGINX on :5001; proxies /api/ to ExaBGP Flask on :5050; includes StatusBar, ScenarioPanel, RouteTable, AnnounceForm, PeerStatus, ChurnControl components - docker-compose.yml: add obmp-exabgp-ui service (host network, port 5001) - exabgp/scenarios/__init__.py: add convergence_test, route_leak, hijack_simulation scenarios for structured BGP learning exercises - exabgp/inject.py: add 'peers' and 'monitor' subcommands; live-refresh terminal status view with ANSI cursor repositioning - obmp-grafana/dashboards/Learning/: 6 new OBMP-Learning dashboards (update rate, peer health, AS path, RPKI, churn, attributes) - obmp-grafana/provisioning/dashboards/openbmp-dashboards.yml: add OpenBMP-Learning folder provider pointing to dashboards/Learning/ - DOCS.md: document Web UI, 3 new scenarios, 6 learning dashboards; fix section numbering (10-14) and architecture diagram (23 dashboards) Co-Authored-By: Claude Sonnet 4.6 --- DOCS.md | 94 +++- docker-compose.yml | 10 + exabgp-ui/Dockerfile | 12 + exabgp-ui/index.html | 16 + exabgp-ui/nginx.conf | 15 + exabgp-ui/package.json | 17 + exabgp-ui/src/App.vue | 301 +++++++++++ exabgp-ui/src/api.js | 21 + exabgp-ui/src/components/AnnounceForm.vue | 383 ++++++++++++++ exabgp-ui/src/components/ChurnControl.vue | 481 ++++++++++++++++++ exabgp-ui/src/components/PeerStatus.vue | 214 ++++++++ exabgp-ui/src/components/RouteTable.vue | 362 +++++++++++++ exabgp-ui/src/components/ScenarioPanel.vue | 319 ++++++++++++ exabgp-ui/src/components/StatusBar.vue | 127 +++++ exabgp-ui/src/main.js | 3 + exabgp-ui/vite.config.js | 14 + exabgp/inject.py | 66 ++- exabgp/scenarios/__init__.py | 92 ++++ .../dashboards/Learning/learning_as_path.json | 160 ++++++ .../Learning/learning_attributes.json | 201 ++++++++ .../dashboards/Learning/learning_churn.json | 152 ++++++ .../Learning/learning_peer_health.json | 144 ++++++ .../dashboards/Learning/learning_rpki.json | 150 ++++++ .../Learning/learning_update_rate.json | 137 +++++ .../dashboards/openbmp-dashboards.yml | 11 + 25 files changed, 3487 insertions(+), 15 deletions(-) create mode 100644 exabgp-ui/Dockerfile create mode 100644 exabgp-ui/index.html create mode 100644 exabgp-ui/nginx.conf create mode 100644 exabgp-ui/package.json create mode 100644 exabgp-ui/src/App.vue create mode 100644 exabgp-ui/src/api.js create mode 100644 exabgp-ui/src/components/AnnounceForm.vue create mode 100644 exabgp-ui/src/components/ChurnControl.vue create mode 100644 exabgp-ui/src/components/PeerStatus.vue create mode 100644 exabgp-ui/src/components/RouteTable.vue create mode 100644 exabgp-ui/src/components/ScenarioPanel.vue create mode 100644 exabgp-ui/src/components/StatusBar.vue create mode 100644 exabgp-ui/src/main.js create mode 100644 exabgp-ui/vite.config.js create mode 100644 obmp-grafana/dashboards/Learning/learning_as_path.json create mode 100644 obmp-grafana/dashboards/Learning/learning_attributes.json create mode 100644 obmp-grafana/dashboards/Learning/learning_churn.json create mode 100644 obmp-grafana/dashboards/Learning/learning_peer_health.json create mode 100644 obmp-grafana/dashboards/Learning/learning_rpki.json create mode 100644 obmp-grafana/dashboards/Learning/learning_update_rate.json diff --git a/DOCS.md b/DOCS.md index a60f07d..0a63ec4 100644 --- a/DOCS.md +++ b/DOCS.md @@ -9,12 +9,14 @@ 5. [IOS-XR Router Configuration](#5-ios-xr-router-configuration) 6. [Starting and Stopping](#6-starting-and-stopping) 7. [Route Injection User Guide](#7-route-injection-user-guide) -8. [Grafana Dashboards](#8-grafana-dashboards) -9. [Sanity Checks](#9-sanity-checks) -10. [Relevant Commands Reference](#10-relevant-commands-reference) -11. [Troubleshooting](#11-troubleshooting) -12. [Data Retention](#12-data-retention) -13. [Environment Variables Reference](#13-environment-variables-reference) +8. [ExaBGP Control Panel (Web UI)](#8-exabgp-control-panel-web-ui) +9. [Grafana Dashboards](#9-grafana-dashboards) +10. [Sanity Checks](#10-sanity-checks) +11. [Relevant Commands Reference](#11-relevant-commands-reference) +12. [Troubleshooting](#12-troubleshooting) +13. [Data Retention](#13-data-retention) +14. [Environment Variables Reference](#14-environment-variables-reference) + --- @@ -26,8 +28,9 @@ This is a **BGP Monitoring Platform (BMP) lab stack** deployed via Docker Compos - Receives BMP (BGP Monitoring Protocol, RFC 7854) telemetry from routers on TCP port 5000 - Streams BMP data through Kafka into a TimescaleDB/PostgreSQL database -- Provides 17 Grafana dashboards for real-time and historical BGP analysis +- Provides **23 Grafana dashboards** (17 operational + 6 learning-focused) for real-time and historical BGP analysis - Includes an **ExaBGP route injector** that peers with the two CORE routers and injects synthetic BGP routes, enabling testing of BGP policy, route propagation, and Grafana dashboards without needing internet connectivity +- Provides a **Vue 3 web UI** at `:5001` for point-and-click scenario management, live route tables, and peer monitoring **The lab network:** @@ -61,7 +64,7 @@ IOS-XR Routers (9x, AS 65020) PostgreSQL 14 + TimescaleDB | +---------> obmp-grafana (grafana/grafana:9.1.7) :3000 - | 17 dashboards, PostgreSQL datasource + | 23 dashboards, PostgreSQL datasource +---------> obmp-whois (openbmp/whois:2.2.0) :4300 WHOIS query server backed by the DB @@ -84,6 +87,7 @@ ExaBGP (obmp-exabgp, built locally) | obmp-grafana | grafana/grafana:9.1.7 | 3000 | Visualization | | obmp-whois | openbmp/whois:2.2.0 | 4300 | WHOIS query server | | obmp-exabgp | local build | 5050 (host net) | BGP route injector | +| obmp-exabgp-ui | local build | 5001 (host net) | Vue 3 web control panel | --- @@ -305,6 +309,9 @@ python3 inject.py scenarios | `anycast` | 3 | Same prefixes with varying AS paths and MEDs (best-path testing) | | `full_table` | 500+ | Large partial internet table with synthetic /24s | | `lab_prefixes` | 8 | Enterprise/SP-style routes with communities and local-pref | +| `convergence_test` | 10 | Prefixes for timing BGP convergence — announce then check ip_rib_log timestamps | +| `route_leak` | 10 | Real prefixes re-announced with short AS paths — simulates a route leak (community 65100:999) | +| `hijack_simulation` | 10 | Prefixes claimed directly by AS 65100 — simulates a prefix hijack (community 65100:hijack) | ### 7.4 Load a scenario @@ -400,7 +407,51 @@ docker compose -p obmp restart exabgp --- -## 8. Grafana Dashboards +## 8. ExaBGP Control Panel (Web UI) + +Access: `http://10.40.40.202:5001` + +A Vue 3 single-page app served by NGINX that proxies `/api/` to the ExaBGP Flask API on port 5050. No login required. + +### Layout + +``` +┌─────────────────────────────────────────────────────────────┐ +│ OpenBMP Route Injector [API OK] [77 routes] [2/2 UP] │ +├──────────────────────┬──────────────────────────────────────┤ +│ SCENARIOS │ [Routes] [Inject] [Peers] tabs │ +│ │ │ +│ [internet_sample] │ Routes tab: searchable/paginated │ +│ [LOAD] [UNLOAD] │ table with per-row Withdraw button │ +│ │ │ +│ [churn] │ Inject tab: manual prefix form │ +│ [LOAD] [START CHURN]│ (prefix, AS path, communities, MED) │ +│ │ │ +│ [blackhole] ... │ Peers tab: per-peer UP/DOWN cards │ +├──────────────────────┴──────────────────────────────────────┤ +│ Refreshing every 5s │ +└─────────────────────────────────────────────────────────────┘ +``` + +### Features + +- **Live status bar** — API health, active route count, peer UP/DOWN badges; auto-refreshes every 5 seconds +- **Scenario panel** — Load/Unload buttons for all 9 scenarios with loading states and feedback +- **Churn control** — Start/stop churn cycles with configurable count and interval sliders directly in the browser +- **Route table** — Searchable, paginated (20/page) table of active routes; per-row Withdraw button; Withdraw All +- **Manual inject form** — Announce any prefix with custom AS path, communities, MED, local-pref +- **Peer cards** — Per-peer state display with UP (green) / DOWN (red pulsing) indicators + +### Rebuild after code changes + +```bash +docker compose -p obmp build exabgp-ui +docker compose -p obmp up -d exabgp-ui +``` + +--- + +## 9. Grafana Dashboards Access: `http://10.40.40.202:3000` Default credentials: `admin` / `openbmp` (anonymous access also enabled) @@ -429,9 +480,24 @@ Default credentials: `admin` / `openbmp` (anonymous access also enabled) > History dashboards require `ip_rib_log` and `stats_chg_*` table data. Run `inject.py churn` to populate these. +### OBMP-Learning Dashboards (folder: `OBMP-Learning`) + +Six learning-focused dashboards in a separate folder, designed to teach BGP concepts using live lab data. + +| Dashboard | UID | What it teaches | +|-----------|-----|-----------------| +| BGP Update Rate & Churn | `obmp-learn-01` | Network stability — advertisements vs withdrawals over time from `ip_rib_log`; per-peer update counts | +| Peer Session Health & Flap Analysis | `obmp-learn-02` | BGP session stability — state timeline, flap count, uptime %, last reset reason | +| AS Path Analysis | `obmp-learn-03` | Internet topology — path length distribution, longest paths, top origin ASNs, transit frequency | +| RPKI Validation Status | `obmp-learn-04` | BGP security — Valid / Invalid / NotFound breakdown; invalid routes (potential hijacks) table | +| Route Churn & Stability Score | `obmp-learn-05` | Prefix stability — tiered churn score (Very Stable / Stable / Moderate / Unstable) per prefix | +| BGP Attribute Explorer | `obmp-learn-06` | BGP path attributes — community list distribution, MED values, local-pref spread per peer | + +> **RPKI note:** The `rpki_validator` table is populated by a cron job in `psql-app` every 2 hours. Dashboard `obmp-learn-04` will show zero counts until the cron runs — check `ENABLE_RPKI=1` in `docker-compose.yml`. + --- -## 9. Sanity Checks +## 10. Sanity Checks ### 9.1 All containers running @@ -517,7 +583,7 @@ Should show periodic cron job outputs (RPKI sync, IRR sync, global_ip_rib update --- -## 10. Relevant Commands Reference +## 11. Relevant Commands Reference ### Docker Compose @@ -622,7 +688,7 @@ show route 8.8.8.0/24 --- -## 11. Troubleshooting +## 12. Troubleshooting ### ExaBGP container keeps restarting @@ -692,7 +758,7 @@ docker compose -p obmp restart psql-app --- -## 12. Data Retention +## 13. Data Retention Configured in `docker-compose.yml` via `POSTGRES_DROP_*` environment variables: @@ -716,7 +782,7 @@ Adjust in `docker-compose.yml` under the `psql-app` service environment block. --- -## 13. Environment Variables Reference +## 14. Environment Variables Reference ### ExaBGP container diff --git a/docker-compose.yml b/docker-compose.yml index 28acc94..7fdac0a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -221,6 +221,16 @@ services: - ./exabgp/scenarios:/exabgp/scenarios # No ports: block needed — network_mode: host exposes directly + exabgp-ui: + restart: unless-stopped + container_name: obmp-exabgp-ui + build: + context: ./exabgp-ui + dockerfile: Dockerfile + # Host networking so NGINX can proxy /api to ExaBGP Flask on localhost:5050 + network_mode: host + # Serves on port 5001 (host network, defined in nginx.conf) + whois: restart: unless-stopped container_name: obmp-whois diff --git a/exabgp-ui/Dockerfile b/exabgp-ui/Dockerfile new file mode 100644 index 0000000..2c02609 --- /dev/null +++ b/exabgp-ui/Dockerfile @@ -0,0 +1,12 @@ +FROM node:20-alpine AS build +WORKDIR /app +COPY package.json ./ +RUN npm install +COPY . . +RUN npm run build + +FROM nginx:alpine +COPY --from=build /app/dist /usr/share/nginx/html +COPY nginx.conf /etc/nginx/conf.d/default.conf +EXPOSE 5001 +CMD ["nginx", "-g", "daemon off;"] diff --git a/exabgp-ui/index.html b/exabgp-ui/index.html new file mode 100644 index 0000000..124e24c --- /dev/null +++ b/exabgp-ui/index.html @@ -0,0 +1,16 @@ + + + + + + OpenBMP Route Injector + + + +
+ + + diff --git a/exabgp-ui/nginx.conf b/exabgp-ui/nginx.conf new file mode 100644 index 0000000..0d3a895 --- /dev/null +++ b/exabgp-ui/nginx.conf @@ -0,0 +1,15 @@ +server { + listen 5001; + root /usr/share/nginx/html; + index index.html; + + location /api/ { + proxy_pass http://localhost:5050/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + location / { + try_files $uri $uri/ /index.html; + } +} diff --git a/exabgp-ui/package.json b/exabgp-ui/package.json new file mode 100644 index 0000000..4da3444 --- /dev/null +++ b/exabgp-ui/package.json @@ -0,0 +1,17 @@ +{ + "name": "exabgp-ui", + "version": "1.0.0", + "private": true, + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "vue": "^3.3.0" + }, + "devDependencies": { + "@vitejs/plugin-vue": "^4.2.0", + "vite": "^4.4.0" + } +} diff --git a/exabgp-ui/src/App.vue b/exabgp-ui/src/App.vue new file mode 100644 index 0000000..c031bec --- /dev/null +++ b/exabgp-ui/src/App.vue @@ -0,0 +1,301 @@ + + + + + + + diff --git a/exabgp-ui/src/api.js b/exabgp-ui/src/api.js new file mode 100644 index 0000000..23b33b8 --- /dev/null +++ b/exabgp-ui/src/api.js @@ -0,0 +1,21 @@ +const BASE = '/api' + +async function req(method, path, body) { + const opts = { method, headers: { 'Content-Type': 'application/json' } } + if (body) opts.body = JSON.stringify(body) + const r = await fetch(BASE + path, opts) + if (!r.ok) throw new Error(`${method} ${path} → ${r.status}`) + return r.json() +} + +export const api = { + health: () => req('GET', '/healthz'), + peers: () => req('GET', '/peers'), + routes: () => req('GET', '/routes'), + scenarios: () => req('GET', '/scenarios'), + loadScenario: name => req('POST', `/scenario/${name}`), + unloadScenario: name => req('DELETE', `/scenario/${name}`), + announce: payload => req('POST', '/announce', payload), + withdraw: prefixes => req('POST', '/withdraw', { prefixes }), + withdrawAll: () => req('POST', '/withdraw/all'), +} diff --git a/exabgp-ui/src/components/AnnounceForm.vue b/exabgp-ui/src/components/AnnounceForm.vue new file mode 100644 index 0000000..3c4636d --- /dev/null +++ b/exabgp-ui/src/components/AnnounceForm.vue @@ -0,0 +1,383 @@ + + + + + diff --git a/exabgp-ui/src/components/ChurnControl.vue b/exabgp-ui/src/components/ChurnControl.vue new file mode 100644 index 0000000..e8ea865 --- /dev/null +++ b/exabgp-ui/src/components/ChurnControl.vue @@ -0,0 +1,481 @@ + + + + + diff --git a/exabgp-ui/src/components/PeerStatus.vue b/exabgp-ui/src/components/PeerStatus.vue new file mode 100644 index 0000000..f02f929 --- /dev/null +++ b/exabgp-ui/src/components/PeerStatus.vue @@ -0,0 +1,214 @@ + + + + + diff --git a/exabgp-ui/src/components/RouteTable.vue b/exabgp-ui/src/components/RouteTable.vue new file mode 100644 index 0000000..f1754cb --- /dev/null +++ b/exabgp-ui/src/components/RouteTable.vue @@ -0,0 +1,362 @@ + + + + + diff --git a/exabgp-ui/src/components/ScenarioPanel.vue b/exabgp-ui/src/components/ScenarioPanel.vue new file mode 100644 index 0000000..e0964b1 --- /dev/null +++ b/exabgp-ui/src/components/ScenarioPanel.vue @@ -0,0 +1,319 @@ + + + + + diff --git a/exabgp-ui/src/components/StatusBar.vue b/exabgp-ui/src/components/StatusBar.vue new file mode 100644 index 0000000..802fcd6 --- /dev/null +++ b/exabgp-ui/src/components/StatusBar.vue @@ -0,0 +1,127 @@ + + + + + diff --git a/exabgp-ui/src/main.js b/exabgp-ui/src/main.js new file mode 100644 index 0000000..1d4acd7 --- /dev/null +++ b/exabgp-ui/src/main.js @@ -0,0 +1,3 @@ +import { createApp } from 'vue' +import App from './App.vue' +createApp(App).mount('#app') diff --git a/exabgp-ui/vite.config.js b/exabgp-ui/vite.config.js new file mode 100644 index 0000000..71b1406 --- /dev/null +++ b/exabgp-ui/vite.config.js @@ -0,0 +1,14 @@ +import { defineConfig } from 'vite' +import vue from '@vitejs/plugin-vue' + +export default defineConfig({ + plugins: [vue()], + server: { + proxy: { + '/api': { + target: 'http://localhost:5050', + rewrite: path => path.replace(/^\/api/, '') + } + } + } +}) diff --git a/exabgp/inject.py b/exabgp/inject.py index f1fc7fd..370e20b 100644 --- a/exabgp/inject.py +++ b/exabgp/inject.py @@ -4,6 +4,7 @@ inject.py — CLI wrapper for the ExaBGP Route Injection API Usage: inject.py status + inject.py peers inject.py routes inject.py scenarios inject.py announce [...] [--as-path ASN...] [--community STR...] [--med N] [--next-hop IP] @@ -12,6 +13,7 @@ Usage: inject.py scenario inject.py withdraw-scenario inject.py churn [--count N] [--interval SEC] # cycle announce/withdraw for ip_rib_log population + inject.py monitor # live-refresh terminal view Environment: EXABGP_API=http://localhost:5050 API base URL @@ -53,6 +55,64 @@ def cmd_status(args): _pp(_get('/healthz')) +def cmd_peers(args): + data = _get('/peers') + peers = data.get('peers', {}) + if not peers: + print("No peer state received yet (ExaBGP may still be establishing sessions).") + return + print(f"{'Peer':<20} {'State':<8} {'Updated'}") + print('-' * 55) + for ip, info in peers.items(): + state = info.get('state', 'unknown') + updated = info.get('updated', '-') + indicator = 'UP' if state == 'up' else 'DOWN' + print(f"{ip:<20} {indicator:<8} {updated}") + + +def cmd_monitor(args): + """Live-refreshing terminal status view. Ctrl+C to exit.""" + import shutil + print("OpenBMP ExaBGP Monitor (Ctrl+C to exit)\n") + try: + while True: + try: + health = _get('/healthz') + peers = health.get('peers', {}) + active = health.get('active_routes', 0) + status = health.get('status', '?') + + # Clear to start of previous output using ANSI codes + cols, _ = shutil.get_terminal_size(fallback=(80, 24)) + peer_count = len(peers) + peers_up = sum(1 for p in peers.values() if p.get('state') == 'up') + + lines = [ + f" API: {status.upper():<8} Routes: {active:<6} Peers: {peers_up}/{peer_count} UP", + '', + ] + for ip, info in peers.items(): + state = info.get('state', 'unknown').upper() + updated = info.get('updated', '-') + lines.append(f" {ip:<22} {state:<6} {updated}") + + lines.append('') + lines.append(f" Refreshing every 5s ... {time.strftime('%H:%M:%S')}") + + output = '\n'.join(lines) + # Move cursor up to overwrite previous output + print(f"\033[{len(lines) + 1}A", end='') + print(output) + + except requests.exceptions.ConnectionError: + print("\033[1A API: UNREACHABLE") + + time.sleep(5) + + except KeyboardInterrupt: + print("\n\nMonitor stopped.") + + def cmd_routes(args): data = _get('/routes') print(f"Active routes: {data['count']}") @@ -153,9 +213,11 @@ def main(): ) sub = parser.add_subparsers(dest='command') - sub.add_parser('status', help='Show API health and peer states') + sub.add_parser('status', help='Show API health and peer states (JSON)') + sub.add_parser('peers', help='Show BGP peer states in a readable table') sub.add_parser('routes', help='List active announced routes') sub.add_parser('scenarios', help='List available scenarios') + sub.add_parser('monitor', help='Live-refreshing terminal status view') sub.add_parser('withdraw-all', help='Withdraw all active routes') p = sub.add_parser('announce', help='Announce one or more prefixes') @@ -184,8 +246,10 @@ def main(): cmds = { 'status': cmd_status, + 'peers': cmd_peers, 'routes': cmd_routes, 'scenarios': cmd_scenarios, + 'monitor': cmd_monitor, 'announce': cmd_announce, 'withdraw': cmd_withdraw, 'withdraw-all': cmd_withdraw_all, diff --git a/exabgp/scenarios/__init__.py b/exabgp/scenarios/__init__.py index bde9ca8..a1c91a1 100644 --- a/exabgp/scenarios/__init__.py +++ b/exabgp/scenarios/__init__.py @@ -283,6 +283,86 @@ _LAB_ROUTES = [ ] +# --------------------------------------------------------------------------- +# Registry +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Scenario: convergence_test +# 10 prefixes for timing BGP convergence. +# Announce with inject.py, observe arrival in ip_rib_log, then withdraw. +# Convergence time = delta between first announcement and stable state. +# --------------------------------------------------------------------------- + +_CONVERGENCE_ROUTES = [ + _r('192.168.100.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.101.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.102.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.103.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.104.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.105.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.106.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.107.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.108.0/24', [65100, 65200], communities=['65100:convergence']), + _r('192.168.109.0/24', [65100, 65200], communities=['65100:convergence']), +] + + +# --------------------------------------------------------------------------- +# Scenario: route_leak +# Simulates a route leak: real internet prefixes re-announced with a short +# (direct) AS path, as if an intermediate AS leaked them without proper +# filtering. Community 65100:999 tags these as "leaked". +# Learning: shows how a shorter AS path wins best-path selection even when +# the origin is unexpected. Watch the Grafana AS Path dashboard. +# --------------------------------------------------------------------------- + +_ROUTE_LEAK_ROUTES = [ + # Real prefixes, but announced with a single-hop path (leak simulation) + _r('8.8.8.0/24', [65100, 15169], communities=['65100:999']), # Google DNS — legit origin + _r('1.1.1.0/24', [65100, 13335], communities=['65100:999']), # Cloudflare — legit origin + _r('208.67.222.0/24', [65100, 36692], communities=['65100:999']), # OpenDNS + _r('9.9.9.0/24', [65100, 19281], communities=['65100:999']), # Quad9 + _r('4.2.2.0/24', [65100, 3356], communities=['65100:999']), # Level3 DNS (leaked from transit) + _r('64.6.64.0/24', [65100, 19262], communities=['65100:999']), # Verisign + _r('156.154.70.0/24', [65100, 19318], communities=['65100:999']), # Neustar + _r('195.46.39.0/24', [65100, 21414], communities=['65100:999']), # SafeDNS + _r('216.146.35.0/24', [65100, 36692], communities=['65100:999']), # Dyn/Oracle + _r('77.88.8.0/24', [65100, 13238], communities=['65100:999']), # Yandex DNS +] + + +# --------------------------------------------------------------------------- +# Scenario: hijack_simulation +# Simulates a BGP prefix hijack: ExaBGP (AS 65100) announces a subset of +# the internet_sample prefixes with a *shorter* AS path than the legitimate +# announcements, mimicking an attacker claiming ownership. +# Community 65100:hijack marks these entries. +# Learning: demonstrates why shorter AS paths win, how RPKI prevents this, +# and why origin AS validation matters. +# Watch ip_rib on the CORE routers: the hijack paths should become bestpaths +# if they have a shorter AS path length than the existing legitimate routes. +# --------------------------------------------------------------------------- + +_HIJACK_ROUTES = [ + # Announcing Google prefixes as if originated directly from AS 65100 + # (shorter path = wins best-path selection over the legitimate 3-hop paths) + _r('8.8.8.0/24', [65100], communities=['65100:hijack', '65100:999']), + _r('8.8.4.0/24', [65100], communities=['65100:hijack', '65100:999']), + _r('1.1.1.0/24', [65100], communities=['65100:hijack', '65100:999']), + _r('104.16.0.0/13', [65100], communities=['65100:hijack', '65100:999']), + _r('172.217.0.0/16', [65100], communities=['65100:hijack', '65100:999']), + # Announcing AWS prefixes + _r('52.0.0.0/14', [65100], communities=['65100:hijack', '65100:999']), + _r('54.64.0.0/13', [65100], communities=['65100:hijack', '65100:999']), + # Announcing Azure prefixes + _r('40.64.0.0/10', [65100], communities=['65100:hijack', '65100:999']), + _r('13.64.0.0/11', [65100], communities=['65100:hijack', '65100:999']), + # Announcing Cloudflare prefixes + _r('162.158.0.0/15', [65100], communities=['65100:hijack', '65100:999']), +] + + # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- @@ -312,4 +392,16 @@ SCENARIOS = { 'description': 'Enterprise/SP-style routes with communities and local-pref for policy testing', 'routes': _LAB_ROUTES, }, + 'convergence_test': { + 'description': '10 prefixes for BGP convergence timing — announce, observe ip_rib_log, withdraw', + 'routes': _CONVERGENCE_ROUTES, + }, + 'route_leak': { + 'description': '10 real prefixes re-announced with short AS paths — simulates a route leak (community 65100:999)', + 'routes': _ROUTE_LEAK_ROUTES, + }, + 'hijack_simulation': { + 'description': '10 prefixes announced as if directly originated by AS 65100 — simulates a prefix hijack (community 65100:hijack)', + 'routes': _HIJACK_ROUTES, + }, } diff --git a/obmp-grafana/dashboards/Learning/learning_as_path.json b/obmp-grafana/dashboards/Learning/learning_as_path.json new file mode 100644 index 0000000..ad3fdb6 --- /dev/null +++ b/obmp-grafana/dashboards/Learning/learning_as_path.json @@ -0,0 +1,160 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]}, + "description": "AS path length distribution and analysis. Teaches how BGP AS paths reflect internet topology and how to detect anomalies like route leaks or AS path prepending.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Internet routes typically have 2-5 hops. A /32 or /24 appearing with only 1-hop AS path from an unexpected ASN is a classic hijack indicator. Routes with 10+ hops may indicate prepending.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"fillOpacity": 80,"gradientMode": "none","lineWidth": 0}, + "unit": "short" + } + }, + "gridPos": {"h": 10,"w": 12,"x": 0,"y": 0}, + "id": 1, + "options": {"barRadius": 0,"barWidth": 0.7,"groupWidth": 0.7,"legend": {"calcs": [],"displayMode": "list","placement": "bottom"},"orientation": "auto","tooltip": {"mode": "single"},"xTickLabelRotation": 0,"xTickLabelSpacing": 200}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n ba.as_path_count AS \"AS Path Length (hops)\",\n COUNT(*) AS \"Prefix Count\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false\n AND r.isipv4 = true\n AND ba.as_path_count > 0\nGROUP BY ba.as_path_count\nORDER BY ba.as_path_count", + "refId": "A" + } + ], + "title": "AS Path Length Distribution (Active IPv4 Routes)", + "type": "barchart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Average AS path length on the internet is ~4-5 hops. Your lab has shorter paths since ExaBGP is a single eBGP hop away.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 5},{"color": "red","value": 8}]}, + "unit": "short", + "decimals": 1 + } + }, + "gridPos": {"h": 5,"w": 6,"x": 12,"y": 0}, + "id": 2, + "options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time,\n ROUND(AVG(ba.as_path_count)::numeric, 1) AS \"Avg AS Path Length\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true AND ba.as_path_count > 0", + "refId": "A" + } + ], + "title": "Average AS Path Length", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Routes with only 1-hop AS path are directly connected or possibly hijacked. In your lab, ExaBGP injects routes starting with AS 65100.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 5},{"color": "red","value": 20}]}, + "unit": "short" + } + }, + "gridPos": {"h": 5,"w": 6,"x": 18,"y": 0}, + "id": 3, + "options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time,\n COUNT(*) AS \"Direct (1-hop) Routes\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true AND ba.as_path_count = 1", + "refId": "A" + } + ], + "title": "1-Hop Routes (Direct/Possible Hijack)", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: The longest paths reveal the most AS-level hops in your network. AS path prepending intentionally lengthens paths to make a route less preferred.", + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [ + {"matcher": {"id": "byName","options": "AS Path Length"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 5},{"color": "red","value": 10}]}}]}, + {"matcher": {"id": "byName","options": "AS Path"},"properties": [{"id": "custom.width","value": 400}]} + ] + }, + "gridPos": {"h": 10,"w": 24,"x": 0,"y": 10}, + "id": 4, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "AS Path Length"}]}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n r.prefix AS \"Prefix\",\n ba.as_path_count AS \"AS Path Length\",\n ba.as_path::text AS \"AS Path\",\n ba.origin_as AS \"Origin AS\",\n ba.next_hop AS \"Next Hop\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nORDER BY ba.as_path_count DESC\nLIMIT 30", + "refId": "A" + } + ], + "title": "Longest AS Paths (Top 30)", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Origin AS is the rightmost ASN in the AS path — the network that first originated the prefix. Most internet prefixes are originated by their owning organization.", + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [ + {"matcher": {"id": "byName","options": "Route Count"},"properties": [{"id": "custom.displayMode","value": "lcd-gauge"},{"id": "custom.width","value": 200}]} + ] + }, + "gridPos": {"h": 12,"w": 12,"x": 0,"y": 20}, + "id": 5, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Route Count"}]}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n ba.origin_as AS \"Origin AS\",\n COALESCE(ia.as_name, 'Unknown') AS \"AS Name\",\n COUNT(*) AS \"Route Count\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nLEFT JOIN info_asn ia ON ia.asn = ba.origin_as\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY ba.origin_as, ia.as_name\nORDER BY COUNT(*) DESC\nLIMIT 20", + "refId": "A" + } + ], + "title": "Top Origin ASNs by Route Count", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: A transit AS (appearing frequently in AS paths but not as origin) is a carrier. The most frequent transit ASNs in your lab correspond to simulated Tier-1 carriers (174=Cogent, 3356=Lumen, 1299=Telia, etc.)", + "fieldConfig": { + "defaults": {"color": {"mode": "palette-classic"},"custom": {"fillOpacity": 80,"lineWidth": 0},"unit": "short"} + }, + "gridPos": {"h": 12,"w": 12,"x": 12,"y": 20}, + "id": 6, + "options": {"barRadius": 0,"barWidth": 0.7,"groupWidth": 0.7,"legend": {"calcs": [],"displayMode": "list","placement": "bottom"},"orientation": "horizontal","tooltip": {"mode": "single"},"xTickLabelRotation": 0,"xTickLabelSpacing": 200}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n asn_val AS \"Transit ASN\",\n COUNT(*) AS \"Appearances in AS Paths\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nCROSS JOIN LATERAL unnest(ba.as_path) AS asn_val\nWHERE r.iswithdrawn = false AND asn_val != ba.origin_as\nGROUP BY asn_val\nORDER BY COUNT(*) DESC\nLIMIT 15", + "refId": "A" + } + ], + "title": "Most Common Transit ASNs", + "type": "barchart" + } + ], + "schemaVersion": 36, + "style": "dark", + "tags": ["obmp","learning","bgp","as-path","topology"], + "time": {"from": "now-1h","to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "AS Path Analysis", + "uid": "obmp-learn-03", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/learning_attributes.json b/obmp-grafana/dashboards/Learning/learning_attributes.json new file mode 100644 index 0000000..217d0b0 --- /dev/null +++ b/obmp-grafana/dashboards/Learning/learning_attributes.json @@ -0,0 +1,201 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","target": {"limit": 100,"matchAny": false,"tags": [],"type": "dashboard"},"type": "dashboard"}]}, + "description": "Explore BGP path attributes: communities, MED, local-pref and how they influence routing policy decisions.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "datasource","uid": "grafana"}, + "gridPos": {"h": 8,"w": 24,"x": 0,"y": 0}, + "id": 1, + "options": { + "content": "## BGP Path Attributes — What They Mean\n\n### BGP Communities (RFC 1997)\nCommunities are 32-bit tags attached to routes, written as **ASN:value** (e.g., `65000:100`). They carry policy signals between routers and ASes.\n\n**Well-known communities:**\n| Community | Decimal | Meaning |\n|-----------|---------|----------|\n| `65535:0` | NO_EXPORT | Do not advertise outside this AS or confederation |\n| `65535:1` | NO_ADVERTISE | Do not advertise to any peer |\n| `65535:666` | BLACKHOLE | Drop traffic destined for this prefix (RFC 7999) |\n\nPrivate communities (e.g., `65001:200`) are operator-defined — they may encode region, customer tier, or traffic-engineering intent.\n\n### Local Preference (local-pref)\n- **Scope:** iBGP only — never sent to eBGP peers.\n- **Effect:** Higher local-pref wins. Default is **100**.\n- **Use case:** Prefer one upstream provider over another for all outbound traffic.\n\n### Multi-Exit Discriminator (MED)\n- **Scope:** Sent to directly connected eBGP peers to influence *inbound* traffic.\n- **Effect:** Lower MED wins (when comparing routes from the same AS).\n- **Use case:** Tell a peer which of your links to prefer when sending traffic to you.\n\n> **Tip:** Use the panels below to explore what communities and attributes are actually present in the current RIB. Run `inject.py attributes` to load routes with varied communities and MED values.", + "mode": "markdown" + }, + "title": "BGP Attribute Reference — Communities, Local-Pref, MED", + "type": "text" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Each row is a unique community string (format ASN:value) seen across all active routes. High route counts for a community mean many routes share that policy tag. Look for well-known communities: 65535:0 (NO_EXPORT), 65535:1 (NO_ADVERTISE), 65535:666 (BLACKHOLE).", + "fieldConfig": { + "defaults": {"color": {"mode": "thresholds"},"custom": {"align": "auto","displayMode": "auto"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]}}, + "overrides": [ + {"matcher": {"id": "byName","options": "Routes Tagged"},"properties": [{"id": "custom.displayMode","value": "lcd-gauge"},{"id": "color","value": {"mode": "thresholds"}},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "blue","value": null},{"color": "green","value": 10},{"color": "yellow","value": 100}]}}]} + ] + }, + "gridPos": {"h": 11,"w": 12,"x": 0,"y": 8}, + "id": 2, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Routes Tagged"}]}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n comm AS \"Community\",\n COUNT(*) AS \"Routes Tagged\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nCROSS JOIN LATERAL unnest(ba.community_list) AS comm\nWHERE r.iswithdrawn = false AND ba.community_list IS NOT NULL\nGROUP BY comm\nORDER BY COUNT(*) DESC\nLIMIT 30", + "refId": "A" + } + ], + "title": "Top BGP Communities in Current RIB", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Routes with notable BGP attributes — tagged with communities or using non-default local-pref / MED values. These routes carry explicit policy information. Examine the Communities column for operator-defined tags and the Local Pref column to see traffic engineering decisions.", + "fieldConfig": { + "defaults": {"color": {"mode": "thresholds"},"custom": {"align": "auto","displayMode": "auto"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]}}, + "overrides": [ + {"matcher": {"id": "byName","options": "Local Pref"},"properties": [{"id": "custom.displayMode","value": "color-text"},{"id": "color","value": {"mode": "thresholds"}},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 101},{"color": "red","value": 200}]}}]}, + {"matcher": {"id": "byName","options": "MED"},"properties": [{"id": "custom.displayMode","value": "color-text"},{"id": "color","value": {"mode": "thresholds"}},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 100}]}}]} + ] + }, + "gridPos": {"h": 11,"w": 12,"x": 12,"y": 8}, + "id": 3, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n r.prefix::text AS \"Prefix\",\n ba.origin_as AS \"Origin AS\",\n ba.community_list::text AS \"Communities\",\n ba.local_pref AS \"Local Pref\",\n ba.med AS \"MED\",\n ba.as_path_count AS \"Path Length\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\n AND (ba.community_list IS NOT NULL OR ba.med IS NOT NULL OR ba.local_pref IS NOT NULL)\nORDER BY r.prefix\nLIMIT 100", + "refId": "A" + } + ], + "title": "Routes with Notable Attributes", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: MED (Multi-Exit Discriminator) is used to influence inbound traffic from a directly connected AS. Lower MED is preferred. If most routes show 'Not Set', MED is not being used for traffic engineering. A single dominant MED value means a simple policy; many different values indicate fine-grained control.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"fillOpacity": 80,"lineWidth": 0}, + "unit": "short" + } + }, + "gridPos": {"h": 9,"w": 12,"x": 0,"y": 19}, + "id": 4, + "options": {"barRadius": 0.1,"barWidth": 0.6,"groupWidth": 0.7,"legend": {"displayMode": "list","placement": "bottom"},"orientation": "auto","text": {},"tooltip": {"mode": "single"},"xTickLabelRotation": -30,"xTickLabelSpacing": 100}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n COALESCE(ba.med::text, 'Not Set') AS \"MED Value\",\n COUNT(*) AS \"Route Count\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY ba.med\nORDER BY ba.med NULLS LAST\nLIMIT 20", + "refId": "A" + } + ], + "title": "MED Value Distribution", + "type": "barchart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Local preference is an iBGP attribute — it never crosses AS boundaries. Default is 100. Routes with local-pref above 100 are preferred over the default path; below 100 they are used as last-resort. Non-100 values indicate active traffic-engineering policy. Run 'inject.py attributes' to inject routes with varied local-pref values.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"fillOpacity": 80,"lineWidth": 0}, + "unit": "short" + } + }, + "gridPos": {"h": 9,"w": 12,"x": 12,"y": 19}, + "id": 5, + "options": {"barRadius": 0.1,"barWidth": 0.6,"groupWidth": 0.7,"legend": {"displayMode": "list","placement": "bottom"},"orientation": "auto","text": {},"tooltip": {"mode": "single"},"xTickLabelRotation": -30,"xTickLabelSpacing": 100}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n COALESCE(ba.local_pref::text, 'Not Set') AS \"Local Pref\",\n COUNT(*) AS \"Route Count\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY ba.local_pref\nORDER BY ba.local_pref DESC NULLS LAST\nLIMIT 20", + "refId": "A" + } + ], + "title": "Local Preference Value Distribution", + "type": "barchart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: This count tells you how widely BGP communities are used in your network. A value of 0 means no community tagging — communities are an opt-in feature. Run 'inject.py attributes' to add routes with community strings.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null},{"color": "green","value": 1}]}, + "unit": "short", + "mappings": [] + } + }, + "gridPos": {"h": 5,"w": 8,"x": 0,"y": 28}, + "id": 6, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() as time, COUNT(*) AS \"Routes with Communities\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nWHERE r.iswithdrawn = false\n AND ba.community_list IS NOT NULL\n AND array_length(ba.community_list, 1) > 0", + "refId": "A" + } + ], + "title": "Routes with Communities", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: The number of distinct community strings seen across all active routes. A diverse set indicates fine-grained policy tagging. A single value means one uniform policy tag is applied.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null},{"color": "green","value": 1},{"color": "yellow","value": 50}]}, + "unit": "short", + "mappings": [] + } + }, + "gridPos": {"h": 5,"w": 8,"x": 8,"y": 28}, + "id": 7, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() as time, COUNT(DISTINCT comm) AS \"Unique Communities\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nCROSS JOIN LATERAL unnest(ba.community_list) AS comm\nWHERE r.iswithdrawn = false", + "refId": "A" + } + ], + "title": "Unique Community Values", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Routes with a local-pref other than the default (100) have been explicitly policy-engineered. A high count here means your network actively uses local-pref to prefer specific paths. A value of 0 means all paths are at default preference.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 100},{"color": "red","value": 1000}]}, + "unit": "short", + "mappings": [] + } + }, + "gridPos": {"h": 5,"w": 8,"x": 16,"y": 28}, + "id": 8, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() as time, COUNT(*) AS \"Custom Local-Pref Routes\"\nFROM base_attrs ba\nJOIN ip_rib r ON r.base_attr_hash_id = ba.hash_id\nWHERE r.iswithdrawn = false\n AND ba.local_pref IS NOT NULL\n AND ba.local_pref != 100", + "refId": "A" + } + ], + "title": "Routes with Non-Default Local-Pref", + "type": "stat" + } + ], + "schemaVersion": 36, + "style": "dark", + "tags": ["obmp","learning","bgp","communities","attributes","policy"], + "time": {"from": "now-1h","to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "BGP Attribute Explorer", + "uid": "obmp-learn-06", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/learning_churn.json b/obmp-grafana/dashboards/Learning/learning_churn.json new file mode 100644 index 0000000..e2188c1 --- /dev/null +++ b/obmp-grafana/dashboards/Learning/learning_churn.json @@ -0,0 +1,152 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","target": {"limit": 100,"matchAny": false,"tags": [],"type": "dashboard"},"type": "dashboard"}]}, + "description": "Prefix stability analysis and route churn visualization. Teaches how to identify unstable routes and understand BGP churn.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: This chart shows BGP advertisements and withdrawals bucketed per hour. A healthy network has steady low churn. Spikes in withdrawals indicate route instability events — link failures, IBGP reconvergence, or policy changes. Run 'inject.py churn' to generate synthetic churn data and observe it here.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "bars","fillOpacity": 60,"lineWidth": 1,"spanNulls": false,"stacking": {"group": "A","mode": "none"}}, + "unit": "short" + }, + "overrides": [ + {"matcher": {"id": "byName","options": "Advertisements"},"properties": [{"id": "color","value": {"fixedColor": "green","mode": "fixed"}}]}, + {"matcher": {"id": "byName","options": "Withdrawals"},"properties": [{"id": "color","value": {"fixedColor": "red","mode": "fixed"}}]} + ] + }, + "gridPos": {"h": 9,"w": 24,"x": 0,"y": 0}, + "id": 1, + "options": {"legend": {"calcs": ["sum","max"],"displayMode": "list","placement": "bottom"},"tooltip": {"mode": "multi"}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT\n $__timeGroupAlias(timestamp,'1h'),\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) AS \"Advertisements\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) AS \"Withdrawals\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY 1\nORDER BY 1", + "refId": "A" + } + ], + "title": "Advertisements vs Withdrawals Rate (per hour)", + "type": "timeseries" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: A prefix with more than 30 updates per day is considered unstable — it is flapping or being re-announced frequently. The Stability column categorizes each prefix. Run 'inject.py churn' to generate churn data and observe it here. Sort by 'Total Updates' to find the most problematic prefixes.", + "fieldConfig": { + "defaults": {"color": {"mode": "thresholds"},"custom": {"align": "auto","displayMode": "auto"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]}}, + "overrides": [ + {"matcher": {"id": "byName","options": "Stability"},"properties": [{"id": "custom.displayMode","value": "color-text"},{"id": "mappings","value": [{"options": {"Very Stable": {"color": "green","index": 0},"Stable": {"color": "blue","index": 1},"Moderate": {"color": "yellow","index": 2},"Unstable": {"color": "red","index": 3}},"type": "value"}]}]}, + {"matcher": {"id": "byName","options": "Total Updates"},"properties": [{"id": "custom.displayMode","value": "lcd-gauge"},{"id": "color","value": {"mode": "thresholds"}},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 7},{"color": "red","value": 30}]}}]} + ] + }, + "gridPos": {"h": 12,"w": 24,"x": 0,"y": 9}, + "id": 2, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Total Updates"}]}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n prefix::text AS \"Prefix\",\n COUNT(*) AS \"Total Updates\",\n SUM(CASE WHEN iswithdrawn THEN 1 ELSE 0 END) AS \"Withdrawals\",\n SUM(CASE WHEN NOT iswithdrawn THEN 1 ELSE 0 END) AS \"Announcements\",\n MAX(timestamp) AS \"Last Change\",\n CASE\n WHEN COUNT(*) = 1 THEN 'Very Stable'\n WHEN COUNT(*) <= 7 THEN 'Stable'\n WHEN COUNT(*) <= 30 THEN 'Moderate'\n ELSE 'Unstable'\n END AS \"Stability\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix\nORDER BY \"Total Updates\" DESC\nLIMIT 100", + "refId": "A" + } + ], + "title": "Top Churning Prefixes", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: This bar chart shows how many prefixes fall into each stability tier. In a healthy network, the vast majority of prefixes should be 'Very Stable' (only announced once during the window). A large 'Unstable' bar is a red flag. Run 'inject.py churn' to shift prefixes into the Unstable tier.", + "fieldConfig": { + "defaults": { + "color": {"mode": "fixed","fixedColor": "blue"}, + "custom": {"fillOpacity": 80,"lineWidth": 0}, + "unit": "short" + }, + "overrides": [ + {"matcher": {"id": "byName","options": "1. Very Stable (1 update)"},"properties": [{"id": "color","value": {"fixedColor": "green","mode": "fixed"}}]}, + {"matcher": {"id": "byName","options": "2. Stable (2-7 updates)"},"properties": [{"id": "color","value": {"fixedColor": "blue","mode": "fixed"}}]}, + {"matcher": {"id": "byName","options": "3. Moderate (8-30 updates)"},"properties": [{"id": "color","value": {"fixedColor": "yellow","mode": "fixed"}}]}, + {"matcher": {"id": "byName","options": "4. Unstable (31+ updates)"},"properties": [{"id": "color","value": {"fixedColor": "red","mode": "fixed"}}]} + ] + }, + "gridPos": {"h": 9,"w": 14,"x": 0,"y": 21}, + "id": 3, + "options": {"barRadius": 0.1,"barWidth": 0.6,"groupWidth": 0.7,"legend": {"displayMode": "list","placement": "bottom"},"orientation": "auto","text": {},"tooltip": {"mode": "single"},"xTickLabelRotation": 0,"xTickLabelSpacing": 200}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n CASE\n WHEN cnt = 1 THEN '1. Very Stable (1 update)'\n WHEN cnt <= 7 THEN '2. Stable (2-7 updates)'\n WHEN cnt <= 30 THEN '3. Moderate (8-30 updates)'\n ELSE '4. Unstable (31+ updates)'\n END AS \"Stability Tier\",\n COUNT(*) AS \"Prefix Count\"\nFROM (\n SELECT prefix, COUNT(*) as cnt\n FROM ip_rib_log\n WHERE $__timeFilter(timestamp)\n GROUP BY prefix\n) sub\nGROUP BY 1\nORDER BY 1", + "refId": "A" + } + ], + "title": "Prefix Distribution by Stability Tier", + "type": "barchart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: This is the single most churning prefix in the selected time range. If a prefix appears here repeatedly across time ranges, it may warrant investigation — check the AS path and peers announcing it.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "red","value": null}]}, + "unit": "string", + "mappings": [] + } + }, + "gridPos": {"h": 5,"w": 10,"x": 14,"y": 21}, + "id": 4, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "center","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {"titleSize": 14,"valueSize": 18}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, prefix::text AS \"Most Churned Prefix\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix\nORDER BY COUNT(*) DESC\nLIMIT 1", + "refId": "A" + } + ], + "title": "Most Churned Prefix", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: This counts how many distinct prefixes had at least one update event in the selected time window. During a normal steady state this number should be low. After a major routing event (e.g., upstream link failure) you may see thousands of prefixes change simultaneously.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 500},{"color": "red","value": 2000}]}, + "unit": "short", + "mappings": [] + } + }, + "gridPos": {"h": 4,"w": 10,"x": 14,"y": 26}, + "id": 5, + "options": {"colorMode": "background","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(DISTINCT prefix) AS \"Prefixes with Updates\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)", + "refId": "A" + } + ], + "title": "Total Unique Prefixes with Updates", + "type": "stat" + } + ], + "schemaVersion": 36, + "style": "dark", + "tags": ["obmp","learning","bgp","churn","stability"], + "time": {"from": "now-24h","to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Route Churn & Stability Score", + "uid": "obmp-learn-05", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/learning_peer_health.json b/obmp-grafana/dashboards/Learning/learning_peer_health.json new file mode 100644 index 0000000..716ebdc --- /dev/null +++ b/obmp-grafana/dashboards/Learning/learning_peer_health.json @@ -0,0 +1,144 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]}, + "description": "BGP peer session health, uptime, and flap analysis. Teaches session stability and how to diagnose flapping peers.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: A healthy BGP mesh shows all peers UP continuously. Any gap in the UP state represents a session flap — investigate the reset reason.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "custom": {"fillOpacity": 70,"lineWidth": 0,"spanNulls": false}, + "mappings": [{"options": {"down": {"color": "red","index": 1,"text": "DOWN"},"up": {"color": "green","index": 0,"text": "UP"}},"type": "value"}], + "thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "green","value": 1}]} + } + }, + "gridPos": {"h": 8,"w": 24,"x": 0,"y": 0}, + "id": 1, + "options": {"alignValue": "left","legend": {"displayMode": "list","placement": "bottom"},"mergeValues": true,"rowHeight": 0.9,"showValue": "auto","tooltip": {"mode": "single"}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT\n $__timeGroupAlias(e.timestamp,'1m'),\n COALESCE(p.name, p.peer_addr::text) AS metric,\n CASE WHEN e.state = 'up' THEN 1 ELSE 0 END AS \"value\"\nFROM peer_event_log e\nJOIN bgp_peers p ON p.hash_id = e.peer_hash_id\nWHERE $__timeFilter(e.timestamp)\nORDER BY 1, 2", + "refId": "A" + } + ], + "title": "Peer Session State Timeline", + "type": "state-timeline" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Current state of all BGP peers. Learn: 'bmp_reason' tells you why BMP reporting stopped. 'bgp_err_code' shows BGP NOTIFICATION error codes.", + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [ + {"matcher": {"id": "byName","options": "State"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "mappings","value": [{"options": {"down": {"color": "red","index": 1,"text": "DOWN"},"up": {"color": "green","index": 0,"text": "UP"}},"type": "value"}]}]}, + {"matcher": {"id": "byName","options": "Peer"},"properties": [{"id": "custom.width","value": 200}]}, + {"matcher": {"id": "byName","options": "AS"},"properties": [{"id": "custom.width","value": 80}]} + ] + }, + "gridPos": {"h": 12,"w": 24,"x": 0,"y": 8}, + "id": 2, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": false,"displayName": "State"}]}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n COALESCE(p.name, p.peer_addr::text) AS \"Peer\",\n p.peer_addr AS \"Address\",\n p.peer_as AS \"AS\",\n p.state AS \"State\",\n p.timestamp AS \"Last State Change\",\n p.error_text AS \"Last Error\",\n p.local_hold_time AS \"Hold Time\"\nFROM bgp_peers p\nWHERE p.isprepolicy = true\nORDER BY p.state, p.peer_addr", + "refId": "A" + } + ], + "title": "Current Peer State", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Flap count = number of times a peer went from UP to DOWN. A peer flapping more than 2 times per hour needs investigation.", + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [ + {"matcher": {"id": "byName","options": "Flap Count"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 1},{"color": "red","value": 5}]}}]} + ] + }, + "gridPos": {"h": 10,"w": 24,"x": 0,"y": 20}, + "id": 3, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true,"sortBy": [{"desc": true,"displayName": "Flap Count"}]}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n COALESCE(p.name, p.peer_addr::text) AS \"Peer\",\n p.peer_addr AS \"Address\",\n p.peer_as AS \"AS\",\n COUNT(CASE WHEN e.state = 'down' THEN 1 END) AS \"Flap Count\",\n MIN(e.timestamp) AS \"First Event\",\n MAX(e.timestamp) AS \"Last Event\"\nFROM peer_event_log e\nJOIN bgp_peers p ON p.hash_id = e.peer_hash_id\nWHERE $__timeFilter(e.timestamp)\nGROUP BY p.name, p.peer_addr, p.peer_as\nORDER BY \"Flap Count\" DESC", + "refId": "A" + } + ], + "title": "Peer Flap Analysis", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "yellow","value": 50},{"color": "green","value": 90}]},"unit": "percent","max": 100,"min": 0}}, + "gridPos": {"h": 8,"w": 8,"x": 0,"y": 30}, + "id": 4, + "options": {"orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"showThresholdLabels": false,"showThresholdMarkers": true,"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time,\n ROUND(100.0 * SUM(CASE WHEN state = 'up' THEN 1 ELSE 0 END) / NULLIF(COUNT(*),0), 1) AS \"Mesh Health %\"\nFROM bgp_peers WHERE isprepolicy = true", + "refId": "A" + } + ], + "title": "Overall Peer Mesh Health", + "type": "gauge" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "green","value": 1}]},"unit": "short","mappings": [{"options": {"0": {"color": "red","index": 0,"text": "DOWN"}},"type": "value"}]}}, + "gridPos": {"h": 8,"w": 8,"x": 8,"y": 30}, + "id": 5, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time,\n SUM(CASE WHEN state = 'up' THEN 1 ELSE 0 END) AS \"Peers UP\"\nFROM bgp_peers WHERE isprepolicy = true", + "refId": "A" + } + ], + "title": "Peers Currently UP", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 1},{"color": "red","value": 5}]},"unit": "short"}}, + "gridPos": {"h": 8,"w": 8,"x": 16,"y": 30}, + "id": 6, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time,\n COUNT(CASE WHEN state = 'down' THEN 1 END) AS \"Flap Events (24h)\"\nFROM peer_event_log\nWHERE timestamp > NOW() - INTERVAL '24 hours' AND state = 'down'", + "refId": "A" + } + ], + "title": "Flap Events (24h)", + "type": "stat" + } + ], + "schemaVersion": 36, + "style": "dark", + "tags": ["obmp","learning","bgp","peers","flap"], + "time": {"from": "now-24h","to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "Peer Session Health & Flap Analysis", + "uid": "obmp-learn-02", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/learning_rpki.json b/obmp-grafana/dashboards/Learning/learning_rpki.json new file mode 100644 index 0000000..3cdf79f --- /dev/null +++ b/obmp-grafana/dashboards/Learning/learning_rpki.json @@ -0,0 +1,150 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]}, + "description": "RPKI (Resource Public Key Infrastructure) validation status. Teaches BGP routing security and how RPKI prevents prefix hijacks by validating route origin.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "content": "## What is RPKI?\n\nRPKI (Resource Public Key Infrastructure) is a cryptographic security framework for BGP routing. It lets IP address holders publish **Route Origin Authorizations (ROAs)** stating which ASNs are authorized to originate their prefixes.\n\n### RPKI Validation States\n| State | Meaning |\n|-------|----------|\n| **Valid** | The route's origin AS matches a ROA for this prefix |\n| **Invalid** | A ROA exists but the origin AS or prefix length does NOT match — this route is potentially a hijack |\n| **NotFound** | No ROA exists for this prefix/origin — unprotected, can't be validated |\n\n### How to read this dashboard\n- **Valid %** should be as high as possible (target: 100%)\n- **Invalid routes** are critical — they indicate either a misconfiguration or a prefix hijack\n- Routes with no RPKI data show as **NotFound** — they are not necessarily invalid, just unprotected\n\n> **Lab note:** The RPKI validator table is populated by a cron job in psql-app every 2 hours. If the table shows 0 rows, wait for the cron to run or check `ENABLE_RPKI=1` in docker-compose.yml.", + "datasource": {"type": "datasource","uid": "grafana"}, + "gridPos": {"h": 10,"w": 8,"x": 0,"y": 0}, + "id": 1, + "options": {"content": "## What is RPKI?\n\nRPKI (Resource Public Key Infrastructure) is a cryptographic security framework for BGP routing. It lets IP address holders publish **Route Origin Authorizations (ROAs)** stating which ASNs are authorized to originate their prefixes.\n\n### RPKI Validation States\n| State | Meaning |\n|-------|----------|\n| **Valid** | The route's origin AS matches a ROA for this prefix |\n| **Invalid** | A ROA exists but the origin AS or prefix length does NOT match — this route is potentially a hijack |\n| **NotFound** | No ROA exists for this prefix/origin — unprotected, can't be validated |\n\n### How to read this dashboard\n- **Valid %** should be as high as possible (target: 100%)\n- **Invalid routes** are critical — they indicate either a misconfiguration or a prefix hijack\n- Routes with no RPKI data show as **NotFound** — they are not necessarily invalid, just unprotected\n\n> **Lab note:** The RPKI validator table is populated by a cron job in psql-app every 2 hours. If the table shows 0 rows, wait for the cron to run or check `ENABLE_RPKI=1` in docker-compose.yml.","mode": "markdown"}, + "pluginVersion": "9.1.7", + "title": "RPKI Learning Guide", + "type": "text" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Total ROAs (Route Origin Authorizations) loaded from the RPKI validator. If 0, the cron job has not yet run.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "yellow","value": 1},{"color": "green","value": 100000}]}, + "unit": "short" + } + }, + "gridPos": {"h": 5,"w": 4,"x": 8,"y": 0}, + "id": 2, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(*) AS \"RPKI ROAs Loaded\" FROM rpki_validator", + "refId": "A" + } + ], + "title": "RPKI ROAs Loaded", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Routes with a matching valid ROA — origin AS and prefix length both match.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "green","value": 1}]}, + "unit": "short" + } + }, + "gridPos": {"h": 5,"w": 4,"x": 12,"y": 0}, + "id": 3, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(*) AS \"Valid Routes\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nJOIN rpki_validator rv ON rv.prefix >>= r.prefix AND rv.origin_as = ba.origin_as AND r.prefix_len <= rv.prefix_len_max\nWHERE r.iswithdrawn = false AND r.isipv4 = true", + "refId": "A" + } + ], + "title": "RPKI Valid Routes", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Routes where a ROA exists but the origin AS does NOT match — high-priority investigation needed.", + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "red","value": 1}]}, + "unit": "short" + } + }, + "gridPos": {"h": 5,"w": 4,"x": 16,"y": 0}, + "id": 4, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(*) AS \"RPKI Invalid Routes\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\n AND EXISTS (\n SELECT 1 FROM rpki_validator rv\n WHERE rv.prefix >>= r.prefix AND rv.origin_as != ba.origin_as\n )\n AND NOT EXISTS (\n SELECT 1 FROM rpki_validator rv\n WHERE rv.prefix >>= r.prefix AND rv.origin_as = ba.origin_as AND r.prefix_len <= rv.prefix_len_max\n )", + "refId": "A" + } + ], + "title": "RPKI Invalid Routes", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: ExaBGP-injected routes (AS 65100) will be NotFound since they use synthetic ASNs not registered in RPKI. Real internet prefixes with valid ROAs will appear as Valid.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"hideFrom": {"legend": false,"tooltip": false,"viz": false}}, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": {"h": 10,"w": 10,"x": 0,"y": 10}, + "id": 5, + "options": {"displayLabels": ["percent","name"],"legend": {"displayMode": "list","placement": "bottom"},"pieType": "donut","tooltip": {"mode": "single"}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n CASE\n WHEN rv_valid.prefix IS NOT NULL THEN 'Valid'\n WHEN rv_any.prefix IS NOT NULL THEN 'Invalid'\n ELSE 'NotFound'\n END AS \"RPKI Status\",\n COUNT(*) AS \"Route Count\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nLEFT JOIN rpki_validator rv_valid\n ON rv_valid.prefix >>= r.prefix AND rv_valid.origin_as = ba.origin_as AND r.prefix_len <= rv_valid.prefix_len_max\nLEFT JOIN rpki_validator rv_any\n ON rv_any.prefix >>= r.prefix AND rv_any.origin_as != ba.origin_as\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY 1\nORDER BY 1", + "refId": "A" + } + ], + "title": "RPKI Validation Status Distribution", + "type": "piechart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Prefixes that have a ROA but the observed origin AS does not match. These are the most security-critical routes — each one represents a potential hijack or misconfiguration.", + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [ + {"matcher": {"id": "byName","options": "Status"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "mappings","value": [{"options": {"Invalid": {"color": "red","index": 0},"Valid": {"color": "green","index": 1},"NotFound": {"color": "yellow","index": 2}},"type": "value"}]}]} + ] + }, + "gridPos": {"h": 14,"w": 14,"x": 10,"y": 10}, + "id": 6, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT\n r.prefix AS \"Prefix\",\n ba.origin_as AS \"Observed Origin AS\",\n rv.origin_as AS \"Authorized Origin AS (ROA)\",\n 'Invalid' AS \"Status\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nJOIN rpki_validator rv ON rv.prefix >>= r.prefix AND rv.origin_as != ba.origin_as\nWHERE r.iswithdrawn = false AND r.isipv4 = true\n AND NOT EXISTS (\n SELECT 1 FROM rpki_validator rv2\n WHERE rv2.prefix >>= r.prefix AND rv2.origin_as = ba.origin_as AND r.prefix_len <= rv2.prefix_len_max\n )\nORDER BY r.prefix\nLIMIT 50", + "refId": "A" + } + ], + "title": "RPKI Invalid Routes — Potential Hijacks", + "type": "table" + } + ], + "schemaVersion": 36, + "style": "dark", + "tags": ["obmp","learning","bgp","rpki","security"], + "time": {"from": "now-1h","to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "RPKI Validation Status", + "uid": "obmp-learn-04", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/learning_update_rate.json b/obmp-grafana/dashboards/Learning/learning_update_rate.json new file mode 100644 index 0000000..4a099bb --- /dev/null +++ b/obmp-grafana/dashboards/Learning/learning_update_rate.json @@ -0,0 +1,137 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","target": {"limit": 100,"matchAny": false,"tags": [],"type": "dashboard"},"type": "dashboard"}]}, + "description": "BGP update and withdrawal rates over time. Teaches what normal BGP traffic looks like and how to detect route churn or instability.", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: A healthy network has far more advertisements than withdrawals. A withdrawal spike often signals a link failure or route flap.", + "fieldConfig": { + "defaults": { + "color": {"mode": "palette-classic"}, + "custom": {"drawStyle": "bars","fillOpacity": 60,"lineWidth": 1,"spanNulls": false,"stacking": {"group": "A","mode": "none"}}, + "unit": "short" + }, + "overrides": [ + {"matcher": {"id": "byName","options": "Advertisements"},"properties": [{"id": "color","value": {"fixedColor": "green","mode": "fixed"}}]}, + {"matcher": {"id": "byName","options": "Withdrawals"},"properties": [{"id": "color","value": {"fixedColor": "red","mode": "fixed"}}]} + ] + }, + "gridPos": {"h": 10,"w": 24,"x": 0,"y": 0}, + "id": 1, + "options": {"legend": {"calcs": ["sum","max"],"displayMode": "list","placement": "bottom"},"tooltip": {"mode": "multi"}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT\n $__timeGroupAlias(timestamp,'5m'),\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) AS \"Advertisements\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) AS \"Withdrawals\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY 1\nORDER BY 1", + "refId": "A" + } + ], + "title": "BGP Updates Over Time — Advertisements vs Withdrawals", + "type": "timeseries" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 100},{"color": "red","value": 1000}]},"unit": "short","mappings": []}}, + "gridPos": {"h": 5,"w": 6,"x": 0,"y": 10}, + "id": 2, + "options": {"colorMode": "background","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(*) AS \"Total Updates (24h)\" FROM ip_rib_log WHERE timestamp > NOW() - INTERVAL '24 hours'", + "refId": "A" + } + ], + "title": "Total Updates (24h)", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Learn: Withdrawal rate above 30% is unusual. Above 50% may indicate a route leak or oscillation event.", + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 20},{"color": "red","value": 50}]},"unit": "percent","max": 100}}, + "gridPos": {"h": 5,"w": 6,"x": 6,"y": 10}, + "id": 3, + "options": {"colorMode": "background","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time,\n ROUND(100.0 * SUM(CASE WHEN iswithdrawn THEN 1 ELSE 0 END) / NULLIF(COUNT(*),0), 1) AS \"Withdrawal Rate %\"\nFROM ip_rib_log\nWHERE timestamp > NOW() - INTERVAL '24 hours'", + "refId": "A" + } + ], + "title": "Withdrawal Rate % (24h)", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 1000},{"color": "red","value": 10000}]},"unit": "short"}}, + "gridPos": {"h": 5,"w": 6,"x": 12,"y": 10}, + "id": 4, + "options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(DISTINCT peer_hash_id) AS \"Active Peers\" FROM ip_rib_log WHERE timestamp > NOW() - INTERVAL '1 hour'", + "refId": "A" + } + ], + "title": "Active Reporting Peers (1h)", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "yellow","value": 500},{"color": "red","value": 2000}]},"unit": "short"}}, + "gridPos": {"h": 5,"w": 6,"x": 18,"y": 10}, + "id": 5, + "options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"text": {}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT NOW() AS time, COUNT(DISTINCT prefix) AS \"Unique Prefixes Updated (24h)\" FROM ip_rib_log WHERE timestamp > NOW() - INTERVAL '24 hours'", + "refId": "A" + } + ], + "title": "Unique Prefixes Updated (24h)", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "description": "Updates per peer over time. Learn: Peers should have similar update rates. A peer with dramatically more updates may be experiencing instability or receiving a full BGP table with frequent changes.", + "fieldConfig": { + "defaults": {"color": {"mode": "palette-classic"},"custom": {"drawStyle": "line","fillOpacity": 10,"lineWidth": 1,"spanNulls": false},"unit": "short"} + }, + "gridPos": {"h": 9,"w": 24,"x": 0,"y": 15}, + "id": 6, + "options": {"legend": {"calcs": [],"displayMode": "list","placement": "right"},"tooltip": {"mode": "multi"}}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "time_series", + "rawSql": "SELECT\n $__timeGroupAlias(s.interval_time,'30m'),\n COALESCE(p.name, p.peer_addr::text) AS metric,\n SUM(s.advertise_avg + s.withdraw_avg) AS \"Updates\"\nFROM stats_peer_update_counts s\nJOIN bgp_peers p ON p.hash_id = s.peer_hash_id\nWHERE $__timeFilter(s.interval_time)\nGROUP BY 1, 2\nORDER BY 1", + "refId": "A" + } + ], + "title": "Update Rate by Peer (30-min buckets)", + "type": "timeseries" + } + ], + "schemaVersion": 36, + "style": "dark", + "tags": ["obmp","learning","bgp","churn"], + "time": {"from": "now-24h","to": "now"}, + "timepicker": {}, + "timezone": "browser", + "title": "BGP Update Rate & Churn", + "uid": "obmp-learn-01", + "version": 1 +} diff --git a/obmp-grafana/provisioning/dashboards/openbmp-dashboards.yml b/obmp-grafana/provisioning/dashboards/openbmp-dashboards.yml index 5a58345..3cbcea3 100644 --- a/obmp-grafana/provisioning/dashboards/openbmp-dashboards.yml +++ b/obmp-grafana/provisioning/dashboards/openbmp-dashboards.yml @@ -122,4 +122,15 @@ providers: # path to dashboard files on disk. Required when using the 'file' type path: /var/lib/grafana/dashboards/obmp/L3VPN-1005 # use folder names from filesystem to create folders in Grafana + foldersFromFilesStructure: false + - name: 'OpenBMP-Learning' + orgId: 1 + folder: 'OBMP-Learning' + folderUid: '2001' + type: file + disableDeletion: false + updateIntervalSeconds: 30 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards/Learning foldersFromFilesStructure: false \ No newline at end of file