From f23e222bc0b542e818ee5beb1ff21413440d7f68 Mon Sep 17 00:00:00 2001 From: sam Date: Fri, 6 Mar 2026 13:31:03 -0700 Subject: [PATCH] Add Phase 3: TE/SR analytics, anomaly detection, DB schema reference - 4 new Grafana dashboards: - Database Schema Map (obmp-learn-07): interactive schema reference with live row counts, relationship diagrams, column details - TE & Segment Routing Analytics (obmp-learn-08): exposes BGP-LS TE/SR fields (bandwidth, admin groups, SRLG, SR SIDs, protection types) - Topology Change & Anomaly Detection (obmp-learn-09): link state change tracking, origin AS hijack detection, convergence timeline - Link Utilization & TE Thought Experiment (obmp-learn-10): capacity data from BGP-LS + streaming telemetry integration guide - DB_SCHEMA.md: standalone database reference (33 tables, 11 views) - 3 new ExaBGP scenarios: te_community_steering, origin_shift, path_diversity - Updated DOCS.md with Phase 3 dashboards and scenarios Co-Authored-By: Claude Opus 4.6 --- DB_SCHEMA.md | 387 ++++++++++++++++++ DOCS.md | 22 +- exabgp/scenarios/__init__.py | 86 ++++ .../dashboards/Learning/db_schema_map.json | 131 ++++++ .../Learning/link_utilization_te.json | 238 +++++++++++ .../dashboards/Learning/te_sr_analytics.json | 345 ++++++++++++++++ .../dashboards/Learning/topology_anomaly.json | 235 +++++++++++ 7 files changed, 1443 insertions(+), 1 deletion(-) create mode 100644 DB_SCHEMA.md create mode 100644 obmp-grafana/dashboards/Learning/db_schema_map.json create mode 100644 obmp-grafana/dashboards/Learning/link_utilization_te.json create mode 100644 obmp-grafana/dashboards/Learning/te_sr_analytics.json create mode 100644 obmp-grafana/dashboards/Learning/topology_anomaly.json diff --git a/DB_SCHEMA.md b/DB_SCHEMA.md new file mode 100644 index 0000000..9b302d9 --- /dev/null +++ b/DB_SCHEMA.md @@ -0,0 +1,387 @@ +# OpenBMP Database Schema Reference + +PostgreSQL database `openbmp` with TimescaleDB extension for time-series data. + +## Entity Relationship Diagram + +``` +collectors + └── routers (collector_hash_id) + └── bgp_peers (router_hash_id) + ├── ip_rib (peer_hash_id) ──► base_attrs (base_attr_hash_id) + ├── ip_rib_log (peer_hash_id) + ├── l3vpn_rib (peer_hash_id) ──► base_attrs + ├── ls_nodes (peer_hash_id) + ├── ls_links (peer_hash_id) ──► ls_nodes (local/remote_node_hash_id) + ├── ls_prefixes (peer_hash_id) ──► ls_nodes (local_node_hash_id) + ├── peer_event_log (peer_hash_id) + ├── stat_reports (peer_hash_id) + └── stats_* tables (peer_hash_id) + +ip_rib.prefix ◄──► global_ip_rib.prefix (aggregated view) + ├── rpki_origin_as ◄── rpki_validator + └── irr_origin_as ◄── info_route + +base_attrs.origin_as ──► info_asn.asn (ASN enrichment) +routers.geo_ip_start ──► geo_ip.ip (geolocation) +``` + +--- + +## BMP Core Tables + +### routers +BMP-monitored routers (one row per monitored device). + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Primary key | +| name | varchar(200) | Router hostname | +| ip_address | inet | Router management IP | +| router_as | bigint | Router ASN | +| bgp_id | inet | BGP router-id | +| collector_hash_id | uuid | FK to collectors | +| state | opstate | up / down | +| timestamp | timestamp | Last update time | +| description | varchar(255) | Router description | +| init_data | text | BMP init message data | +| term_reason_code | int | BMP termination reason | + +### collectors +BMP collector instances. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Primary key | +| admin_id | varchar(64) | Admin identifier | +| name | varchar(200) | Collector name | +| ip_address | varchar(40) | Collector IP | +| state | opstate | up / down | +| router_count | smallint | Number of monitored routers | + +### bgp_peers +BGP sessions per router (one row per peer per router). + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Primary key (composite with router_hash_id) | +| router_hash_id | uuid | FK to routers | +| peer_addr | inet | Peer IP address | +| peer_as | bigint | Peer ASN | +| peer_bgp_id | inet | Peer BGP router-id | +| name | varchar(200) | Peer name | +| state | opstate | up / down | +| isl3vpnpeer | boolean | L3VPN peer flag | +| isipv4 | boolean | IPv4 peer | +| isprepolicy | boolean | Pre-policy RIB | +| islocrib | boolean | Local RIB | +| local_ip | inet | Local IP | +| local_asn | bigint | Local ASN | +| local_hold_time | smallint | Local hold time | +| remote_hold_time | smallint | Remote hold time | +| sent_capabilities | varchar(4096) | BGP capabilities sent | +| recv_capabilities | varchar(4096) | BGP capabilities received | +| table_name | varchar(255) | VRF/table name | + +### peer_event_log (TimescaleDB) +Historical BGP session state changes. + +| Column | Type | Description | +|--------|------|-------------| +| id | bigint | Event sequence | +| peer_hash_id | uuid | FK to bgp_peers | +| state | opstate | up / down | +| timestamp | timestamp | Event time (partition key) | +| bmp_reason | smallint | BMP reason code | +| bgp_err_code | smallint | BGP error code | +| bgp_err_subcode | smallint | BGP error subcode | +| error_text | varchar(255) | Error description | + +--- + +## BGP Path Attributes + +### base_attrs +BGP path attributes shared across routes. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Primary key | +| peer_hash_id | uuid | FK to bgp_peers | +| origin | varchar(16) | IGP / EGP / Incomplete | +| as_path | bigint[] | AS path array | +| as_path_count | smallint | AS path length | +| origin_as | bigint | Origin ASN | +| next_hop | inet | BGP next-hop | +| med | bigint | Multi-Exit Discriminator | +| local_pref | bigint | Local preference | +| community_list | varchar(15)[] | Standard communities | +| ext_community_list | varchar(50)[] | Extended communities (RT, etc.) | +| large_community_list | varchar(40)[] | Large communities (RFC 8092) | +| cluster_list | varchar(40)[] | Route reflector cluster list | +| isatomicagg | boolean | Atomic aggregate flag | +| originator_id | inet | RR originator ID | +| aggregator | varchar(64) | Aggregator | + +**Indexes**: GIN on as_path, community_list, ext_community_list, large_community_list + +--- + +## IP RIB Tables + +### ip_rib +Current IPv4/IPv6 unicast routing table. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Route hash | +| peer_hash_id | uuid | FK to bgp_peers (composite PK) | +| base_attr_hash_id | uuid | FK to base_attrs | +| prefix | inet | IP prefix | +| prefix_len | smallint | Prefix length | +| origin_as | bigint | Origin ASN | +| isipv4 | boolean | IPv4 flag | +| iswithdrawn | boolean | Withdrawn flag | +| labels | varchar(255) | MPLS labels | +| path_id | bigint | Add-Path ID | +| isprepolicy | boolean | Pre-policy flag | +| isadjribin | boolean | Adj-RIB-In flag | +| timestamp | timestamp | Last update | +| first_added_timestamp | timestamp | First seen | + +### ip_rib_log (TimescaleDB) +Historical RIB changes — every advertisement and withdrawal. + +| Column | Type | Description | +|--------|------|-------------| +| id | bigint | Change event ID | +| peer_hash_id | uuid | FK to bgp_peers | +| base_attr_hash_id | uuid | FK to base_attrs | +| prefix | inet | IP prefix | +| prefix_len | smallint | Prefix length | +| origin_as | bigint | Origin ASN | +| iswithdrawn | boolean | Withdrawal flag | +| timestamp | timestamp | Event time (partition key) | + +### global_ip_rib +Aggregated prefix summary across all peers. + +| Column | Type | Description | +|--------|------|-------------| +| prefix | inet | IP prefix (composite PK) | +| prefix_len | smallint | Prefix length | +| recv_origin_as | bigint | Received origin AS | +| rpki_origin_as | bigint | RPKI-validated origin AS | +| irr_origin_as | bigint | IRR-registered origin AS | +| irr_source | varchar(32) | IRR source (RADB, RIPE, etc.) | +| num_peers | int | Total advertising peers | +| iswithdrawn | boolean | Withdrawn flag | + +--- + +## L3VPN Tables + +### l3vpn_rib +L3VPN (RFC 4364) routes with Route Distinguisher. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Route hash | +| peer_hash_id | uuid | FK to bgp_peers | +| base_attr_hash_id | uuid | FK to base_attrs | +| rd | varchar(128) | Route Distinguisher | +| prefix | inet | VPN prefix | +| prefix_len | smallint | Prefix length | +| origin_as | bigint | Origin ASN | +| labels | varchar(255) | MPLS VPN labels | +| ext_community_list | varchar(50)[] | Route Targets | +| path_id | bigint | Add-Path ID | +| iswithdrawn | boolean | Withdrawn flag | + +### l3vpn_rib_log (TimescaleDB) +Historical L3VPN route changes. + +--- + +## Link-State Tables (BGP-LS / RFC 7752) + +### ls_nodes +IS-IS / OSPF node information from BGP-LS. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Node hash | +| peer_hash_id | uuid | FK to bgp_peers (composite PK) | +| base_attr_hash_id | uuid | FK to base_attrs | +| asn | bigint | Node ASN | +| bgp_ls_id | bigint | BGP-LS Identifier | +| igp_router_id | varchar(46) | IGP Router ID | +| router_id | varchar(46) | BGP Router ID | +| protocol | ls_proto | IS-IS_L1, IS-IS_L2, OSPFv2, OSPFv3 | +| isis_area_id | varchar(46) | IS-IS area | +| ospf_area_id | varchar(16) | OSPF area | +| name | varchar(255) | Node hostname | +| flags | varchar(20) | Node flags | +| mt_ids | varchar(128) | Multi-Topology IDs | +| **sr_capabilities** | **varchar(255)** | **SR Global Block (SRGB) ranges** | +| iswithdrawn | boolean | Withdrawn flag | + +### ls_links +IS-IS / OSPF links with full TE and SR attributes. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Link hash | +| peer_hash_id | uuid | FK to bgp_peers (composite PK) | +| local_node_hash_id | uuid | FK to ls_nodes (local end) | +| remote_node_hash_id | uuid | FK to ls_nodes (remote end) | +| local_router_id | varchar(46) | Local BGP Router ID | +| remote_router_id | varchar(46) | Remote BGP Router ID | +| local_igp_router_id | varchar(46) | Local IGP Router ID | +| remote_igp_router_id | varchar(46) | Remote IGP Router ID | +| interface_addr | inet | Local interface IP | +| neighbor_addr | inet | Remote interface IP | +| igp_metric | bigint | IGP metric | +| protocol | ls_proto | IGP protocol | +| mt_id | int | Multi-Topology ID | +| local_link_id | bigint | Local link identifier | +| remote_link_id | bigint | Remote link identifier | +| name | varchar(255) | Link name | +| **admin_group** | **bigint** | **TE admin group / link color bitmap** | +| **max_link_bw** | **bigint** | **Maximum link bandwidth (bytes/sec)** | +| **max_resv_bw** | **bigint** | **Maximum reservable bandwidth** | +| **unreserved_bw** | **varchar(128)** | **Unreserved BW per priority (8 values)** | +| **te_def_metric** | **bigint** | **TE default metric (for CSPF)** | +| **protection_type** | **varchar(60)** | **Link protection (FRR type)** | +| **mpls_proto_mask** | **ls_mpls_proto_mask** | **MPLS protocol support flags** | +| **srlg** | **varchar(128)** | **Shared Risk Link Group** | +| **peer_node_sid** | **varchar(128)** | **SR Peer Node SID (EPE, RFC 9086)** | +| **sr_adjacency_sids** | **varchar(255)** | **SR Adjacency SIDs** | +| iswithdrawn | boolean | Withdrawn flag | + +**Bold** = TE/SR fields available via BGP-LS but not used by default dashboards. + +### ls_prefixes +IS-IS / OSPF prefix information. + +| Column | Type | Description | +|--------|------|-------------| +| hash_id | uuid | Prefix hash | +| peer_hash_id | uuid | FK to bgp_peers (composite PK) | +| local_node_hash_id | uuid | FK to ls_nodes | +| prefix | inet | Advertised prefix | +| prefix_len | smallint | Prefix length | +| protocol | ls_proto | IGP protocol | +| metric | bigint | Prefix metric | +| mt_id | int | Multi-Topology ID | +| ospf_route_type | ospf_route_type | Intra/Inter/Ext-1/Ext-2/NSSA | +| igp_flags | varchar(20) | IGP flags | +| route_tag | bigint | Route tag | +| **sr_prefix_sids** | **varchar(255)** | **SR Prefix SIDs (node SIDs)** | +| iswithdrawn | boolean | Withdrawn flag | + +### ls_nodes_log, ls_links_log, ls_prefixes_log (TimescaleDB) +Historical link-state changes. Same columns as parent tables plus `id` (bigint) and timestamp as partition key. + +--- + +## Statistics Tables (TimescaleDB) + +| Table | Purpose | Key Columns | +|-------|---------|-------------| +| **stat_reports** | BMP stat messages per peer | prefixes_rejected, known_dup_prefixes, num_routes_adj_rib_in, num_routes_local_rib | +| **stats_chg_byprefix** | Per-prefix update/withdrawal counts | interval_time, prefix, updates, withdraws | +| **stats_chg_byasn** | Per-ASN update/withdrawal counts | interval_time, origin_as, updates, withdraws | +| **stats_chg_bypeer** | Per-peer update/withdrawal counts | interval_time, updates, withdraws | +| **stats_peer_rib** | Per-peer RIB size over time | interval_time, v4_prefixes, v6_prefixes | +| **stats_peer_update_counts** | Update rate statistics | interval_time, advertise_avg/min/max, withdraw_avg/min/max | +| **stats_ip_origins** | Per-ASN IP prefix counts | interval_time, asn, v4_prefixes, v6_prefixes, v4_with_rpki, v4_with_irr | +| **stats_l3vpn_chg_byprefix** | L3VPN per-prefix stats | interval_time, rd, prefix, updates, withdraws | +| **stats_l3vpn_chg_bypeer** | L3VPN per-peer stats | interval_time, updates, withdraws | +| **stats_l3vpn_chg_byrd** | L3VPN per-RD stats | interval_time, rd, updates, withdraws | + +--- + +## Reference & Enrichment Tables + +| Table | Purpose | Key Columns | +|-------|---------|-------------| +| **rpki_validator** | RPKI ROAs | prefix, prefix_len, prefix_len_max, origin_as | +| **info_asn** | ASN WHOIS/IRR data | asn, as_name, org_name, country, source | +| **info_route** | Route IRR data | prefix, origin_as, descr, source | +| **geo_ip** | IP geolocation (DB-IP) | ip, country, city, latitude, longitude, isp_name | +| **pdb_exchange_peers** | PeeringDB IXP peering | ix_name, peer_name, peer_asn, speed, peer_ipv4/ipv6 | + +--- + +## Views + +| View | Joins | Purpose | +|------|-------|---------| +| **v_peers** | bgp_peers + routers + info_asn | Complete peer info with router name and ASN details | +| **v_ip_routes** | ip_rib + bgp_peers + base_attrs + routers | Full route detail with path attributes | +| **v_ip_routes_geo** | v_ip_routes + geo_ip | Routes with geolocation | +| **v_ip_routes_history** | ip_rib_log + base_attrs + bgp_peers + routers | Historical route changes with attributes | +| **v_l3vpn_routes** | l3vpn_rib + bgp_peers + base_attrs + routers | L3VPN routes with path attributes | +| **v_l3vpn_routes_history** | l3vpn_rib_log + base_attrs + bgp_peers + routers | Historical L3VPN changes | +| **v_ls_nodes** | ls_nodes + base_attrs + bgp_peers + routers | Link-state nodes with peer/router info | +| **v_ls_links** | ls_links + ls_nodes(x2) + routers | Links with local/remote node names + all TE/SR fields | +| **v_ls_prefixes** | ls_prefixes + ls_nodes + routers | LS prefixes with originating node info | + +--- + +## Custom Enum Types + +| Type | Values | +|------|--------| +| **opstate** | up, down | +| **ls_proto** | IS-IS_L1, IS-IS_L2, OSPFv2, OSPFv3, Direct, Static | +| **ospf_route_type** | Intra, Inter, Ext-1, Ext-2, NSSA-1, NSSA-2 | +| **ls_mpls_proto_mask** | MPLS protocol bitmask | +| **user_role** | admin, oper | + +--- + +## Key Query Patterns + +### Get all active routes with full attributes +```sql +SELECT r.prefix, r.prefix_len, ba.origin_as, ba.as_path, + ba.med, ba.local_pref, ba.community_list, ba.next_hop +FROM ip_rib r +JOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id +WHERE r.iswithdrawn = false AND r.isipv4 = true +``` + +### Get link-state topology with TE attributes +```sql +SELECT local_router_name, remote_router_name, + igp_metric, te_def_metric, max_link_bw, admin_group, srlg, + sr_adjacency_sids +FROM v_ls_links +WHERE peer_hash_id = '' AND iswithdrawn = false +``` + +### Time-series RIB changes +```sql +SELECT date_trunc('minute', timestamp) as time, + SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as ads, + SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as withdrawals +FROM ip_rib_log +WHERE timestamp > NOW() - INTERVAL '24 hours' +GROUP BY 1 ORDER BY 1 +``` + +### RPKI validation status +```sql +SELECT CASE + WHEN rv.origin_as IS NOT NULL AND rv.origin_as = r.origin_as THEN 'Valid' + WHEN rv.origin_as IS NOT NULL THEN 'Invalid' + ELSE 'NotFound' + END as status, + COUNT(*) +FROM ip_rib r +LEFT JOIN rpki_validator rv ON rv.prefix = r.prefix AND rv.prefix_len = r.prefix_len +WHERE r.iswithdrawn = false +GROUP BY 1 +``` diff --git a/DOCS.md b/DOCS.md index 0a63ec4..7fee8cd 100644 --- a/DOCS.md +++ b/DOCS.md @@ -28,7 +28,7 @@ This is a **BGP Monitoring Platform (BMP) lab stack** deployed via Docker Compos - Receives BMP (BGP Monitoring Protocol, RFC 7854) telemetry from routers on TCP port 5000 - Streams BMP data through Kafka into a TimescaleDB/PostgreSQL database -- Provides **23 Grafana dashboards** (17 operational + 6 learning-focused) for real-time and historical BGP analysis +- Provides **27 Grafana dashboards** (17 operational + 6 learning + 4 advanced analytics) for real-time and historical BGP analysis - Includes an **ExaBGP route injector** that peers with the two CORE routers and injects synthetic BGP routes, enabling testing of BGP policy, route propagation, and Grafana dashboards without needing internet connectivity - Provides a **Vue 3 web UI** at `:5001` for point-and-click scenario management, live route tables, and peer monitoring @@ -312,6 +312,9 @@ python3 inject.py scenarios | `convergence_test` | 10 | Prefixes for timing BGP convergence — announce then check ip_rib_log timestamps | | `route_leak` | 10 | Real prefixes re-announced with short AS paths — simulates a route leak (community 65100:999) | | `hijack_simulation` | 10 | Prefixes claimed directly by AS 65100 — simulates a prefix hijack (community 65100:hijack) | +| `te_community_steering` | 15 | Routes tagged with TE communities for color-based steering (65020:100=red, 65020:200=blue, 65020:300=green) | +| `origin_shift` | 5 | Prefixes with changed origin AS — simulates origin migration for anomaly detection | +| `path_diversity` | 10 | Same prefixes with different AS paths/MEDs — demonstrates best-path selection | ### 7.4 Load a scenario @@ -495,6 +498,23 @@ Six learning-focused dashboards in a separate folder, designed to teach BGP conc > **RPKI note:** The `rpki_validator` table is populated by a cron job in `psql-app` every 2 hours. Dashboard `obmp-learn-04` will show zero counts until the cron runs — check `ENABLE_RPKI=1` in `docker-compose.yml`. +### Advanced Analytics Dashboards (folder: `OBMP-Learning`) + +Four advanced dashboards that go beyond basic BMP monitoring, unlocking TE/SR data and providing heuristic analysis. + +| Dashboard | UID | What it provides | +|-----------|-----|-----------------| +| Database Schema Map | `obmp-learn-07` | Interactive schema reference — live table row counts, entity relationships, column details for all 33 tables and 11 views | +| TE & Segment Routing Analytics | `obmp-learn-08` | Exposes TE/SR fields from BGP-LS: link bandwidth, admin groups, SRLG, SR SIDs, adjacency SIDs, protection types | +| Topology Change & Anomaly Detection | `obmp-learn-09` | Heuristic analysis: link state changes over time, origin AS hijack detection, convergence timeline, route consistency | +| Link Utilization & TE Thought Experiment | `obmp-learn-10` | BGP-LS capacity data (bandwidth, TE metrics) + integration guide for streaming telemetry (gNMI/MDT) | + +> **TE/SR data note:** Some TE fields (admin_group, max_link_bw, srlg, sr_adjacency_sids) may be NULL if routers don't advertise those TLVs. Enable `mpls traffic-eng` under IS-IS and `segment-routing mpls` for full data. + +### Database Schema Reference + +A standalone database schema reference is also available at `DB_SCHEMA.md` in the repo root. It documents all 33 tables, 11 views, TE/SR columns, enum types, and common query patterns. + --- ## 10. Sanity Checks diff --git a/exabgp/scenarios/__init__.py b/exabgp/scenarios/__init__.py index a1c91a1..3a7ed3a 100644 --- a/exabgp/scenarios/__init__.py +++ b/exabgp/scenarios/__init__.py @@ -363,6 +363,80 @@ _HIJACK_ROUTES = [ ] +# --------------------------------------------------------------------------- +# Scenario: te_community_steering +# Routes tagged with TE communities representing different "colors" for +# community-based TE policy steering. Shows how communities drive path +# selection when routers apply route-policy based on community values. +# --------------------------------------------------------------------------- + +_TE_COMMUNITY_ROUTES = [ + # Red paths (community 65020:100) — high-priority, low-latency + _r('10.210.0.0/24', [65100, 65020], communities=['65020:100'], med=10), + _r('10.210.1.0/24', [65100, 65020], communities=['65020:100'], med=10), + _r('10.210.2.0/24', [65100, 65020], communities=['65020:100'], med=10), + _r('10.210.3.0/24', [65100, 65020], communities=['65020:100'], med=10), + _r('10.210.4.0/24', [65100, 65020], communities=['65020:100'], med=10), + # Blue paths (community 65020:200) — bulk transfer, cost-optimized + _r('10.220.0.0/24', [65100, 65020, 3356], communities=['65020:200'], med=100), + _r('10.220.1.0/24', [65100, 65020, 3356], communities=['65020:200'], med=100), + _r('10.220.2.0/24', [65100, 65020, 3356], communities=['65020:200'], med=100), + _r('10.220.3.0/24', [65100, 65020, 3356], communities=['65020:200'], med=100), + _r('10.220.4.0/24', [65100, 65020, 3356], communities=['65020:200'], med=100), + # Green paths (community 65020:300) — backup/diverse paths + _r('10.230.0.0/24', [65100, 65020, 1299, 6762], communities=['65020:300'], med=200), + _r('10.230.1.0/24', [65100, 65020, 1299, 6762], communities=['65020:300'], med=200), + _r('10.230.2.0/24', [65100, 65020, 1299, 6762], communities=['65020:300'], med=200), + _r('10.230.3.0/24', [65100, 65020, 1299, 6762], communities=['65020:300'], med=200), + _r('10.230.4.0/24', [65100, 65020, 1299, 6762], communities=['65020:300'], med=200), +] + + +# --------------------------------------------------------------------------- +# Scenario: origin_shift +# Simulates an origin AS change: prefixes initially associated with +# well-known origin ASNs are re-announced with a different origin. +# Use: load internet_sample first, then load origin_shift to see the +# origin_as column change in ip_rib_log (visible on Anomaly dashboard). +# --------------------------------------------------------------------------- + +_ORIGIN_SHIFT_ROUTES = [ + # These prefixes overlap with internet_sample but have different origin ASNs + _r('8.8.8.0/24', [65100, 64999], communities=['65100:origin-shift']), # was 15169 (Google) + _r('1.1.1.0/24', [65100, 64998], communities=['65100:origin-shift']), # was 13335 (Cloudflare) + _r('9.9.9.0/24', [65100, 64997], communities=['65100:origin-shift']), # was 19281 (Quad9) + _r('208.67.222.0/24', [65100, 64996], communities=['65100:origin-shift']), # was 36692 (OpenDNS) + _r('156.154.70.0/24', [65100, 64995], communities=['65100:origin-shift']), # was 19318 (Neustar) +] + + +# --------------------------------------------------------------------------- +# Scenario: path_diversity +# Multiple announcements of the same prefix with different AS paths, +# MEDs, and communities. Demonstrates best-path selection: +# - Shorter AS path wins (unless local-pref overrides) +# - Lower MED preferred among paths from same neighbor AS +# - Communities tag paths for policy identification +# --------------------------------------------------------------------------- + +_PATH_DIVERSITY_ROUTES = [ + # Prefix 1: 3 paths with varying length and MED + _r('10.250.0.0/24', [65100, 174], communities=['65100:path-a'], med=50), + _r('10.250.0.0/24', [65100, 174, 3356], communities=['65100:path-b'], med=100), + _r('10.250.0.0/24', [65100, 174, 3356, 15169], communities=['65100:path-c'], med=150), + # Prefix 2: paths with same length but different MED + _r('10.250.1.0/24', [65100, 1299, 15169], communities=['65100:low-med'], med=10), + _r('10.250.1.0/24', [65100, 3356, 15169], communities=['65100:high-med'], med=500), + # Prefix 3: local-pref override (higher local-pref wins over shorter path) + _r('10.250.2.0/24', [65100, 2914], communities=['65100:low-lp'], local_pref=50), + _r('10.250.2.0/24', [65100, 2914, 7018], communities=['65100:high-lp'], local_pref=200), + # Prefix 4: transit diversity + _r('10.250.3.0/24', [65100, 174, 32934], communities=['65100:via-cogent']), + _r('10.250.3.0/24', [65100, 3356, 32934], communities=['65100:via-lumen']), + _r('10.250.3.0/24', [65100, 2914, 32934], communities=['65100:via-ntt']), +] + + # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- @@ -404,4 +478,16 @@ SCENARIOS = { 'description': '10 prefixes announced as if directly originated by AS 65100 — simulates a prefix hijack (community 65100:hijack)', 'routes': _HIJACK_ROUTES, }, + 'te_community_steering': { + 'description': 'Routes tagged with TE communities for color-based steering (65020:100=red, 65020:200=blue, 65020:300=green)', + 'routes': _TE_COMMUNITY_ROUTES, + }, + 'origin_shift': { + 'description': '5 prefixes with changed origin AS — simulates origin migration/hijack for anomaly detection', + 'routes': _ORIGIN_SHIFT_ROUTES, + }, + 'path_diversity': { + 'description': 'Same prefixes with different AS paths and MEDs — demonstrates best-path selection and path diversity', + 'routes': _PATH_DIVERSITY_ROUTES, + }, } diff --git a/obmp-grafana/dashboards/Learning/db_schema_map.json b/obmp-grafana/dashboards/Learning/db_schema_map.json new file mode 100644 index 0000000..49a641d --- /dev/null +++ b/obmp-grafana/dashboards/Learning/db_schema_map.json @@ -0,0 +1,131 @@ +{ + "uid": "obmp-learn-07", + "title": "Database Schema Map", + "schemaVersion": 39, + "tags": ["obmp-learning"], + "editable": true, + "time": { + "from": "now-6h", + "to": "now" + }, + "templating": { + "list": [] + }, + "panels": [ + { + "id": 1, + "title": "Table Row Counts", + "type": "table", + "gridPos": { "h": 12, "w": 8, "x": 0, "y": 0 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "refId": "A", + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "rawSql": "SELECT 'routers' as table_name, count(*) as rows FROM routers\nUNION ALL SELECT 'collectors', count(*) FROM collectors\nUNION ALL SELECT 'bgp_peers', count(*) FROM bgp_peers\nUNION ALL SELECT 'peer_event_log', count(*) FROM peer_event_log\nUNION ALL SELECT 'base_attrs', count(*) FROM base_attrs\nUNION ALL SELECT 'ip_rib', count(*) FROM ip_rib\nUNION ALL SELECT 'ip_rib_log', count(*) FROM ip_rib_log\nUNION ALL SELECT 'l3vpn_rib', count(*) FROM l3vpn_rib\nUNION ALL SELECT 'global_ip_rib', count(*) FROM global_ip_rib\nUNION ALL SELECT 'ls_nodes', count(*) FROM ls_nodes\nUNION ALL SELECT 'ls_links', count(*) FROM ls_links\nUNION ALL SELECT 'ls_prefixes', count(*) FROM ls_prefixes\nUNION ALL SELECT 'ls_nodes_log', count(*) FROM ls_nodes_log\nUNION ALL SELECT 'ls_links_log', count(*) FROM ls_links_log\nUNION ALL SELECT 'ls_prefixes_log', count(*) FROM ls_prefixes_log\nUNION ALL SELECT 'rpki_validator', count(*) FROM rpki_validator\nUNION ALL SELECT 'info_asn', count(*) FROM info_asn\nUNION ALL SELECT 'info_route', count(*) FROM info_route\nUNION ALL SELECT 'stat_reports', count(*) FROM stat_reports\nUNION ALL SELECT 'geo_ip', count(*) FROM geo_ip\nORDER BY table_name", + "format": "table" + } + ] + }, + { + "id": 2, + "title": "Table Relationships", + "type": "text", + "gridPos": { "h": 12, "w": 8, "x": 8, "y": 0 }, + "options": { + "mode": "markdown", + "content": "## Entity Relationships\n\n### BMP Core Chain\n```\ncollectors\n └── routers (collector_hash_id)\n └── bgp_peers (router_hash_id)\n ├── ip_rib (peer_hash_id)\n ├── ip_rib_log (peer_hash_id)\n ├── l3vpn_rib (peer_hash_id)\n ├── ls_nodes (peer_hash_id)\n ├── ls_links (peer_hash_id)\n ├── ls_prefixes (peer_hash_id)\n ├── peer_event_log (peer_hash_id)\n └── stat_reports (peer_hash_id)\n```\n\n### Path Attributes\n```\nip_rib ──(base_attr_hash_id)──► base_attrs\n │ ├── as_path (bigint[])\n │ ├── origin_as\n │ ├── next_hop\n │ ├── med / local_pref\n │ ├── community_list[]\n │ ├── ext_community_list[]\n │ └── large_community_list[]\n │\n └──(prefix)──► global_ip_rib\n ├── rpki_origin_as\n ├── irr_origin_as\n └── num_peers\n```\n\n### Link-State Topology\n```\nls_nodes ◄── ls_links (local_node_hash_id, remote_node_hash_id)\nls_nodes ◄── ls_prefixes (local_node_hash_id)\n```\n\n### Reference Data\n```\nrpki_validator ──(prefix, origin_as)──► validates ip_rib\ninfo_asn ──(asn)──► enriches base_attrs.origin_as\ninfo_route ──(prefix)──► enriches ip_rib.prefix\ngeo_ip ──(ip)──► geolocates routers, peers\n```" + } + }, + { + "id": 3, + "title": "BMP Core Tables", + "type": "text", + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 0 }, + "options": { + "mode": "markdown", + "content": "## BMP Core Tables\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| **routers** | BMP-monitored routers | hash_id, name, ip_address, router_as, state, bgp_id |\n| **collectors** | BMP collector instances | hash_id, admin_id, name, ip_address, router_count |\n| **bgp_peers** | BGP sessions per router | hash_id, router_hash_id, peer_addr, peer_as, state, isl3vpnpeer |\n| **peer_event_log** | Session state history (TimescaleDB) | peer_hash_id, state, timestamp, bmp_reason, bgp_err_code |\n| **stat_reports** | BMP statistics messages | peer_hash_id, prefixes_rejected, num_routes_adj_rib_in, num_routes_local_rib |\n| **users** | Access control | username, password, type (admin/oper) |" + } + }, + { + "id": 4, + "title": "RIB & Path Attribute Tables", + "type": "text", + "gridPos": { "h": 8, "w": 8, "x": 16, "y": 8 }, + "options": { + "mode": "markdown", + "content": "## RIB & Path Attribute Tables\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| **base_attrs** | BGP path attributes | hash_id, as_path[], as_path_count, origin_as, next_hop, med, local_pref, community_list[], ext_community_list[], large_community_list[], cluster_list, originator_id |\n| **ip_rib** | IPv4/IPv6 unicast RIB | hash_id, peer_hash_id, prefix, prefix_len, origin_as, iswithdrawn, labels, path_id |\n| **ip_rib_log** | RIB change history (TimescaleDB) | peer_hash_id, prefix, prefix_len, origin_as, iswithdrawn, timestamp |\n| **l3vpn_rib** | L3VPN/MPLS VPN routes | hash_id, peer_hash_id, rd, prefix, labels, ext_community_list[] |\n| **l3vpn_rib_log** | L3VPN change history (TimescaleDB) | peer_hash_id, rd, prefix, iswithdrawn, timestamp |\n| **global_ip_rib** | Aggregated prefix summary | prefix, recv_origin_as, rpki_origin_as, irr_origin_as, num_peers |" + } + }, + { + "id": 5, + "title": "Link-State Tables", + "type": "text", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "options": { + "mode": "markdown", + "content": "## Link-State Tables (BGP-LS / RFC 7752)\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| **ls_nodes** | IS-IS/OSPF nodes | hash_id, peer_hash_id, igp_router_id, name, protocol, asn, sr_capabilities, isis_area_id |\n| **ls_links** | IS-IS/OSPF links + TE/SR | hash_id, local/remote_node_hash_id, interface_addr, neighbor_addr, igp_metric, **te_def_metric**, **max_link_bw**, **max_resv_bw**, **unreserved_bw**, **admin_group**, **srlg**, **sr_adjacency_sids**, **peer_node_sid**, **protection_type**, **mpls_proto_mask** |\n| **ls_prefixes** | IS-IS/OSPF prefixes | hash_id, local_node_hash_id, prefix, metric, sr_prefix_sids, igp_flags |\n| **ls_nodes_log** | Node change history (TimescaleDB) | Same as ls_nodes + timestamp |\n| **ls_links_log** | Link change history (TimescaleDB) | Same as ls_links + timestamp |\n| **ls_prefixes_log** | Prefix change history (TimescaleDB) | Same as ls_prefixes + timestamp |\n\n**Bold columns** = TE/SR fields not used by any existing dashboard" + } + }, + { + "id": 6, + "title": "Statistics Tables", + "type": "text", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "options": { + "mode": "markdown", + "content": "## Statistics Tables (TimescaleDB Hypertables)\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| **stat_reports** | BMP stat messages | peer_hash_id, prefixes_rejected, known_dup_prefixes, num_routes_adj_rib_in |\n| **stats_chg_byprefix** | Per-prefix churn stats | interval_time, peer_hash_id, prefix, updates, withdraws |\n| **stats_chg_byasn** | Per-ASN churn stats | interval_time, peer_hash_id, origin_as, updates, withdraws |\n| **stats_chg_bypeer** | Per-peer churn stats | interval_time, peer_hash_id, updates, withdraws |\n| **stats_peer_rib** | Per-peer RIB size | interval_time, peer_hash_id, v4_prefixes, v6_prefixes |\n| **stats_peer_update_counts** | Update rate statistics | interval_time, peer_hash_id, advertise_avg/min/max, withdraw_avg/min/max |\n| **stats_ip_origins** | Per-ASN prefix counts | interval_time, asn, v4_prefixes, v6_prefixes, v4_with_rpki, v4_with_irr |" + } + }, + { + "id": 7, + "title": "Reference & Enrichment Tables", + "type": "text", + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 20 }, + "options": { + "mode": "markdown", + "content": "## Reference & Enrichment Tables\n\n| Table | Purpose | Key Columns |\n|-------|---------|-------------|\n| **rpki_validator** | RPKI ROAs | prefix, prefix_len, prefix_len_max, origin_as |\n| **info_asn** | ASN WHOIS/IRR data | asn, as_name, org_name, country, source |\n| **info_route** | Route IRR data | prefix, prefix_len, origin_as, descr, source |\n| **geo_ip** | IP geolocation (DB-IP) | ip, country, city, latitude, longitude, isp_name |\n| **pdb_exchange_peers** | PeeringDB IXP data | ix_name, peer_name, peer_asn, speed, peer_ipv4/ipv6 |" + } + }, + { + "id": 8, + "title": "Views Quick Reference", + "type": "text", + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 20 }, + "options": { + "mode": "markdown", + "content": "## Database Views\n\n| View | Joins | Purpose |\n|------|-------|---------|\n| **v_peers** | bgp_peers + routers + info_asn | Complete peer info with router name and ASN details |\n| **v_ip_routes** | ip_rib + bgp_peers + base_attrs + routers | Full route detail with path attributes |\n| **v_ip_routes_geo** | v_ip_routes + geo_ip | Routes with geolocation |\n| **v_ip_routes_history** | ip_rib_log + base_attrs + bgp_peers + routers | Historical route changes with attributes |\n| **v_l3vpn_routes** | l3vpn_rib + bgp_peers + base_attrs + routers | L3VPN routes with path attributes |\n| **v_l3vpn_routes_history** | l3vpn_rib_log + base_attrs + bgp_peers + routers | Historical L3VPN changes |\n| **v_ls_nodes** | ls_nodes + base_attrs + bgp_peers + routers | Link-state nodes with peer/router info |\n| **v_ls_links** | ls_links + ls_nodes(x2) + routers | Links with local/remote node names + TE fields |\n| **v_ls_prefixes** | ls_prefixes + ls_nodes + routers | LS prefixes with originating node info |\n\n### Enum Types\n- **opstate**: up, down\n- **ls_proto**: IS-IS_L1, IS-IS_L2, OSPFv2, OSPFv3, Direct, Static\n- **ospf_route_type**: Intra, Inter, Ext-1, Ext-2, NSSA-1, NSSA-2\n- **ls_mpls_proto_mask**: MPLS protocol bitmask" + } + }, + { + "id": 9, + "title": "LinkState Column Details", + "type": "table", + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 26 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "refId": "A", + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "rawSql": "SELECT column_name, data_type, \n CASE \n WHEN column_name IN ('admin_group','max_link_bw','max_resv_bw','unreserved_bw','te_def_metric','protection_type','srlg','sr_adjacency_sids','peer_node_sid','mpls_proto_mask') THEN 'TE/SR'\n WHEN column_name IN ('hash_id','peer_hash_id','base_attr_hash_id','local_node_hash_id','remote_node_hash_id') THEN 'FK/Key'\n ELSE 'Core'\n END as category\nFROM information_schema.columns \nWHERE table_name = 'ls_links' AND table_schema = 'public'\nORDER BY ordinal_position", + "format": "table" + } + ] + }, + { + "id": 10, + "title": "ip_rib Column Details", + "type": "table", + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 26 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "refId": "A", + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "rawSql": "SELECT column_name, data_type,\n CASE \n WHEN column_name IN ('hash_id','peer_hash_id','base_attr_hash_id') THEN 'FK/Key'\n ELSE 'Core'\n END as category\nFROM information_schema.columns \nWHERE table_name = 'ip_rib' AND table_schema = 'public'\nORDER BY ordinal_position", + "format": "table" + } + ] + } + ] +} diff --git a/obmp-grafana/dashboards/Learning/link_utilization_te.json b/obmp-grafana/dashboards/Learning/link_utilization_te.json new file mode 100644 index 0000000..9eceab8 --- /dev/null +++ b/obmp-grafana/dashboards/Learning/link_utilization_te.json @@ -0,0 +1,238 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { "type": "datasource", "uid": "grafana" }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + } + }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Max BW (B/s)" }, + "properties": [{ "id": "unit", "value": "Bps" }] + }, + { + "matcher": { "id": "byName", "options": "Max Reservable BW" }, + "properties": [{ "id": "unit", "value": "Bps" }] + } + ] + }, + "gridPos": { "h": 10, "w": 24, "x": 0, "y": 0 }, + "id": 1, + "options": { + "showHeader": true, + "sortBy": [{ "desc": false, "displayName": "Local Router" }] + }, + "targets": [ + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT local_router_name as \"Local Router\",\n remote_router_name as \"Remote Router\",\n interface_addr::text as \"Interface IP\",\n neighbor_addr::text as \"Neighbor IP\",\n max_link_bw as \"Max BW (B/s)\",\n max_resv_bw as \"Max Reservable BW\",\n unreserved_bw as \"Unreserved BW\",\n igp_metric as \"IGP Metric\",\n te_def_metric as \"TE Metric\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY local_router_name, remote_router_name", + "refId": "A" + } + ], + "title": "Link Capacity Inventory (from BGP-LS)", + "type": "table" + }, + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisLabel": "Bandwidth (B/s)", + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1, + "scaleDistribution": { "type": "linear" }, + "showValue": "auto", + "stacking": { "group": "A", "mode": "none" } + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 }, + "id": 2, + "options": { + "barRadius": 0.1, + "barWidth": 0.8, + "groupWidth": 0.7, + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom" }, + "orientation": "horizontal", + "tooltip": { "mode": "single", "sort": "none" }, + "xTickLabelRotation": 0 + }, + "targets": [ + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n COALESCE(max_link_bw, 0) as \"Max Bandwidth\",\n COALESCE(max_resv_bw, 0) as \"Max Reservable\",\n COALESCE(max_link_bw, 0) - COALESCE(max_resv_bw, 0) as \"Unreserved Gap\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\n AND max_link_bw IS NOT NULL AND max_link_bw > 0\nORDER BY max_link_bw DESC", + "refId": "A" + } + ], + "title": "Capacity vs Reservable Bandwidth", + "type": "barchart" + }, + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "thresholds" }, + "thresholds": { + "mode": "percentage", + "steps": [ + { "color": "green", "value": null }, + { "color": "yellow", "value": 50 }, + { "color": "orange", "value": 75 }, + { "color": "red", "value": 90 } + ] + }, + "unit": "percentunit", + "max": 1, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 }, + "id": 3, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": true + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "sizing": "auto" + }, + "targets": [ + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n CASE WHEN max_link_bw > 0 \n THEN 1.0 - (COALESCE(max_resv_bw, 0)::float / max_link_bw::float)\n ELSE 0 END as \"Reservation Ratio\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\n AND max_link_bw IS NOT NULL AND max_link_bw > 0\nORDER BY \"Reservation Ratio\" DESC\nLIMIT 10", + "refId": "A" + } + ], + "title": "Bandwidth Reservation Ratio (Higher = More Reserved)", + "type": "gauge" + }, + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisBorderShow": false, + "fillOpacity": 80, + "gradientMode": "none", + "lineWidth": 1, + "showValue": "auto", + "stacking": { "group": "A", "mode": "none" } + } + }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 }, + "id": 4, + "options": { + "barRadius": 0.1, + "barWidth": 0.8, + "groupWidth": 0.7, + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom" }, + "orientation": "horizontal", + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "format": "table", + "rawQuery": true, + "rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n igp_metric as \"IGP Metric\",\n COALESCE(te_def_metric, 0) as \"TE Default Metric\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY igp_metric DESC\nLIMIT 20", + "refId": "A" + } + ], + "title": "IGP Metric vs TE Default Metric", + "type": "barchart" + }, + { + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 }, + "id": 5, + "options": { + "mode": "markdown", + "content": "## What-If: CSPF Path Computation\n\nIn a real MPLS-TE or SR-TE deployment, the headend router runs **Constrained Shortest Path First (CSPF)** to find paths that satisfy:\n\n1. **Bandwidth constraint** - Enough unreserved BW at the required priority\n2. **Admin group (affinity)** - Link colors must match include/exclude masks\n3. **SRLG diversity** - Backup path avoids shared risk with primary\n4. **TE metric optimization** - Minimize TE metric (not IGP metric)\n\n### How BGP-LS Enables This\n\nBGP-LS distributes the complete IGP topology **with TE attributes** to an external controller (PCE, SDN controller). The controller can:\n\n- Build a Traffic Engineering Database (TED)\n- Run CSPF with arbitrary constraints\n- Program SR-TE policies via PCEP or gRPC\n\n### Data Available in This Lab\n\n| Attribute | Source | Available? |\n|-----------|--------|------------|\n| Topology (nodes/links) | BGP-LS | Yes |\n| IGP Metric | BGP-LS | Yes |\n| TE Default Metric | BGP-LS TLV 1092 | Check TE table |\n| Max Link BW | BGP-LS TLV 1089 | Check TE table |\n| Max Reservable BW | BGP-LS TLV 1090 | Check TE table |\n| Unreserved BW | BGP-LS TLV 1091 | Check TE table |\n| Admin Group | BGP-LS TLV 1088 | Check TE table |\n| SRLG | BGP-LS TLV 1096 | Check TE table |\n| SR Node SID | BGP-LS TLV 1034 | Check SR table |\n| SR Adj SID | BGP-LS TLV 1099 | Check SR table |" + }, + "title": "CSPF & Traffic Engineering Concepts", + "type": "text" + }, + { + "gridPos": { "h": 12, "w": 24, "x": 0, "y": 28 }, + "id": 6, + "options": { + "mode": "markdown", + "content": "## Integration Guide: Adding Real-Time Link Utilization\n\nBMP/BGP-LS provides **capacity** data (max bandwidth, reservable bandwidth) but NOT real-time **utilization**. To complete the traffic engineering picture, you need streaming telemetry.\n\n### Architecture\n\n```\n +------------------+\n IOS-XR Routers ---->| OpenBMP Collector|----> PostgreSQL (topology + capacity)\n | +------------------+\n | \n +-- gNMI ----->| Telegraf |----> InfluxDB/Prometheus (utilization)\n +------------------+\n |\n +------------------+\n | Grafana | <-- Mixed datasource queries\n +------------------+\n```\n\n### Step 1: Enable Model-Driven Telemetry on IOS-XR\n\n```\ntelemetry model-driven\n sensor-group INTF-COUNTERS\n sensor-path Cisco-IOS-XR-infra-statsd-oper:infra-statistics/interfaces/interface/latest/generic-counters\n !\n subscription INTF-SUB\n sensor-group-id INTF-COUNTERS sample-interval 30000\n destination-id TELEGRAF\n !\n destination-group TELEGRAF\n address-family ipv4 10.40.40.202 port 57000\n encoding self-describing-gpb\n protocol grpc no-tls\n !\n !\n```\n\n### Step 2: Telegraf Configuration\n\n```toml\n[[inputs.cisco_telemetry_mdt]]\n transport = \"grpc\"\n service_address = \":57000\"\n\n[[outputs.influxdb_v2]]\n urls = [\"http://localhost:8086\"]\n token = \"your-token\"\n organization = \"openbmp\"\n bucket = \"telemetry\"\n```\n\n### Step 3: Grafana Mixed Datasource Query\n\nCombine BGP-LS capacity from PostgreSQL with utilization from InfluxDB:\n\n```\n-- PostgreSQL: Get link capacity\nSELECT interface_addr::text as interface, max_link_bw\nFROM v_ls_links WHERE peer_hash_id = '$peer_hash'\n\n-- InfluxDB: Get interface utilization\nfrom(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"Cisco-IOS-XR-infra-statsd-oper\")\n |> filter(fn: (r) => r._field == \"bytes-received\" or r._field == \"bytes-sent\")\n |> derivative(unit: 1s, nonNegative: true)\n```\n\n### Step 4: Calculate Utilization %\n\nIn Grafana, use **Transformations** to:\n1. Join PostgreSQL capacity with InfluxDB utilization by interface IP\n2. Add calculated field: `utilization_pct = bytes_per_sec / max_link_bw * 100`\n3. Set threshold alerts: >80% = warning, >95% = critical\n\n### Key gNMI Sensor Paths for IOS-XR\n\n| Sensor Path | Data |\n|-------------|------|\n| `Cisco-IOS-XR-infra-statsd-oper:infra-statistics/interfaces/interface/latest/generic-counters` | Interface byte/packet counters |\n| `Cisco-IOS-XR-infra-statsd-oper:infra-statistics/interfaces/interface/latest/data-rate` | Current data rate (bits/sec) |\n| `Cisco-IOS-XR-mpls-te-oper:mpls-te/tunnels/summary` | MPLS-TE tunnel summary |\n| `Cisco-IOS-XR-ip-rsvp-oper:rsvp/interface-briefs` | RSVP interface reservations |\n| `Cisco-IOS-XR-segment-routing-ms-oper:srms/policy` | SR-MPLS policy state |\n\n### RFC 8571: Performance Metrics via BGP-LS\n\nIf routers support RFC 8571, these metrics flow through BGP-LS automatically:\n- **Unidirectional Link Delay** (TLV 1114) - microseconds\n- **Min/Max Link Delay** (TLV 1115)\n- **Delay Variation (jitter)** (TLV 1116)\n- **Link Loss** (TLV 1117) - percentage\n- **Residual Bandwidth** (TLV 1118)\n- **Available Bandwidth** (TLV 1119)\n- **Utilized Bandwidth** (TLV 1120)\n\nThese would appear in the `ls_links` table if the OpenBMP parser supports them." + }, + "title": "Integration Guide: Streaming Telemetry for Link Utilization", + "type": "text" + } + ], + "schemaVersion": 39, + "tags": ["obmp-learning"], + "templating": { + "list": [ + { + "current": {}, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "definition": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0", + "hide": 0, + "includeAll": false, + "label": "BGP Peer", + "multi": false, + "name": "peer_hash", + "options": [], + "query": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "Link Utilization & TE Thought Experiment", + "uid": "obmp-learn-10", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/te_sr_analytics.json b/obmp-grafana/dashboards/Learning/te_sr_analytics.json new file mode 100644 index 0000000..4289d27 --- /dev/null +++ b/obmp-grafana/dashboards/Learning/te_sr_analytics.json @@ -0,0 +1,345 @@ +{ + "annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]}, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [] + }, + "gridPos": {"h": 10,"w": 24,"x": 0,"y": 0}, + "id": 1, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT local_router_name as \"Local Router\", \n remote_router_name as \"Remote Router\",\n igp_metric as \"IGP Metric\",\n te_def_metric as \"TE Metric\",\n max_link_bw as \"Max BW (B/s)\",\n max_resv_bw as \"Max Reservable BW\",\n unreserved_bw as \"Unreserved BW\",\n admin_group as \"Admin Group\",\n protection_type as \"Protection\",\n srlg as \"SRLG\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY local_router_name, remote_router_name", + "refId": "A" + } + ], + "title": "TE Link Capacity Map", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"color": {"mode": "palette-classic"}}, + "overrides": [] + }, + "gridPos": {"h": 10,"w": 12,"x": 0,"y": 10}, + "id": 2, + "options": { + "barRadius": 0, + "barWidth": 0.97, + "groupWidth": 0.7, + "legend": {"displayMode": "list","placement": "bottom"}, + "orientation": "auto", + "showValue": "auto", + "stacking": "none", + "tooltip": {"mode": "single","sort": "none"}, + "xTickLabelRotation": -45 + }, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n igp_metric as \"IGP Metric\",\n COALESCE(te_def_metric, igp_metric) as \"TE Metric\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY igp_metric DESC", + "refId": "A" + } + ], + "title": "IGP Metric vs TE Metric Comparison", + "type": "barchart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"color": {"mode": "palette-classic"}}, + "overrides": [] + }, + "gridPos": {"h": 10,"w": 6,"x": 12,"y": 10}, + "id": 3, + "options": { + "legend": {"displayMode": "list","placement": "bottom"}, + "pieType": "pie", + "reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": true}, + "tooltip": {"mode": "single","sort": "none"} + }, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COALESCE(admin_group::text, 'None') as \"Admin Group\",\n COUNT(*) as \"Link Count\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nGROUP BY admin_group\nORDER BY \"Link Count\" DESC", + "refId": "A" + } + ], + "title": "Admin Group Distribution", + "type": "piechart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"color": {"mode": "palette-classic"}}, + "overrides": [] + }, + "gridPos": {"h": 10,"w": 6,"x": 18,"y": 10}, + "id": 4, + "options": { + "legend": {"displayMode": "list","placement": "bottom"}, + "pieType": "pie", + "reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": true}, + "tooltip": {"mode": "single","sort": "none"} + }, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COALESCE(protection_type, 'None') as \"Protection Type\",\n COUNT(*) as \"Link Count\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nGROUP BY protection_type\nORDER BY \"Link Count\" DESC", + "refId": "A" + } + ], + "title": "Link Protection Types", + "type": "piechart" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [] + }, + "gridPos": {"h": 8,"w": 12,"x": 0,"y": 20}, + "id": 5, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT nodename as \"Node\",\n routerid as \"Router ID\",\n protocol as \"Protocol\",\n sr_capabilities as \"SR Capabilities (SRGB)\"\nFROM v_ls_nodes\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY nodename", + "refId": "A" + } + ], + "title": "SR Node Capabilities", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [] + }, + "gridPos": {"h": 8,"w": 12,"x": 12,"y": 20}, + "id": 6, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT n.nodename as \"Node\",\n p.prefix::text as \"Prefix\",\n p.prefix_len as \"Len\",\n p.metric as \"Metric\",\n p.sr_prefix_sids as \"Prefix SID\",\n p.protocol::text as \"Protocol\"\nFROM ls_prefixes p\nJOIN ls_nodes n ON n.hash_id = p.local_node_hash_id \n AND n.peer_hash_id = p.peer_hash_id\nWHERE p.peer_hash_id = '$peer_hash' AND p.iswithdrawn = false\nORDER BY n.nodename, p.prefix", + "refId": "A" + } + ], + "title": "SR Prefix SIDs", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [] + }, + "gridPos": {"h": 8,"w": 12,"x": 0,"y": 28}, + "id": 7, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT local_router_name as \"Local\",\n remote_router_name as \"Remote\",\n sr_adjacency_sids as \"Adjacency SIDs\",\n peer_node_sid as \"Peer Node SID\",\n mpls_proto_mask::text as \"MPLS Proto\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY local_router_name, remote_router_name", + "refId": "A" + } + ], + "title": "SR Adjacency SIDs", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": {"custom": {"align": "auto","displayMode": "auto"}}, + "overrides": [] + }, + "gridPos": {"h": 8,"w": 12,"x": 12,"y": 28}, + "id": 8, + "options": {"footer": {"fields": "","reducer": ["sum"],"show": false},"showHeader": true}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT srlg as \"SRLG Value\",\n COUNT(*) as \"Link Count\",\n string_agg(DISTINCT local_router_name || ' -> ' || remote_router_name, ', ') as \"Links\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false \n AND srlg IS NOT NULL AND srlg != ''\nGROUP BY srlg\nORDER BY COUNT(*) DESC", + "refId": "A" + } + ], + "title": "SRLG Groups", + "type": "table" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]} + }, + "overrides": [] + }, + "gridPos": {"h": 4,"w": 5,"x": 0,"y": 36}, + "id": 9, + "options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COUNT(*) FROM v_ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false AND te_def_metric IS NOT NULL", + "refId": "A" + } + ], + "title": "Links with TE Metric", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]} + }, + "overrides": [] + }, + "gridPos": {"h": 4,"w": 5,"x": 5,"y": 36}, + "id": 10, + "options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COUNT(*) FROM v_ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false AND max_link_bw IS NOT NULL AND max_link_bw > 0", + "refId": "A" + } + ], + "title": "Links with Bandwidth", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]} + }, + "overrides": [] + }, + "gridPos": {"h": 4,"w": 5,"x": 10,"y": 36}, + "id": 11, + "options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COUNT(*) FROM v_ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false AND srlg IS NOT NULL AND srlg != ''", + "refId": "A" + } + ], + "title": "Links with SRLG", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]} + }, + "overrides": [] + }, + "gridPos": {"h": 4,"w": 5,"x": 15,"y": 36}, + "id": 12, + "options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COUNT(*) FROM v_ls_nodes WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false AND sr_capabilities IS NOT NULL AND sr_capabilities != ''", + "refId": "A" + } + ], + "title": "Nodes with SR", + "type": "stat" + }, + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "fieldConfig": { + "defaults": { + "color": {"mode": "thresholds"}, + "thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]} + }, + "overrides": [] + }, + "gridPos": {"h": 4,"w": 4,"x": 20,"y": 36}, + "id": 13, + "options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"}, + "targets": [ + { + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "format": "table", + "rawSql": "SELECT COUNT(*) FROM v_ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false AND sr_adjacency_sids IS NOT NULL AND sr_adjacency_sids != ''", + "refId": "A" + } + ], + "title": "Links with Adj SID", + "type": "stat" + }, + { + "gridPos": {"h": 10,"w": 24,"x": 0,"y": 40}, + "id": 14, + "options": { + "code": {"language": "plaintext","showLineNumbers": false,"showMiniMap": false}, + "content": "## Traffic Engineering & Segment Routing Analytics\n\nThis dashboard exposes TE and SR attributes from BGP-LS (RFC 7752) that OpenBMP collects but existing dashboards don't display.\n\n### TE Fields (from ls_links)\n- **admin_group**: Link color/affinity bitmap for RSVP-TE constraints\n- **max_link_bw / max_resv_bw**: Link capacity in bytes/sec\n- **unreserved_bw**: Available bandwidth per priority level\n- **te_def_metric**: TE metric (may differ from IGP metric)\n- **protection_type**: FRR protection (unprotected, shared, dedicated, etc.)\n- **srlg**: Shared Risk Link Group for diverse path computation\n\n### SR Fields\n- **sr_capabilities**: Node SRGB (Segment Routing Global Block) range\n- **sr_prefix_sids**: Prefix SID for SR-MPLS forwarding\n- **sr_adjacency_sids**: Adjacency SIDs for SR-TE path steering\n- **peer_node_sid**: BGP EPE SID (RFC 9086)\n\n### Notes\n- NULL values indicate the router is not advertising that TLV\n- To enable TE metrics on IOS-XR: `mpls traffic-eng` under IS-IS\n- To enable SR: `segment-routing mpls` under IS-IS with prefix-sid-map", + "mode": "markdown" + }, + "title": "About This Dashboard", + "type": "text" + } + ], + "schemaVersion": 39, + "tags": ["obmp-learning"], + "templating": { + "list": [ + { + "current": {}, + "datasource": {"type": "postgres","uid": "obmp_postgres"}, + "definition": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0", + "hide": 0, + "includeAll": false, + "label": "BGP Peer", + "multi": false, + "name": "peer_hash", + "options": [], + "query": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": {"from": "now-6h","to": "now"}, + "timepicker": {}, + "timezone": "", + "title": "TE & Segment Routing Analytics", + "uid": "obmp-learn-08", + "version": 1 +} diff --git a/obmp-grafana/dashboards/Learning/topology_anomaly.json b/obmp-grafana/dashboards/Learning/topology_anomaly.json new file mode 100644 index 0000000..e27a47d --- /dev/null +++ b/obmp-grafana/dashboards/Learning/topology_anomaly.json @@ -0,0 +1,235 @@ +{ + "uid": "obmp-learn-09", + "title": "Topology Change & Anomaly Detection", + "tags": ["obmp-learning"], + "editable": true, + "schemaVersion": 39, + "time": { + "from": "now-6h", + "to": "now" + }, + "templating": { + "list": [ + { + "name": "peer_hash", + "label": "BGP Peer", + "type": "query", + "datasource": { + "type": "postgres", + "uid": "obmp_postgres" + }, + "query": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0", + "refresh": 1, + "multi": false + } + ] + }, + "panels": [ + { + "id": 1, + "title": "Link State Changes Over Time", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Links Up\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Links Down\"\nFROM ls_links_log\nWHERE $__timeFilter(timestamp) AND peer_hash_id = '$peer_hash'\nGROUP BY 1 ORDER BY 1", + "format": "time_series", + "refId": "A" + } + ] + }, + { + "id": 2, + "title": "Node Changes Over Time", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Nodes Appeared\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Nodes Withdrawn\"\nFROM ls_nodes_log\nWHERE $__timeFilter(timestamp) AND peer_hash_id = '$peer_hash'\nGROUP BY 1 ORDER BY 1", + "format": "time_series", + "refId": "A" + } + ] + }, + { + "id": 3, + "title": "BGP Peer Session Events", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT $__timeGroupAlias(pel.timestamp, '5m') as time,\n SUM(CASE WHEN pel.state = 'up' THEN 1 ELSE 0 END) as \"Sessions Up\",\n SUM(CASE WHEN pel.state = 'down' THEN 1 ELSE 0 END) as \"Sessions Down\"\nFROM peer_event_log pel\nWHERE $__timeFilter(pel.timestamp)\nGROUP BY 1 ORDER BY 1", + "format": "time_series", + "refId": "A" + } + ] + }, + { + "id": 4, + "title": "RIB Update Rate", + "type": "timeseries", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT $__timeGroupAlias(timestamp, '5m') as time,\n SUM(CASE WHEN iswithdrawn = false THEN 1 ELSE 0 END) as \"Advertisements\",\n SUM(CASE WHEN iswithdrawn = true THEN 1 ELSE 0 END) as \"Withdrawals\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY 1 ORDER BY 1", + "format": "time_series", + "refId": "A" + } + ] + }, + { + "id": 5, + "title": "Origin AS Changes (Potential Hijacks)", + "type": "table", + "gridPos": { "h": 10, "w": 12, "x": 0, "y": 16 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT DISTINCT ON (r1.prefix, r1.prefix_len)\n r1.prefix::text as \"Prefix\",\n r1.prefix_len as \"Len\",\n r1.origin_as as \"Current Origin AS\",\n r2.origin_as as \"Previous Origin AS\",\n r1.timestamp as \"Changed At\"\nFROM ip_rib_log r1\nJOIN ip_rib_log r2 ON r1.prefix = r2.prefix \n AND r1.prefix_len = r2.prefix_len\n AND r1.timestamp > r2.timestamp\nWHERE r1.origin_as != r2.origin_as\n AND $__timeFilter(r1.timestamp)\nORDER BY r1.prefix, r1.prefix_len, r1.timestamp DESC\nLIMIT 50", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 6, + "title": "Most Churned Prefixes", + "type": "table", + "gridPos": { "h": 10, "w": 12, "x": 12, "y": 16 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT prefix::text as \"Prefix\",\n prefix_len as \"Len\",\n COUNT(*) as \"Total Updates\",\n SUM(CASE WHEN iswithdrawn THEN 1 ELSE 0 END) as \"Withdrawals\",\n MIN(timestamp) as \"First Seen\",\n MAX(timestamp) as \"Last Change\",\n CASE \n WHEN COUNT(*) <= 2 THEN 'Stable'\n WHEN COUNT(*) <= 10 THEN 'Moderate'\n ELSE 'Unstable'\n END as \"Stability\"\nFROM ip_rib_log\nWHERE $__timeFilter(timestamp)\nGROUP BY prefix, prefix_len\nHAVING COUNT(*) > 1\nORDER BY COUNT(*) DESC\nLIMIT 30", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 7, + "title": "Recent Link State Changes", + "type": "table", + "gridPos": { "h": 10, "w": 24, "x": 0, "y": 26 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT l.timestamp as \"Time\",\n CASE WHEN l.iswithdrawn THEN 'DOWN' ELSE 'UP' END as \"State\",\n ln.name as \"Local Node\",\n l.local_igp_router_id as \"Local IGP ID\",\n rn.name as \"Remote Node\",\n l.remote_igp_router_id as \"Remote IGP ID\",\n l.igp_metric as \"IGP Metric\",\n l.protocol::text as \"Protocol\"\nFROM ls_links_log l\nLEFT JOIN ls_nodes ln ON ln.hash_id = l.local_node_hash_id AND ln.peer_hash_id = l.peer_hash_id\nLEFT JOIN ls_nodes rn ON rn.hash_id = l.remote_node_hash_id AND rn.peer_hash_id = l.peer_hash_id\nWHERE $__timeFilter(l.timestamp) AND l.peer_hash_id = '$peer_hash'\nORDER BY l.timestamp DESC\nLIMIT 50", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 8, + "title": "Multi-Peer Route Consistency", + "type": "table", + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 36 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT r.prefix::text as \"Prefix\",\n r.prefix_len as \"Len\",\n COUNT(DISTINCT r.peer_hash_id) as \"Peer Count\",\n COUNT(DISTINCT ba.origin_as) as \"Distinct Origins\",\n COUNT(DISTINCT ba.as_path_count) as \"Distinct Path Lengths\",\n string_agg(DISTINCT ba.origin_as::text, ', ') as \"Origin ASNs\"\nFROM ip_rib r\nJOIN base_attrs ba ON ba.hash_id = r.base_attr_hash_id\nWHERE r.iswithdrawn = false AND r.isipv4 = true\nGROUP BY r.prefix, r.prefix_len\nHAVING COUNT(DISTINCT ba.origin_as) > 1\nORDER BY COUNT(DISTINCT ba.origin_as) DESC\nLIMIT 30", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 9, + "title": "Active Peers", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 0, "y": 44 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) FROM bgp_peers WHERE state = 'up'", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 10, + "title": "Total LS Links", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 4, "y": 44 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) FROM ls_links WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 11, + "title": "Total LS Nodes", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 8, "y": 44 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) FROM ls_nodes WHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 12, + "title": "RIB Updates (24h)", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 12, "y": 44 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) FROM ip_rib_log WHERE timestamp > NOW() - INTERVAL '24 hours'", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 13, + "title": "Link Changes (24h)", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 16, "y": 44 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT COUNT(*) FROM ls_links_log WHERE timestamp > NOW() - INTERVAL '24 hours' AND peer_hash_id = '$peer_hash'", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 14, + "title": "Origin Changes (24h)", + "type": "stat", + "gridPos": { "h": 4, "w": 4, "x": 20, "y": 44 }, + "datasource": { "type": "postgres", "uid": "obmp_postgres" }, + "targets": [ + { + "rawSql": "SELECT COUNT(DISTINCT r1.prefix) FROM ip_rib_log r1\nJOIN ip_rib_log r2 ON r1.prefix = r2.prefix AND r1.prefix_len = r2.prefix_len AND r1.timestamp > r2.timestamp\nWHERE r1.origin_as != r2.origin_as AND r1.timestamp > NOW() - INTERVAL '24 hours'", + "format": "table", + "refId": "A" + } + ] + }, + { + "id": 15, + "title": "About This Dashboard", + "type": "text", + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 36 }, + "options": { + "mode": "markdown", + "content": "## Topology Change & Anomaly Detection\n\nThis dashboard provides heuristic analysis of BMP data to detect network anomalies:\n\n### What to Watch For\n- **Link flaps**: Rapid up/down cycles in the Link State Changes panel indicate instability\n- **Origin AS changes**: Could indicate a route hijack or legitimate migration\n- **Multi-origin prefixes**: Same prefix seen from different origin ASNs across peers\n- **Correlated events**: Peer session drops followed by mass withdrawals indicate convergence events\n\n### Testing with ExaBGP Scenarios\n1. Load `origin_shift` scenario to simulate origin AS changes\n2. Load `hijack_simulation` to see how shorter paths override legitimate routes\n3. Load/unload `churn` scenario repeatedly to generate instability patterns\n\n### Data Sources\n- **ls_links_log / ls_nodes_log**: TimescaleDB hypertables tracking all BGP-LS topology changes\n- **ip_rib_log**: All BGP RIB updates and withdrawals with timestamps\n- **peer_event_log**: BGP session state changes (up/down events)" + } + } + ] +}