obmp-docker/obmp-grafana/dashboards/Learning/link_utilization_te.json
sam f23e222bc0 Add Phase 3: TE/SR analytics, anomaly detection, DB schema reference
- 4 new Grafana dashboards:
  - Database Schema Map (obmp-learn-07): interactive schema reference
    with live row counts, relationship diagrams, column details
  - TE & Segment Routing Analytics (obmp-learn-08): exposes BGP-LS TE/SR
    fields (bandwidth, admin groups, SRLG, SR SIDs, protection types)
  - Topology Change & Anomaly Detection (obmp-learn-09): link state
    change tracking, origin AS hijack detection, convergence timeline
  - Link Utilization & TE Thought Experiment (obmp-learn-10): capacity
    data from BGP-LS + streaming telemetry integration guide
- DB_SCHEMA.md: standalone database reference (33 tables, 11 views)
- 3 new ExaBGP scenarios: te_community_steering, origin_shift, path_diversity
- Updated DOCS.md with Phase 3 dashboards and scenarios

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-06 13:31:03 -07:00

239 lines
13 KiB
JSON

{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": { "type": "datasource", "uid": "grafana" },
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"cellOptions": { "type": "auto" },
"inspect": false
}
},
"overrides": [
{
"matcher": { "id": "byName", "options": "Max BW (B/s)" },
"properties": [{ "id": "unit", "value": "Bps" }]
},
{
"matcher": { "id": "byName", "options": "Max Reservable BW" },
"properties": [{ "id": "unit", "value": "Bps" }]
}
]
},
"gridPos": { "h": 10, "w": 24, "x": 0, "y": 0 },
"id": 1,
"options": {
"showHeader": true,
"sortBy": [{ "desc": false, "displayName": "Local Router" }]
},
"targets": [
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"format": "table",
"rawQuery": true,
"rawSql": "SELECT local_router_name as \"Local Router\",\n remote_router_name as \"Remote Router\",\n interface_addr::text as \"Interface IP\",\n neighbor_addr::text as \"Neighbor IP\",\n max_link_bw as \"Max BW (B/s)\",\n max_resv_bw as \"Max Reservable BW\",\n unreserved_bw as \"Unreserved BW\",\n igp_metric as \"IGP Metric\",\n te_def_metric as \"TE Metric\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY local_router_name, remote_router_name",
"refId": "A"
}
],
"title": "Link Capacity Inventory (from BGP-LS)",
"type": "table"
},
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisLabel": "Bandwidth (B/s)",
"fillOpacity": 80,
"gradientMode": "none",
"lineWidth": 1,
"scaleDistribution": { "type": "linear" },
"showValue": "auto",
"stacking": { "group": "A", "mode": "none" }
},
"unit": "Bps"
},
"overrides": []
},
"gridPos": { "h": 10, "w": 12, "x": 0, "y": 10 },
"id": 2,
"options": {
"barRadius": 0.1,
"barWidth": 0.8,
"groupWidth": 0.7,
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom" },
"orientation": "horizontal",
"tooltip": { "mode": "single", "sort": "none" },
"xTickLabelRotation": 0
},
"targets": [
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"format": "table",
"rawQuery": true,
"rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n COALESCE(max_link_bw, 0) as \"Max Bandwidth\",\n COALESCE(max_resv_bw, 0) as \"Max Reservable\",\n COALESCE(max_link_bw, 0) - COALESCE(max_resv_bw, 0) as \"Unreserved Gap\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\n AND max_link_bw IS NOT NULL AND max_link_bw > 0\nORDER BY max_link_bw DESC",
"refId": "A"
}
],
"title": "Capacity vs Reservable Bandwidth",
"type": "barchart"
},
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"fieldConfig": {
"defaults": {
"color": { "mode": "thresholds" },
"thresholds": {
"mode": "percentage",
"steps": [
{ "color": "green", "value": null },
{ "color": "yellow", "value": 50 },
{ "color": "orange", "value": 75 },
{ "color": "red", "value": 90 }
]
},
"unit": "percentunit",
"max": 1,
"min": 0
},
"overrides": []
},
"gridPos": { "h": 10, "w": 12, "x": 12, "y": 10 },
"id": 3,
"options": {
"minVizHeight": 75,
"minVizWidth": 75,
"orientation": "auto",
"reduceOptions": {
"calcs": ["lastNotNull"],
"fields": "",
"values": true
},
"showThresholdLabels": false,
"showThresholdMarkers": true,
"sizing": "auto"
},
"targets": [
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"format": "table",
"rawQuery": true,
"rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n CASE WHEN max_link_bw > 0 \n THEN 1.0 - (COALESCE(max_resv_bw, 0)::float / max_link_bw::float)\n ELSE 0 END as \"Reservation Ratio\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\n AND max_link_bw IS NOT NULL AND max_link_bw > 0\nORDER BY \"Reservation Ratio\" DESC\nLIMIT 10",
"refId": "A"
}
],
"title": "Bandwidth Reservation Ratio (Higher = More Reserved)",
"type": "gauge"
},
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"fieldConfig": {
"defaults": {
"color": { "mode": "palette-classic" },
"custom": {
"axisBorderShow": false,
"fillOpacity": 80,
"gradientMode": "none",
"lineWidth": 1,
"showValue": "auto",
"stacking": { "group": "A", "mode": "none" }
}
},
"overrides": []
},
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 },
"id": 4,
"options": {
"barRadius": 0.1,
"barWidth": 0.8,
"groupWidth": 0.7,
"legend": { "calcs": [], "displayMode": "list", "placement": "bottom" },
"orientation": "horizontal",
"tooltip": { "mode": "single", "sort": "none" }
},
"targets": [
{
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"format": "table",
"rawQuery": true,
"rawSql": "SELECT local_router_name || ' -> ' || remote_router_name as \"Link\",\n igp_metric as \"IGP Metric\",\n COALESCE(te_def_metric, 0) as \"TE Default Metric\"\nFROM v_ls_links\nWHERE peer_hash_id = '$peer_hash' AND iswithdrawn = false\nORDER BY igp_metric DESC\nLIMIT 20",
"refId": "A"
}
],
"title": "IGP Metric vs TE Default Metric",
"type": "barchart"
},
{
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 },
"id": 5,
"options": {
"mode": "markdown",
"content": "## What-If: CSPF Path Computation\n\nIn a real MPLS-TE or SR-TE deployment, the headend router runs **Constrained Shortest Path First (CSPF)** to find paths that satisfy:\n\n1. **Bandwidth constraint** - Enough unreserved BW at the required priority\n2. **Admin group (affinity)** - Link colors must match include/exclude masks\n3. **SRLG diversity** - Backup path avoids shared risk with primary\n4. **TE metric optimization** - Minimize TE metric (not IGP metric)\n\n### How BGP-LS Enables This\n\nBGP-LS distributes the complete IGP topology **with TE attributes** to an external controller (PCE, SDN controller). The controller can:\n\n- Build a Traffic Engineering Database (TED)\n- Run CSPF with arbitrary constraints\n- Program SR-TE policies via PCEP or gRPC\n\n### Data Available in This Lab\n\n| Attribute | Source | Available? |\n|-----------|--------|------------|\n| Topology (nodes/links) | BGP-LS | Yes |\n| IGP Metric | BGP-LS | Yes |\n| TE Default Metric | BGP-LS TLV 1092 | Check TE table |\n| Max Link BW | BGP-LS TLV 1089 | Check TE table |\n| Max Reservable BW | BGP-LS TLV 1090 | Check TE table |\n| Unreserved BW | BGP-LS TLV 1091 | Check TE table |\n| Admin Group | BGP-LS TLV 1088 | Check TE table |\n| SRLG | BGP-LS TLV 1096 | Check TE table |\n| SR Node SID | BGP-LS TLV 1034 | Check SR table |\n| SR Adj SID | BGP-LS TLV 1099 | Check SR table |"
},
"title": "CSPF & Traffic Engineering Concepts",
"type": "text"
},
{
"gridPos": { "h": 12, "w": 24, "x": 0, "y": 28 },
"id": 6,
"options": {
"mode": "markdown",
"content": "## Integration Guide: Adding Real-Time Link Utilization\n\nBMP/BGP-LS provides **capacity** data (max bandwidth, reservable bandwidth) but NOT real-time **utilization**. To complete the traffic engineering picture, you need streaming telemetry.\n\n### Architecture\n\n```\n +------------------+\n IOS-XR Routers ---->| OpenBMP Collector|----> PostgreSQL (topology + capacity)\n | +------------------+\n | \n +-- gNMI ----->| Telegraf |----> InfluxDB/Prometheus (utilization)\n +------------------+\n |\n +------------------+\n | Grafana | <-- Mixed datasource queries\n +------------------+\n```\n\n### Step 1: Enable Model-Driven Telemetry on IOS-XR\n\n```\ntelemetry model-driven\n sensor-group INTF-COUNTERS\n sensor-path Cisco-IOS-XR-infra-statsd-oper:infra-statistics/interfaces/interface/latest/generic-counters\n !\n subscription INTF-SUB\n sensor-group-id INTF-COUNTERS sample-interval 30000\n destination-id TELEGRAF\n !\n destination-group TELEGRAF\n address-family ipv4 10.40.40.202 port 57000\n encoding self-describing-gpb\n protocol grpc no-tls\n !\n !\n```\n\n### Step 2: Telegraf Configuration\n\n```toml\n[[inputs.cisco_telemetry_mdt]]\n transport = \"grpc\"\n service_address = \":57000\"\n\n[[outputs.influxdb_v2]]\n urls = [\"http://localhost:8086\"]\n token = \"your-token\"\n organization = \"openbmp\"\n bucket = \"telemetry\"\n```\n\n### Step 3: Grafana Mixed Datasource Query\n\nCombine BGP-LS capacity from PostgreSQL with utilization from InfluxDB:\n\n```\n-- PostgreSQL: Get link capacity\nSELECT interface_addr::text as interface, max_link_bw\nFROM v_ls_links WHERE peer_hash_id = '$peer_hash'\n\n-- InfluxDB: Get interface utilization\nfrom(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"Cisco-IOS-XR-infra-statsd-oper\")\n |> filter(fn: (r) => r._field == \"bytes-received\" or r._field == \"bytes-sent\")\n |> derivative(unit: 1s, nonNegative: true)\n```\n\n### Step 4: Calculate Utilization %\n\nIn Grafana, use **Transformations** to:\n1. Join PostgreSQL capacity with InfluxDB utilization by interface IP\n2. Add calculated field: `utilization_pct = bytes_per_sec / max_link_bw * 100`\n3. Set threshold alerts: >80% = warning, >95% = critical\n\n### Key gNMI Sensor Paths for IOS-XR\n\n| Sensor Path | Data |\n|-------------|------|\n| `Cisco-IOS-XR-infra-statsd-oper:infra-statistics/interfaces/interface/latest/generic-counters` | Interface byte/packet counters |\n| `Cisco-IOS-XR-infra-statsd-oper:infra-statistics/interfaces/interface/latest/data-rate` | Current data rate (bits/sec) |\n| `Cisco-IOS-XR-mpls-te-oper:mpls-te/tunnels/summary` | MPLS-TE tunnel summary |\n| `Cisco-IOS-XR-ip-rsvp-oper:rsvp/interface-briefs` | RSVP interface reservations |\n| `Cisco-IOS-XR-segment-routing-ms-oper:srms/policy` | SR-MPLS policy state |\n\n### RFC 8571: Performance Metrics via BGP-LS\n\nIf routers support RFC 8571, these metrics flow through BGP-LS automatically:\n- **Unidirectional Link Delay** (TLV 1114) - microseconds\n- **Min/Max Link Delay** (TLV 1115)\n- **Delay Variation (jitter)** (TLV 1116)\n- **Link Loss** (TLV 1117) - percentage\n- **Residual Bandwidth** (TLV 1118)\n- **Available Bandwidth** (TLV 1119)\n- **Utilized Bandwidth** (TLV 1120)\n\nThese would appear in the `ls_links` table if the OpenBMP parser supports them."
},
"title": "Integration Guide: Streaming Telemetry for Link Utilization",
"type": "text"
}
],
"schemaVersion": 39,
"tags": ["obmp-learning"],
"templating": {
"list": [
{
"current": {},
"datasource": { "type": "postgres", "uid": "obmp_postgres" },
"definition": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0",
"hide": 0,
"includeAll": false,
"label": "BGP Peer",
"multi": false,
"name": "peer_hash",
"options": [],
"query": "SELECT __text,__value FROM (\n select peername as __text, peer_hash_id as __value, count(*) as count\n from v_ls_nodes\n group by peername,peer_hash_id) d\nwhere count > 0",
"refresh": 1,
"regex": "",
"skipUrlSync": false,
"sort": 0,
"type": "query"
}
]
},
"time": { "from": "now-6h", "to": "now" },
"timepicker": {},
"timezone": "",
"title": "Link Utilization & TE Thought Experiment",
"uid": "obmp-learn-10",
"version": 1
}