From a662496e53a0c7d90e584e9aeaf9ac6ba953babc Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 18 May 2026 21:10:57 -0700 Subject: [PATCH] Fix telemetry dashboard variables and parameterize gNMI targets The telemetry dashboards' router/interface variables used a keep|distinct Flux pattern that returned only one source; switch to schema.tagValues so all streaming routers and interfaces are listed. Parameterize telegraf.conf gNMI addresses and credentials via GNMI_ADDRESSES/GNMI_USERNAME/GNMI_PASSWORD so the telemetry fleet can scale without editing the config. Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 7 +++++++ docker-compose.yml | 5 +++++ .../Telemetry-3001/combined_bmp_telemetry.json | 2 +- .../Telemetry-3001/interface_errors.json | 4 ++-- .../Telemetry-3001/interface_utilization.json | 4 ++-- telegraf/telegraf.conf | 17 +++++++++-------- 6 files changed, 26 insertions(+), 13 deletions(-) diff --git a/.env.example b/.env.example index d10d5f3..b9175fc 100644 --- a/.env.example +++ b/.env.example @@ -25,6 +25,13 @@ PSQL_MEM_LIMIT=6g PSQL_APP_MEM_LIMIT=4g KAFKA_MEM_LIMIT=4g +# gNMI streaming telemetry (telegraf, test profile). GNMI_ADDRESSES is a +# quoted, comma-separated host:port list — add a router here once gNMI/grpc +# is enabled on it and the management path is reachable. +GNMI_ADDRESSES="10.100.0.100:57400", "10.100.0.200:57400" +GNMI_USERNAME=changeme +GNMI_PASSWORD=changeme + # --------------------------------------------------------------------------- # ExaBGP route injector (test profile) # --------------------------------------------------------------------------- diff --git a/docker-compose.yml b/docker-compose.yml index 790f2a6..dfeea97 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -284,6 +284,11 @@ services: - influxdb environment: - INFLUXDB_TOKEN=openbmp-telemetry-token + # gNMI fleet — quoted, comma-separated host:port list. Default = the two + # ESXi CORE routers; extend via GNMI_ADDRESSES in .env for more routers. + - 'GNMI_ADDRESSES=${GNMI_ADDRESSES:-"10.100.0.100:57400", "10.100.0.200:57400"}' + - GNMI_USERNAME=${GNMI_USERNAME:-webui} + - GNMI_PASSWORD=${GNMI_PASSWORD:-cisco} # --- Phase 4: Traffic Generator --- diff --git a/obmp-grafana/dashboards/Telemetry-3001/combined_bmp_telemetry.json b/obmp-grafana/dashboards/Telemetry-3001/combined_bmp_telemetry.json index 848f883..097bc87 100644 --- a/obmp-grafana/dashboards/Telemetry-3001/combined_bmp_telemetry.json +++ b/obmp-grafana/dashboards/Telemetry-3001/combined_bmp_telemetry.json @@ -48,7 +48,7 @@ "multi": true, "name": "router", "options": [], - "query": "from(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"interface_counters\")\n |> keep(columns: [\"source\"])\n |> distinct(column: \"source\")\n |> sort()", + "query": "import \"influxdata/influxdb/schema\"\nschema.tagValues(bucket: \"telemetry\", tag: \"source\", predicate: (r) => r._measurement == \"interface_counters\", start: -1h)", "refresh": 2, "regex": "", "type": "query" diff --git a/obmp-grafana/dashboards/Telemetry-3001/interface_errors.json b/obmp-grafana/dashboards/Telemetry-3001/interface_errors.json index 7d1bf60..b4307b6 100644 --- a/obmp-grafana/dashboards/Telemetry-3001/interface_errors.json +++ b/obmp-grafana/dashboards/Telemetry-3001/interface_errors.json @@ -48,7 +48,7 @@ "multi": true, "name": "router", "options": [], - "query": "from(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"interface_counters\")\n |> keep(columns: [\"source\"])\n |> distinct(column: \"source\")\n |> sort()", + "query": "import \"influxdata/influxdb/schema\"\nschema.tagValues(bucket: \"telemetry\", tag: \"source\", predicate: (r) => r._measurement == \"interface_counters\", start: -1h)", "refresh": 2, "regex": "", "type": "query" @@ -66,7 +66,7 @@ "multi": true, "name": "interface", "options": [], - "query": "from(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"interface_counters\")\n |> filter(fn: (r) => r.source =~ /${router:regex}/)\n |> keep(columns: [\"name\"])\n |> distinct(column: \"name\")\n |> sort()", + "query": "import \"influxdata/influxdb/schema\"\nschema.tagValues(bucket: \"telemetry\", tag: \"name\", predicate: (r) => r._measurement == \"interface_counters\" and r.source =~ /${router:regex}/, start: -1h)", "refresh": 2, "regex": "", "type": "query" diff --git a/obmp-grafana/dashboards/Telemetry-3001/interface_utilization.json b/obmp-grafana/dashboards/Telemetry-3001/interface_utilization.json index af7e1c7..dc4e3f4 100644 --- a/obmp-grafana/dashboards/Telemetry-3001/interface_utilization.json +++ b/obmp-grafana/dashboards/Telemetry-3001/interface_utilization.json @@ -48,7 +48,7 @@ "multi": true, "name": "router", "options": [], - "query": "from(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"interface_counters\")\n |> keep(columns: [\"source\"])\n |> distinct(column: \"source\")\n |> sort()", + "query": "import \"influxdata/influxdb/schema\"\nschema.tagValues(bucket: \"telemetry\", tag: \"source\", predicate: (r) => r._measurement == \"interface_counters\", start: -1h)", "refresh": 2, "regex": "", "type": "query" @@ -66,7 +66,7 @@ "multi": true, "name": "interface", "options": [], - "query": "from(bucket: \"telemetry\")\n |> range(start: -1h)\n |> filter(fn: (r) => r._measurement == \"interface_counters\")\n |> filter(fn: (r) => r.source =~ /${router:regex}/)\n |> keep(columns: [\"name\"])\n |> distinct(column: \"name\")\n |> sort()", + "query": "import \"influxdata/influxdb/schema\"\nschema.tagValues(bucket: \"telemetry\", tag: \"name\", predicate: (r) => r._measurement == \"interface_counters\" and r.source =~ /${router:regex}/, start: -1h)", "refresh": 2, "regex": "", "type": "query" diff --git a/telegraf/telegraf.conf b/telegraf/telegraf.conf index 5aafc16..4df7338 100644 --- a/telegraf/telegraf.conf +++ b/telegraf/telegraf.conf @@ -17,15 +17,16 @@ # INPUT PLUGINS # ############################################################################### -## CORE routers (directly reachable on port 57400 from host) -## R9K routers (10.100.0.1-7) are blocked by CML management network filtering +## gNMI targets — driven by environment variables so the telemetry fleet can +## scale without editing this file. Set in .env: +## GNMI_ADDRESSES — quoted, comma-separated host:port list, e.g. +## GNMI_ADDRESSES="10.0.0.1:57400", "10.0.0.2:57400" +## GNMI_USERNAME / GNMI_PASSWORD — gNMI credentials (uniform across the fleet) +## Every target must have gNMI/grpc enabled and be reachable on the gRPC port. [[inputs.gnmi]] - addresses = [ - "10.100.0.100:57400", - "10.100.0.200:57400" - ] - username = "webui" - password = "cisco" + addresses = [ ${GNMI_ADDRESSES} ] + username = "${GNMI_USERNAME}" + password = "${GNMI_PASSWORD}" ## No TLS (lab environment) enable_tls = false