Compare commits
No commits in common. "565ebdbee01f8cfcf3e096a2fa7dc2fb540e455e" and "77665257875ebbfdbc7078546bc06e7d56a5b3ab" have entirely different histories.
565ebdbee0
...
7766525787
@ -339,20 +339,10 @@ services:
|
||||
entrypoint: ["telegraf"]
|
||||
volumes:
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
# Host root, read-only — lets [[inputs.disk]] report the real host
|
||||
# filesystems (Postgres/Kafka/InfluxDB data) instead of the container's.
|
||||
- /:/hostfs:ro
|
||||
depends_on:
|
||||
- influxdb
|
||||
environment:
|
||||
- INFLUXDB_TOKEN=openbmp-telemetry-token
|
||||
# Point gopsutil-based inputs (disk) at the host filesystem mount above.
|
||||
- HOST_MOUNT_PREFIX=/hostfs
|
||||
- HOST_PROC=/hostfs/proc
|
||||
- HOST_SYS=/hostfs/sys
|
||||
- HOST_ETC=/hostfs/etc
|
||||
# PostgreSQL credentials for [[inputs.postgresql_extensible]] (DB size).
|
||||
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-openbmp}
|
||||
# gNMI fleet — quoted, comma-separated host:port list. Default = the two
|
||||
# ESXi CORE routers; extend via GNMI_ADDRESSES in .env for more routers.
|
||||
- 'GNMI_ADDRESSES=${GNMI_ADDRESSES:-"10.100.0.100:57400", "10.100.0.200:57400"}'
|
||||
@ -437,11 +427,6 @@ services:
|
||||
restart: unless-stopped
|
||||
container_name: obmp-gobgp
|
||||
image: jauderho/gobgp:v4.5.0
|
||||
# Host networking: the daemon uses the host's real IPv4 + IPv6 stack, so
|
||||
# both the v4 and v6 eBGP sessions to AS57355 source from the host's
|
||||
# public addresses (no Docker IPv6/NAT plumbing). BMP still reaches the
|
||||
# collector on 10.40.40.202:5000 (its published port).
|
||||
network_mode: host
|
||||
depends_on:
|
||||
- collector
|
||||
# gobgpd reads /config/gobgpd.conf; the same mount carries mrt-refresh.sh
|
||||
@ -450,40 +435,6 @@ services:
|
||||
- ./gobgp:/config
|
||||
command: ["gobgpd", "-f", "/config/gobgpd.conf", "-t", "toml"]
|
||||
|
||||
# GoBGP -- modular EVPN test-route injector (roadmap E5). Profile-gated, so
|
||||
# it is NOT part of the normal stack. Originates synthetic BGP EVPN routes
|
||||
# and BMP-exports them so the EVPN pipeline can be exercised. Start only for
|
||||
# testing: docker compose --profile evpn-test up -d gobgp-evpn
|
||||
# then: bash gobgp-evpn/inject-evpn.sh
|
||||
gobgp-evpn:
|
||||
restart: unless-stopped
|
||||
container_name: obmp-gobgp-evpn
|
||||
profiles: ["evpn-test"]
|
||||
image: jauderho/gobgp:v4.5.0
|
||||
depends_on:
|
||||
- collector
|
||||
volumes:
|
||||
- ./gobgp-evpn:/config
|
||||
command: ["gobgpd", "-f", "/config/gobgpd.conf", "-t", "toml"]
|
||||
|
||||
# EVPN consumer -- subscribes to the openbmp.parsed.evpn Kafka topic (which
|
||||
# the collector already populates) and writes BGP EVPN routes into evpn_rib;
|
||||
# the stock psql-app does not handle EVPN. Profile-gated alongside the EVPN
|
||||
# test injector: docker compose --profile evpn-test up -d
|
||||
evpn-consumer:
|
||||
restart: unless-stopped
|
||||
container_name: obmp-evpn-consumer
|
||||
profiles: ["evpn-test"]
|
||||
build:
|
||||
context: ./obmp-evpn-consumer
|
||||
depends_on:
|
||||
- kafka
|
||||
- psql
|
||||
environment:
|
||||
- KAFKA_BROKER=obmp-kafka:29092
|
||||
- EVPN_TOPIC=openbmp.parsed.evpn
|
||||
- PG_DSN=host=obmp-psql port=5432 dbname=openbmp user=openbmp password=${POSTGRES_PASSWORD:-openbmp}
|
||||
|
||||
whois:
|
||||
restart: unless-stopped
|
||||
container_name: obmp-whois
|
||||
|
||||
@ -300,45 +300,19 @@ Keep E4 scope minimal until there's a real L3VPN source.
|
||||
### E5. L2VPN / EVPN support — platform-level, not a dashboard task
|
||||
|
||||
L2VPN/EVPN was requested alongside L3VPN. **It cannot be done as a dashboard
|
||||
change.** Research findings on where the gap actually is:
|
||||
change**: this OpenBMP deployment (collector 2.2.3, psql-app 2.2.2) has *no
|
||||
EVPN/L2VPN schema* — the only RIB tables are `ip_rib` (unicast), `l3vpn_rib`
|
||||
(VPNv4/6) and `ls_*` (BGP-LS). EVPN routes (Type-2 MAC/IP, Type-3 IMET,
|
||||
Type-5 IP-prefix) have nowhere to land and are dropped by the consumer.
|
||||
|
||||
- **Collector** (`openbmp/collector`) — *already decodes EVPN*. It has an
|
||||
`EVPN.cpp` parser and emits a parsed `openbmp.parsed.evpn` Kafka topic
|
||||
(RD, ESI, MAC, ethernet-tag, IP, labels, route-targets). No work needed.
|
||||
- **psql-app** (`openbmp/psql-app`) — **drops it**. It never subscribes to
|
||||
`openbmp.parsed.evpn`, has no `EvpnQuery` handler, and the PostgreSQL
|
||||
schema has no EVPN table. This is the whole gap.
|
||||
- **L2VPN-VPLS (SAFI 65)** — not supported anywhere; only EVPN (AFI 25).
|
||||
Supporting it means **upstream platform work**, not Grafana:
|
||||
- EVPN parsing in the collector + psql-app (these are upstream `openbmp/*`
|
||||
images — would need a fork or an upstream contribution).
|
||||
- A new `evpn_rib` / `l2vpn_rib` table + history + stats.
|
||||
- Only then: EVPN dashboards (per-EVI, MAC mobility, RT scoping).
|
||||
|
||||
Two viable paths:
|
||||
1. **Fork the psql-app** (Java): subscribe to the evpn topic, add an
|
||||
`EvpnQuery` class, add an `evpn_rib` table + history/stats. Keeps one
|
||||
unified schema; cost is owning a Java fork of a slow-moving upstream and
|
||||
inheriting the collector's older EVPN parser (likely no RFC 9251/9572
|
||||
route types).
|
||||
2. **Run GoBMP** (`sbezverk/gobmp`, Go) as a second collector — strongest,
|
||||
most current EVPN decoding — plus a thin Kafka→Postgres consumer landing
|
||||
an `evpn_rib` table. Less code than the Java fork, but two collectors and
|
||||
two ingest paths.
|
||||
|
||||
Recommended: path 2 for fastest EVPN visibility; path 1 if a single unified
|
||||
OpenBMP schema outweighs the extra effort. Either way, then build EVPN
|
||||
dashboards (per-EVI, MAC mobility, RT scoping).
|
||||
|
||||
**Status — lab-testable scope DONE (path 1, type-2/3):**
|
||||
- `evpn_rib` table — `postgres/scripts/007_obmp_evpn.sql`.
|
||||
- `gobgp-evpn` — profile-gated synthetic EVPN injector (`evpn-test` profile).
|
||||
- `obmp-evpn-consumer` — standalone Python consumer, `openbmp.parsed.evpn`
|
||||
→ `evpn_rib` (the gap path 1 describes, done without forking the Java
|
||||
psql-app — a small isolated container instead).
|
||||
- `EVPN RIB` Grafana dashboard (OBMP-L3VPN folder).
|
||||
- Verified end to end with synthetic type-2/type-3 routes.
|
||||
|
||||
**Known limitation:** collector 2.2.3 **mis-decodes EVPN type-5** (IP-prefix)
|
||||
— the prefix corrupts the RD field — so type-5 is not ingested. Full type-5
|
||||
support still needs path 2 (GoBMP) or a newer/fixed collector. Real EVPN
|
||||
(vs the synthetic injector) also needs an EVPN-capable BMP source — the CML
|
||||
IOS-XR lab has none.
|
||||
Recommend treating E5 as a research spike first: confirm whether any current
|
||||
OpenBMP release adds EVPN, versus the cost of carrying a fork.
|
||||
|
||||
### E-scale. PostgreSQL sizing for a full table
|
||||
|
||||
|
||||
@ -14,30 +14,11 @@ Derived from the OpenBMP `psql-app` sizing guidance and measured lab behavior.
|
||||
| Routes per full feed | ~1.2M (≈1M IPv4 + ~0.2M IPv6) |
|
||||
| **Estimated total NLRIs** | **~100–150M** in Adj-RIB-In |
|
||||
| Telemetry | gNMI via Telegraf → InfluxDB, ~50–200 interfaces/router, 10 s interval |
|
||||
| History retention | `ip_rib_log` 2 months, LS logs 8 weeks, `peer_event_log` 4 months (lab policy defaults; tunable) |
|
||||
| History retention | `ip_rib_log` 4 weeks, LS logs 4 months, `peer_event_log` 1 year |
|
||||
|
||||
The NLRI estimate (40 × ~2.5 feeds × 1.2M) places this deployment at the top
|
||||
of the OpenBMP `psql-app` guidance tier (150M NLRIs → 64 GB heap).
|
||||
|
||||
## Measured data point (lab, 2026)
|
||||
|
||||
Real numbers from the lab after adding **one** full-table feed (GoBGP →
|
||||
AS57355, ~1.04M IPv4 + ~0.25M IPv6 routes):
|
||||
|
||||
| Metric | Before feed | After 1 full feed |
|
||||
|--------|-------------|-------------------|
|
||||
| `openbmp` DB size | ~25 GB | **~30 GB** |
|
||||
| `ip_rib` (current state) | small | 5.3 GB |
|
||||
| `ip_rib_log` (history hypertable) | — | 7.75 GB, 82/97 chunks compressed |
|
||||
| `base_attrs` | ~1 GB | 2.3 GB |
|
||||
| `geo_ip` (fixed reference data) | 8.8 GB | 8.8 GB |
|
||||
|
||||
So **one full feed ≈ +5 GB current-state**, plus history that accrues against
|
||||
the 2-month `ip_rib_log` retention. The ~1.3M-route initial dump ingested in
|
||||
minutes with no Kafka consumer lag. Extrapolating linearly, 40 routers × ~2.5
|
||||
feeds ≈ 100 feed-equivalents → on the order of **0.5 TB current state** before
|
||||
history and indexes; the 2–4 TB storage target below holds with headroom.
|
||||
|
||||
## BMP RIB scope — recommendation
|
||||
|
||||
**Deploy with Adj-RIB-In only.** It is the OpenBMP default, is what every
|
||||
@ -67,15 +48,14 @@ advertises. Alternatives and their cost:
|
||||
|
||||
| Store | Lab today | Production target | Notes |
|
||||
|-------|-----------|-------------------|-------|
|
||||
| **PostgreSQL** | 30 GB | **2–4 TB NVMe SSD** | `ip_rib` current state (~100–150M rows) + `ip_rib_log` history (2-month retention, the dominant grower) + `base_attrs` + `geo_ip` (~9 GB fixed). OpenBMP guidance: 500 GB main + 1 TB TimescaleDB; add headroom. |
|
||||
| **PostgreSQL** | 25 GB | **2–4 TB NVMe SSD** | `ip_rib` current state (~100–150M rows) + `ip_rib_log` history (4-week retention, the dominant grower) + `base_attrs` + `geo_ip` (~7 GB fixed). OpenBMP guidance: 500 GB main + 1 TB TimescaleDB; add headroom. |
|
||||
| **Kafka** | 0.2 GB | **100–500 GB** | 12 h retention; sized for full-table initial-dump bursts × 40 routers |
|
||||
| **InfluxDB (telemetry)** | minimal | **50–200 GB** | 40 routers × ~50–200 interfaces × 10 s gNMI × 30 d; compresses well |
|
||||
| **Total** | — | **~3–5 TB fast NVMe** | Use NVMe; PostgreSQL random-IO under churn is the bottleneck on slow disks |
|
||||
|
||||
Put the PostgreSQL data directory and the TimescaleDB tablespace on NVMe.
|
||||
`ip_rib_log` retention (2 months in the lab) is the main storage tuning knob
|
||||
— revisit once production update volume is measured; halving it roughly
|
||||
halves the dominant history table.
|
||||
`ip_rib_log` 4-week retention is the main storage tuning knob — revisit once
|
||||
production update volume is measured.
|
||||
|
||||
## Architecture
|
||||
|
||||
@ -92,36 +72,6 @@ Whichever layout: every service already carries a Compose `mem_limit` — raise
|
||||
`PSQL_MEM_LIMIT` / `PSQL_APP_MEM_LIMIT` / `KAFKA_MEM_LIMIT` in `.env` for the
|
||||
production hosts.
|
||||
|
||||
## Horizontal scaling — where it actually helps
|
||||
|
||||
The ingestion bottleneck is **not** the collector or Kafka — it is the
|
||||
`psql-app` consumer writing to PostgreSQL, and ultimately **disk IOPS**.
|
||||
Plan scaling accordingly:
|
||||
|
||||
- **Scale `psql-app` as a Kafka consumer group.** Run multiple `psql-app`
|
||||
containers with the **same group ID**; Kafka rebalances partitions across
|
||||
them and fails over automatically. This is the real throughput lever and
|
||||
also provides HA. **Hard cap = Kafka partition count** — the compose sets
|
||||
`KAFKA_NUM_PARTITIONS: 8`, so ≤ 8 useful instances. **Raise the partition
|
||||
count before scaling past a few consumers** — it cannot easily be reduced
|
||||
later.
|
||||
- **Disk IOPS is the named bottleneck.** Target **≥ 5000 IOPS** (NVMe) for
|
||||
the PostgreSQL store; this buys more headroom than any container count.
|
||||
- **Multiple collectors are an HA / locality decision, not a throughput
|
||||
one.** A BMP session is one stateful TCP connection and cannot be load
|
||||
balanced — you distribute routers by pointing each router's `bmp server`
|
||||
config at a specific collector. All collectors feed one Kafka. Shard
|
||||
collectors for fault isolation / POP locality, not for performance, and
|
||||
note a dead collector's routers go dark until reconfigured (no auto-
|
||||
failover at the collector tier).
|
||||
- Within one `psql-app`, writer threads already auto-scale per type
|
||||
(`writer_max_threads_per_type`); the consumer-group is the across-instance
|
||||
layer on top.
|
||||
|
||||
Bursts (every collector restart triggers simultaneous full-table dumps from
|
||||
all peers) are absorbed by Kafka — size Kafka retention so a slow consumer
|
||||
never loses data during a convergence storm.
|
||||
|
||||
## PostgreSQL tuning
|
||||
|
||||
- `shared_buffers` ≈ 25% of host RAM; large `effective_cache_size`.
|
||||
|
||||
@ -1,45 +0,0 @@
|
||||
# gobgp-evpn — modular EVPN test-route injector
|
||||
|
||||
A **profile-gated, non-production** GoBGP instance for exercising the EVPN
|
||||
ingestion pipeline (roadmap E5). The CML IOS-XR lab cannot originate EVPN
|
||||
routes, so this container synthesises them.
|
||||
|
||||
## What it does
|
||||
|
||||
`gobgp-evpn` runs GoBGP with no BGP peers, BMP-exporting its local RIB
|
||||
(`route-monitoring-policy = local-rib`) to the OpenBMP collector. Routes
|
||||
injected with `inject-evpn.sh` are parsed by the collector and published to
|
||||
the `openbmp.parsed.evpn` Kafka topic, where the EVPN consumer picks them up
|
||||
and writes the `evpn_rib` table.
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
# start the injector (not started by a normal `docker compose up`)
|
||||
docker compose --profile evpn-test up -d gobgp-evpn
|
||||
|
||||
# push synthetic type-2 / type-3 / type-5 EVPN routes
|
||||
bash gobgp-evpn/inject-evpn.sh
|
||||
|
||||
# inspect what GoBGP holds
|
||||
docker exec obmp-gobgp-evpn gobgp global rib -a evpn
|
||||
|
||||
# stop it when done testing
|
||||
docker compose --profile evpn-test stop gobgp-evpn
|
||||
```
|
||||
|
||||
## Notes
|
||||
|
||||
- Local AS 65010, router-id 10.40.40.251 — distinct from the production
|
||||
`gobgp` global-table feed (AS 65001).
|
||||
- It is *not* part of the default stack: the `evpn-test` Compose profile
|
||||
keeps it out of production and lets it be started/stopped on demand.
|
||||
|
||||
## Collector type-5 limitation
|
||||
|
||||
The OpenBMP collector 2.2.3 parses EVPN **type-2 (MAC/IP)** and **type-3
|
||||
(inclusive multicast)** cleanly, but **mis-decodes type-5 (IP-prefix)**: the
|
||||
IP prefix bleeds into the RD field on the `openbmp.parsed.evpn` topic
|
||||
(observed garbage RDs such as `6154:3523870730`). `inject-evpn.sh` therefore
|
||||
injects only type-2 and type-3. Full type-5 support needs a newer collector
|
||||
or the GoBMP path — see `docs/ROADMAP.md` Track E (E5).
|
||||
@ -1,29 +0,0 @@
|
||||
# GoBGP -- modular EVPN test-route injector (roadmap E5)
|
||||
#
|
||||
# A profile-gated, throwaway GoBGP instance whose only job is to originate
|
||||
# synthetic BGP EVPN routes and BMP-export them to the OpenBMP collector, so
|
||||
# the EVPN ingestion pipeline (collector -> Kafka openbmp.parsed.evpn ->
|
||||
# evpn-consumer -> evpn_rib) can be exercised. NOT a production component --
|
||||
# start it only when testing:
|
||||
# docker compose --profile evpn-test up -d gobgp-evpn
|
||||
# bash gobgp-evpn/inject-evpn.sh
|
||||
#
|
||||
# It has no BGP peers; routes are injected straight into the local RIB, so
|
||||
# BMP export uses route-monitoring-policy = local-rib.
|
||||
|
||||
[global]
|
||||
[global.config]
|
||||
as = 65010
|
||||
router-id = "10.40.40.251"
|
||||
# No inbound BGP listener -- we only originate locally and BMP-export.
|
||||
port = -1
|
||||
|
||||
# --- BMP export to the OpenBMP collector ------------------------------------
|
||||
[[bmp-servers]]
|
||||
[bmp-servers.config]
|
||||
address = "10.40.40.202"
|
||||
port = 5000
|
||||
# local-rib: the injected EVPN routes live in the loc-rib (there are no
|
||||
# BGP peers / no adj-rib-in), so export the local RIB.
|
||||
route-monitoring-policy = "local-rib"
|
||||
statistics-timeout = 3600
|
||||
@ -1,34 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# inject-evpn.sh -- push synthetic BGP EVPN routes into the gobgp-evpn
|
||||
# instance so the EVPN ingestion pipeline can be tested end to end.
|
||||
#
|
||||
# Run from the docker host after starting the injector:
|
||||
# docker compose --profile evpn-test up -d gobgp-evpn
|
||||
# bash gobgp-evpn/inject-evpn.sh
|
||||
#
|
||||
# Routes land in gobgp-evpn's local RIB and are BMP-exported to the collector
|
||||
# (route-monitoring-policy = local-rib), parsed onto the openbmp.parsed.evpn
|
||||
# Kafka topic. Re-running is harmless (GoBGP de-dupes identical routes).
|
||||
|
||||
set -euo pipefail
|
||||
G=(docker exec obmp-gobgp-evpn gobgp global rib add -a evpn)
|
||||
|
||||
echo "Injecting EVPN type-2 (MAC/IP advertisement) routes..."
|
||||
"${G[@]}" macadv aa:bb:cc:00:00:01 10.200.10.1 etag 100 label 10100 rd 65010:100 rt 65010:100 encap vxlan
|
||||
"${G[@]}" macadv aa:bb:cc:00:00:02 10.200.10.2 etag 100 label 10100 rd 65010:100 rt 65010:100 encap vxlan
|
||||
"${G[@]}" macadv aa:bb:cc:00:00:03 10.200.20.1 etag 200 label 10200 rd 65010:200 rt 65010:200 encap vxlan
|
||||
|
||||
echo "Injecting EVPN type-3 (inclusive multicast) routes..."
|
||||
"${G[@]}" multicast 10.40.40.251 etag 100 rd 65010:100 rt 65010:100
|
||||
"${G[@]}" multicast 10.40.40.251 etag 200 rd 65010:200 rt 65010:200
|
||||
|
||||
# NOTE: EVPN type-5 (IP-prefix) routes are intentionally NOT injected.
|
||||
# The OpenBMP collector 2.2.3 parses type-2 (MAC/IP) and type-3 (multicast)
|
||||
# cleanly, but mis-decodes the type-5 NLRI — the IP prefix bleeds into the
|
||||
# RD field (observed RDs like '6154:3523870730'). Type-5 visibility needs a
|
||||
# newer collector or the GoBMP path — see docs/ROADMAP.md E5.
|
||||
|
||||
echo
|
||||
echo "Current EVPN RIB on gobgp-evpn:"
|
||||
docker exec obmp-gobgp-evpn gobgp global rib -a evpn
|
||||
@ -14,10 +14,9 @@
|
||||
[global.config]
|
||||
as = 65001
|
||||
router-id = "10.40.40.250"
|
||||
# We only originate outbound sessions to the route server; disable the
|
||||
# inbound BGP listener (port -1) so the daemon needs no privileged
|
||||
# (<1024) bind -- required under docker network_mode: host.
|
||||
port = -1
|
||||
# Listen for inbound BGP on the standard port. We only originate
|
||||
# outbound sessions, but the daemon still needs a listen port.
|
||||
port = 179
|
||||
|
||||
# --- Neighbor: route server, IPv4 feed --------------------------------------
|
||||
# The IPv4 transport session carries the full IPv4 table only.
|
||||
|
||||
@ -1,8 +0,0 @@
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY consumer.py .
|
||||
|
||||
CMD ["python", "-u", "consumer.py"]
|
||||
@ -1,223 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""obmp-evpn-consumer — OpenBMP EVPN -> PostgreSQL (roadmap E5).
|
||||
|
||||
Subscribes to the Kafka topic `openbmp.parsed.evpn` (the OpenBMP collector
|
||||
already decodes EVPN and publishes it there) and writes BGP EVPN routes into
|
||||
the `evpn_rib` table. The stock openbmp/psql-app never consumes this topic;
|
||||
this process fills that gap.
|
||||
|
||||
Field positions are pinned to the collector 2.2.3 / message-bus v1.7 layout,
|
||||
verified off the live topic. The collector parses EVPN type-2 (MAC/IP) and
|
||||
type-3 (inclusive multicast) cleanly; type-5 (IP-prefix) is mis-decoded
|
||||
upstream and is not relied on here.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import psycopg2
|
||||
from psycopg2.extras import execute_values
|
||||
from confluent_kafka import Consumer, KafkaException
|
||||
|
||||
KAFKA_BROKER = os.environ.get("KAFKA_BROKER", "obmp-kafka:29092")
|
||||
TOPIC = os.environ.get("EVPN_TOPIC", "openbmp.parsed.evpn")
|
||||
GROUP_ID = os.environ.get("EVPN_GROUP", "evpn-psql")
|
||||
PG_DSN = os.environ.get(
|
||||
"PG_DSN", "host=obmp-psql port=5432 dbname=openbmp user=openbmp password=openbmp"
|
||||
)
|
||||
BATCH_SECONDS = 2.0
|
||||
|
||||
# 0-indexed field positions in a parsed EVPN data row (collector 2.2.3, v1.7).
|
||||
F_ACTION, F_HASH = 0, 2
|
||||
F_BASE_ATTR, F_PEER_HASH = 5, 6
|
||||
F_TIMESTAMP = 9
|
||||
F_ORIGIN_AS = 13
|
||||
F_EXT_COMM = 19
|
||||
F_PATH_ID = 24
|
||||
F_RD, F_RD_TYPE = 27, 28
|
||||
F_ORIG_RTR_IP = 30
|
||||
F_ETH_TAG, F_ESI = 31, 32
|
||||
F_MAC_LEN, F_MAC = 33, 34
|
||||
F_IP_LEN, F_IP = 35, 36
|
||||
F_LABEL1, F_LABEL2 = 37, 38
|
||||
MIN_FIELDS = 39
|
||||
|
||||
|
||||
def log(msg):
|
||||
print(f"[{time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}] {msg}", flush=True)
|
||||
|
||||
|
||||
def nz(s):
|
||||
s = (s or "").strip()
|
||||
return s or None
|
||||
|
||||
|
||||
def to_int(s):
|
||||
s = nz(s)
|
||||
if s is None:
|
||||
return None
|
||||
try:
|
||||
return int(s)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def hex_to_int(s):
|
||||
s = nz(s)
|
||||
if s is None:
|
||||
return None
|
||||
try:
|
||||
return int(s, 16)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_rts(field):
|
||||
"""The ext-community field looks like 'rt=65010:100 encap=8' — keep the RTs."""
|
||||
rts = [t[3:] for t in (field or "").split() if t.startswith("rt=")]
|
||||
return rts or None
|
||||
|
||||
|
||||
def derive_route_type(mac, orig_rtr_ip):
|
||||
if mac:
|
||||
return 2 # MAC/IP advertisement
|
||||
if orig_rtr_ip:
|
||||
return 3 # inclusive multicast
|
||||
return 5 # IP-prefix
|
||||
|
||||
|
||||
def parse_message(raw):
|
||||
"""OpenBMP message: 'K: V' header lines, a blank line, then R tab-sep rows."""
|
||||
text = raw.decode("utf-8", errors="replace")
|
||||
if "\n\n" not in text:
|
||||
return []
|
||||
_, body = text.split("\n\n", 1)
|
||||
return [ln.split("\t") for ln in body.splitlines() if "\t" in ln]
|
||||
|
||||
|
||||
def row_to_record(r):
|
||||
if len(r) < MIN_FIELDS:
|
||||
return None
|
||||
mac = nz(r[F_MAC])
|
||||
orig_rtr_ip = nz(r[F_ORIG_RTR_IP])
|
||||
return {
|
||||
"action": r[F_ACTION].strip().lower(),
|
||||
"hash_id": r[F_HASH].strip(),
|
||||
"peer_hash_id": r[F_PEER_HASH].strip(),
|
||||
"base_attr_hash_id": nz(r[F_BASE_ATTR]),
|
||||
"rd": r[F_RD].strip() or "0:0",
|
||||
"rd_type": to_int(r[F_RD_TYPE]),
|
||||
"route_type": derive_route_type(mac, orig_rtr_ip),
|
||||
"origin_as": to_int(r[F_ORIGIN_AS]),
|
||||
"eth_segment_id": nz(r[F_ESI]),
|
||||
"eth_tag_id": hex_to_int(r[F_ETH_TAG]),
|
||||
"mac": mac,
|
||||
"mac_len": to_int(r[F_MAC_LEN]),
|
||||
"ip": nz(r[F_IP]),
|
||||
"ip_len": to_int(r[F_IP_LEN]),
|
||||
"orig_router_ip": orig_rtr_ip,
|
||||
"mpls_label1": to_int(r[F_LABEL1]),
|
||||
"mpls_label2": to_int(r[F_LABEL2]),
|
||||
"ext_community_list": parse_rts(r[F_EXT_COMM]),
|
||||
"path_id": to_int(r[F_PATH_ID]),
|
||||
"timestamp": nz(r[F_TIMESTAMP]),
|
||||
}
|
||||
|
||||
|
||||
INSERT_COLS = (
|
||||
"hash_id", "peer_hash_id", "base_attr_hash_id", "rd", "rd_type", "route_type",
|
||||
"origin_as", "eth_segment_id", "eth_tag_id", "mac", "mac_len", "ip", "ip_len",
|
||||
"orig_router_ip", "mpls_label1", "mpls_label2", "ext_community_list", "path_id",
|
||||
"timestamp",
|
||||
)
|
||||
INSERT_SQL = f"""
|
||||
INSERT INTO evpn_rib ({", ".join(INSERT_COLS)}, iswithdrawn)
|
||||
VALUES %s
|
||||
ON CONFLICT (peer_hash_id, hash_id) DO UPDATE SET
|
||||
base_attr_hash_id = EXCLUDED.base_attr_hash_id, rd = EXCLUDED.rd,
|
||||
rd_type = EXCLUDED.rd_type, route_type = EXCLUDED.route_type,
|
||||
origin_as = EXCLUDED.origin_as, eth_segment_id = EXCLUDED.eth_segment_id,
|
||||
eth_tag_id = EXCLUDED.eth_tag_id, mac = EXCLUDED.mac, mac_len = EXCLUDED.mac_len,
|
||||
ip = EXCLUDED.ip, ip_len = EXCLUDED.ip_len,
|
||||
orig_router_ip = EXCLUDED.orig_router_ip, mpls_label1 = EXCLUDED.mpls_label1,
|
||||
mpls_label2 = EXCLUDED.mpls_label2, ext_community_list = EXCLUDED.ext_community_list,
|
||||
path_id = EXCLUDED.path_id, timestamp = EXCLUDED.timestamp, iswithdrawn = false
|
||||
"""
|
||||
DELETE_SQL = """
|
||||
UPDATE evpn_rib SET iswithdrawn = true, base_attr_hash_id = NULL, timestamp = %s
|
||||
WHERE peer_hash_id = %s AND hash_id = %s
|
||||
"""
|
||||
|
||||
|
||||
def flush(conn, adds, dels):
|
||||
if not adds and not dels:
|
||||
return
|
||||
with conn.cursor() as cur:
|
||||
if adds:
|
||||
tuples = [
|
||||
tuple(rec[c] for c in INSERT_COLS) + (False,) for rec in adds
|
||||
]
|
||||
execute_values(cur, INSERT_SQL, tuples)
|
||||
for rec in dels:
|
||||
cur.execute(DELETE_SQL, (rec["timestamp"], rec["peer_hash_id"], rec["hash_id"]))
|
||||
conn.commit()
|
||||
log(f"flushed {len(adds)} add/update, {len(dels)} withdraw")
|
||||
|
||||
|
||||
def connect_pg():
|
||||
while True:
|
||||
try:
|
||||
conn = psycopg2.connect(PG_DSN)
|
||||
conn.autocommit = False
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT 1 FROM evpn_rib LIMIT 0")
|
||||
log("connected to PostgreSQL; evpn_rib present")
|
||||
return conn
|
||||
except psycopg2.Error as e:
|
||||
log(f"PostgreSQL not ready ({e}); retrying in 5s")
|
||||
time.sleep(5)
|
||||
|
||||
|
||||
def main():
|
||||
log(f"starting — kafka={KAFKA_BROKER} topic={TOPIC} group={GROUP_ID}")
|
||||
conn = connect_pg()
|
||||
consumer = Consumer({
|
||||
"bootstrap.servers": KAFKA_BROKER,
|
||||
"group.id": GROUP_ID,
|
||||
"auto.offset.reset": "earliest",
|
||||
"enable.auto.commit": False,
|
||||
})
|
||||
consumer.subscribe([TOPIC])
|
||||
|
||||
adds, dels = [], []
|
||||
last_flush = time.time()
|
||||
try:
|
||||
while True:
|
||||
msg = consumer.poll(1.0)
|
||||
if msg is not None and not msg.error():
|
||||
for row in parse_message(msg.value()):
|
||||
rec = row_to_record(row)
|
||||
if rec is None:
|
||||
continue
|
||||
(dels if rec["action"] == "del" else adds).append(rec)
|
||||
elif msg is not None and msg.error():
|
||||
raise KafkaException(msg.error())
|
||||
|
||||
if (adds or dels) and time.time() - last_flush >= BATCH_SECONDS:
|
||||
try:
|
||||
flush(conn, adds, dels)
|
||||
except psycopg2.Error as e:
|
||||
log(f"DB write failed ({e}); reconnecting")
|
||||
conn = connect_pg()
|
||||
continue
|
||||
consumer.commit(asynchronous=False)
|
||||
adds, dels = [], []
|
||||
last_flush = time.time()
|
||||
except KeyboardInterrupt:
|
||||
log("shutting down")
|
||||
finally:
|
||||
consumer.close()
|
||||
conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@ -1,2 +0,0 @@
|
||||
confluent-kafka==2.5.3
|
||||
psycopg2-binary==2.9.9
|
||||
@ -1,107 +0,0 @@
|
||||
{
|
||||
"annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]},
|
||||
"description": "Disk-space, PostgreSQL database/table growth, and GoBGP global-feed health. Disk and DB metrics come from Telegraf -> InfluxDB; feed health is read live from PostgreSQL. Watch this when the full-table feed is ingesting — the RIB grows fast.",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 1,
|
||||
"id": null,
|
||||
"links": [{"asDropdown": true,"icon": "external link","includeVars": true,"keepTime": true,"tags": ["obmp-nav"],"title": "OBMP Dashboards","type": "dashboards"}],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
|
||||
"description": "Current size of the openbmp PostgreSQL database.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "decbytes","thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 8,"x": 0,"y": 0},
|
||||
"id": 1,
|
||||
"options": {"colorMode": "value","graphMode": "area","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"postgresql_db_size\" and r._field == \"bytes\")\n |> last()\n |> keep(columns: [\"_time\", \"_value\"])","refId": "A"}],
|
||||
"title": "openbmp Database Size","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
|
||||
"description": "Highest filesystem utilisation across monitored host volumes. Orange >80%, red >95%.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "percent","min": 0,"max": 100,"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "orange","value": 80},{"color": "red","value": 95}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 8,"x": 8,"y": 0},
|
||||
"id": 2,
|
||||
"options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: -10m)\n |> filter(fn: (r) => r._measurement == \"disk\" and r._field == \"used_percent\")\n |> last()\n |> max()\n |> keep(columns: [\"_value\"])","refId": "A"}],
|
||||
"title": "Busiest Filesystem","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Routes currently held by the GoBGP global-table feed peer.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "short","thresholds": {"mode": "absolute","steps": [{"color": "red","value": null},{"color": "green","value": 1}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 8,"x": 16,"y": 0},
|
||||
"id": 3,
|
||||
"options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT count(*) AS \"GoBGP Feed Routes\" FROM ip_rib r JOIN bgp_peers p ON p.hash_id = r.peer_hash_id JOIN routers rt ON rt.hash_id = p.router_hash_id WHERE rt.name = 'GoBGP' AND r.iswithdrawn = false","refId": "A"}],
|
||||
"title": "GoBGP Feed Routes","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
|
||||
"description": "openbmp database size over time. A steady climb is expected while the global feed ingests; a plateau means it has converged.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"},"custom": {"axisPlacement": "auto","drawStyle": "line","fillOpacity": 15,"lineInterpolation": "smooth","lineWidth": 2,"pointSize": 5,"showPoints": "never","spanNulls": true},"unit": "decbytes","min": 0},"overrides": []},
|
||||
"gridPos": {"h": 8,"w": 12,"x": 0,"y": 4},
|
||||
"id": 4,
|
||||
"options": {"legend": {"calcs": ["last","max"],"displayMode": "table","placement": "bottom","showLegend": true},"tooltip": {"mode": "single","sort": "none"}},
|
||||
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"postgresql_db_size\" and r._field == \"bytes\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> keep(columns: [\"_time\", \"_value\"])","refId": "A"}],
|
||||
"title": "Database Size Over Time","type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
|
||||
"description": "Filesystem utilisation per host volume. Threshold lines at 80% and 95%.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"},"custom": {"axisPlacement": "auto","drawStyle": "line","fillOpacity": 10,"lineInterpolation": "linear","lineWidth": 2,"pointSize": 5,"showPoints": "never","spanNulls": true,"thresholdsStyle": {"mode": "line"}},"unit": "percent","min": 0,"max": 100,"thresholds": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "orange","value": 80},{"color": "red","value": 95}]}},"overrides": []},
|
||||
"gridPos": {"h": 8,"w": 12,"x": 12,"y": 4},
|
||||
"id": 5,
|
||||
"options": {"legend": {"calcs": ["last"],"displayMode": "table","placement": "bottom","showLegend": true},"tooltip": {"mode": "multi","sort": "desc"}},
|
||||
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: v.timeRangeStart, stop: v.timeRangeStop)\n |> filter(fn: (r) => r._measurement == \"disk\" and r._field == \"used_percent\")\n |> aggregateWindow(every: v.windowPeriod, fn: mean, createEmpty: false)\n |> keep(columns: [\"_time\", \"_value\", \"path\"])\n |> group(columns: [\"path\"])","refId": "A"}],
|
||||
"title": "Filesystem Usage %","type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
|
||||
"description": "Largest tables in the openbmp database (total relation size, incl. indexes + TOAST).",
|
||||
"fieldConfig": {"defaults": {"custom": {"align": "auto","displayMode": "auto"},"unit": "decbytes"},"overrides": [{"matcher": {"id": "byName","options": "Size"},"properties": [{"id": "custom.displayMode","value": "gradient-gauge"},{"id": "color","value": {"mode": "continuous-BlPu"}}]}]},
|
||||
"gridPos": {"h": 9,"w": 12,"x": 0,"y": 12},
|
||||
"id": 6,
|
||||
"options": {"showHeader": true,"sortBy": [{"desc": true,"displayName": "Size"}]},
|
||||
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "from(bucket: \"telemetry\")\n |> range(start: -15m)\n |> filter(fn: (r) => r._measurement == \"postgresql_table_size\" and r._field == \"bytes\")\n |> last()\n |> keep(columns: [\"tablename\", \"_value\"])\n |> group()\n |> rename(columns: {_value: \"Size\", tablename: \"Table\"})\n |> sort(columns: [\"Size\"], desc: true)","refId": "A"}],
|
||||
"title": "Largest Tables","type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "influxdb","uid": "obmp_influxdb"},
|
||||
"description": "Current free space and utilisation per host filesystem.",
|
||||
"fieldConfig": {"defaults": {"custom": {"align": "auto","displayMode": "auto"}},"overrides": [{"matcher": {"id": "byName","options": "Used %"},"properties": [{"id": "unit","value": "percent"},{"id": "custom.displayMode","value": "gradient-gauge"},{"id": "max","value": 100},{"id": "thresholds","value": {"mode": "absolute","steps": [{"color": "green","value": null},{"color": "orange","value": 80},{"color": "red","value": 95}]}}]},{"matcher": {"id": "byName","options": "Free"},"properties": [{"id": "unit","value": "decbytes"}]}]},
|
||||
"gridPos": {"h": 9,"w": 12,"x": 12,"y": 12},
|
||||
"id": 7,
|
||||
"options": {"showHeader": true,"sortBy": [{"desc": true,"displayName": "Used %"}]},
|
||||
"targets": [{"datasource": {"type": "influxdb","uid": "obmp_influxdb"},"query": "free = from(bucket: \"telemetry\")\n |> range(start: -15m)\n |> filter(fn: (r) => r._measurement == \"disk\" and r._field == \"free\")\n |> last()\n |> keep(columns: [\"path\", \"_value\"])\n |> rename(columns: {_value: \"Free\"})\npct = from(bucket: \"telemetry\")\n |> range(start: -15m)\n |> filter(fn: (r) => r._measurement == \"disk\" and r._field == \"used_percent\")\n |> last()\n |> keep(columns: [\"path\", \"_value\"])\n |> rename(columns: {_value: \"Used %\"})\njoin(tables: {f: free, p: pct}, on: [\"path\"])\n |> rename(columns: {path: \"Filesystem\"})\n |> group()","refId": "A"}],
|
||||
"title": "Filesystem Free Space","type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "BGP sessions of the GoBGP global-table feed and their state. Both the IPv4 and IPv6 sessions to AS57355 should read 'up'.",
|
||||
"fieldConfig": {"defaults": {"custom": {"align": "auto","displayMode": "auto"}},"overrides": [{"matcher": {"id": "byName","options": "State"},"properties": [{"id": "custom.displayMode","value": "color-background"},{"id": "mappings","value": [{"type": "value","options": {"up": {"color": "green","index": 0},"down": {"color": "red","index": 1}}}]}]}]},
|
||||
"gridPos": {"h": 6,"w": 24,"x": 0,"y": 21},
|
||||
"id": 8,
|
||||
"options": {"showHeader": true,"sortBy": [{"desc": false,"displayName": "Peer"}]},
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT host(vp.peerip) AS \"Peer\",\n vp.peerasn AS \"Peer AS\",\n vp.peer_state AS \"State\",\n (SELECT count(*) FROM ip_rib r WHERE r.peer_hash_id = vp.peer_hash_id AND r.iswithdrawn = false) AS \"Routes\"\nFROM v_peers vp\nWHERE vp.routername = 'GoBGP'\nORDER BY vp.peerip","refId": "A"}],
|
||||
"title": "GoBGP Feed — BGP Sessions","type": "table"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": ["obmp", "obmp-nav", "telemetry", "storage"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-24h","to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Storage & Feed Health",
|
||||
"uid": "obmp-storage-health",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@ -1,112 +0,0 @@
|
||||
{
|
||||
"annotations": {"list": [{"builtIn": 1,"datasource": {"type": "datasource","uid": "grafana"},"enable": true,"hide": true,"iconColor": "rgba(0, 211, 255, 1)","name": "Annotations & Alerts","type": "dashboard"}]},
|
||||
"description": "BGP EVPN routes monitored over BMP and stored in evpn_rib by the obmp-evpn-consumer (roadmap E5). Covers type-2 (MAC/IP advertisement) and type-3 (inclusive multicast); collector 2.2.3 mis-decodes type-5 (IP-prefix) so it is not shown. Scope with the RD/EVI variable.",
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [{"asDropdown": true,"icon": "external link","includeVars": true,"keepTime": true,"tags": ["obmp-nav"],"title": "OBMP Dashboards","type": "dashboards"}],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Active EVPN routes (not withdrawn) in the selected RD scope.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "short","thresholds": {"mode": "absolute","steps": [{"color": "blue","value": null}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 6,"x": 0,"y": 0},
|
||||
"id": 1,
|
||||
"options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT count(*) AS \"EVPN Routes\" FROM evpn_rib WHERE iswithdrawn = false AND ('$rd' = '-- all --' OR rd = '$rd')","refId": "A"}],
|
||||
"title": "EVPN Routes","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Distinct route distinguishers (EVPN instances) in scope.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "short","thresholds": {"mode": "absolute","steps": [{"color": "purple","value": null}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 6,"x": 6,"y": 0},
|
||||
"id": 2,
|
||||
"options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT count(DISTINCT rd) AS \"EVIs (RDs)\" FROM evpn_rib WHERE iswithdrawn = false AND ('$rd' = '-- all --' OR rd = '$rd')","refId": "A"}],
|
||||
"title": "EVIs (RDs)","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Type-2 MAC/IP advertisement routes — learned MAC addresses.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "short","thresholds": {"mode": "absolute","steps": [{"color": "green","value": null}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 6,"x": 12,"y": 0},
|
||||
"id": 3,
|
||||
"options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT count(DISTINCT mac) AS \"MACs\" FROM evpn_rib WHERE iswithdrawn = false AND route_type = 2 AND mac IS NOT NULL AND ('$rd' = '-- all --' OR rd = '$rd')","refId": "A"}],
|
||||
"title": "Learned MACs","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Type-3 inclusive-multicast routes — per-EVI broadcast/unknown-unicast/multicast flood endpoints.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "thresholds"},"unit": "short","thresholds": {"mode": "absolute","steps": [{"color": "orange","value": null}]}},"overrides": []},
|
||||
"gridPos": {"h": 4,"w": 6,"x": 18,"y": 0},
|
||||
"id": 4,
|
||||
"options": {"colorMode": "value","graphMode": "none","justifyMode": "auto","orientation": "auto","reduceOptions": {"calcs": ["lastNotNull"],"fields": "","values": false},"textMode": "auto"},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT count(*) AS \"Multicast Routes\" FROM evpn_rib WHERE iswithdrawn = false AND route_type = 3 AND ('$rd' = '-- all --' OR rd = '$rd')","refId": "A"}],
|
||||
"title": "Multicast (T3)","type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Route count by EVPN route type.",
|
||||
"fieldConfig": {"defaults": {"color": {"mode": "palette-classic"},"custom": {"lineWidth": 1,"fillOpacity": 80,"axisPlacement": "auto"}},"overrides": []},
|
||||
"gridPos": {"h": 8,"w": 8,"x": 0,"y": 4},
|
||||
"id": 5,
|
||||
"options": {"orientation": "horizontal","showValue": "auto","xField": "Type","legend": {"showLegend": false},"tooltip": {"mode": "single"}},
|
||||
"pluginVersion": "9.1.7",
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT CASE route_type WHEN 1 THEN 'T1 Eth A-D' WHEN 2 THEN 'T2 MAC/IP' WHEN 3 THEN 'T3 Multicast' WHEN 4 THEN 'T4 Eth Segment' WHEN 5 THEN 'T5 IP-prefix' ELSE 'T' || route_type END AS \"Type\",\n count(*) AS \"Routes\"\nFROM evpn_rib\nWHERE iswithdrawn = false AND ('$rd' = '-- all --' OR rd = '$rd')\nGROUP BY route_type\nORDER BY route_type","refId": "A"}],
|
||||
"title": "Routes by Type","type": "barchart"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Per-EVI summary — MAC/IP and multicast route counts and distinct MACs per route distinguisher.",
|
||||
"fieldConfig": {"defaults": {"custom": {"align": "auto","displayMode": "auto"}},"overrides": []},
|
||||
"gridPos": {"h": 8,"w": 16,"x": 8,"y": 4},
|
||||
"id": 6,
|
||||
"options": {"showHeader": true,"sortBy": [{"desc": false,"displayName": "RD / EVI"}]},
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT rd AS \"RD / EVI\",\n count(*) FILTER (WHERE route_type = 2) AS \"MAC/IP\",\n count(*) FILTER (WHERE route_type = 3) AS \"Multicast\",\n count(DISTINCT mac) FILTER (WHERE mac IS NOT NULL) AS \"Distinct MACs\"\nFROM evpn_rib\nWHERE iswithdrawn = false AND ('$rd' = '-- all --' OR rd = '$rd')\nGROUP BY rd\nORDER BY rd","refId": "A"}],
|
||||
"title": "Per-EVI Summary","type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Type-2 MAC/IP advertisements — every MAC (and host IP) learned in the selected EVPN instances.",
|
||||
"fieldConfig": {"defaults": {"custom": {"align": "auto","displayMode": "auto"}},"overrides": []},
|
||||
"gridPos": {"h": 10,"w": 24,"x": 0,"y": 12},
|
||||
"id": 7,
|
||||
"options": {"showHeader": true,"sortBy": [{"desc": false,"displayName": "RD"}]},
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT rd AS \"RD\",\n eth_tag_id AS \"Eth Tag\",\n mac AS \"MAC\",\n host(ip) AS \"Host IP\",\n mpls_label1 AS \"VNI / Label\",\n array_to_string(ext_community_list, ', ') AS \"Route Targets\",\n eth_segment_id AS \"ESI\",\n timestamp AS \"Last Update\"\nFROM evpn_rib\nWHERE iswithdrawn = false AND route_type = 2 AND ('$rd' = '-- all --' OR rd = '$rd')\nORDER BY rd, mac","refId": "A"}],
|
||||
"title": "MAC/IP Advertisements (Type-2)","type": "table"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "postgres","uid": "obmp_postgres"},
|
||||
"description": "Type-3 inclusive-multicast routes — the flood list per EVPN instance.",
|
||||
"fieldConfig": {"defaults": {"custom": {"align": "auto","displayMode": "auto"}},"overrides": []},
|
||||
"gridPos": {"h": 8,"w": 24,"x": 0,"y": 22},
|
||||
"id": 8,
|
||||
"options": {"showHeader": true,"sortBy": [{"desc": false,"displayName": "RD"}]},
|
||||
"targets": [{"datasource": {"type": "postgres","uid": "obmp_postgres"},"format": "table","rawSql": "SELECT rd AS \"RD\",\n eth_tag_id AS \"Eth Tag\",\n host(orig_router_ip) AS \"Originating Router\",\n array_to_string(ext_community_list, ', ') AS \"Route Targets\",\n timestamp AS \"Last Update\"\nFROM evpn_rib\nWHERE iswithdrawn = false AND route_type = 3 AND ('$rd' = '-- all --' OR rd = '$rd')\nORDER BY rd","refId": "A"}],
|
||||
"title": "Inclusive Multicast (Type-3)","type": "table"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 36,
|
||||
"style": "dark",
|
||||
"tags": ["obmp", "obmp-nav", "bgp", "evpn"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{"name": "rd","type": "query","label": "RD / EVI","datasource": {"type": "postgres","uid": "obmp_postgres"},"query": "SELECT '-- all --' AS rd UNION SELECT DISTINCT rd FROM evpn_rib WHERE iswithdrawn = false ORDER BY rd","definition": "SELECT '-- all --' AS rd UNION SELECT DISTINCT rd FROM evpn_rib WHERE iswithdrawn = false ORDER BY rd","refresh": 1,"includeAll": false,"multi": false,"current": {"selected": true,"text": "-- all --","value": "-- all --"},"options": [],"sort": 1,"hide": 0}
|
||||
]
|
||||
},
|
||||
"time": {"from": "now-6h","to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "",
|
||||
"title": "EVPN RIB",
|
||||
"uid": "evpn-rib",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
@ -1,44 +0,0 @@
|
||||
-- BGP EVPN RIB table (roadmap E5)
|
||||
--
|
||||
-- The OpenBMP collector already decodes EVPN and emits the
|
||||
-- 'openbmp.parsed.evpn' Kafka topic, but the stock psql-app consumer never
|
||||
-- subscribes to it and the base schema has no table for it. This table is
|
||||
-- the landing zone; a dedicated consumer (obmp-evpn-consumer, separate)
|
||||
-- subscribes to the topic and writes here.
|
||||
--
|
||||
-- Mirrors l3vpn_rib conventions. route_type is derived by the consumer from
|
||||
-- which fields are populated (the parsed EVPN message has no explicit type),
|
||||
-- so it is nullable.
|
||||
CREATE TABLE IF NOT EXISTS evpn_rib (
|
||||
hash_id uuid NOT NULL,
|
||||
base_attr_hash_id uuid,
|
||||
peer_hash_id uuid NOT NULL,
|
||||
rd varchar(128) NOT NULL,
|
||||
rd_type smallint,
|
||||
route_type smallint, -- EVPN route type 1..5
|
||||
origin_as bigint,
|
||||
eth_segment_id varchar(255), -- ESI
|
||||
eth_tag_id bigint,
|
||||
mac macaddr,
|
||||
mac_len smallint,
|
||||
ip inet,
|
||||
ip_len smallint,
|
||||
orig_router_ip inet,
|
||||
mpls_label1 bigint, -- VXLAN VNI when encap = vxlan
|
||||
mpls_label2 bigint,
|
||||
ext_community_list varchar(50)[], -- route-targets
|
||||
path_id bigint,
|
||||
timestamp timestamp(6) without time zone NOT NULL DEFAULT (now() AT TIME ZONE 'utc'),
|
||||
first_added_timestamp timestamp(6) without time zone NOT NULL DEFAULT (now() AT TIME ZONE 'utc'),
|
||||
iswithdrawn boolean NOT NULL DEFAULT false,
|
||||
isprepolicy boolean NOT NULL DEFAULT true,
|
||||
isadjribin boolean NOT NULL DEFAULT true,
|
||||
PRIMARY KEY (peer_hash_id, hash_id)
|
||||
);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_hash_id_idx ON evpn_rib (hash_id);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_base_attr_idx ON evpn_rib (base_attr_hash_id);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_rd_idx ON evpn_rib (rd);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_route_type_idx ON evpn_rib (route_type);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_mac_idx ON evpn_rib (mac);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_extcomm_idx ON evpn_rib USING gin (ext_community_list);
|
||||
CREATE INDEX IF NOT EXISTS evpn_rib_timestamp_idx ON evpn_rib ("timestamp");
|
||||
@ -64,28 +64,6 @@
|
||||
total = true
|
||||
timeout = "10s"
|
||||
|
||||
## Host filesystem usage — free/used space on the volumes holding the
|
||||
## Postgres data, Kafka, InfluxDB and the OpenBMP data root. The host root is
|
||||
## bind-mounted read-only at /hostfs and HOST_MOUNT_PREFIX / HOST_PROC (set in
|
||||
## docker-compose.yml) point the plugin at the host, not the container.
|
||||
[[inputs.disk]]
|
||||
interval = "60s"
|
||||
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "overlay", "aufs", "squashfs", "ramfs", "nsfs", "iso9660"]
|
||||
|
||||
## PostgreSQL database + table sizes — tracks RIB growth, which expands
|
||||
## sharply once the full-table GoBGP feed is ingesting (~1M routes).
|
||||
[[inputs.postgresql_extensible]]
|
||||
interval = "60s"
|
||||
address = "host=localhost port=5432 user=openbmp password=${POSTGRES_PASSWORD} dbname=openbmp sslmode=disable"
|
||||
[[inputs.postgresql_extensible.query]]
|
||||
sqlquery = "SELECT datname, pg_database_size(datname) AS bytes FROM pg_database WHERE datname = 'openbmp'"
|
||||
measurement = "postgresql_db_size"
|
||||
tagvalue = "datname"
|
||||
[[inputs.postgresql_extensible.query]]
|
||||
sqlquery = "SELECT c.relname AS tablename, pg_total_relation_size(c.oid) AS bytes FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE n.nspname = 'public' AND c.relkind = 'r' ORDER BY 2 DESC LIMIT 20"
|
||||
measurement = "postgresql_table_size"
|
||||
tagvalue = "tablename"
|
||||
|
||||
###############################################################################
|
||||
# OUTPUT PLUGINS #
|
||||
###############################################################################
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user