obmp-docker/cml/proxmox_bmp_config.py
sam 2634aada24 Parameterize HOST_IP everywhere -- portable to another lab host
Removes hardcoded 10.40.40.202 references so a fresh clone + .env-only
edit can stand the stack up on a new compute node.

  * docker-compose.yml: rib-poller PG_DSN now uses ${HOST_IP:-...}.
  * obmp-rib-poller/poller.py: default PG_DSN host falls back to
    ${HOST_IP} env (compose passes it; manual runs honour $HOST_IP too).
  * cml/gobgp_peering_config.py: GOBGP_IP read from $HOST_IP or the
    HOST_IP= line in repo-root .env, with a small _env_default helper.
  * cml/proxmox_bmp_config.py: COLLECTOR_HOST resolved the same way.

For gobgp/gobgpd.conf and gobgp-evpn/gobgpd.conf -- jauderho/gobgp is
distroless (no shell), so we can't sed-substitute at container start.
Pattern instead:

  * gobgpd.conf is now gobgpd.conf.tmpl with __HOST_IP__ placeholders
    (committed). The rendered gobgpd.conf is gitignored.
  * setup.sh renders the .tmpl(s) to .conf using $HOST_IP from .env.
  * compose `command` stays the simple `gobgpd -f /config/gobgpd.conf`.

After cloning on a new host:  cp .env.example .env  -> edit HOST_IP ->
./setup.sh -> docker compose up -d. Verified locally by force-recreating
gobgp; all 6 sessions (4 cores + 2 Bromirski) re-established in <60s.

Known portability gaps still to address (separate work):
  * Hardcoded lab-router inventories in cml/*.py and
    obmp-rib-poller/poller.py.
  * The /etc/cron.d/openbmp */5 -> */15 edit inside obmp-psql-app is
    not persistent (regenerated by config_cron on every container start).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-19 18:34:51 -07:00

192 lines
6.9 KiB
Python

#!/usr/bin/env python3
"""Apply the OpenBMP `bmp server 1` config to the Proxmox CML lab routers.
IOS-XR BMP configuration is not exposed via the device's NETCONF YANG schema
on this release, so this applies config over the SSH CLI. It is idempotent —
re-applying an identical block commits no changes.
PROX-R9K-03 was built without `bmp-activate` on its BGP neighbor-group; this
script adds it (the other 8 routers already have it from the re-addressing).
Usage:
pip install paramiko
python3 cml/proxmox_bmp_config.py # all 9 routers
python3 cml/proxmox_bmp_config.py r9k-05 # one router (smoke test)
Verify afterwards in OpenBMP:
docker exec -i obmp-psql psql -U openbmp -d openbmp \\
-c "SELECT name, ip_address, bgp_id, isconnected FROM routers ORDER BY name;"
"""
import os
import sys
import time
import paramiko
def _env_default(key, default, dotenv=".env"):
"""Resolve a value from os.environ or the repo-root .env, else default."""
v = os.environ.get(key)
if v:
return v
try:
with open(dotenv) as fh:
for line in fh:
s = line.strip()
if s and not s.startswith("#") and s.startswith(f"{key}="):
return s.split("=", 1)[1].strip().strip('"').strip("'")
except FileNotFoundError:
pass
return default
# --- BMP collector ---------------------------------------------------------
# Resolved from $HOST_IP or the HOST_IP= line in repo-root .env.
COLLECTOR_HOST = _env_default("HOST_IP", "10.40.40.202")
COLLECTOR_PORT = "5000"
# `bmp server 1` block — flat formal form, identical to the ESXi lab.
# Each line is self-contained and applied at the (config)# prompt; a bare
# "bmp server 1" is deliberately omitted (it would drop into the bmp submode
# and the remaining flat lines would then be invalid).
BMP_LINES = [
f"bmp server 1 host {COLLECTOR_HOST} port {COLLECTOR_PORT}",
"bmp server 1 description OpenBMP-Collector",
"bmp server 1 update-source MgmtEth0/RP0/CPU0/0",
"bmp server 1 initial-delay 60",
"bmp server 1 stats-reporting-period 300",
"bmp server 1 initial-refresh delay 60 spread 30",
]
# Only PROX-R9K-03 needs this — its BMP-MONITORED neighbor-group was built
# without bmp-activate. AS 65021 is the Proxmox lab.
BMP_ACTIVATE_LINE = "router bgp 65021 neighbor-group BMP-MONITORED bmp-activate server 1"
# --- router inventory ------------------------------------------------------
# (name, mgmt_ip, user, password, needs_bmp_activate)
ROUTERS = [
("PROX-R9K-CORE-01", "10.100.1.100", "admin", "cisco", False),
("PROX-R9K-CORE-02", "10.100.1.200", "admin", "cisco", False),
("PROX-R9K-01", "10.100.1.1", "webui", "cisco", False),
("PROX-R9K-02", "10.100.1.2", "webui", "cisco", False),
("PROX-R9K-03", "10.100.1.3", "webui", "cisco", True),
("PROX-R9K-04", "10.100.1.4", "webui", "cisco", False),
("PROX-R9K-05", "10.100.1.5", "webui", "cisco", False),
("PROX-R9K-06", "10.100.1.6", "webui", "cisco", False),
("PROX-R9K-07", "10.100.1.7", "admin", "cisco", False),
]
def _drain(shell, settle=1.0, limit=15.0, until=None):
"""Read from the shell.
If `until` is given, keep reading until that substring appears (or `limit`
elapses). Otherwise return once output stops arriving for `settle` seconds.
"""
out = ""
start = time.time()
while time.time() - start < limit:
time.sleep(settle)
if shell.recv_ready():
out += shell.recv(65535).decode(errors="replace")
if until and until in out:
break
elif until is None:
break
elif until in out:
break
return out
def apply_router(name, ip, user, pwd, needs_activate):
"""Apply the BMP config to one router. Returns True on success."""
print(f"\n=== {name} ({ip}) ===")
lines = list(BMP_LINES)
if needs_activate:
lines.append(BMP_ACTIVATE_LINE)
try:
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(ip, username=user, password=pwd, timeout=15,
look_for_keys=False, allow_agent=False)
shell = ssh.invoke_shell(width=220, height=1000)
time.sleep(2)
shell.recv(65535) # banner
# "(config)#" is the universal IOS-XR config-prompt suffix — used as
# the wait marker so the device hostname is irrelevant.
CFG = "(config)#"
shell.send("terminal length 0\n")
_drain(shell, 0.5, 5)
# Enter config mode. IOS-XR may print an active-session banner first,
# so wait specifically for the (config) prompt.
shell.send("configure terminal\n")
out = _drain(shell, 0.4, 15, until=CFG)
if CFG not in out:
print(f" FAIL: could not enter config mode\n {out[-200:]}")
ssh.close()
return False
# Send config lines, paced.
for line in lines:
shell.send(line + "\n")
time.sleep(0.4)
_drain(shell, 0.3, 8, until=CFG)
# Confirm the candidate actually holds changes before committing.
shell.send("show configuration\n")
cand = _drain(shell, 0.3, 10, until=CFG)
if "bmp server" not in cand:
print(" OK: no changes (config already present) — nothing to commit")
shell.send("abort\n")
_drain(shell, 0.5, 5)
ssh.close()
return True
shell.send("commit\n")
result = _drain(shell, 0.3, 25, until=CFG)
if "fail" in result.lower() or "error" in result.lower():
print(f" FAIL: commit error\n {result[-300:]}")
shell.send("abort\n")
_drain(shell, 0.5, 5)
ssh.close()
return False
# Leave config mode and fully drain (settle-based, no marker) so the
# verify output is clean — not contaminated by echoed config lines.
shell.send("end\n")
_drain(shell, 1.0, 10)
shell.send("show run formal bmp\n")
verify = _drain(shell, 1.0, 12)
ok = f"host {COLLECTOR_HOST} port {COLLECTOR_PORT}" in verify
print(f" {'OK' if ok else 'FAIL'}: bmp server 1 "
f"{'present' if ok else 'NOT found'} in running config")
ssh.close()
return ok
except Exception as e:
print(f" FAIL: {e}")
return False
def main():
target = sys.argv[1].lower() if len(sys.argv) > 1 else None
results = {}
for name, ip, user, pwd, needs_activate in ROUTERS:
if target and target not in name.lower():
continue
results[name] = apply_router(name, ip, user, pwd, needs_activate)
print(f"\n{'='*48}\n SUMMARY")
for name, ok in results.items():
print(f" {name:22s} {'OK' if ok else 'FAILED'}")
sys.exit(0 if all(results.values()) else 1)
if __name__ == "__main__":
main()