psql-app-consumer: profile-gated (scale-out) horizontal scale-out for the Kafka->Postgres ingestion path. Shares the primary's /config read-only so it reuses obmp-psql.yml, whose fixed group.id makes Kafka rebalance partitions across the primary and every replica. Its command runs ONLY the consumer jar -- no cron, RPKI/IRR/DBIP or initdb -- so it does not duplicate the primary's DB-maintenance jobs (config_cron wires those up unconditionally in /usr/sbin/run). Each replica brings its own consumer and writer threads. Measured: one consumer-only replica took the post-storm backlog drain from a cold-start ~3.7k msg/s to ~48k msg/s; group membership 8->16. With 2 consumers feeding it, Postgres becomes the next bottleneck (~500% CPU) -- DB write capacity is the ceiling beyond ~2-3 consumers. docker compose --profile scale-out up -d --scale psql-app-consumer=2 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
639 lines
22 KiB
YAML
639 lines
22 KiB
YAML
---
|
|
name: obmp
|
|
volumes:
|
|
data-volume:
|
|
driver_opts:
|
|
type: none
|
|
device: ${OBMP_DATA_ROOT}/postgres/data
|
|
o: bind
|
|
ts-volume:
|
|
driver_opts:
|
|
type: none
|
|
device: ${OBMP_DATA_ROOT}/postgres/ts
|
|
o: bind
|
|
|
|
services:
|
|
|
|
zookeeper:
|
|
restart: unless-stopped
|
|
container_name: obmp-zookeeper
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/2181'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
image: confluentinc/cp-zookeeper:7.1.1
|
|
mem_limit: 1g
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/zk-data:/var/lib/zookeeper/data
|
|
- ${OBMP_DATA_ROOT}/zk-log:/var/lib/zookeeper/log
|
|
environment:
|
|
ZOOKEEPER_CLIENT_PORT: 2181
|
|
ZOOKEEPER_TICK_TIME: 2000
|
|
|
|
kafka:
|
|
restart: unless-stopped
|
|
container_name: obmp-kafka
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/9092'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 90s
|
|
image: confluentinc/cp-kafka:7.1.1
|
|
# Raise KAFKA_MEM_LIMIT for production (full-table initial dumps are bursty).
|
|
mem_limit: ${KAFKA_MEM_LIMIT:-4g}
|
|
|
|
# Change the mount point to where you want to store Kafka data.
|
|
# Normally 80GB or more
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/kafka-data:/var/lib/kafka/data
|
|
depends_on:
|
|
- zookeeper
|
|
ports:
|
|
- "9092:9092"
|
|
environment:
|
|
KAFKA_BROKER_ID: 1
|
|
KAFKA_ZOOKEEPER_CONNECT: obmp-zookeeper:2181
|
|
|
|
# Change/add listeners based on your FQDN that the host and other containers can access. You can use
|
|
# an IP address as well. By default, only within the compose/containers can Kafka be accesssed
|
|
# using port 29092. Outside access can be enabled, but you should use an FQDN listener.
|
|
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://obmp-kafka:29092,PLAINTEXT_HOST://${HOST_IP:-10.40.40.202}:9092
|
|
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
|
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
|
|
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
|
KAFKA_NUM_PARTITIONS: 8
|
|
KAFKA_LOG_RETENTION_MINUTES: 720
|
|
KAFKA_LOG_ROLL_MS: 3600000
|
|
KAFKA_LOG_SEGMENT_BYTES: 1073741824
|
|
KAFKA_MESSAGE_MAX_BYTES: 100000000
|
|
KAFKA_LOG_CLEANER_THREADS: 2
|
|
|
|
# TODO: Move comment to documentation
|
|
# You can use SWAG and Let's Encrypt to front the Grafana HTTP port 3000 instance. Below is an example
|
|
# Create the SWAG direcgtory using "sudo mkdir -m 777 ${OBMP_DATA_ROOT}/swag"
|
|
# swag:
|
|
# image: linuxserver/letsencrypt:version-1.11.0
|
|
# container_name: obmp-swag
|
|
# cap_add:
|
|
# - NET_ADMIN
|
|
# environment:
|
|
# - PUID=1000
|
|
# - PGID=1000
|
|
# - TZ=UTC
|
|
# - URL=<domain of grafana host, this should be the domain only, not the hostname>
|
|
# - SUBDOMAINS=<hostname, without the domain>,
|
|
# - VALIDATION=http
|
|
# - EMAIL=<your email>
|
|
# - ONLY_SUBDOMAINS=true
|
|
# - STAGING=false
|
|
# volumes:
|
|
# - /var/openbmp/swag:/config
|
|
# ports:
|
|
# - 443:443
|
|
# - 80:80 #optional
|
|
# restart: unless-stopped
|
|
|
|
grafana:
|
|
restart: unless-stopped
|
|
container_name: obmp-grafana
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
image: grafana/grafana:9.1.7
|
|
mem_limit: 1g
|
|
ports:
|
|
- "3000:3000"
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/grafana:/var/lib/grafana
|
|
- ${OBMP_DATA_ROOT}/grafana/provisioning:/etc/grafana/provisioning/
|
|
environment:
|
|
- GF_SECURITY_ADMIN_PASSWORD=openbmp
|
|
- GF_AUTH_ANONYMOUS_ENABLED=false
|
|
- GF_SERVER_ROOT_URL=https://${OBMP_DOMAIN:-bmp.apodacalab.com}/grafana/
|
|
- GF_SERVER_SERVE_FROM_SUB_PATH=true
|
|
- GF_AUTH_PROXY_ENABLED=true
|
|
- GF_AUTH_PROXY_HEADER_NAME=Remote-User
|
|
- GF_AUTH_PROXY_HEADER_PROPERTY=username
|
|
- GF_AUTH_PROXY_AUTO_SIGN_UP=true
|
|
- GF_USERS_HOME_PAGE=d/obmp-home/obmp-home
|
|
- GF_INSTALL_PLUGINS=agenty-flowcharting-panel,grafana-piechart-panel,grafana-worldmap-panel,grafana-simple-json-datasource,vonage-status-panel
|
|
|
|
# TODO: Move comment block to documentation
|
|
# The below is an example of how to enable OAuth
|
|
#- GF_AUTH_GENERIC_OAUTH_NAME=MyCompany SSO
|
|
#- GF_AUTH_GENERIC_OAUTH_ICON=signin
|
|
#- GF_AUTH_GENERIC_OAUTH_ENABLED=true
|
|
#- GF_AUTH_GENERIC_OAUTH_CLIENT_ID=<the client ID>
|
|
#- GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET=<client secret>
|
|
#- GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP=true
|
|
#- GF_AUTH_GENERIC_OAUTH_AUTH_URL=<auth url, such as https://domain/as/authorization.oauth2>
|
|
#- GF_AUTH_GENERIC_OAUTH_TOKEN_URL=<token url, such as https://domain/as/token.oauth2
|
|
#- GF_AUTH_GENERIC_OAUTH_API_URL=<userinfo url, such as https://domain/idp/userinfo.openid
|
|
#- GF_AUTH_GENERIC_OAUTH_SCOPES=email openid profile
|
|
#- GF_AUTH_GENERIC_OAUTH_SEND_CLIENT_CREDENTIALS_VIA_POST=true
|
|
#- GF_SERVER_ENFORCE_DOMAIN=true
|
|
#- GF_SERVER_PROTOCOL=http
|
|
#- GF_SERVER_HTTP_PORT=3000
|
|
#- GF_SERVER_DOMAIN=<fqdn of grafana server>
|
|
#- GF_SERVER_ROOT_URL=%(protocol)s://%(domain)s:%(http_port)s/
|
|
|
|
psql:
|
|
restart: unless-stopped
|
|
container_name: obmp-psql
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "pg_isready -U openbmp -d openbmp"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 60s
|
|
image: openbmp/postgres:2.2.1
|
|
# Raise PSQL_MEM_LIMIT for production (see docs/production-sizing.md).
|
|
mem_limit: ${PSQL_MEM_LIMIT:-6g}
|
|
privileged: true
|
|
shm_size: 1536m
|
|
sysctls:
|
|
- net.ipv4.tcp_keepalive_intvl=30
|
|
- net.ipv4.tcp_keepalive_probes=5
|
|
- net.ipv4.tcp_keepalive_time=180
|
|
ports:
|
|
- "5432:5432"
|
|
volumes:
|
|
- data-volume:/var/lib/postgresql/data
|
|
- ts-volume:/var/lib/postgresql/ts
|
|
# alter_job max_runtime in _timescaledb_config.bgw_job ( https://docs.timescale.com/latest/api#alter_job )
|
|
command: >
|
|
-c max_wal_size=10GB
|
|
environment:
|
|
- POSTGRES_PASSWORD=openbmp
|
|
- POSTGRES_USER=openbmp
|
|
- POSTGRES_DB=openbmp
|
|
|
|
collector:
|
|
restart: unless-stopped
|
|
container_name: obmp-collector
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5000'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
image: openbmp/collector:2.2.3
|
|
mem_limit: 2g
|
|
sysctls:
|
|
- net.ipv4.tcp_keepalive_intvl=30
|
|
- net.ipv4.tcp_keepalive_probes=5
|
|
- net.ipv4.tcp_keepalive_time=180
|
|
ports:
|
|
- "5000:5000"
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/config:/config
|
|
environment:
|
|
- KAFKA_FQDN=obmp-kafka:29092
|
|
|
|
psql-app:
|
|
restart: unless-stopped
|
|
container_name: obmp-psql-app
|
|
# Gate startup on Postgres being ready. psql-app's consumer connects to
|
|
# Postgres once at startup and, if it loses the cold-boot race (DB still
|
|
# initialising -> "the database system is starting up"), ConsumerApp.main
|
|
# throws and the consumer dies -- and the container does NOT exit, so
|
|
# restart: unless-stopped never fires. service_healthy avoids the race.
|
|
depends_on:
|
|
psql:
|
|
condition: service_healthy
|
|
kafka:
|
|
condition: service_started
|
|
# No healthcheck — the consumer exposes no health port; Docker's
|
|
# restart-on-exit covers process death.
|
|
image: openbmp/psql-app:2.2.2
|
|
# mem_limit must exceed the MEM (JVM heap) env below. Raise both for
|
|
# production — see docs/production-sizing.md.
|
|
mem_limit: ${PSQL_APP_MEM_LIMIT:-4g}
|
|
sysctls:
|
|
- net.ipv4.tcp_keepalive_intvl=30
|
|
- net.ipv4.tcp_keepalive_probes=5
|
|
- net.ipv4.tcp_keepalive_time=180
|
|
ports:
|
|
- "9005:9005"
|
|
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/config:/config
|
|
environment:
|
|
- MEM=3 # Set memory to at least 2GB but ideally 4GB
|
|
- KAFKA_FQDN=obmp-kafka:29092
|
|
- RPKI_URL=https://rpki.cloudflare.com/rpki.json # define the URL to retrieve json endoed RPKI data
|
|
- RPKI_PASS=None
|
|
- RPKI_USER=None
|
|
- ENABLE_RPKI=1 # 1 enables, 0 disables RPKI sync
|
|
- ENABLE_IRR=1 # 1 enables, 0 disables IRR sync
|
|
- ENABLE_DBIP=1 # 1 enables, 0 disables DBIP import
|
|
- POSTGRES_REPORT_WINDOW='8 minute' # default POSTGRESS window to select when building
|
|
# summary tables. For deployments that absorb large
|
|
# bursts increase the value, ex 60 minute
|
|
- POSTGRES_PASSWORD=openbmp
|
|
- POSTGRES_USER=openbmp
|
|
- POSTGRES_DB=openbmp
|
|
- POSTGRES_HOST=obmp-psql
|
|
- POSTGRES_PORT=5432
|
|
- POSTGRES_DROP_peer_event_log='1 year'
|
|
- POSTGRES_DROP_stat_reports='4 weeks'
|
|
- POSTGRES_DROP_ip_rib_log='4 weeks'
|
|
- POSTGRES_DROP_alerts='4 weeks'
|
|
- POSTGRES_DROP_ls_nodes_log='4 months'
|
|
- POSTGRES_DROP_ls_links_log='4 months'
|
|
- POSTGRES_DROP_ls_prefixes_log='4 months'
|
|
- POSTGRES_DROP_stats_chg_byprefix='4 weeks'
|
|
- POSTGRES_DROP_stats_chg_byasn='4 weeks'
|
|
- POSTGRES_DROP_stats_chg_bypeer='4 weeks'
|
|
- POSTGRES_DROP_stats_ip_origins='4 weeks'
|
|
- POSTGRES_DROP_stats_peer_rib='4 weeks'
|
|
- POSTGRES_DROP_stats_peer_update_counts='4 weeks'
|
|
|
|
# Consumer-only psql-app replica -- horizontal ingestion scale-out.
|
|
# Profile-gated; bring up on demand (the host needs spare CPU+RAM for it):
|
|
# docker compose --profile scale-out up -d --scale psql-app-consumer=2
|
|
# It shares the primary's /config (read-only) so it reuses obmp-psql.yml,
|
|
# whose fixed group.id "obmp-psql-consumer" makes Kafka rebalance partitions
|
|
# across the primary and every replica. The command runs ONLY the consumer
|
|
# jar -- no cron, no RPKI/IRR/DBIP, no initdb -- so a replica does NOT
|
|
# duplicate the primary's DB-maintenance jobs (update_global_ip_rib,
|
|
# update_chg_stats, retention, ...), which config_cron wires up
|
|
# unconditionally in /usr/sbin/run. Each replica brings its own consumer
|
|
# AND writer threads, so it adds real write throughput (the primary's
|
|
# writer_max_threads_per_type is 1).
|
|
psql-app-consumer:
|
|
profiles: ["scale-out"]
|
|
restart: unless-stopped
|
|
image: openbmp/psql-app:2.2.2
|
|
mem_limit: ${PSQL_APP_CONSUMER_MEM_LIMIT:-4g}
|
|
depends_on:
|
|
psql:
|
|
condition: service_healthy
|
|
kafka:
|
|
condition: service_started
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/config:/config:ro
|
|
command: ["bash","-c","cd /var/log && exec java -Xmx3G -Xms128m -XX:+UseG1GC -XX:+UnlockExperimentalVMOptions -XX:InitiatingHeapOccupancyPercent=30 -XX:G1MixedGCLiveThresholdPercent=30 -XX:MaxGCPauseMillis=200 -XX:ParallelGCThreads=20 -XX:ConcGCThreads=5 -XX:+ExitOnOutOfMemoryError -Duser.timezone=UTC -jar /usr/local/openbmp/obmp-psql-consumer.jar -cf /config/obmp-psql.yml"]
|
|
|
|
exabgp:
|
|
restart: unless-stopped
|
|
container_name: obmp-exabgp
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5050'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
profiles: ["test"]
|
|
# The full-table feature generates up to 900K route objects in memory;
|
|
# 512m OOM-killed it. Raise EXABGP_MEM_LIMIT in .env for larger tables.
|
|
mem_limit: ${EXABGP_MEM_LIMIT:-6g}
|
|
build:
|
|
context: ./exabgp
|
|
dockerfile: Dockerfile
|
|
# Host networking so ExaBGP can reach CML routers directly on port 179
|
|
network_mode: host
|
|
environment:
|
|
# IP on the host that CML routers reach (BGP peering source)
|
|
- EXABGP_LOCAL_IP=${HOST_IP:-10.40.40.202}
|
|
# ExaBGP presents as AS 65100 (eBGP peer to the lab route reflectors)
|
|
- EXABGP_LOCAL_AS=${EXABGP_LOCAL_AS:-65100}
|
|
# Peer list — ";"-separated entries of "ip:peer_as:description".
|
|
# Default covers both labs: AS 65020 (ESXi) and AS 65021 (Proxmox).
|
|
- EXABGP_PEERS=${EXABGP_PEERS:-10.100.0.100:65020:CML-R9K-CORE-01;10.100.0.200:65020:CML-R9K-CORE-02;10.100.1.100:65021:PROX-R9K-CORE-01;10.100.1.200:65021:PROX-R9K-CORE-02}
|
|
# Flask API port (also on host network)
|
|
- EXABGP_API_PORT=${EXABGP_API_PORT:-5050}
|
|
volumes:
|
|
# Mount scenarios dir so you can edit/add scenarios without rebuilding
|
|
- ./exabgp/scenarios:/exabgp/scenarios
|
|
# No ports: block needed — network_mode: host exposes directly
|
|
|
|
exabgp-ui:
|
|
restart: unless-stopped
|
|
container_name: obmp-exabgp-ui
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -q --spider http://localhost:5001/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
profiles: ["test"]
|
|
mem_limit: 256m
|
|
build:
|
|
context: ./exabgp-ui
|
|
dockerfile: Dockerfile
|
|
# Host networking so NGINX can proxy /api to ExaBGP Flask on localhost:5050
|
|
network_mode: host
|
|
# Serves on port 5001 (host network, defined in nginx.conf)
|
|
|
|
# --- Phase 4: gNMI Streaming Telemetry ---
|
|
|
|
influxdb:
|
|
restart: unless-stopped
|
|
container_name: obmp-influxdb
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "curl -fsS http://localhost:8086/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 40s
|
|
profiles: ["test"]
|
|
image: influxdb:2.7
|
|
mem_limit: 2g
|
|
ports:
|
|
- "8086:8086"
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/influxdb:/var/lib/influxdb2
|
|
environment:
|
|
- DOCKER_INFLUXDB_INIT_MODE=setup
|
|
- DOCKER_INFLUXDB_INIT_USERNAME=openbmp
|
|
- DOCKER_INFLUXDB_INIT_PASSWORD=openbmp123
|
|
- DOCKER_INFLUXDB_INIT_ORG=openbmp
|
|
- DOCKER_INFLUXDB_INIT_BUCKET=telemetry
|
|
- DOCKER_INFLUXDB_INIT_ADMIN_TOKEN=openbmp-telemetry-token
|
|
- DOCKER_INFLUXDB_INIT_RETENTION=30d
|
|
|
|
telegraf:
|
|
restart: unless-stopped
|
|
container_name: obmp-telegraf
|
|
profiles: ["test"]
|
|
mem_limit: 512m
|
|
build:
|
|
context: ./telegraf
|
|
dockerfile: Dockerfile
|
|
network_mode: host
|
|
# Run telegraf as root and override the image entrypoint (which otherwise
|
|
# drops back to the telegraf user) so [[inputs.docker]] can read the
|
|
# Docker daemon socket for container resource metrics.
|
|
user: root
|
|
entrypoint: ["telegraf"]
|
|
volumes:
|
|
- /var/run/docker.sock:/var/run/docker.sock
|
|
# Host root, read-only — lets [[inputs.disk]] report the real host
|
|
# filesystems (Postgres/Kafka/InfluxDB data) instead of the container's.
|
|
- /:/hostfs:ro
|
|
depends_on:
|
|
- influxdb
|
|
environment:
|
|
- INFLUXDB_TOKEN=openbmp-telemetry-token
|
|
# Point gopsutil-based inputs (disk) at the host filesystem mount above.
|
|
- HOST_MOUNT_PREFIX=/hostfs
|
|
- HOST_PROC=/hostfs/proc
|
|
- HOST_SYS=/hostfs/sys
|
|
- HOST_ETC=/hostfs/etc
|
|
# PostgreSQL credentials for [[inputs.postgresql_extensible]] (DB size).
|
|
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-openbmp}
|
|
# gNMI fleet — quoted, comma-separated host:port list. Default = the two
|
|
# ESXi CORE routers; extend via GNMI_ADDRESSES in .env for more routers.
|
|
- 'GNMI_ADDRESSES=${GNMI_ADDRESSES:-"10.100.0.100:57400", "10.100.0.200:57400"}'
|
|
- GNMI_USERNAME=${GNMI_USERNAME:-webui}
|
|
- GNMI_PASSWORD=${GNMI_PASSWORD:-cisco}
|
|
|
|
# --- Phase 4: Traffic Generator ---
|
|
|
|
traffic-gen:
|
|
restart: unless-stopped
|
|
container_name: obmp-traffic-gen
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5051'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
profiles: ["test"]
|
|
mem_limit: 1g
|
|
build:
|
|
context: ./traffic-gen
|
|
dockerfile: Dockerfile
|
|
network_mode: host
|
|
cap_add:
|
|
- NET_RAW
|
|
- NET_ADMIN
|
|
environment:
|
|
- TRAFFIC_GEN_PORT=5051
|
|
- TRAFFIC_GEN_MODE=sender
|
|
- RESPONDER_URL=http://172.30.0.10:5053
|
|
|
|
traffic-gen-ui:
|
|
restart: unless-stopped
|
|
container_name: obmp-traffic-gen-ui
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -q --spider http://localhost:5002/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
profiles: ["test"]
|
|
mem_limit: 256m
|
|
build:
|
|
context: ./traffic-gen-ui
|
|
dockerfile: Dockerfile
|
|
network_mode: host
|
|
# Serves on port 5002 (host network, defined in nginx.conf)
|
|
|
|
traffic-gen-responder:
|
|
restart: unless-stopped
|
|
container_name: obmp-traffic-gen-responder
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5053'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
profiles: ["test"]
|
|
mem_limit: 1g
|
|
build:
|
|
context: ./traffic-gen
|
|
dockerfile: Dockerfile
|
|
cap_add:
|
|
- NET_RAW
|
|
- NET_ADMIN
|
|
environment:
|
|
- TRAFFIC_GEN_PORT=5053
|
|
- TRAFFIC_GEN_MODE=responder
|
|
- TRAFFIC_GEN_RESPONDER_MODE=echo
|
|
- TRAFFIC_GEN_INTERFACE=eth0
|
|
networks:
|
|
traffic-test-net:
|
|
ipv4_address: 172.30.0.10
|
|
ports:
|
|
- "5053:5053"
|
|
|
|
# GoBGP -- pulls the full real Internet routing table (roadmap E1) from the
|
|
# AS57355 lab route server and BMP-exports it to the OpenBMP collector, where
|
|
# it lands in PostgreSQL ip_rib as a monitored peer. Config + MRT fallback
|
|
# script live in ./gobgp (see gobgp/README.md). Receive-only, local AS 65001.
|
|
gobgp:
|
|
restart: unless-stopped
|
|
container_name: obmp-gobgp
|
|
image: jauderho/gobgp:v4.5.0
|
|
# Host networking: the daemon uses the host's real IPv4 + IPv6 stack, so
|
|
# both the v4 and v6 eBGP sessions to AS57355 source from the host's
|
|
# public addresses (no Docker IPv6/NAT plumbing). BMP still reaches the
|
|
# collector on 10.40.40.202:5000 (its published port).
|
|
network_mode: host
|
|
depends_on:
|
|
- collector
|
|
# gobgpd reads /config/gobgpd.conf; the same mount carries mrt-refresh.sh
|
|
# and the cached MRT dumps it downloads.
|
|
volumes:
|
|
- ./gobgp:/config
|
|
command: ["gobgpd", "-f", "/config/gobgpd.conf", "-t", "toml"]
|
|
|
|
# GoBGP -- modular EVPN test-route injector (roadmap E5). Profile-gated, so
|
|
# it is NOT part of the normal stack. Originates synthetic BGP EVPN routes
|
|
# and BMP-exports them so the EVPN pipeline can be exercised. Start only for
|
|
# testing: docker compose --profile evpn-test up -d gobgp-evpn
|
|
# then: bash gobgp-evpn/inject-evpn.sh
|
|
gobgp-evpn:
|
|
restart: unless-stopped
|
|
container_name: obmp-gobgp-evpn
|
|
profiles: ["evpn-test"]
|
|
image: jauderho/gobgp:v4.5.0
|
|
depends_on:
|
|
- collector
|
|
volumes:
|
|
- ./gobgp-evpn:/config
|
|
command: ["gobgpd", "-f", "/config/gobgpd.conf", "-t", "toml"]
|
|
|
|
# EVPN consumer -- subscribes to the openbmp.parsed.evpn Kafka topic (which
|
|
# the collector already populates) and writes BGP EVPN routes into evpn_rib;
|
|
# the stock psql-app does not handle EVPN. Profile-gated alongside the EVPN
|
|
# test injector: docker compose --profile evpn-test up -d
|
|
evpn-consumer:
|
|
restart: unless-stopped
|
|
container_name: obmp-evpn-consumer
|
|
profiles: ["evpn-test"]
|
|
build:
|
|
context: ./obmp-evpn-consumer
|
|
depends_on:
|
|
- kafka
|
|
- psql
|
|
environment:
|
|
- KAFKA_BROKER=obmp-kafka:29092
|
|
- EVPN_TOPIC=openbmp.parsed.evpn
|
|
- PG_DSN=host=obmp-psql port=5432 dbname=openbmp user=openbmp password=${POSTGRES_PASSWORD:-openbmp}
|
|
|
|
# Per-router BGP policy-diff collector. Pulls post-policy accepted/advertised
|
|
# prefix counts and route-policy bindings from the IOS-XR routers over CLI +
|
|
# NETCONF (BMP on XRv9000 24.3.1 only carries pre-policy Adj-RIB-In). Feeds
|
|
# the Policy Diff dashboard. Host networking: it must reach the lab
|
|
# management network (10.100.0.x) and the published Postgres port.
|
|
rib-poller:
|
|
restart: unless-stopped
|
|
container_name: obmp-rib-poller
|
|
build:
|
|
context: ./obmp-rib-poller
|
|
network_mode: host
|
|
depends_on:
|
|
- psql
|
|
environment:
|
|
- PG_DSN=host=10.40.40.202 port=5432 dbname=openbmp user=openbmp password=${POSTGRES_PASSWORD:-openbmp}
|
|
- POLL_INTERVAL=900
|
|
- ROUTER_USER=webui
|
|
- ROUTER_PASS=cisco
|
|
|
|
# Samples Kafka consumer-group lag into PostgreSQL every 30s for the Kafka
|
|
# Lag dashboard -- visibility into the ingestion path under load (e.g. a
|
|
# full-table BGP convergence storm) and a sanity check when scaling psql-app.
|
|
kafka-lag-monitor:
|
|
restart: unless-stopped
|
|
container_name: obmp-kafka-lag-monitor
|
|
build:
|
|
context: ./kafka-lag-monitor
|
|
depends_on:
|
|
- kafka
|
|
- psql
|
|
environment:
|
|
- KAFKA_BROKER=obmp-kafka:29092
|
|
- PG_DSN=host=obmp-psql port=5432 dbname=openbmp user=openbmp password=${POSTGRES_PASSWORD:-openbmp}
|
|
- LAG_POLL_INTERVAL=30
|
|
- CONSUMER_GROUPS=obmp-psql-consumer,evpn-psql
|
|
|
|
# Decoupled fast-path BGP churn monitor. Reads openbmp.parsed.unicast_prefix
|
|
# with its own consumer group and only counts announcements/withdrawals --
|
|
# stays real-time during a churn storm even while psql-app lags, because
|
|
# counting is far cheaper than the relational RIB write. Featherweight.
|
|
churn-monitor:
|
|
restart: unless-stopped
|
|
container_name: obmp-churn-monitor
|
|
build:
|
|
context: ./obmp-churn-monitor
|
|
depends_on:
|
|
- kafka
|
|
- psql
|
|
environment:
|
|
- KAFKA_BROKER=obmp-kafka:29092
|
|
- PG_DSN=host=obmp-psql port=5432 dbname=openbmp user=openbmp password=${POSTGRES_PASSWORD:-openbmp}
|
|
- CHURN_TOPIC=openbmp.parsed.unicast_prefix
|
|
- FLUSH_INTERVAL=10
|
|
|
|
whois:
|
|
restart: unless-stopped
|
|
container_name: obmp-whois
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/43'"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 30s
|
|
image: openbmp/whois:2.2.0
|
|
mem_limit: 1g
|
|
sysctls:
|
|
- net.ipv4.tcp_keepalive_intvl=30
|
|
- net.ipv4.tcp_keepalive_probes=5
|
|
- net.ipv4.tcp_keepalive_time=180
|
|
ports:
|
|
- "4300:43"
|
|
# volumes:
|
|
# - ${OBMP_DATA_ROOT}/config:/config
|
|
environment:
|
|
- POSTGRES_PASSWORD=openbmp
|
|
- POSTGRES_USER=openbmp
|
|
- POSTGRES_DB=openbmp
|
|
- POSTGRES_HOST=obmp-psql
|
|
- POSTGRES_PORT=5432
|
|
|
|
authelia:
|
|
restart: unless-stopped
|
|
container_name: obmp-authelia
|
|
profiles: ["auth"]
|
|
mem_limit: 256m
|
|
image: authelia/authelia:4.38
|
|
ports:
|
|
- "9091:9091"
|
|
volumes:
|
|
- ${OBMP_DATA_ROOT}/authelia:/config
|
|
environment:
|
|
- TZ=UTC
|
|
|
|
portal:
|
|
restart: unless-stopped
|
|
container_name: obmp-portal
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -q --spider http://localhost:80/ || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
start_period: 20s
|
|
profiles: ["auth"]
|
|
mem_limit: 128m
|
|
image: nginx:alpine
|
|
ports:
|
|
- "8080:80"
|
|
volumes:
|
|
- ./portal:/usr/share/nginx/html:ro
|
|
|
|
networks:
|
|
traffic-test-net:
|
|
driver: bridge
|
|
ipam:
|
|
config:
|
|
- subnet: 172.30.0.0/24
|