--- name: obmp volumes: data-volume: driver_opts: type: none device: ${OBMP_DATA_ROOT}/postgres/data o: bind ts-volume: driver_opts: type: none device: ${OBMP_DATA_ROOT}/postgres/ts o: bind services: zookeeper: restart: unless-stopped container_name: obmp-zookeeper healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/2181'"] interval: 30s timeout: 10s retries: 3 start_period: 60s image: confluentinc/cp-zookeeper:7.1.1 mem_limit: 1g volumes: - ${OBMP_DATA_ROOT}/zk-data:/var/lib/zookeeper/data - ${OBMP_DATA_ROOT}/zk-log:/var/lib/zookeeper/log environment: ZOOKEEPER_CLIENT_PORT: 2181 ZOOKEEPER_TICK_TIME: 2000 kafka: restart: unless-stopped container_name: obmp-kafka healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/9092'"] interval: 30s timeout: 10s retries: 3 start_period: 90s image: confluentinc/cp-kafka:7.1.1 # Raise KAFKA_MEM_LIMIT for production (full-table initial dumps are bursty). mem_limit: ${KAFKA_MEM_LIMIT:-4g} # Change the mount point to where you want to store Kafka data. # Normally 80GB or more volumes: - ${OBMP_DATA_ROOT}/kafka-data:/var/lib/kafka/data depends_on: - zookeeper ports: - "9092:9092" environment: KAFKA_BROKER_ID: 1 KAFKA_ZOOKEEPER_CONNECT: obmp-zookeeper:2181 # Change/add listeners based on your FQDN that the host and other containers can access. You can use # an IP address as well. By default, only within the compose/containers can Kafka be accesssed # using port 29092. Outside access can be enabled, but you should use an FQDN listener. KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://obmp-kafka:29092,PLAINTEXT_HOST://${HOST_IP:-10.40.40.202}:9092 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_NUM_PARTITIONS: 8 KAFKA_LOG_RETENTION_MINUTES: 720 KAFKA_LOG_ROLL_MS: 3600000 KAFKA_LOG_SEGMENT_BYTES: 1073741824 KAFKA_MESSAGE_MAX_BYTES: 100000000 KAFKA_LOG_CLEANER_THREADS: 2 # TODO: Move comment to documentation # You can use SWAG and Let's Encrypt to front the Grafana HTTP port 3000 instance. Below is an example # Create the SWAG direcgtory using "sudo mkdir -m 777 ${OBMP_DATA_ROOT}/swag" # swag: # image: linuxserver/letsencrypt:version-1.11.0 # container_name: obmp-swag # cap_add: # - NET_ADMIN # environment: # - PUID=1000 # - PGID=1000 # - TZ=UTC # - URL= # - SUBDOMAINS=, # - VALIDATION=http # - EMAIL= # - ONLY_SUBDOMAINS=true # - STAGING=false # volumes: # - /var/openbmp/swag:/config # ports: # - 443:443 # - 80:80 #optional # restart: unless-stopped grafana: restart: unless-stopped container_name: obmp-grafana healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 40s image: grafana/grafana:9.1.7 mem_limit: 1g ports: - "3000:3000" volumes: - ${OBMP_DATA_ROOT}/grafana:/var/lib/grafana - ${OBMP_DATA_ROOT}/grafana/provisioning:/etc/grafana/provisioning/ environment: - GF_SECURITY_ADMIN_PASSWORD=openbmp - GF_AUTH_ANONYMOUS_ENABLED=false - GF_SERVER_ROOT_URL=https://${OBMP_DOMAIN:-bmp.apodacalab.com}/grafana/ - GF_SERVER_SERVE_FROM_SUB_PATH=true - GF_AUTH_PROXY_ENABLED=true - GF_AUTH_PROXY_HEADER_NAME=Remote-User - GF_AUTH_PROXY_HEADER_PROPERTY=username - GF_AUTH_PROXY_AUTO_SIGN_UP=true - GF_USERS_HOME_PAGE=d/obmp-home/obmp-home - GF_INSTALL_PLUGINS=agenty-flowcharting-panel,grafana-piechart-panel,grafana-worldmap-panel,grafana-simple-json-datasource,vonage-status-panel # TODO: Move comment block to documentation # The below is an example of how to enable OAuth #- GF_AUTH_GENERIC_OAUTH_NAME=MyCompany SSO #- GF_AUTH_GENERIC_OAUTH_ICON=signin #- GF_AUTH_GENERIC_OAUTH_ENABLED=true #- GF_AUTH_GENERIC_OAUTH_CLIENT_ID= #- GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET= #- GF_AUTH_GENERIC_OAUTH_ALLOW_SIGN_UP=true #- GF_AUTH_GENERIC_OAUTH_AUTH_URL= #- GF_AUTH_GENERIC_OAUTH_TOKEN_URL= #- GF_SERVER_ROOT_URL=%(protocol)s://%(domain)s:%(http_port)s/ psql: restart: unless-stopped container_name: obmp-psql healthcheck: test: ["CMD-SHELL", "pg_isready -U openbmp -d openbmp"] interval: 30s timeout: 10s retries: 3 start_period: 60s image: openbmp/postgres:2.2.1 # Raise PSQL_MEM_LIMIT for production (see docs/production-sizing.md). mem_limit: ${PSQL_MEM_LIMIT:-6g} privileged: true shm_size: 1536m sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 - net.ipv4.tcp_keepalive_time=180 ports: - "5432:5432" volumes: - data-volume:/var/lib/postgresql/data - ts-volume:/var/lib/postgresql/ts # alter_job max_runtime in _timescaledb_config.bgw_job ( https://docs.timescale.com/latest/api#alter_job ) command: > -c max_wal_size=10GB environment: - POSTGRES_PASSWORD=openbmp - POSTGRES_USER=openbmp - POSTGRES_DB=openbmp collector: restart: unless-stopped container_name: obmp-collector healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5000'"] interval: 30s timeout: 10s retries: 3 start_period: 40s image: openbmp/collector:2.2.3 mem_limit: 2g sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 - net.ipv4.tcp_keepalive_time=180 ports: - "5000:5000" volumes: - ${OBMP_DATA_ROOT}/config:/config environment: - KAFKA_FQDN=obmp-kafka:29092 psql-app: restart: unless-stopped container_name: obmp-psql-app # No healthcheck — the consumer exposes no health port; Docker's # restart-on-exit covers process death. image: openbmp/psql-app:2.2.2 # mem_limit must exceed the MEM (JVM heap) env below. Raise both for # production — see docs/production-sizing.md. mem_limit: ${PSQL_APP_MEM_LIMIT:-4g} sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 - net.ipv4.tcp_keepalive_time=180 ports: - "9005:9005" volumes: - ${OBMP_DATA_ROOT}/config:/config environment: - MEM=3 # Set memory to at least 2GB but ideally 4GB - KAFKA_FQDN=obmp-kafka:29092 - RPKI_URL=https://rpki.cloudflare.com/rpki.json # define the URL to retrieve json endoed RPKI data - RPKI_PASS=None - RPKI_USER=None - ENABLE_RPKI=1 # 1 enables, 0 disables RPKI sync - ENABLE_IRR=1 # 1 enables, 0 disables IRR sync - ENABLE_DBIP=1 # 1 enables, 0 disables DBIP import - POSTGRES_REPORT_WINDOW='8 minute' # default POSTGRESS window to select when building # summary tables. For deployments that absorb large # bursts increase the value, ex 60 minute - POSTGRES_PASSWORD=openbmp - POSTGRES_USER=openbmp - POSTGRES_DB=openbmp - POSTGRES_HOST=obmp-psql - POSTGRES_PORT=5432 - POSTGRES_DROP_peer_event_log='1 year' - POSTGRES_DROP_stat_reports='4 weeks' - POSTGRES_DROP_ip_rib_log='4 weeks' - POSTGRES_DROP_alerts='4 weeks' - POSTGRES_DROP_ls_nodes_log='4 months' - POSTGRES_DROP_ls_links_log='4 months' - POSTGRES_DROP_ls_prefixes_log='4 months' - POSTGRES_DROP_stats_chg_byprefix='4 weeks' - POSTGRES_DROP_stats_chg_byasn='4 weeks' - POSTGRES_DROP_stats_chg_bypeer='4 weeks' - POSTGRES_DROP_stats_ip_origins='4 weeks' - POSTGRES_DROP_stats_peer_rib='4 weeks' - POSTGRES_DROP_stats_peer_update_counts='4 weeks' exabgp: restart: unless-stopped container_name: obmp-exabgp healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5050'"] interval: 30s timeout: 10s retries: 3 start_period: 40s profiles: ["test"] # The full-table feature generates up to 900K route objects in memory; # 512m OOM-killed it. Raise EXABGP_MEM_LIMIT in .env for larger tables. mem_limit: ${EXABGP_MEM_LIMIT:-6g} build: context: ./exabgp dockerfile: Dockerfile # Host networking so ExaBGP can reach CML routers directly on port 179 network_mode: host environment: # IP on the host that CML routers reach (BGP peering source) - EXABGP_LOCAL_IP=${HOST_IP:-10.40.40.202} # ExaBGP presents as AS 65100 (eBGP peer to the lab route reflectors) - EXABGP_LOCAL_AS=${EXABGP_LOCAL_AS:-65100} # Peer list — ";"-separated entries of "ip:peer_as:description". # Default covers both labs: AS 65020 (ESXi) and AS 65021 (Proxmox). - EXABGP_PEERS=${EXABGP_PEERS:-10.100.0.100:65020:CML-R9K-CORE-01;10.100.0.200:65020:CML-R9K-CORE-02;10.100.1.100:65021:PROX-R9K-CORE-01;10.100.1.200:65021:PROX-R9K-CORE-02} # Flask API port (also on host network) - EXABGP_API_PORT=${EXABGP_API_PORT:-5050} volumes: # Mount scenarios dir so you can edit/add scenarios without rebuilding - ./exabgp/scenarios:/exabgp/scenarios # No ports: block needed — network_mode: host exposes directly exabgp-ui: restart: unless-stopped container_name: obmp-exabgp-ui healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:5001/ || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 30s profiles: ["test"] mem_limit: 256m build: context: ./exabgp-ui dockerfile: Dockerfile # Host networking so NGINX can proxy /api to ExaBGP Flask on localhost:5050 network_mode: host # Serves on port 5001 (host network, defined in nginx.conf) # --- Phase 4: gNMI Streaming Telemetry --- influxdb: restart: unless-stopped container_name: obmp-influxdb healthcheck: test: ["CMD-SHELL", "curl -fsS http://localhost:8086/health || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 40s profiles: ["test"] image: influxdb:2.7 mem_limit: 2g ports: - "8086:8086" volumes: - ${OBMP_DATA_ROOT}/influxdb:/var/lib/influxdb2 environment: - DOCKER_INFLUXDB_INIT_MODE=setup - DOCKER_INFLUXDB_INIT_USERNAME=openbmp - DOCKER_INFLUXDB_INIT_PASSWORD=openbmp123 - DOCKER_INFLUXDB_INIT_ORG=openbmp - DOCKER_INFLUXDB_INIT_BUCKET=telemetry - DOCKER_INFLUXDB_INIT_ADMIN_TOKEN=openbmp-telemetry-token - DOCKER_INFLUXDB_INIT_RETENTION=30d telegraf: restart: unless-stopped container_name: obmp-telegraf profiles: ["test"] mem_limit: 512m build: context: ./telegraf dockerfile: Dockerfile network_mode: host # Run telegraf as root and override the image entrypoint (which otherwise # drops back to the telegraf user) so [[inputs.docker]] can read the # Docker daemon socket for container resource metrics. user: root entrypoint: ["telegraf"] volumes: - /var/run/docker.sock:/var/run/docker.sock depends_on: - influxdb environment: - INFLUXDB_TOKEN=openbmp-telemetry-token # gNMI fleet — quoted, comma-separated host:port list. Default = the two # ESXi CORE routers; extend via GNMI_ADDRESSES in .env for more routers. - 'GNMI_ADDRESSES=${GNMI_ADDRESSES:-"10.100.0.100:57400", "10.100.0.200:57400"}' - GNMI_USERNAME=${GNMI_USERNAME:-webui} - GNMI_PASSWORD=${GNMI_PASSWORD:-cisco} # --- Phase 4: Traffic Generator --- traffic-gen: restart: unless-stopped container_name: obmp-traffic-gen healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5051'"] interval: 30s timeout: 10s retries: 3 start_period: 30s profiles: ["test"] mem_limit: 1g build: context: ./traffic-gen dockerfile: Dockerfile network_mode: host cap_add: - NET_RAW - NET_ADMIN environment: - TRAFFIC_GEN_PORT=5051 - TRAFFIC_GEN_MODE=sender - RESPONDER_URL=http://172.30.0.10:5053 traffic-gen-ui: restart: unless-stopped container_name: obmp-traffic-gen-ui healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:5002/ || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 30s profiles: ["test"] mem_limit: 256m build: context: ./traffic-gen-ui dockerfile: Dockerfile network_mode: host # Serves on port 5002 (host network, defined in nginx.conf) traffic-gen-responder: restart: unless-stopped container_name: obmp-traffic-gen-responder healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/5053'"] interval: 30s timeout: 10s retries: 3 start_period: 30s profiles: ["test"] mem_limit: 1g build: context: ./traffic-gen dockerfile: Dockerfile cap_add: - NET_RAW - NET_ADMIN environment: - TRAFFIC_GEN_PORT=5053 - TRAFFIC_GEN_MODE=responder - TRAFFIC_GEN_RESPONDER_MODE=echo - TRAFFIC_GEN_INTERFACE=eth0 networks: traffic-test-net: ipv4_address: 172.30.0.10 ports: - "5053:5053" # GoBGP -- pulls the full real Internet routing table (roadmap E1) from the # AS57355 lab route server and BMP-exports it to the OpenBMP collector, where # it lands in PostgreSQL ip_rib as a monitored peer. Config + MRT fallback # script live in ./gobgp (see gobgp/README.md). Receive-only, local AS 65001. gobgp: restart: unless-stopped container_name: obmp-gobgp image: jauderho/gobgp:v4.5.0 depends_on: - collector # gobgpd reads /config/gobgpd.conf; the same mount carries mrt-refresh.sh # and the cached MRT dumps it downloads. volumes: - ./gobgp:/config command: ["gobgpd", "-f", "/config/gobgpd.conf", "-t", "toml"] whois: restart: unless-stopped container_name: obmp-whois healthcheck: test: ["CMD-SHELL", "bash -c 'echo > /dev/tcp/localhost/43'"] interval: 30s timeout: 10s retries: 3 start_period: 30s image: openbmp/whois:2.2.0 mem_limit: 1g sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 - net.ipv4.tcp_keepalive_time=180 ports: - "4300:43" # volumes: # - ${OBMP_DATA_ROOT}/config:/config environment: - POSTGRES_PASSWORD=openbmp - POSTGRES_USER=openbmp - POSTGRES_DB=openbmp - POSTGRES_HOST=obmp-psql - POSTGRES_PORT=5432 authelia: restart: unless-stopped container_name: obmp-authelia profiles: ["auth"] mem_limit: 256m image: authelia/authelia:4.38 ports: - "9091:9091" volumes: - ${OBMP_DATA_ROOT}/authelia:/config environment: - TZ=UTC portal: restart: unless-stopped container_name: obmp-portal healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:80/ || exit 1"] interval: 30s timeout: 10s retries: 3 start_period: 20s profiles: ["auth"] mem_limit: 128m image: nginx:alpine ports: - "8080:80" volumes: - ./portal:/usr/share/nginx/html:ro networks: traffic-test-net: driver: bridge ipam: config: - subnet: 172.30.0.0/24