From 4e9bd7cc5a8af42687c69694de67cd87a2d3118a Mon Sep 17 00:00:00 2001 From: sam Date: Mon, 18 May 2026 20:04:37 -0700 Subject: [PATCH] Add container memory limits to all services Sets mem_limit on every service to cap the OOM/swap-exhaustion risk (the lab host had only 5 MiB swap free). The three heavy services (psql, kafka, psql-app) read their limits from .env so production can raise them; the rest use lab-appropriate fixed values. Total ~25 GB, leaving headroom on the 31 GB lab host. Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 6 ++++++ docker-compose.yml | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/.env.example b/.env.example index ce01e88..d10d5f3 100644 --- a/.env.example +++ b/.env.example @@ -19,6 +19,12 @@ OBMP_DOMAIN=changeme.example.com # cookie is valid across subpaths/subdomains. OBMP_COOKIE_DOMAIN=example.com +# Container memory limits. Lab defaults shown; raise for production +# (see docs/production-sizing.md). psql-app's limit must exceed its MEM heap. +PSQL_MEM_LIMIT=6g +PSQL_APP_MEM_LIMIT=4g +KAFKA_MEM_LIMIT=4g + # --------------------------------------------------------------------------- # ExaBGP route injector (test profile) # --------------------------------------------------------------------------- diff --git a/docker-compose.yml b/docker-compose.yml index b9d9c17..790f2a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,6 +18,7 @@ services: restart: unless-stopped container_name: obmp-zookeeper image: confluentinc/cp-zookeeper:7.1.1 + mem_limit: 1g volumes: - ${OBMP_DATA_ROOT}/zk-data:/var/lib/zookeeper/data - ${OBMP_DATA_ROOT}/zk-log:/var/lib/zookeeper/log @@ -29,6 +30,8 @@ services: restart: unless-stopped container_name: obmp-kafka image: confluentinc/cp-kafka:7.1.1 + # Raise KAFKA_MEM_LIMIT for production (full-table initial dumps are bursty). + mem_limit: ${KAFKA_MEM_LIMIT:-4g} # Change the mount point to where you want to store Kafka data. # Normally 80GB or more @@ -85,6 +88,7 @@ services: restart: unless-stopped container_name: obmp-grafana image: grafana/grafana:9.1.7 + mem_limit: 1g ports: - "3000:3000" volumes: @@ -125,6 +129,8 @@ services: restart: unless-stopped container_name: obmp-psql image: openbmp/postgres:2.2.1 + # Raise PSQL_MEM_LIMIT for production (see docs/production-sizing.md). + mem_limit: ${PSQL_MEM_LIMIT:-6g} privileged: true shm_size: 1536m sysctls: @@ -148,6 +154,7 @@ services: restart: unless-stopped container_name: obmp-collector image: openbmp/collector:2.2.3 + mem_limit: 2g sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 @@ -163,6 +170,9 @@ services: restart: unless-stopped container_name: obmp-psql-app image: openbmp/psql-app:2.2.2 + # mem_limit must exceed the MEM (JVM heap) env below. Raise both for + # production — see docs/production-sizing.md. + mem_limit: ${PSQL_APP_MEM_LIMIT:-4g} sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 @@ -207,6 +217,7 @@ services: restart: unless-stopped container_name: obmp-exabgp profiles: ["test"] + mem_limit: 512m build: context: ./exabgp dockerfile: Dockerfile @@ -231,6 +242,7 @@ services: restart: unless-stopped container_name: obmp-exabgp-ui profiles: ["test"] + mem_limit: 256m build: context: ./exabgp-ui dockerfile: Dockerfile @@ -245,6 +257,7 @@ services: container_name: obmp-influxdb profiles: ["test"] image: influxdb:2.7 + mem_limit: 2g ports: - "8086:8086" volumes: @@ -262,6 +275,7 @@ services: restart: unless-stopped container_name: obmp-telegraf profiles: ["test"] + mem_limit: 512m build: context: ./telegraf dockerfile: Dockerfile @@ -277,6 +291,7 @@ services: restart: unless-stopped container_name: obmp-traffic-gen profiles: ["test"] + mem_limit: 1g build: context: ./traffic-gen dockerfile: Dockerfile @@ -293,6 +308,7 @@ services: restart: unless-stopped container_name: obmp-traffic-gen-ui profiles: ["test"] + mem_limit: 256m build: context: ./traffic-gen-ui dockerfile: Dockerfile @@ -303,6 +319,7 @@ services: restart: unless-stopped container_name: obmp-traffic-gen-responder profiles: ["test"] + mem_limit: 1g build: context: ./traffic-gen dockerfile: Dockerfile @@ -324,6 +341,7 @@ services: restart: unless-stopped container_name: obmp-whois image: openbmp/whois:2.2.0 + mem_limit: 1g sysctls: - net.ipv4.tcp_keepalive_intvl=30 - net.ipv4.tcp_keepalive_probes=5 @@ -343,6 +361,7 @@ services: restart: unless-stopped container_name: obmp-authelia profiles: ["auth"] + mem_limit: 256m image: authelia/authelia:4.38 ports: - "9091:9091" @@ -355,6 +374,7 @@ services: restart: unless-stopped container_name: obmp-portal profiles: ["auth"] + mem_limit: 128m image: nginx:alpine ports: - "8080:80"