From 06019ef74c64e3c9c092b911231df347e4516c3c Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 19 May 2026 14:04:34 -0700 Subject: [PATCH] Add consumer-only psql-app replica for ingestion scale-out psql-app-consumer: profile-gated (scale-out) horizontal scale-out for the Kafka->Postgres ingestion path. Shares the primary's /config read-only so it reuses obmp-psql.yml, whose fixed group.id makes Kafka rebalance partitions across the primary and every replica. Its command runs ONLY the consumer jar -- no cron, RPKI/IRR/DBIP or initdb -- so it does not duplicate the primary's DB-maintenance jobs (config_cron wires those up unconditionally in /usr/sbin/run). Each replica brings its own consumer and writer threads. Measured: one consumer-only replica took the post-storm backlog drain from a cold-start ~3.7k msg/s to ~48k msg/s; group membership 8->16. With 2 consumers feeding it, Postgres becomes the next bottleneck (~500% CPU) -- DB write capacity is the ceiling beyond ~2-3 consumers. docker compose --profile scale-out up -d --scale psql-app-consumer=2 Co-Authored-By: Claude Opus 4.7 --- docker-compose.yml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 2205f17..411d251 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -255,6 +255,32 @@ services: - POSTGRES_DROP_stats_peer_rib='4 weeks' - POSTGRES_DROP_stats_peer_update_counts='4 weeks' + # Consumer-only psql-app replica -- horizontal ingestion scale-out. + # Profile-gated; bring up on demand (the host needs spare CPU+RAM for it): + # docker compose --profile scale-out up -d --scale psql-app-consumer=2 + # It shares the primary's /config (read-only) so it reuses obmp-psql.yml, + # whose fixed group.id "obmp-psql-consumer" makes Kafka rebalance partitions + # across the primary and every replica. The command runs ONLY the consumer + # jar -- no cron, no RPKI/IRR/DBIP, no initdb -- so a replica does NOT + # duplicate the primary's DB-maintenance jobs (update_global_ip_rib, + # update_chg_stats, retention, ...), which config_cron wires up + # unconditionally in /usr/sbin/run. Each replica brings its own consumer + # AND writer threads, so it adds real write throughput (the primary's + # writer_max_threads_per_type is 1). + psql-app-consumer: + profiles: ["scale-out"] + restart: unless-stopped + image: openbmp/psql-app:2.2.2 + mem_limit: ${PSQL_APP_CONSUMER_MEM_LIMIT:-4g} + depends_on: + psql: + condition: service_healthy + kafka: + condition: service_started + volumes: + - ${OBMP_DATA_ROOT}/config:/config:ro + command: ["bash","-c","cd /var/log && exec java -Xmx3G -Xms128m -XX:+UseG1GC -XX:+UnlockExperimentalVMOptions -XX:InitiatingHeapOccupancyPercent=30 -XX:G1MixedGCLiveThresholdPercent=30 -XX:MaxGCPauseMillis=200 -XX:ParallelGCThreads=20 -XX:ConcGCThreads=5 -XX:+ExitOnOutOfMemoryError -Duser.timezone=UTC -jar /usr/local/openbmp/obmp-psql-consumer.jar -cf /config/obmp-psql.yml"] + exabgp: restart: unless-stopped container_name: obmp-exabgp