From 2a82bd9a9496add3a26eb358a97c934e05661a75 Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 19 May 2026 16:50:15 -0700 Subject: [PATCH] ip_rib perf tuning: per-table autovacuum + drop 4 unused indexes Derived from the 2026-05-19 ingestion stress-test session. psql-app's unicast_prefix drain rate caps at a few-hundred msg/s under continuous Postgres maintenance (autovacuum on ip_rib + update_global_ip_rib() / update_chg_stats() / update_peer_rib_counts() crons) competing for ip_rib disk I/O. ALTER TABLE ip_rib SET autovacuum_vacuum_scale_factor=0.02 -- run more often on smaller chunks. cost_limit kept at its OpenBMP-default 3000 so each run finishes fast; the consumer runs flat out between bursts instead of being throttled continuously. DROP INDEX for four unused/redundant indexes (every INSERT updates every index; these all had 0 scans in ~2h of heavy activity): - ip_rib_hash_id_idx (907 MB) - ip_rib_base_attr_hash_id_idx (558 MB) - ip_rib_prefix_idx (1538 MB, GiST) - ip_rib_origin_as_idx (364 MB) 9 -> 5 indexes; ~3.4 GB freed (6,715 MB -> 3,348 MB). Reduces index write-amplification per UPSERT by ~45% and shortens autovacuum on ip_rib by ~the same. Measurement note: across-cycle 25-min runs were inconclusive on the sustained-rate effect (inflow was near-zero by then -- gobgp stopped -- so the consumer was largely idle). The real test is re-enabling the fleet-wide feed with the consumer-replica + 62 GiB RAM and seeing whether unicast_prefix keeps up. Co-Authored-By: Claude Opus 4.7 --- postgres/scripts/011_ip_rib_perf_tuning.sql | 42 +++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 postgres/scripts/011_ip_rib_perf_tuning.sql diff --git a/postgres/scripts/011_ip_rib_perf_tuning.sql b/postgres/scripts/011_ip_rib_perf_tuning.sql new file mode 100644 index 0000000..08d70b7 --- /dev/null +++ b/postgres/scripts/011_ip_rib_perf_tuning.sql @@ -0,0 +1,42 @@ +-- 011_ip_rib_perf_tuning.sql +-- Runtime performance tuning for ip_rib, derived from the 2026-05-19 ingestion +-- stress-test session. Idempotent. +-- +-- Background: psql-app's unicast_prefix drain rate caps at a few-hundred msg/s +-- under continuous Postgres maintenance (autovacuum on ip_rib + +-- update_global_ip_rib() / update_chg_stats() / update_peer_rib_counts() +-- crons) competing for ip_rib disk I/O. Two levers applied here: +-- +-- 1) Per-table autovacuum on ip_rib: lower scale_factor so autovacuum runs +-- more often on smaller chunks instead of one long disk-storm per cycle +-- (cost_limit kept high so each run finishes fast -- short intense +-- bursts let the consumer run flat out between them). +-- +-- 2) Drop four unused/redundant ip_rib indexes (every INSERT updates every +-- index; these had 0 scans across ~2h of fleet convergence + storms + +-- operator activity). 9 -> 5 indexes; ~3.4 GB freed. +-- - ip_rib_hash_id_idx (907 MB, btree hash_id; the composite +-- pkey is (peer_hash_id, hash_id) so a +-- hash_id-alone index has no caller) +-- - ip_rib_base_attr_hash_id_idx (558 MB, no reverse-join caller) +-- - ip_rib_prefix_idx (1538 MB, GiST prefix inet_ops; only +-- used by ad-hoc prefix-containment +-- queries -- recreate if needed) +-- - ip_rib_origin_as_idx (364 MB, btree origin_as; the composite +-- (peer_hash_id, origin_as) handles +-- most ASN queries) +-- +-- Recovery for dropped indexes (no downtime): +-- CREATE INDEX CONCURRENTLY ip_rib_prefix_idx ON ip_rib USING gist (prefix inet_ops); +-- CREATE INDEX CONCURRENTLY ip_rib_origin_as_idx ON ip_rib (origin_as); +-- ... etc. + +ALTER TABLE ip_rib SET ( + autovacuum_vacuum_scale_factor = 0.02, + autovacuum_analyze_scale_factor = 0.02 +); + +DROP INDEX IF EXISTS ip_rib_hash_id_idx; +DROP INDEX IF EXISTS ip_rib_base_attr_hash_id_idx; +DROP INDEX IF EXISTS ip_rib_prefix_idx; +DROP INDEX IF EXISTS ip_rib_origin_as_idx;