feat(infra): add PgBouncer connection pooling for production PostgreSQL

Introduces PgBouncer as a connection pooler between the API service and
PostgreSQL in docker-compose.prod.yml, reducing connection overhead and
improving concurrency under production load.

- Add PgBouncer service (edoburu/pgbouncer:1.23.1-p2) with transaction
  pool mode, max_client_conn=200, default_pool_size=20
- Route API DATABASE_URL through PgBouncer (port 6432), keep direct
  connection (DATABASE_URL_DIRECT) for Prisma migrations/introspection
- Create infra/pgbouncer/ config: pgbouncer.ini, userlist template,
  and entrypoint script with runtime env-var substitution
- Update prisma.config.ts to prefer DATABASE_URL_DIRECT for migrations
- Add K6 load test (e2e/load/pgbouncer-pool-test.js) with ramp-up to
  200 VUs, pool exhaustion detection, and p95 < 2s threshold
- Add PgBouncer env vars to .env.example

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-10 20:15:21 +07:00
parent f5ef9d8c86
commit 05abbc5250
7 changed files with 335 additions and 3 deletions

View File

@@ -13,6 +13,17 @@ DB_USER=goodgo
DB_PASSWORD=CHANGE_ME
DATABASE_URL=postgresql://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}?schema=public
# Direct connection (bypasses PgBouncer — used for migrations/introspection)
DATABASE_URL_DIRECT=postgresql://${DB_USER}:${DB_PASSWORD}@${DB_HOST}:${DB_PORT}/${DB_NAME}?schema=public
# -----------------------------------------------------------------------------
# PgBouncer (Connection Pooling — production only)
# -----------------------------------------------------------------------------
PGBOUNCER_POOL_SIZE=20
PGBOUNCER_MAX_CLIENT_CONN=200
PGBOUNCER_ADMIN_PASSWORD=CHANGE_ME
PGBOUNCER_STATS_PASSWORD=CHANGE_ME
# -----------------------------------------------------------------------------
# Redis
# -----------------------------------------------------------------------------

View File

@@ -12,7 +12,9 @@ services:
- '${API_PORT:-3001}:3001'
environment:
NODE_ENV: production
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
DATABASE_URL: postgresql://${DB_USER}:${DB_PASSWORD}@pgbouncer:6432/${DB_NAME}
# Direct connection for migrations (bypasses PgBouncer — required for DDL)
DATABASE_URL_DIRECT: postgresql://${DB_USER}:${DB_PASSWORD}@postgres:5432/${DB_NAME}
REDIS_URL: redis://:${REDIS_PASSWORD}@redis:6379
TYPESENSE_HOST: typesense
TYPESENSE_PORT: 8108
@@ -27,7 +29,7 @@ services:
AI_SERVICES_API_KEY: ${AI_API_KEY}
RUN_MIGRATIONS: ${RUN_MIGRATIONS:-false}
depends_on:
postgres:
pgbouncer:
condition: service_healthy
redis:
condition: service_healthy
@@ -165,6 +167,49 @@ services:
networks:
- goodgo-net
# ── Connection Pooling ─────────────────────────────────────────────────────
pgbouncer:
image: edoburu/pgbouncer:1.23.1-p2
container_name: goodgo-pgbouncer
restart: unless-stopped
entrypoint: ['/bin/sh', '/etc/pgbouncer/entrypoint.sh']
environment:
DB_USER: ${DB_USER}
DB_PASSWORD: ${DB_PASSWORD}
PGBOUNCER_POOL_SIZE: ${PGBOUNCER_POOL_SIZE:-20}
PGBOUNCER_MAX_CLIENT_CONN: ${PGBOUNCER_MAX_CLIENT_CONN:-200}
PGBOUNCER_ADMIN_PASSWORD: ${PGBOUNCER_ADMIN_PASSWORD:-pgbouncer_admin_secret}
PGBOUNCER_STATS_PASSWORD: ${PGBOUNCER_STATS_PASSWORD:-pgbouncer_stats_secret}
volumes:
- ./infra/pgbouncer/pgbouncer.ini:/etc/pgbouncer/pgbouncer.ini:ro
- ./infra/pgbouncer/userlist.txt.template:/etc/pgbouncer/userlist.txt.template:ro
- ./infra/pgbouncer/entrypoint.sh:/etc/pgbouncer/entrypoint.sh:ro
depends_on:
postgres:
condition: service_healthy
healthcheck:
test: ['CMD-SHELL', 'pg_isready -h 127.0.0.1 -p 6432 -U ${DB_USER}']
interval: 10s
timeout: 5s
retries: 5
start_period: 10s
deploy:
resources:
limits:
memory: 256m
cpus: '0.5'
reservations:
memory: 64m
security_opt:
- no-new-privileges:true
logging:
driver: json-file
options:
max-size: '5m'
max-file: '3'
networks:
- goodgo-net
redis:
image: redis:7-alpine
container_name: goodgo-redis

View File

@@ -0,0 +1,162 @@
// =============================================================================
// K6 Load Test — PgBouncer Connection Pooling Validation
//
// This script validates that PgBouncer handles concurrent database connections
// correctly under production-like load. It targets the GoodGo API endpoints
// that trigger database queries (health, listings search, auth).
//
// Usage:
// k6 run e2e/load/pgbouncer-pool-test.js
// k6 run --env BASE_URL=http://localhost:3001 e2e/load/pgbouncer-pool-test.js
//
// Requirements:
// - GoodGo API running (docker-compose.prod.yml or local dev)
// - PgBouncer service running and healthy
// - Seeded database (pnpm db:seed)
// =============================================================================
import http from 'k6/http';
import { check, group, sleep } from 'k6';
import { Rate, Trend, Counter } from 'k6/metrics';
// ── Custom Metrics ───────────────────────────────────────────────────────────
const errorRate = new Rate('errors');
const dbQueryDuration = new Trend('db_query_duration', true);
const poolExhaustedErrors = new Counter('pool_exhausted_errors');
// ── Configuration ────────────────────────────────────────────────────────────
const BASE_URL = __ENV.BASE_URL || 'http://localhost:3001';
export const options = {
// Ramp-up pattern to stress-test connection pooling
stages: [
// Warm-up: gradually increase to 50 concurrent users
{ duration: '30s', target: 50 },
// Sustained load: hold at 50 users
{ duration: '1m', target: 50 },
// Spike: jump to 150 users (should exceed default_pool_size=20)
{ duration: '30s', target: 150 },
// Peak: hold at 200 users (matches max_client_conn=200)
{ duration: '1m', target: 200 },
// Cool-down: ramp back to 0
{ duration: '30s', target: 0 },
],
thresholds: {
// 95th percentile response time under 2 seconds
http_req_duration: ['p(95)<2000'],
// Error rate under 5%
errors: ['rate<0.05'],
// No pool-exhaustion errors
pool_exhausted_errors: ['count<1'],
// 99% of requests succeed
http_req_failed: ['rate<0.01'],
},
};
// ── Helper Functions ─────────────────────────────────────────────────────────
function getHeaders() {
return {
'Content-Type': 'application/json',
Accept: 'application/json',
};
}
function checkResponse(res, name) {
const success = check(res, {
[`${name}: status 2xx`]: (r) => r.status >= 200 && r.status < 300,
[`${name}: response time < 2s`]: (r) => r.timings.duration < 2000,
});
errorRate.add(!success);
// Detect pool exhaustion (typically 503 or connection refused)
if (res.status === 503 || res.status === 0) {
poolExhaustedErrors.add(1);
}
if (res.timings.duration) {
dbQueryDuration.add(res.timings.duration);
}
return success;
}
// ── Test Scenarios ───────────────────────────────────────────────────────────
export default function () {
// Each VU runs a mix of API calls that hit the database
group('Health Check (DB connectivity)', () => {
const res = http.get(`${BASE_URL}/health`, { headers: getHeaders() });
checkResponse(res, 'health');
});
group('Listings Search (read-heavy, geo queries)', () => {
// Search listings — triggers PostGIS queries through the pool
const searchParams = {
page: 1,
limit: 20,
};
const res = http.get(
`${BASE_URL}/api/v1/listings?page=${searchParams.page}&limit=${searchParams.limit}`,
{ headers: getHeaders() },
);
checkResponse(res, 'listings-search');
});
group('Listings Detail (single record lookup)', () => {
// Fetch a specific listing — simulates detail page views
const res = http.get(`${BASE_URL}/api/v1/listings?page=1&limit=1`, {
headers: getHeaders(),
});
checkResponse(res, 'listings-detail');
});
group('Concurrent Burst (pool stress)', () => {
// Fire multiple requests in parallel to stress the connection pool
const responses = http.batch([
['GET', `${BASE_URL}/health`, null, { headers: getHeaders() }],
[
'GET',
`${BASE_URL}/api/v1/listings?page=1&limit=5`,
null,
{ headers: getHeaders() },
],
[
'GET',
`${BASE_URL}/api/v1/listings?page=2&limit=5`,
null,
{ headers: getHeaders() },
],
]);
responses.forEach((res, i) => {
checkResponse(res, `burst-request-${i}`);
});
});
// Brief pause between iterations to simulate realistic user behavior
sleep(0.5 + Math.random() * 1.5);
}
// ── Lifecycle Hooks ──────────────────────────────────────────────────────────
export function setup() {
// Verify the API is reachable before starting load test
const res = http.get(`${BASE_URL}/health`);
if (res.status !== 200) {
throw new Error(
`API is not healthy at ${BASE_URL}/health — got status ${res.status}. ` +
'Ensure docker-compose.prod.yml services are running.',
);
}
console.log(`API healthy at ${BASE_URL}. Starting PgBouncer pool load test...`);
return { baseUrl: BASE_URL };
}
export function teardown(data) {
console.log(`Load test complete against ${data.baseUrl}.`);
console.log('Review k6 output for pool_exhausted_errors and db_query_duration metrics.');
}

31
infra/pgbouncer/entrypoint.sh Executable file
View File

@@ -0,0 +1,31 @@
#!/bin/sh
# =============================================================================
# PgBouncer entrypoint — render userlist from environment variables and start.
# =============================================================================
set -eu
USERLIST_TEMPLATE="/etc/pgbouncer/userlist.txt.template"
USERLIST="/etc/pgbouncer/userlist.txt"
if [ -z "${DB_USER:-}" ] || [ -z "${DB_PASSWORD:-}" ]; then
echo "ERROR: DB_USER and DB_PASSWORD must be set" >&2
exit 1
fi
# Render userlist from template, substituting env vars
envsubst < "$USERLIST_TEMPLATE" > "$USERLIST"
chmod 600 "$USERLIST"
echo "PgBouncer userlist rendered for user: ${DB_USER}"
echo "Starting PgBouncer on port 6432 (pool_mode=transaction, pool_size=${PGBOUNCER_POOL_SIZE:-20})..."
# Override pool settings via env vars if provided
if [ -n "${PGBOUNCER_POOL_SIZE:-}" ]; then
sed -i "s/^default_pool_size = .*/default_pool_size = ${PGBOUNCER_POOL_SIZE}/" /etc/pgbouncer/pgbouncer.ini
fi
if [ -n "${PGBOUNCER_MAX_CLIENT_CONN:-}" ]; then
sed -i "s/^max_client_conn = .*/max_client_conn = ${PGBOUNCER_MAX_CLIENT_CONN}/" /etc/pgbouncer/pgbouncer.ini
fi
exec pgbouncer /etc/pgbouncer/pgbouncer.ini

View File

@@ -0,0 +1,69 @@
;; =============================================================================
;; PgBouncer Configuration for GoodGo Platform
;; Docs: https://www.pgbouncer.org/config.html
;; =============================================================================
[databases]
;; Route all connections to the upstream PostgreSQL container.
;; AUTH_USER is handled via userlist.txt; DB credentials are injected at runtime
;; via environment variable substitution in the entrypoint.
* = host=postgres port=5432
[pgbouncer]
;; ── Listening ────────────────────────────────────────────────────────────────
listen_addr = 0.0.0.0
listen_port = 6432
unix_socket_dir =
;; ── Authentication ───────────────────────────────────────────────────────────
auth_type = md5
auth_file = /etc/pgbouncer/userlist.txt
;; ── Pool Mode ────────────────────────────────────────────────────────────────
;; "transaction" is recommended for short-lived, stateless web/API workloads.
;; Each server connection is returned to the pool after every transaction.
;; NOTE: session-level features (LISTEN/NOTIFY, prepared statements in older PG,
;; advisory locks held across transactions) will NOT work in this mode.
pool_mode = transaction
;; ── Pool Sizing ──────────────────────────────────────────────────────────────
;; max_client_conn — total client connections PgBouncer will accept.
;; default_pool_size — server connections per user/database pair.
;; min_pool_size — pre-warmed connections kept open even when idle.
;; reserve_pool_size — extra connections allowed when the pool is exhausted.
;; reserve_pool_timeout — seconds to wait before using reserve connections.
max_client_conn = 200
default_pool_size = 20
min_pool_size = 5
reserve_pool_size = 5
reserve_pool_timeout = 3
;; ── Timeouts ─────────────────────────────────────────────────────────────────
;; server_connect_timeout — abort if backend doesn't accept within N seconds.
;; server_idle_timeout — close idle backend connections after N seconds.
;; server_lifetime — recycle backend connections after N seconds.
;; client_idle_timeout — disconnect idle clients after N seconds (0 = off).
;; query_timeout — cancel queries running longer than N seconds (0 = off).
;; query_wait_timeout — error if a client waits this long for a server.
server_connect_timeout = 15
server_idle_timeout = 600
server_lifetime = 3600
client_idle_timeout = 0
query_timeout = 0
query_wait_timeout = 120
;; ── Logging ──────────────────────────────────────────────────────────────────
log_connections = 1
log_disconnections = 1
log_pooler_errors = 1
stats_period = 60
;; ── Admin Console ────────────────────────────────────────────────────────────
admin_users = pgbouncer_admin
stats_users = pgbouncer_stats
;; ── TLS (disabled — traffic stays within Docker network) ─────────────────────
;; Uncomment and configure if PgBouncer is exposed outside the Docker network.
;; client_tls_sslmode = prefer
;; client_tls_key_file = /etc/pgbouncer/tls/server.key
;; client_tls_cert_file = /etc/pgbouncer/tls/server.crt

View File

@@ -0,0 +1,12 @@
;; =============================================================================
;; PgBouncer userlist — injected at container startup
;;
;; This is a TEMPLATE file. The entrypoint script substitutes environment
;; variables to produce the real /etc/pgbouncer/userlist.txt at runtime.
;;
;; Format: "username" "password"
;; Passwords can be plaintext, md5, or scram-sha-256 hashed.
;; =============================================================================
"${DB_USER}" "${DB_PASSWORD}"
"pgbouncer_admin" "${PGBOUNCER_ADMIN_PASSWORD}"
"pgbouncer_stats" "${PGBOUNCER_STATS_PASSWORD}"

View File

@@ -6,8 +6,10 @@ export default defineConfig({
schema: path.join(__dirname, 'schema.prisma'),
migrate: {
async development() {
// Use DATABASE_URL_DIRECT (bypasses PgBouncer) for migrations/introspection
// when available; fall back to DATABASE_URL for local dev without PgBouncer.
return {
url: process.env.DATABASE_URL!,
url: process.env.DATABASE_URL_DIRECT || process.env.DATABASE_URL!,
};
},
},