feat(ops): add database backup strategy and log aggregation stack
- Add pg-backup container with daily automated pg_dump (02:00 UTC) and 7-day retention - Add backup/restore scripts with documented recovery procedure - Add Loki + Promtail for centralized log aggregation from all Docker containers - Add Loki as Grafana datasource with correlation ID derived fields - Add Grafana logs dashboard with volume, error rate, HTTP request, and log viewer panels - Configure Promtail to parse Pino structured JSON logs with level/context labels - Enhance LoggerService with string-level formatter and service base field - Configure 15-day log retention in Loki Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -101,6 +101,70 @@ services:
|
||||
networks:
|
||||
- goodgo-net
|
||||
|
||||
# ── Database Backup ──
|
||||
pg-backup:
|
||||
image: postgis/postgis:16-3.4
|
||||
container_name: goodgo-pg-backup
|
||||
restart: unless-stopped
|
||||
entrypoint: /bin/bash
|
||||
command:
|
||||
- -c
|
||||
- |
|
||||
apt-get update -qq && apt-get install -y -qq cron > /dev/null 2>&1
|
||||
echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER:-goodgo} PGDATABASE=${DB_NAME:-goodgo} PGPASSWORD=${DB_PASSWORD:-goodgo_secret} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1" | crontab -
|
||||
/scripts/pg-backup.sh
|
||||
cron -f
|
||||
environment:
|
||||
PGHOST: postgres
|
||||
PGPORT: '5432'
|
||||
PGUSER: ${DB_USER:-goodgo}
|
||||
PGDATABASE: ${DB_NAME:-goodgo}
|
||||
PGPASSWORD: ${DB_PASSWORD:-goodgo_secret}
|
||||
BACKUP_DIR: /backups
|
||||
RETENTION_DAYS: ${BACKUP_RETENTION_DAYS:-7}
|
||||
volumes:
|
||||
- ./scripts/backup:/scripts:ro
|
||||
- pg_backups:/backups
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- goodgo-net
|
||||
|
||||
# ── Log Aggregation ──
|
||||
loki:
|
||||
image: grafana/loki:3.0.0
|
||||
container_name: goodgo-loki
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- '${LOKI_PORT:-3100}:3100'
|
||||
command: -config.file=/etc/loki/loki-config.yml
|
||||
volumes:
|
||||
- ./monitoring/loki/loki-config.yml:/etc/loki/loki-config.yml:ro
|
||||
- loki_data:/loki
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3100/ready']
|
||||
interval: 15s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 20s
|
||||
networks:
|
||||
- goodgo-net
|
||||
|
||||
promtail:
|
||||
image: grafana/promtail:3.0.0
|
||||
container_name: goodgo-promtail
|
||||
restart: unless-stopped
|
||||
command: -config.file=/etc/promtail/promtail-config.yml
|
||||
volumes:
|
||||
- ./monitoring/promtail/promtail-config.yml:/etc/promtail/promtail-config.yml:ro
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
depends_on:
|
||||
loki:
|
||||
condition: service_healthy
|
||||
networks:
|
||||
- goodgo-net
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v2.51.0
|
||||
container_name: goodgo-prometheus
|
||||
@@ -142,6 +206,8 @@ services:
|
||||
depends_on:
|
||||
prometheus:
|
||||
condition: service_healthy
|
||||
loki:
|
||||
condition: service_healthy
|
||||
healthcheck:
|
||||
test: ['CMD', 'wget', '--spider', '-q', 'http://localhost:3000/api/health']
|
||||
interval: 15s
|
||||
@@ -160,6 +226,10 @@ volumes:
|
||||
driver: local
|
||||
minio_data:
|
||||
driver: local
|
||||
pg_backups:
|
||||
driver: local
|
||||
loki_data:
|
||||
driver: local
|
||||
prometheus_data:
|
||||
driver: local
|
||||
grafana_data:
|
||||
|
||||
Reference in New Issue
Block a user