feat(ops): add database backup strategy and log aggregation stack

- Add pg-backup container with daily automated pg_dump (02:00 UTC) and 7-day retention
- Add backup/restore scripts with documented recovery procedure
- Add Loki + Promtail for centralized log aggregation from all Docker containers
- Add Loki as Grafana datasource with correlation ID derived fields
- Add Grafana logs dashboard with volume, error rate, HTTP request, and log viewer panels
- Configure Promtail to parse Pino structured JSON logs with level/context labels
- Enhance LoggerService with string-level formatter and service base field
- Configure 15-day log retention in Loki

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-04-08 04:04:32 +07:00
parent 7c9f682046
commit 775eb7b374
9 changed files with 563 additions and 0 deletions

42
scripts/backup/pg-backup.sh Executable file
View File

@@ -0,0 +1,42 @@
#!/bin/bash
set -euo pipefail
# ── PostgreSQL Automated Backup Script ──
# Runs daily via cron inside the pg-backup container.
# Dumps the database and manages retention.
BACKUP_DIR="${BACKUP_DIR:-/backups}"
RETENTION_DAYS="${RETENTION_DAYS:-7}"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="${BACKUP_DIR}/goodgo_${TIMESTAMP}.sql.gz"
echo "[backup] Starting PostgreSQL backup at $(date -Iseconds)"
# Ensure backup directory exists
mkdir -p "${BACKUP_DIR}"
# Run pg_dump with compression
pg_dump \
-h "${PGHOST:-postgres}" \
-p "${PGPORT:-5432}" \
-U "${PGUSER:-goodgo}" \
-d "${PGDATABASE:-goodgo}" \
--no-owner \
--no-privileges \
--format=custom \
--compress=6 \
-f "${BACKUP_FILE}"
FILESIZE=$(du -h "${BACKUP_FILE}" | cut -f1)
echo "[backup] Backup completed: ${BACKUP_FILE} (${FILESIZE})"
# Prune old backups beyond retention period
echo "[backup] Pruning backups older than ${RETENTION_DAYS} days..."
PRUNED=$(find "${BACKUP_DIR}" -name "goodgo_*.sql.gz" -type f -mtime "+${RETENTION_DAYS}" -print -delete | wc -l)
echo "[backup] Pruned ${PRUNED} old backup(s)"
# List current backups
echo "[backup] Current backups:"
ls -lh "${BACKUP_DIR}"/goodgo_*.sql.gz 2>/dev/null || echo " (none)"
echo "[backup] Done at $(date -Iseconds)"

72
scripts/backup/pg-restore.sh Executable file
View File

@@ -0,0 +1,72 @@
#!/bin/bash
set -euo pipefail
# ── PostgreSQL Restore Script ──
# Restores a database from a backup file.
#
# Usage:
# ./pg-restore.sh <backup-file>
# ./pg-restore.sh /backups/goodgo_20260408_020000.sql.gz
#
# Inside Docker:
# docker exec -it goodgo-pg-backup /scripts/pg-restore.sh /backups/<file>
BACKUP_FILE="${1:-}"
if [ -z "${BACKUP_FILE}" ]; then
echo "Usage: $0 <backup-file>"
echo ""
echo "Available backups:"
ls -lht "${BACKUP_DIR:-/backups}"/goodgo_*.sql.gz 2>/dev/null || echo " (none found)"
exit 1
fi
if [ ! -f "${BACKUP_FILE}" ]; then
echo "[restore] ERROR: Backup file not found: ${BACKUP_FILE}"
exit 1
fi
PGHOST="${PGHOST:-postgres}"
PGPORT="${PGPORT:-5432}"
PGUSER="${PGUSER:-goodgo}"
PGDATABASE="${PGDATABASE:-goodgo}"
echo "[restore] WARNING: This will DROP and recreate the '${PGDATABASE}' database."
echo "[restore] Backup file: ${BACKUP_FILE}"
echo "[restore] Target: ${PGHOST}:${PGPORT}/${PGDATABASE}"
echo ""
# If running interactively, prompt for confirmation
if [ -t 0 ]; then
read -rp "Continue? (yes/no): " CONFIRM
if [ "${CONFIRM}" != "yes" ]; then
echo "[restore] Aborted."
exit 0
fi
fi
echo "[restore] Starting restore at $(date -Iseconds)..."
# Terminate existing connections
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${PGDATABASE}' AND pid <> pg_backend_pid();" \
2>/dev/null || true
# Drop and recreate database
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c "DROP DATABASE IF EXISTS \"${PGDATABASE}\";"
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c "CREATE DATABASE \"${PGDATABASE}\";"
# Restore from backup
pg_restore \
-h "${PGHOST}" \
-p "${PGPORT}" \
-U "${PGUSER}" \
-d "${PGDATABASE}" \
--no-owner \
--no-privileges \
--clean \
--if-exists \
"${BACKUP_FILE}" || true
echo "[restore] Restore completed at $(date -Iseconds)"
echo "[restore] Verify with: psql -h ${PGHOST} -U ${PGUSER} -d ${PGDATABASE} -c '\\dt'"