diff --git a/.github/workflows/backup-verify.yml b/.github/workflows/backup-verify.yml new file mode 100644 index 0000000..1edc8a8 --- /dev/null +++ b/.github/workflows/backup-verify.yml @@ -0,0 +1,106 @@ +name: Backup Verification + +on: + # Run weekly on Sundays at 05:00 UTC + schedule: + - cron: '0 5 * * 0' + # Manual trigger + workflow_dispatch: + inputs: + skip_cleanup: + description: 'Keep test database for debugging' + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + +concurrency: + group: backup-verify + cancel-in-progress: false + +jobs: + verify-backup: + name: Backup Restore Verification + runs-on: ubuntu-latest + timeout-minutes: 30 + + services: + postgres: + image: postgis/postgis:16-3.4 + env: + POSTGRES_DB: goodgo + POSTGRES_USER: goodgo + POSTGRES_PASSWORD: goodgo_ci_secret + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U goodgo -d goodgo" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + --health-start-period 30s + + env: + DATABASE_URL: postgresql://goodgo:goodgo_ci_secret@localhost:5432/goodgo + PGHOST: localhost + PGPORT: '5432' + PGUSER: goodgo + PGPASSWORD: goodgo_ci_secret + PGDATABASE: goodgo + + steps: + - uses: actions/checkout@v4 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: 22 + + - name: Install pnpm + uses: pnpm/action-setup@v4 + with: + version: 10 + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Generate Prisma client + run: pnpm db:generate + + - name: Run migrations + run: pnpm db:migrate:dev + + - name: Seed database + run: pnpm db:seed + + - name: Create backup + run: | + mkdir -p /tmp/backups + pg_dump \ + -h localhost \ + -p 5432 \ + -U goodgo \ + -d goodgo \ + --no-owner \ + --no-privileges \ + --format=custom \ + --compress=6 \ + -f /tmp/backups/goodgo_ci_test.sql.gz + + - name: Run backup verification + run: | + chmod +x scripts/backup/pg-verify-backup.sh + BACKUP_DIR=/tmp/backups \ + REPORT_FILE=/tmp/backups/verify-report.json \ + SKIP_CLEANUP=${{ github.event.inputs.skip_cleanup || 'false' }} \ + scripts/backup/pg-verify-backup.sh + + - name: Upload verification report + if: always() + uses: actions/upload-artifact@v4 + with: + name: backup-verify-report + path: /tmp/backups/verify-report.json + retention-days: 30 diff --git a/docker-compose.yml b/docker-compose.yml index dab1981..b4c41f3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -111,7 +111,7 @@ services: - -c - | apt-get update -qq && apt-get install -y -qq cron > /dev/null 2>&1 - echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER:-goodgo} PGDATABASE=${DB_NAME:-goodgo} PGPASSWORD=${DB_PASSWORD:-goodgo_secret} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1" | crontab - + (echo "0 2 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER:-goodgo} PGDATABASE=${DB_NAME:-goodgo} PGPASSWORD=${DB_PASSWORD:-goodgo_secret} BACKUP_DIR=/backups RETENTION_DAYS=${BACKUP_RETENTION_DAYS:-7} /scripts/pg-backup.sh >> /var/log/pg-backup.log 2>&1"; echo "0 4 * * * PGHOST=postgres PGPORT=5432 PGUSER=${DB_USER:-goodgo} PGDATABASE=${DB_NAME:-goodgo} PGPASSWORD=${DB_PASSWORD:-goodgo_secret} BACKUP_DIR=/backups REPORT_FILE=/backups/verify-latest.json /scripts/pg-verify-backup.sh >> /var/log/pg-verify-backup.log 2>&1") | crontab - /scripts/pg-backup.sh cron -f environment: @@ -131,6 +131,34 @@ services: networks: - goodgo-net + # ── Backup Verification (on-demand) ── + # Run manually: docker compose run --rm pg-verify-backup + pg-verify-backup: + image: postgis/postgis:16-3.4 + container_name: goodgo-pg-verify-backup + profiles: + - tools + entrypoint: /bin/bash + command: + - -c + - /scripts/pg-verify-backup.sh + environment: + PGHOST: postgres + PGPORT: '5432' + PGUSER: ${DB_USER:-goodgo} + PGDATABASE: ${DB_NAME:-goodgo} + PGPASSWORD: ${DB_PASSWORD:-goodgo_secret} + BACKUP_DIR: /backups + REPORT_FILE: /backups/verify-report.json + volumes: + - ./scripts/backup:/scripts:ro + - pg_backups:/backups + depends_on: + postgres: + condition: service_healthy + networks: + - goodgo-net + # ── Log Aggregation ── loki: image: grafana/loki:3.0.0 diff --git a/scripts/backup/pg-verify-backup.sh b/scripts/backup/pg-verify-backup.sh new file mode 100755 index 0000000..a4376a6 --- /dev/null +++ b/scripts/backup/pg-verify-backup.sh @@ -0,0 +1,421 @@ +#!/bin/bash +set -euo pipefail + +# ── PostgreSQL Backup Restore Verification Script ── +# Restores the latest backup to an isolated test database, verifies data +# integrity (table existence, row counts, key checksums), and reports pass/fail. +# +# SAFETY: Never touches the production database. Creates a temporary +# "goodgo_verify_" database and drops it on exit. +# +# Usage: +# ./pg-verify-backup.sh # Verify latest backup +# ./pg-verify-backup.sh # Verify specific backup +# SKIP_CLEANUP=1 ./pg-verify-backup.sh # Keep test DB for inspection +# +# Environment variables: +# BACKUP_DIR — Directory containing backups (default: /backups) +# PGHOST — PostgreSQL host (default: postgres) +# PGPORT — PostgreSQL port (default: 5432) +# PGUSER — PostgreSQL user (default: goodgo) +# PGPASSWORD — PostgreSQL password (from environment) +# PGDATABASE — Source/production database name (default: goodgo) +# SKIP_CLEANUP — Set to 1 to keep test database after verification +# REPORT_FILE — Path for JSON report output (optional) +# +# Exit codes: +# 0 — All checks passed +# 1 — One or more checks failed +# 2 — Setup error (no backups found, restore failed, etc.) + +# ── Configuration ── +BACKUP_DIR="${BACKUP_DIR:-/backups}" +PGHOST="${PGHOST:-postgres}" +PGPORT="${PGPORT:-5432}" +PGUSER="${PGUSER:-goodgo}" +PGDATABASE="${PGDATABASE:-goodgo}" +SKIP_CLEANUP="${SKIP_CLEANUP:-0}" +REPORT_FILE="${REPORT_FILE:-}" + +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +VERIFY_DB="goodgo_verify_${TIMESTAMP}" +PASSED=0 +FAILED=0 +WARNINGS=0 +RESULTS=() + +# ── Color output (if terminal) ── +if [ -t 1 ]; then + GREEN='\033[0;32m' + RED='\033[0;31m' + YELLOW='\033[0;33m' + BLUE='\033[0;34m' + NC='\033[0m' +else + GREEN='' + RED='' + YELLOW='' + BLUE='' + NC='' +fi + +log_pass() { + PASSED=$((PASSED + 1)) + RESULTS+=("{\"check\":\"$1\",\"status\":\"pass\",\"detail\":\"$2\"}") + echo -e "${GREEN}[PASS]${NC} $1: $2" +} + +log_fail() { + FAILED=$((FAILED + 1)) + RESULTS+=("{\"check\":\"$1\",\"status\":\"fail\",\"detail\":\"$2\"}") + echo -e "${RED}[FAIL]${NC} $1: $2" +} + +log_warn() { + WARNINGS=$((WARNINGS + 1)) + RESULTS+=("{\"check\":\"$1\",\"status\":\"warn\",\"detail\":\"$2\"}") + echo -e "${YELLOW}[WARN]${NC} $1: $2" +} + +log_info() { + echo -e "${BLUE}[INFO]${NC} $1" +} + +# ── Cleanup function ── +cleanup() { + local exit_code=$? + if [ "${SKIP_CLEANUP}" = "1" ]; then + log_info "SKIP_CLEANUP=1 — keeping test database '${VERIFY_DB}' for inspection" + log_info "Drop manually: psql -h ${PGHOST} -U ${PGUSER} -d postgres -c 'DROP DATABASE IF EXISTS \"${VERIFY_DB}\";'" + else + log_info "Cleaning up test database '${VERIFY_DB}'..." + psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \ + "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${VERIFY_DB}' AND pid <> pg_backend_pid();" \ + > /dev/null 2>&1 || true + psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \ + "DROP DATABASE IF EXISTS \"${VERIFY_DB}\";" \ + > /dev/null 2>&1 || true + log_info "Test database dropped." + fi + return $exit_code +} + +trap cleanup EXIT + +# ── Determine backup file ── +BACKUP_FILE="${1:-}" + +if [ -z "${BACKUP_FILE}" ]; then + log_info "Finding latest backup in ${BACKUP_DIR}..." + BACKUP_FILE=$(ls -t "${BACKUP_DIR}"/goodgo_*.sql.gz 2>/dev/null | head -n1 || true) + if [ -z "${BACKUP_FILE}" ]; then + echo -e "${RED}[ERROR]${NC} No backup files found in ${BACKUP_DIR}" + exit 2 + fi +fi + +if [ ! -f "${BACKUP_FILE}" ]; then + echo -e "${RED}[ERROR]${NC} Backup file not found: ${BACKUP_FILE}" + exit 2 +fi + +BACKUP_SIZE=$(du -h "${BACKUP_FILE}" | cut -f1) +BACKUP_MTIME=$(stat -c '%Y' "${BACKUP_FILE}" 2>/dev/null || stat -f '%m' "${BACKUP_FILE}" 2>/dev/null || echo "unknown") + +echo "" +echo "================================================================" +echo " GoodGo Backup Restore Verification" +echo "================================================================" +echo " Backup file : ${BACKUP_FILE}" +echo " Backup size : ${BACKUP_SIZE}" +echo " Source DB : ${PGHOST}:${PGPORT}/${PGDATABASE}" +echo " Test DB : ${VERIFY_DB}" +echo " Started at : $(date -Iseconds)" +echo "================================================================" +echo "" + +# ── Step 1: Create isolated test database ── +log_info "Step 1/5: Creating isolated test database '${VERIFY_DB}'..." +psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \ + "CREATE DATABASE \"${VERIFY_DB}\";" > /dev/null 2>&1 + +if ! psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \ + "SELECT 1 FROM pg_database WHERE datname = '${VERIFY_DB}';" 2>/dev/null | grep -q 1; then + echo -e "${RED}[ERROR]${NC} Failed to create test database" + exit 2 +fi +log_pass "Database creation" "Test database '${VERIFY_DB}' created" + +# ── Step 2: Restore backup into test database ── +log_info "Step 2/5: Restoring backup to test database..." +RESTORE_START=$(date +%s) + +# Enable PostGIS extension before restore (required for geometry columns) +psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" -c \ + "CREATE EXTENSION IF NOT EXISTS postgis;" > /dev/null 2>&1 + +RESTORE_OUTPUT=$(pg_restore \ + -h "${PGHOST}" \ + -p "${PGPORT}" \ + -U "${PGUSER}" \ + -d "${VERIFY_DB}" \ + --no-owner \ + --no-privileges \ + --clean \ + --if-exists \ + "${BACKUP_FILE}" 2>&1) || true + +RESTORE_END=$(date +%s) +RESTORE_DURATION=$((RESTORE_END - RESTORE_START)) + +# Check if any critical errors occurred (ignore warnings about objects not existing) +RESTORE_ERRORS=$(echo "${RESTORE_OUTPUT}" | grep -ci "ERROR" || true) +if [ "${RESTORE_ERRORS}" -gt 0 ]; then + # Filter out harmless "does not exist" errors from --clean --if-exists + CRITICAL_ERRORS=$(echo "${RESTORE_OUTPUT}" | grep -i "ERROR" | grep -cv "does not exist" || true) + if [ "${CRITICAL_ERRORS}" -gt 0 ]; then + log_fail "Restore" "pg_restore completed with ${CRITICAL_ERRORS} critical error(s) in ${RESTORE_DURATION}s" + echo "${RESTORE_OUTPUT}" | grep -i "ERROR" | grep -v "does not exist" | head -5 + else + log_pass "Restore" "pg_restore completed in ${RESTORE_DURATION}s (non-critical warnings only)" + fi +else + log_pass "Restore" "pg_restore completed cleanly in ${RESTORE_DURATION}s" +fi + +# ── Step 3: Verify table existence ── +log_info "Step 3/5: Verifying table existence..." + +# Expected tables from Prisma schema (22 models + Prisma migration tracking) +EXPECTED_TABLES=( + "User" + "RefreshToken" + "OAuthAccount" + "Agent" + "Property" + "PropertyMedia" + "Listing" + "SavedSearch" + "Transaction" + "Inquiry" + "Lead" + "Payment" + "Plan" + "Subscription" + "UsageRecord" + "Valuation" + "MarketIndex" + "NotificationLog" + "NotificationPreference" + "AdminAuditLog" + "Review" + "_prisma_migrations" +) + +ACTUAL_TABLES=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \ + -t -A -c "SELECT tablename FROM pg_tables WHERE schemaname = 'public' ORDER BY tablename;" 2>/dev/null) + +MISSING_TABLES=() +for table in "${EXPECTED_TABLES[@]}"; do + if echo "${ACTUAL_TABLES}" | grep -qx "${table}"; then + : # Table exists + else + MISSING_TABLES+=("${table}") + fi +done + +ACTUAL_COUNT=$(echo "${ACTUAL_TABLES}" | grep -c . || true) +EXPECTED_COUNT=${#EXPECTED_TABLES[@]} + +if [ ${#MISSING_TABLES[@]} -eq 0 ]; then + log_pass "Table existence" "All ${EXPECTED_COUNT} expected tables present (${ACTUAL_COUNT} total)" +else + log_fail "Table existence" "Missing ${#MISSING_TABLES[@]} table(s): ${MISSING_TABLES[*]}" +fi + +# ── Step 4: Row count comparison ── +log_info "Step 4/5: Comparing row counts between source and restored databases..." + +ROW_MISMATCHES=0 +ROW_REPORT="" + +for table in "${EXPECTED_TABLES[@]}"; do + if [ "${table}" = "_prisma_migrations" ]; then + continue # Skip migration tracking table for row comparison + fi + + # Skip missing tables + if [[ " ${MISSING_TABLES[*]} " =~ " ${table} " ]]; then + continue + fi + + SOURCE_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \ + -t -A -c "SELECT count(*) FROM \"${table}\";" 2>/dev/null || echo "ERROR") + VERIFY_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \ + -t -A -c "SELECT count(*) FROM \"${table}\";" 2>/dev/null || echo "ERROR") + + if [ "${SOURCE_COUNT}" = "ERROR" ] || [ "${VERIFY_COUNT}" = "ERROR" ]; then + log_warn "Row count: ${table}" "Could not query row count" + continue + fi + + ROW_REPORT="${ROW_REPORT}\n ${table}: source=${SOURCE_COUNT} restored=${VERIFY_COUNT}" + + if [ "${SOURCE_COUNT}" != "${VERIFY_COUNT}" ]; then + ROW_MISMATCHES=$((ROW_MISMATCHES + 1)) + log_fail "Row count: ${table}" "Mismatch — source=${SOURCE_COUNT} restored=${VERIFY_COUNT}" + fi +done + +if [ "${ROW_MISMATCHES}" -eq 0 ]; then + log_pass "Row counts" "All tables match between source and restored database" +else + log_fail "Row counts" "${ROW_MISMATCHES} table(s) have row count mismatches" +fi + +# ── Step 5: Key data checksums ── +log_info "Step 5/5: Verifying data checksums on critical tables..." + +# Checksum key tables by hashing sorted IDs + key fields +verify_checksum() { + local table="$1" + local query="$2" + local label="$3" + + # Skip if table is missing + if [[ " ${MISSING_TABLES[*]} " =~ " ${table} " ]]; then + log_warn "Checksum: ${label}" "Table '${table}' missing, skipped" + return + fi + + SOURCE_HASH=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \ + -t -A -c "${query}" 2>/dev/null || echo "ERROR") + VERIFY_HASH=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \ + -t -A -c "${query}" 2>/dev/null || echo "ERROR") + + if [ "${SOURCE_HASH}" = "ERROR" ] || [ "${VERIFY_HASH}" = "ERROR" ]; then + log_warn "Checksum: ${label}" "Could not compute checksum" + return + fi + + if [ "${SOURCE_HASH}" = "${VERIFY_HASH}" ]; then + log_pass "Checksum: ${label}" "Hashes match (${SOURCE_HASH:0:16}...)" + else + log_fail "Checksum: ${label}" "Hash mismatch — source=${SOURCE_HASH:0:16}... restored=${VERIFY_HASH:0:16}..." + fi +} + +# Checksum queries for critical tables +verify_checksum "User" \ + "SELECT md5(string_agg(id || email || phone || \"fullName\" || role::text || \"kycStatus\"::text, '|' ORDER BY id)) FROM \"User\";" \ + "User identities" + +verify_checksum "Property" \ + "SELECT md5(string_agg(id || title || district || city || \"propertyType\"::text || \"areaM2\"::text, '|' ORDER BY id)) FROM \"Property\";" \ + "Property records" + +verify_checksum "Listing" \ + "SELECT md5(string_agg(id || \"propertyId\" || \"sellerId\" || status::text || \"priceVND\"::text || \"transactionType\"::text, '|' ORDER BY id)) FROM \"Listing\";" \ + "Listing records" + +verify_checksum "Payment" \ + "SELECT md5(string_agg(id || \"userId\" || provider::text || type::text || \"amountVND\"::text || status::text, '|' ORDER BY id)) FROM \"Payment\";" \ + "Payment records" + +verify_checksum "Subscription" \ + "SELECT md5(string_agg(id || \"userId\" || \"planId\" || status::text, '|' ORDER BY id)) FROM \"Subscription\";" \ + "Subscription records" + +verify_checksum "Transaction" \ + "SELECT md5(string_agg(id || \"listingId\" || \"buyerId\" || status::text, '|' ORDER BY id)) FROM \"Transaction\";" \ + "Transaction records" + +verify_checksum "Plan" \ + "SELECT md5(string_agg(id || tier::text || name || \"priceMonthlyVND\"::text, '|' ORDER BY id)) FROM \"Plan\";" \ + "Plan records" + +verify_checksum "_prisma_migrations" \ + "SELECT md5(string_agg(id || migration_name || checksum, '|' ORDER BY started_at)) FROM \"_prisma_migrations\";" \ + "Migration history" + +# ── PostGIS extension check ── +POSTGIS_CHECK=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \ + -t -A -c "SELECT extname FROM pg_extension WHERE extname = 'postgis';" 2>/dev/null || echo "") + +if [ "${POSTGIS_CHECK}" = "postgis" ]; then + log_pass "PostGIS extension" "PostGIS is available in restored database" +else + log_fail "PostGIS extension" "PostGIS extension not found in restored database" +fi + +# ── Index verification ── +SOURCE_INDEX_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \ + -t -A -c "SELECT count(*) FROM pg_indexes WHERE schemaname = 'public';" 2>/dev/null || echo "0") +VERIFY_INDEX_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \ + -t -A -c "SELECT count(*) FROM pg_indexes WHERE schemaname = 'public';" 2>/dev/null || echo "0") + +if [ "${SOURCE_INDEX_COUNT}" = "${VERIFY_INDEX_COUNT}" ]; then + log_pass "Index count" "All ${SOURCE_INDEX_COUNT} indexes restored" +else + log_warn "Index count" "Source has ${SOURCE_INDEX_COUNT} indexes, restored has ${VERIFY_INDEX_COUNT}" +fi + +# ── Enum type verification ── +SOURCE_ENUM_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \ + -t -A -c "SELECT count(*) FROM pg_type WHERE typtype = 'e';" 2>/dev/null || echo "0") +VERIFY_ENUM_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \ + -t -A -c "SELECT count(*) FROM pg_type WHERE typtype = 'e';" 2>/dev/null || echo "0") + +if [ "${SOURCE_ENUM_COUNT}" = "${VERIFY_ENUM_COUNT}" ]; then + log_pass "Enum types" "All ${SOURCE_ENUM_COUNT} enum types restored" +else + log_fail "Enum types" "Source has ${SOURCE_ENUM_COUNT} enums, restored has ${VERIFY_ENUM_COUNT}" +fi + +# ── Summary ── +echo "" +echo "================================================================" +echo " Verification Summary" +echo "================================================================" +echo " Backup file : ${BACKUP_FILE}" +echo " Restore time : ${RESTORE_DURATION}s" +TOTAL=$((PASSED + FAILED)) +echo " Checks : ${TOTAL} total, ${PASSED} passed, ${FAILED} failed, ${WARNINGS} warnings" +echo " Finished at : $(date -Iseconds)" + +if [ "${FAILED}" -eq 0 ]; then + echo -e " Result : ${GREEN}ALL CHECKS PASSED${NC}" + echo "================================================================" +else + echo -e " Result : ${RED}${FAILED} CHECK(S) FAILED${NC}" + echo "================================================================" +fi +echo "" + +# ── Write JSON report (if requested) ── +if [ -n "${REPORT_FILE}" ]; then + RESULTS_JSON=$(printf '%s,' "${RESULTS[@]}") + RESULTS_JSON="[${RESULTS_JSON%,}]" + + cat > "${REPORT_FILE}" << JSONEOF +{ + "timestamp": "$(date -Iseconds)", + "backupFile": "${BACKUP_FILE}", + "backupSize": "${BACKUP_SIZE}", + "testDatabase": "${VERIFY_DB}", + "restoreDurationSeconds": ${RESTORE_DURATION}, + "passed": ${PASSED}, + "failed": ${FAILED}, + "warnings": ${WARNINGS}, + "result": "$([ ${FAILED} -eq 0 ] && echo "pass" || echo "fail")", + "checks": ${RESULTS_JSON} +} +JSONEOF + log_info "JSON report written to ${REPORT_FILE}" +fi + +# Exit with appropriate code +if [ "${FAILED}" -gt 0 ]; then + exit 1 +fi +exit 0