Adds pg-verify-backup.sh that restores the latest backup to an isolated test database and verifies integrity (table existence, row counts, key checksums, PostGIS extension, indexes, enum types). Reports pass/fail with optional JSON output. - Cron schedule: daily at 04:00 UTC (2h after backup) - On-demand: docker compose run --rm pg-verify-backup - CI: weekly GitHub Actions workflow with artifact upload Co-Authored-By: Paperclip <noreply@paperclip.ing>
422 lines
14 KiB
Bash
Executable File
422 lines
14 KiB
Bash
Executable File
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
# ── PostgreSQL Backup Restore Verification Script ──
|
|
# Restores the latest backup to an isolated test database, verifies data
|
|
# integrity (table existence, row counts, key checksums), and reports pass/fail.
|
|
#
|
|
# SAFETY: Never touches the production database. Creates a temporary
|
|
# "goodgo_verify_<timestamp>" database and drops it on exit.
|
|
#
|
|
# Usage:
|
|
# ./pg-verify-backup.sh # Verify latest backup
|
|
# ./pg-verify-backup.sh <backup-file> # Verify specific backup
|
|
# SKIP_CLEANUP=1 ./pg-verify-backup.sh # Keep test DB for inspection
|
|
#
|
|
# Environment variables:
|
|
# BACKUP_DIR — Directory containing backups (default: /backups)
|
|
# PGHOST — PostgreSQL host (default: postgres)
|
|
# PGPORT — PostgreSQL port (default: 5432)
|
|
# PGUSER — PostgreSQL user (default: goodgo)
|
|
# PGPASSWORD — PostgreSQL password (from environment)
|
|
# PGDATABASE — Source/production database name (default: goodgo)
|
|
# SKIP_CLEANUP — Set to 1 to keep test database after verification
|
|
# REPORT_FILE — Path for JSON report output (optional)
|
|
#
|
|
# Exit codes:
|
|
# 0 — All checks passed
|
|
# 1 — One or more checks failed
|
|
# 2 — Setup error (no backups found, restore failed, etc.)
|
|
|
|
# ── Configuration ──
|
|
BACKUP_DIR="${BACKUP_DIR:-/backups}"
|
|
PGHOST="${PGHOST:-postgres}"
|
|
PGPORT="${PGPORT:-5432}"
|
|
PGUSER="${PGUSER:-goodgo}"
|
|
PGDATABASE="${PGDATABASE:-goodgo}"
|
|
SKIP_CLEANUP="${SKIP_CLEANUP:-0}"
|
|
REPORT_FILE="${REPORT_FILE:-}"
|
|
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
VERIFY_DB="goodgo_verify_${TIMESTAMP}"
|
|
PASSED=0
|
|
FAILED=0
|
|
WARNINGS=0
|
|
RESULTS=()
|
|
|
|
# ── Color output (if terminal) ──
|
|
if [ -t 1 ]; then
|
|
GREEN='\033[0;32m'
|
|
RED='\033[0;31m'
|
|
YELLOW='\033[0;33m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
else
|
|
GREEN=''
|
|
RED=''
|
|
YELLOW=''
|
|
BLUE=''
|
|
NC=''
|
|
fi
|
|
|
|
log_pass() {
|
|
PASSED=$((PASSED + 1))
|
|
RESULTS+=("{\"check\":\"$1\",\"status\":\"pass\",\"detail\":\"$2\"}")
|
|
echo -e "${GREEN}[PASS]${NC} $1: $2"
|
|
}
|
|
|
|
log_fail() {
|
|
FAILED=$((FAILED + 1))
|
|
RESULTS+=("{\"check\":\"$1\",\"status\":\"fail\",\"detail\":\"$2\"}")
|
|
echo -e "${RED}[FAIL]${NC} $1: $2"
|
|
}
|
|
|
|
log_warn() {
|
|
WARNINGS=$((WARNINGS + 1))
|
|
RESULTS+=("{\"check\":\"$1\",\"status\":\"warn\",\"detail\":\"$2\"}")
|
|
echo -e "${YELLOW}[WARN]${NC} $1: $2"
|
|
}
|
|
|
|
log_info() {
|
|
echo -e "${BLUE}[INFO]${NC} $1"
|
|
}
|
|
|
|
# ── Cleanup function ──
|
|
cleanup() {
|
|
local exit_code=$?
|
|
if [ "${SKIP_CLEANUP}" = "1" ]; then
|
|
log_info "SKIP_CLEANUP=1 — keeping test database '${VERIFY_DB}' for inspection"
|
|
log_info "Drop manually: psql -h ${PGHOST} -U ${PGUSER} -d postgres -c 'DROP DATABASE IF EXISTS \"${VERIFY_DB}\";'"
|
|
else
|
|
log_info "Cleaning up test database '${VERIFY_DB}'..."
|
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \
|
|
"SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE datname = '${VERIFY_DB}' AND pid <> pg_backend_pid();" \
|
|
> /dev/null 2>&1 || true
|
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \
|
|
"DROP DATABASE IF EXISTS \"${VERIFY_DB}\";" \
|
|
> /dev/null 2>&1 || true
|
|
log_info "Test database dropped."
|
|
fi
|
|
return $exit_code
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
# ── Determine backup file ──
|
|
BACKUP_FILE="${1:-}"
|
|
|
|
if [ -z "${BACKUP_FILE}" ]; then
|
|
log_info "Finding latest backup in ${BACKUP_DIR}..."
|
|
BACKUP_FILE=$(ls -t "${BACKUP_DIR}"/goodgo_*.sql.gz 2>/dev/null | head -n1 || true)
|
|
if [ -z "${BACKUP_FILE}" ]; then
|
|
echo -e "${RED}[ERROR]${NC} No backup files found in ${BACKUP_DIR}"
|
|
exit 2
|
|
fi
|
|
fi
|
|
|
|
if [ ! -f "${BACKUP_FILE}" ]; then
|
|
echo -e "${RED}[ERROR]${NC} Backup file not found: ${BACKUP_FILE}"
|
|
exit 2
|
|
fi
|
|
|
|
BACKUP_SIZE=$(du -h "${BACKUP_FILE}" | cut -f1)
|
|
BACKUP_MTIME=$(stat -c '%Y' "${BACKUP_FILE}" 2>/dev/null || stat -f '%m' "${BACKUP_FILE}" 2>/dev/null || echo "unknown")
|
|
|
|
echo ""
|
|
echo "================================================================"
|
|
echo " GoodGo Backup Restore Verification"
|
|
echo "================================================================"
|
|
echo " Backup file : ${BACKUP_FILE}"
|
|
echo " Backup size : ${BACKUP_SIZE}"
|
|
echo " Source DB : ${PGHOST}:${PGPORT}/${PGDATABASE}"
|
|
echo " Test DB : ${VERIFY_DB}"
|
|
echo " Started at : $(date -Iseconds)"
|
|
echo "================================================================"
|
|
echo ""
|
|
|
|
# ── Step 1: Create isolated test database ──
|
|
log_info "Step 1/5: Creating isolated test database '${VERIFY_DB}'..."
|
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \
|
|
"CREATE DATABASE \"${VERIFY_DB}\";" > /dev/null 2>&1
|
|
|
|
if ! psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d postgres -c \
|
|
"SELECT 1 FROM pg_database WHERE datname = '${VERIFY_DB}';" 2>/dev/null | grep -q 1; then
|
|
echo -e "${RED}[ERROR]${NC} Failed to create test database"
|
|
exit 2
|
|
fi
|
|
log_pass "Database creation" "Test database '${VERIFY_DB}' created"
|
|
|
|
# ── Step 2: Restore backup into test database ──
|
|
log_info "Step 2/5: Restoring backup to test database..."
|
|
RESTORE_START=$(date +%s)
|
|
|
|
# Enable PostGIS extension before restore (required for geometry columns)
|
|
psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" -c \
|
|
"CREATE EXTENSION IF NOT EXISTS postgis;" > /dev/null 2>&1
|
|
|
|
RESTORE_OUTPUT=$(pg_restore \
|
|
-h "${PGHOST}" \
|
|
-p "${PGPORT}" \
|
|
-U "${PGUSER}" \
|
|
-d "${VERIFY_DB}" \
|
|
--no-owner \
|
|
--no-privileges \
|
|
--clean \
|
|
--if-exists \
|
|
"${BACKUP_FILE}" 2>&1) || true
|
|
|
|
RESTORE_END=$(date +%s)
|
|
RESTORE_DURATION=$((RESTORE_END - RESTORE_START))
|
|
|
|
# Check if any critical errors occurred (ignore warnings about objects not existing)
|
|
RESTORE_ERRORS=$(echo "${RESTORE_OUTPUT}" | grep -ci "ERROR" || true)
|
|
if [ "${RESTORE_ERRORS}" -gt 0 ]; then
|
|
# Filter out harmless "does not exist" errors from --clean --if-exists
|
|
CRITICAL_ERRORS=$(echo "${RESTORE_OUTPUT}" | grep -i "ERROR" | grep -cv "does not exist" || true)
|
|
if [ "${CRITICAL_ERRORS}" -gt 0 ]; then
|
|
log_fail "Restore" "pg_restore completed with ${CRITICAL_ERRORS} critical error(s) in ${RESTORE_DURATION}s"
|
|
echo "${RESTORE_OUTPUT}" | grep -i "ERROR" | grep -v "does not exist" | head -5
|
|
else
|
|
log_pass "Restore" "pg_restore completed in ${RESTORE_DURATION}s (non-critical warnings only)"
|
|
fi
|
|
else
|
|
log_pass "Restore" "pg_restore completed cleanly in ${RESTORE_DURATION}s"
|
|
fi
|
|
|
|
# ── Step 3: Verify table existence ──
|
|
log_info "Step 3/5: Verifying table existence..."
|
|
|
|
# Expected tables from Prisma schema (22 models + Prisma migration tracking)
|
|
EXPECTED_TABLES=(
|
|
"User"
|
|
"RefreshToken"
|
|
"OAuthAccount"
|
|
"Agent"
|
|
"Property"
|
|
"PropertyMedia"
|
|
"Listing"
|
|
"SavedSearch"
|
|
"Transaction"
|
|
"Inquiry"
|
|
"Lead"
|
|
"Payment"
|
|
"Plan"
|
|
"Subscription"
|
|
"UsageRecord"
|
|
"Valuation"
|
|
"MarketIndex"
|
|
"NotificationLog"
|
|
"NotificationPreference"
|
|
"AdminAuditLog"
|
|
"Review"
|
|
"_prisma_migrations"
|
|
)
|
|
|
|
ACTUAL_TABLES=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \
|
|
-t -A -c "SELECT tablename FROM pg_tables WHERE schemaname = 'public' ORDER BY tablename;" 2>/dev/null)
|
|
|
|
MISSING_TABLES=()
|
|
for table in "${EXPECTED_TABLES[@]}"; do
|
|
if echo "${ACTUAL_TABLES}" | grep -qx "${table}"; then
|
|
: # Table exists
|
|
else
|
|
MISSING_TABLES+=("${table}")
|
|
fi
|
|
done
|
|
|
|
ACTUAL_COUNT=$(echo "${ACTUAL_TABLES}" | grep -c . || true)
|
|
EXPECTED_COUNT=${#EXPECTED_TABLES[@]}
|
|
|
|
if [ ${#MISSING_TABLES[@]} -eq 0 ]; then
|
|
log_pass "Table existence" "All ${EXPECTED_COUNT} expected tables present (${ACTUAL_COUNT} total)"
|
|
else
|
|
log_fail "Table existence" "Missing ${#MISSING_TABLES[@]} table(s): ${MISSING_TABLES[*]}"
|
|
fi
|
|
|
|
# ── Step 4: Row count comparison ──
|
|
log_info "Step 4/5: Comparing row counts between source and restored databases..."
|
|
|
|
ROW_MISMATCHES=0
|
|
ROW_REPORT=""
|
|
|
|
for table in "${EXPECTED_TABLES[@]}"; do
|
|
if [ "${table}" = "_prisma_migrations" ]; then
|
|
continue # Skip migration tracking table for row comparison
|
|
fi
|
|
|
|
# Skip missing tables
|
|
if [[ " ${MISSING_TABLES[*]} " =~ " ${table} " ]]; then
|
|
continue
|
|
fi
|
|
|
|
SOURCE_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \
|
|
-t -A -c "SELECT count(*) FROM \"${table}\";" 2>/dev/null || echo "ERROR")
|
|
VERIFY_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \
|
|
-t -A -c "SELECT count(*) FROM \"${table}\";" 2>/dev/null || echo "ERROR")
|
|
|
|
if [ "${SOURCE_COUNT}" = "ERROR" ] || [ "${VERIFY_COUNT}" = "ERROR" ]; then
|
|
log_warn "Row count: ${table}" "Could not query row count"
|
|
continue
|
|
fi
|
|
|
|
ROW_REPORT="${ROW_REPORT}\n ${table}: source=${SOURCE_COUNT} restored=${VERIFY_COUNT}"
|
|
|
|
if [ "${SOURCE_COUNT}" != "${VERIFY_COUNT}" ]; then
|
|
ROW_MISMATCHES=$((ROW_MISMATCHES + 1))
|
|
log_fail "Row count: ${table}" "Mismatch — source=${SOURCE_COUNT} restored=${VERIFY_COUNT}"
|
|
fi
|
|
done
|
|
|
|
if [ "${ROW_MISMATCHES}" -eq 0 ]; then
|
|
log_pass "Row counts" "All tables match between source and restored database"
|
|
else
|
|
log_fail "Row counts" "${ROW_MISMATCHES} table(s) have row count mismatches"
|
|
fi
|
|
|
|
# ── Step 5: Key data checksums ──
|
|
log_info "Step 5/5: Verifying data checksums on critical tables..."
|
|
|
|
# Checksum key tables by hashing sorted IDs + key fields
|
|
verify_checksum() {
|
|
local table="$1"
|
|
local query="$2"
|
|
local label="$3"
|
|
|
|
# Skip if table is missing
|
|
if [[ " ${MISSING_TABLES[*]} " =~ " ${table} " ]]; then
|
|
log_warn "Checksum: ${label}" "Table '${table}' missing, skipped"
|
|
return
|
|
fi
|
|
|
|
SOURCE_HASH=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \
|
|
-t -A -c "${query}" 2>/dev/null || echo "ERROR")
|
|
VERIFY_HASH=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \
|
|
-t -A -c "${query}" 2>/dev/null || echo "ERROR")
|
|
|
|
if [ "${SOURCE_HASH}" = "ERROR" ] || [ "${VERIFY_HASH}" = "ERROR" ]; then
|
|
log_warn "Checksum: ${label}" "Could not compute checksum"
|
|
return
|
|
fi
|
|
|
|
if [ "${SOURCE_HASH}" = "${VERIFY_HASH}" ]; then
|
|
log_pass "Checksum: ${label}" "Hashes match (${SOURCE_HASH:0:16}...)"
|
|
else
|
|
log_fail "Checksum: ${label}" "Hash mismatch — source=${SOURCE_HASH:0:16}... restored=${VERIFY_HASH:0:16}..."
|
|
fi
|
|
}
|
|
|
|
# Checksum queries for critical tables
|
|
verify_checksum "User" \
|
|
"SELECT md5(string_agg(id || email || phone || \"fullName\" || role::text || \"kycStatus\"::text, '|' ORDER BY id)) FROM \"User\";" \
|
|
"User identities"
|
|
|
|
verify_checksum "Property" \
|
|
"SELECT md5(string_agg(id || title || district || city || \"propertyType\"::text || \"areaM2\"::text, '|' ORDER BY id)) FROM \"Property\";" \
|
|
"Property records"
|
|
|
|
verify_checksum "Listing" \
|
|
"SELECT md5(string_agg(id || \"propertyId\" || \"sellerId\" || status::text || \"priceVND\"::text || \"transactionType\"::text, '|' ORDER BY id)) FROM \"Listing\";" \
|
|
"Listing records"
|
|
|
|
verify_checksum "Payment" \
|
|
"SELECT md5(string_agg(id || \"userId\" || provider::text || type::text || \"amountVND\"::text || status::text, '|' ORDER BY id)) FROM \"Payment\";" \
|
|
"Payment records"
|
|
|
|
verify_checksum "Subscription" \
|
|
"SELECT md5(string_agg(id || \"userId\" || \"planId\" || status::text, '|' ORDER BY id)) FROM \"Subscription\";" \
|
|
"Subscription records"
|
|
|
|
verify_checksum "Transaction" \
|
|
"SELECT md5(string_agg(id || \"listingId\" || \"buyerId\" || status::text, '|' ORDER BY id)) FROM \"Transaction\";" \
|
|
"Transaction records"
|
|
|
|
verify_checksum "Plan" \
|
|
"SELECT md5(string_agg(id || tier::text || name || \"priceMonthlyVND\"::text, '|' ORDER BY id)) FROM \"Plan\";" \
|
|
"Plan records"
|
|
|
|
verify_checksum "_prisma_migrations" \
|
|
"SELECT md5(string_agg(id || migration_name || checksum, '|' ORDER BY started_at)) FROM \"_prisma_migrations\";" \
|
|
"Migration history"
|
|
|
|
# ── PostGIS extension check ──
|
|
POSTGIS_CHECK=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \
|
|
-t -A -c "SELECT extname FROM pg_extension WHERE extname = 'postgis';" 2>/dev/null || echo "")
|
|
|
|
if [ "${POSTGIS_CHECK}" = "postgis" ]; then
|
|
log_pass "PostGIS extension" "PostGIS is available in restored database"
|
|
else
|
|
log_fail "PostGIS extension" "PostGIS extension not found in restored database"
|
|
fi
|
|
|
|
# ── Index verification ──
|
|
SOURCE_INDEX_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \
|
|
-t -A -c "SELECT count(*) FROM pg_indexes WHERE schemaname = 'public';" 2>/dev/null || echo "0")
|
|
VERIFY_INDEX_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \
|
|
-t -A -c "SELECT count(*) FROM pg_indexes WHERE schemaname = 'public';" 2>/dev/null || echo "0")
|
|
|
|
if [ "${SOURCE_INDEX_COUNT}" = "${VERIFY_INDEX_COUNT}" ]; then
|
|
log_pass "Index count" "All ${SOURCE_INDEX_COUNT} indexes restored"
|
|
else
|
|
log_warn "Index count" "Source has ${SOURCE_INDEX_COUNT} indexes, restored has ${VERIFY_INDEX_COUNT}"
|
|
fi
|
|
|
|
# ── Enum type verification ──
|
|
SOURCE_ENUM_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${PGDATABASE}" \
|
|
-t -A -c "SELECT count(*) FROM pg_type WHERE typtype = 'e';" 2>/dev/null || echo "0")
|
|
VERIFY_ENUM_COUNT=$(psql -h "${PGHOST}" -p "${PGPORT}" -U "${PGUSER}" -d "${VERIFY_DB}" \
|
|
-t -A -c "SELECT count(*) FROM pg_type WHERE typtype = 'e';" 2>/dev/null || echo "0")
|
|
|
|
if [ "${SOURCE_ENUM_COUNT}" = "${VERIFY_ENUM_COUNT}" ]; then
|
|
log_pass "Enum types" "All ${SOURCE_ENUM_COUNT} enum types restored"
|
|
else
|
|
log_fail "Enum types" "Source has ${SOURCE_ENUM_COUNT} enums, restored has ${VERIFY_ENUM_COUNT}"
|
|
fi
|
|
|
|
# ── Summary ──
|
|
echo ""
|
|
echo "================================================================"
|
|
echo " Verification Summary"
|
|
echo "================================================================"
|
|
echo " Backup file : ${BACKUP_FILE}"
|
|
echo " Restore time : ${RESTORE_DURATION}s"
|
|
TOTAL=$((PASSED + FAILED))
|
|
echo " Checks : ${TOTAL} total, ${PASSED} passed, ${FAILED} failed, ${WARNINGS} warnings"
|
|
echo " Finished at : $(date -Iseconds)"
|
|
|
|
if [ "${FAILED}" -eq 0 ]; then
|
|
echo -e " Result : ${GREEN}ALL CHECKS PASSED${NC}"
|
|
echo "================================================================"
|
|
else
|
|
echo -e " Result : ${RED}${FAILED} CHECK(S) FAILED${NC}"
|
|
echo "================================================================"
|
|
fi
|
|
echo ""
|
|
|
|
# ── Write JSON report (if requested) ──
|
|
if [ -n "${REPORT_FILE}" ]; then
|
|
RESULTS_JSON=$(printf '%s,' "${RESULTS[@]}")
|
|
RESULTS_JSON="[${RESULTS_JSON%,}]"
|
|
|
|
cat > "${REPORT_FILE}" << JSONEOF
|
|
{
|
|
"timestamp": "$(date -Iseconds)",
|
|
"backupFile": "${BACKUP_FILE}",
|
|
"backupSize": "${BACKUP_SIZE}",
|
|
"testDatabase": "${VERIFY_DB}",
|
|
"restoreDurationSeconds": ${RESTORE_DURATION},
|
|
"passed": ${PASSED},
|
|
"failed": ${FAILED},
|
|
"warnings": ${WARNINGS},
|
|
"result": "$([ ${FAILED} -eq 0 ] && echo "pass" || echo "fail")",
|
|
"checks": ${RESULTS_JSON}
|
|
}
|
|
JSONEOF
|
|
log_info "JSON report written to ${REPORT_FILE}"
|
|
fi
|
|
|
|
# Exit with appropriate code
|
|
if [ "${FAILED}" -gt 0 ]; then
|
|
exit 1
|
|
fi
|
|
exit 0
|