Files
goodgo-platform/scripts/deploy-production.sh
Ho Ngoc Hai 20b79acf08 fix(deploy): tag rollback images before pull, prune after smoke test
Previously, `docker image prune` ran immediately after deploying new
containers, potentially deleting the old images needed for rollback
if smoke tests subsequently failed. Now the deploy pipeline:

1. Tags current images as :rollback before pulling new versions
2. Only runs `docker image prune` after smoke tests pass
3. Uses explicit :rollback tags for rollback instead of relying on
   Docker layer cache (which is fragile)

Applied to:
- scripts/deploy-production.sh (manual deploy script)
- .github/workflows/deploy.yml (staging + production CI jobs)
- docs/deployment.md (updated rollback documentation)

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-15 11:17:32 +07:00

230 lines
9.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# ==============================================================================
# GoodGo Platform — Manual Production Deploy Script
# Backup for CI/CD pipeline. Use when GitHub Actions is unavailable.
#
# Usage (from the server):
# cd ~/goodgo
# ./deploy-production.sh [image-tag]
#
# Usage (from local machine):
# ssh ubuntu@185.225.232.65 'cd ~/goodgo && ./deploy-production.sh abc1234'
# ==============================================================================
set -euo pipefail
# ── Configuration ─────────────────────────────────────────────────────────────
COMPOSE_FILE="docker-compose.prod.yml"
IMAGE_TAG="${1:-latest}"
HEALTH_URL="http://127.0.0.1:3001/health"
HEALTH_RETRIES=15
HEALTH_INTERVAL=5
ROLLBACK_ON_FAIL=true
# ── Colors ────────────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
log() { echo -e "${GREEN}[DEPLOY]${NC} $(date +%H:%M:%S) $*"; }
warn() { echo -e "${YELLOW}[WARN]${NC} $(date +%H:%M:%S) $*"; }
err() { echo -e "${RED}[ERROR]${NC} $(date +%H:%M:%S) $*" >&2; }
info() { echo -e "${CYAN}[INFO]${NC} $(date +%H:%M:%S) $*"; }
# ── Pre-flight Checks ────────────────────────────────────────────────────────
if [ ! -f "$COMPOSE_FILE" ]; then
err "Compose file not found: $COMPOSE_FILE"
err "Are you in the ~/goodgo directory?"
exit 1
fi
if [ ! -f ".env" ]; then
err ".env file not found. Copy from infra/env.production.example"
exit 1
fi
log "=========================================="
log " GoodGo Platform — Production Deploy"
log " Image tag: ${IMAGE_TAG}"
log " Compose: ${COMPOSE_FILE}"
log "=========================================="
echo ""
# ── Step 1: Record Current State & Tag for Rollback ──────────────────────────
log "Step 1/7: Recording current state and tagging rollback images..."
PREV_API=$(docker inspect --format='{{.Config.Image}}' goodgo-api 2>/dev/null || echo "none")
PREV_WEB=$(docker inspect --format='{{.Config.Image}}' goodgo-web 2>/dev/null || echo "none")
PREV_AI=$(docker inspect --format='{{.Config.Image}}' goodgo-ai-services 2>/dev/null || echo "none")
info "Previous API: ${PREV_API}"
info "Previous Web: ${PREV_WEB}"
info "Previous AI: ${PREV_AI}"
# Tag current images as :rollback so they survive docker image prune
if [ "$PREV_API" != "none" ]; then
docker tag "$PREV_API" goodgo-api:rollback 2>/dev/null || warn "Could not tag API rollback image"
info "Tagged API rollback: goodgo-api:rollback"
fi
if [ "$PREV_WEB" != "none" ]; then
docker tag "$PREV_WEB" goodgo-web:rollback 2>/dev/null || warn "Could not tag Web rollback image"
info "Tagged Web rollback: goodgo-web:rollback"
fi
if [ "$PREV_AI" != "none" ]; then
docker tag "$PREV_AI" goodgo-ai-services:rollback 2>/dev/null || warn "Could not tag AI rollback image"
info "Tagged AI rollback: goodgo-ai-services:rollback"
fi
# ── Step 2: Pull New Images ──────────────────────────────────────────────────
log "Step 2/7: Pulling new images (tag: ${IMAGE_TAG})..."
export IMAGE_TAG
docker compose -f "$COMPOSE_FILE" pull api web ai-services
log "Images pulled successfully."
# ── Step 3: Rolling Update ───────────────────────────────────────────────────
log "Step 3/7: Rolling update (zero-downtime)..."
info "Updating API..."
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait api
info "API updated and healthy."
info "Updating Web..."
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait web
info "Web updated and healthy."
info "Updating AI Services..."
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait ai-services
info "AI Services updated and healthy."
log "Rolling update complete."
# ── Step 4: Database Migrations ──────────────────────────────────────────────
log "Step 4/7: Running database migrations..."
docker compose -f "$COMPOSE_FILE" exec -T api npx prisma migrate deploy
log "Migrations complete."
# ── Step 5: Health Check Verification ────────────────────────────────────────
log "Step 5/7: Verifying deployment health..."
HEALTHY=false
for i in $(seq 1 "$HEALTH_RETRIES"); do
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
HEALTHY=true
break
fi
info "Waiting for health check... (${i}/${HEALTH_RETRIES})"
sleep "$HEALTH_INTERVAL"
done
if $HEALTHY; then
log "Health check passed!"
else
err "Health check failed after ${HEALTH_RETRIES} attempts!"
if $ROLLBACK_ON_FAIL; then
warn "Initiating rollback using tagged rollback images..."
# Rollback: stop current and restart with explicitly tagged rollback images
docker compose -f "$COMPOSE_FILE" stop api web ai-services
# Restore from :rollback tags if available
if docker image inspect goodgo-api:rollback > /dev/null 2>&1; then
info "Restoring API from goodgo-api:rollback"
docker tag goodgo-api:rollback "$PREV_API" 2>/dev/null || true
fi
if docker image inspect goodgo-web:rollback > /dev/null 2>&1; then
info "Restoring Web from goodgo-web:rollback"
docker tag goodgo-web:rollback "$PREV_WEB" 2>/dev/null || true
fi
if docker image inspect goodgo-ai-services:rollback > /dev/null 2>&1; then
info "Restoring AI from goodgo-ai-services:rollback"
docker tag goodgo-ai-services:rollback "$PREV_AI" 2>/dev/null || true
fi
docker compose -f "$COMPOSE_FILE" up -d --wait api web ai-services
warn "Rollback complete. Verifying..."
sleep 5
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
warn "Services recovered after rollback."
else
err "CRITICAL: Services still unhealthy after rollback!"
err "Manual intervention required."
fi
fi
exit 1
fi
# ── Step 6: Smoke Tests ─────────────────────────────────────────────────────
log "Step 6/7: Running smoke tests..."
SMOKE_PASSED=false
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
if [ -x "$SCRIPT_DIR/smoke-test.sh" ]; then
if "$SCRIPT_DIR/smoke-test.sh" "http://127.0.0.1:3001"; then
SMOKE_PASSED=true
log "Smoke tests passed!"
else
err "Smoke tests FAILED!"
if $ROLLBACK_ON_FAIL; then
warn "Initiating rollback due to smoke test failure..."
docker compose -f "$COMPOSE_FILE" stop api web ai-services
if docker image inspect goodgo-api:rollback > /dev/null 2>&1; then
docker tag goodgo-api:rollback "$PREV_API" 2>/dev/null || true
fi
if docker image inspect goodgo-web:rollback > /dev/null 2>&1; then
docker tag goodgo-web:rollback "$PREV_WEB" 2>/dev/null || true
fi
if docker image inspect goodgo-ai-services:rollback > /dev/null 2>&1; then
docker tag goodgo-ai-services:rollback "$PREV_AI" 2>/dev/null || true
fi
docker compose -f "$COMPOSE_FILE" up -d --wait api web ai-services
warn "Rollback complete. Verifying..."
sleep 5
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
warn "Services recovered after rollback."
else
err "CRITICAL: Services still unhealthy after rollback!"
fi
fi
exit 1
fi
else
warn "Smoke test script not found at $SCRIPT_DIR/smoke-test.sh — skipping."
warn "Run manually: ./scripts/smoke-test.sh https://api.goodgo.vn"
SMOKE_PASSED=true
fi
# ── Step 7: Cleanup (only after smoke tests pass) ───────────────────────────
log "Step 7/7: Cleaning up old images..."
# Remove the :rollback tags first (they are no longer needed after a successful deploy)
docker rmi goodgo-api:rollback goodgo-web:rollback goodgo-ai-services:rollback 2>/dev/null || true
docker image prune -f
log "Cleanup complete."
# ── Summary ──────────────────────────────────────────────────────────────────
echo ""
log "=========================================="
log " Deployment successful!"
log "=========================================="
log ""
log " Services:"
info " API: $(docker inspect --format='{{.Config.Image}}' goodgo-api)"
info " Web: $(docker inspect --format='{{.Config.Image}}' goodgo-web)"
info " AI: $(docker inspect --format='{{.Config.Image}}' goodgo-ai-services)"
log ""
log " Endpoints:"
info " Web: https://platform.goodgo.vn"
info " API: https://api.goodgo.vn"
info " Grafana: https://grafana.goodgo.vn"
log ""
if $SMOKE_PASSED; then
log " Smoke tests: PASSED"
else
log " Run smoke tests against public URL:"
info " ./scripts/smoke-test.sh https://api.goodgo.vn"
fi
log "=========================================="