Previously, `docker image prune` ran immediately after deploying new containers, potentially deleting the old images needed for rollback if smoke tests subsequently failed. Now the deploy pipeline: 1. Tags current images as :rollback before pulling new versions 2. Only runs `docker image prune` after smoke tests pass 3. Uses explicit :rollback tags for rollback instead of relying on Docker layer cache (which is fragile) Applied to: - scripts/deploy-production.sh (manual deploy script) - .github/workflows/deploy.yml (staging + production CI jobs) - docs/deployment.md (updated rollback documentation) Co-Authored-By: Paperclip <noreply@paperclip.ing>
230 lines
9.3 KiB
Bash
Executable File
230 lines
9.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ==============================================================================
|
|
# GoodGo Platform — Manual Production Deploy Script
|
|
# Backup for CI/CD pipeline. Use when GitHub Actions is unavailable.
|
|
#
|
|
# Usage (from the server):
|
|
# cd ~/goodgo
|
|
# ./deploy-production.sh [image-tag]
|
|
#
|
|
# Usage (from local machine):
|
|
# ssh ubuntu@185.225.232.65 'cd ~/goodgo && ./deploy-production.sh abc1234'
|
|
# ==============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Configuration ─────────────────────────────────────────────────────────────
|
|
COMPOSE_FILE="docker-compose.prod.yml"
|
|
IMAGE_TAG="${1:-latest}"
|
|
HEALTH_URL="http://127.0.0.1:3001/health"
|
|
HEALTH_RETRIES=15
|
|
HEALTH_INTERVAL=5
|
|
ROLLBACK_ON_FAIL=true
|
|
|
|
# ── Colors ────────────────────────────────────────────────────────────────────
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
log() { echo -e "${GREEN}[DEPLOY]${NC} $(date +%H:%M:%S) $*"; }
|
|
warn() { echo -e "${YELLOW}[WARN]${NC} $(date +%H:%M:%S) $*"; }
|
|
err() { echo -e "${RED}[ERROR]${NC} $(date +%H:%M:%S) $*" >&2; }
|
|
info() { echo -e "${CYAN}[INFO]${NC} $(date +%H:%M:%S) $*"; }
|
|
|
|
# ── Pre-flight Checks ────────────────────────────────────────────────────────
|
|
if [ ! -f "$COMPOSE_FILE" ]; then
|
|
err "Compose file not found: $COMPOSE_FILE"
|
|
err "Are you in the ~/goodgo directory?"
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -f ".env" ]; then
|
|
err ".env file not found. Copy from infra/env.production.example"
|
|
exit 1
|
|
fi
|
|
|
|
log "=========================================="
|
|
log " GoodGo Platform — Production Deploy"
|
|
log " Image tag: ${IMAGE_TAG}"
|
|
log " Compose: ${COMPOSE_FILE}"
|
|
log "=========================================="
|
|
echo ""
|
|
|
|
# ── Step 1: Record Current State & Tag for Rollback ──────────────────────────
|
|
log "Step 1/7: Recording current state and tagging rollback images..."
|
|
PREV_API=$(docker inspect --format='{{.Config.Image}}' goodgo-api 2>/dev/null || echo "none")
|
|
PREV_WEB=$(docker inspect --format='{{.Config.Image}}' goodgo-web 2>/dev/null || echo "none")
|
|
PREV_AI=$(docker inspect --format='{{.Config.Image}}' goodgo-ai-services 2>/dev/null || echo "none")
|
|
info "Previous API: ${PREV_API}"
|
|
info "Previous Web: ${PREV_WEB}"
|
|
info "Previous AI: ${PREV_AI}"
|
|
|
|
# Tag current images as :rollback so they survive docker image prune
|
|
if [ "$PREV_API" != "none" ]; then
|
|
docker tag "$PREV_API" goodgo-api:rollback 2>/dev/null || warn "Could not tag API rollback image"
|
|
info "Tagged API rollback: goodgo-api:rollback"
|
|
fi
|
|
if [ "$PREV_WEB" != "none" ]; then
|
|
docker tag "$PREV_WEB" goodgo-web:rollback 2>/dev/null || warn "Could not tag Web rollback image"
|
|
info "Tagged Web rollback: goodgo-web:rollback"
|
|
fi
|
|
if [ "$PREV_AI" != "none" ]; then
|
|
docker tag "$PREV_AI" goodgo-ai-services:rollback 2>/dev/null || warn "Could not tag AI rollback image"
|
|
info "Tagged AI rollback: goodgo-ai-services:rollback"
|
|
fi
|
|
|
|
# ── Step 2: Pull New Images ──────────────────────────────────────────────────
|
|
log "Step 2/7: Pulling new images (tag: ${IMAGE_TAG})..."
|
|
export IMAGE_TAG
|
|
docker compose -f "$COMPOSE_FILE" pull api web ai-services
|
|
log "Images pulled successfully."
|
|
|
|
# ── Step 3: Rolling Update ───────────────────────────────────────────────────
|
|
log "Step 3/7: Rolling update (zero-downtime)..."
|
|
|
|
info "Updating API..."
|
|
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait api
|
|
info "API updated and healthy."
|
|
|
|
info "Updating Web..."
|
|
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait web
|
|
info "Web updated and healthy."
|
|
|
|
info "Updating AI Services..."
|
|
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait ai-services
|
|
info "AI Services updated and healthy."
|
|
|
|
log "Rolling update complete."
|
|
|
|
# ── Step 4: Database Migrations ──────────────────────────────────────────────
|
|
log "Step 4/7: Running database migrations..."
|
|
docker compose -f "$COMPOSE_FILE" exec -T api npx prisma migrate deploy
|
|
log "Migrations complete."
|
|
|
|
# ── Step 5: Health Check Verification ────────────────────────────────────────
|
|
log "Step 5/7: Verifying deployment health..."
|
|
HEALTHY=false
|
|
for i in $(seq 1 "$HEALTH_RETRIES"); do
|
|
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
|
HEALTHY=true
|
|
break
|
|
fi
|
|
info "Waiting for health check... (${i}/${HEALTH_RETRIES})"
|
|
sleep "$HEALTH_INTERVAL"
|
|
done
|
|
|
|
if $HEALTHY; then
|
|
log "Health check passed!"
|
|
else
|
|
err "Health check failed after ${HEALTH_RETRIES} attempts!"
|
|
|
|
if $ROLLBACK_ON_FAIL; then
|
|
warn "Initiating rollback using tagged rollback images..."
|
|
|
|
# Rollback: stop current and restart with explicitly tagged rollback images
|
|
docker compose -f "$COMPOSE_FILE" stop api web ai-services
|
|
|
|
# Restore from :rollback tags if available
|
|
if docker image inspect goodgo-api:rollback > /dev/null 2>&1; then
|
|
info "Restoring API from goodgo-api:rollback"
|
|
docker tag goodgo-api:rollback "$PREV_API" 2>/dev/null || true
|
|
fi
|
|
if docker image inspect goodgo-web:rollback > /dev/null 2>&1; then
|
|
info "Restoring Web from goodgo-web:rollback"
|
|
docker tag goodgo-web:rollback "$PREV_WEB" 2>/dev/null || true
|
|
fi
|
|
if docker image inspect goodgo-ai-services:rollback > /dev/null 2>&1; then
|
|
info "Restoring AI from goodgo-ai-services:rollback"
|
|
docker tag goodgo-ai-services:rollback "$PREV_AI" 2>/dev/null || true
|
|
fi
|
|
|
|
docker compose -f "$COMPOSE_FILE" up -d --wait api web ai-services
|
|
|
|
warn "Rollback complete. Verifying..."
|
|
sleep 5
|
|
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
|
warn "Services recovered after rollback."
|
|
else
|
|
err "CRITICAL: Services still unhealthy after rollback!"
|
|
err "Manual intervention required."
|
|
fi
|
|
fi
|
|
|
|
exit 1
|
|
fi
|
|
|
|
# ── Step 6: Smoke Tests ─────────────────────────────────────────────────────
|
|
log "Step 6/7: Running smoke tests..."
|
|
SMOKE_PASSED=false
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
if [ -x "$SCRIPT_DIR/smoke-test.sh" ]; then
|
|
if "$SCRIPT_DIR/smoke-test.sh" "http://127.0.0.1:3001"; then
|
|
SMOKE_PASSED=true
|
|
log "Smoke tests passed!"
|
|
else
|
|
err "Smoke tests FAILED!"
|
|
if $ROLLBACK_ON_FAIL; then
|
|
warn "Initiating rollback due to smoke test failure..."
|
|
docker compose -f "$COMPOSE_FILE" stop api web ai-services
|
|
|
|
if docker image inspect goodgo-api:rollback > /dev/null 2>&1; then
|
|
docker tag goodgo-api:rollback "$PREV_API" 2>/dev/null || true
|
|
fi
|
|
if docker image inspect goodgo-web:rollback > /dev/null 2>&1; then
|
|
docker tag goodgo-web:rollback "$PREV_WEB" 2>/dev/null || true
|
|
fi
|
|
if docker image inspect goodgo-ai-services:rollback > /dev/null 2>&1; then
|
|
docker tag goodgo-ai-services:rollback "$PREV_AI" 2>/dev/null || true
|
|
fi
|
|
|
|
docker compose -f "$COMPOSE_FILE" up -d --wait api web ai-services
|
|
|
|
warn "Rollback complete. Verifying..."
|
|
sleep 5
|
|
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
|
warn "Services recovered after rollback."
|
|
else
|
|
err "CRITICAL: Services still unhealthy after rollback!"
|
|
fi
|
|
fi
|
|
exit 1
|
|
fi
|
|
else
|
|
warn "Smoke test script not found at $SCRIPT_DIR/smoke-test.sh — skipping."
|
|
warn "Run manually: ./scripts/smoke-test.sh https://api.goodgo.vn"
|
|
SMOKE_PASSED=true
|
|
fi
|
|
|
|
# ── Step 7: Cleanup (only after smoke tests pass) ───────────────────────────
|
|
log "Step 7/7: Cleaning up old images..."
|
|
# Remove the :rollback tags first (they are no longer needed after a successful deploy)
|
|
docker rmi goodgo-api:rollback goodgo-web:rollback goodgo-ai-services:rollback 2>/dev/null || true
|
|
docker image prune -f
|
|
log "Cleanup complete."
|
|
|
|
# ── Summary ──────────────────────────────────────────────────────────────────
|
|
echo ""
|
|
log "=========================================="
|
|
log " Deployment successful!"
|
|
log "=========================================="
|
|
log ""
|
|
log " Services:"
|
|
info " API: $(docker inspect --format='{{.Config.Image}}' goodgo-api)"
|
|
info " Web: $(docker inspect --format='{{.Config.Image}}' goodgo-web)"
|
|
info " AI: $(docker inspect --format='{{.Config.Image}}' goodgo-ai-services)"
|
|
log ""
|
|
log " Endpoints:"
|
|
info " Web: https://platform.goodgo.vn"
|
|
info " API: https://api.goodgo.vn"
|
|
info " Grafana: https://grafana.goodgo.vn"
|
|
log ""
|
|
if $SMOKE_PASSED; then
|
|
log " Smoke tests: PASSED"
|
|
else
|
|
log " Run smoke tests against public URL:"
|
|
info " ./scripts/smoke-test.sh https://api.goodgo.vn"
|
|
fi
|
|
log "=========================================="
|