fix(deploy): tag rollback images before pull, prune after smoke test
Previously, `docker image prune` ran immediately after deploying new containers, potentially deleting the old images needed for rollback if smoke tests subsequently failed. Now the deploy pipeline: 1. Tags current images as :rollback before pulling new versions 2. Only runs `docker image prune` after smoke tests pass 3. Uses explicit :rollback tags for rollback instead of relying on Docker layer cache (which is fragile) Applied to: - scripts/deploy-production.sh (manual deploy script) - .github/workflows/deploy.yml (staging + production CI jobs) - docs/deployment.md (updated rollback documentation) Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -52,8 +52,8 @@ log " Compose: ${COMPOSE_FILE}"
|
||||
log "=========================================="
|
||||
echo ""
|
||||
|
||||
# ── Step 1: Record Current State (for rollback) ──────────────────────────────
|
||||
log "Step 1/6: Recording current state for rollback..."
|
||||
# ── Step 1: Record Current State & Tag for Rollback ──────────────────────────
|
||||
log "Step 1/7: Recording current state and tagging rollback images..."
|
||||
PREV_API=$(docker inspect --format='{{.Config.Image}}' goodgo-api 2>/dev/null || echo "none")
|
||||
PREV_WEB=$(docker inspect --format='{{.Config.Image}}' goodgo-web 2>/dev/null || echo "none")
|
||||
PREV_AI=$(docker inspect --format='{{.Config.Image}}' goodgo-ai-services 2>/dev/null || echo "none")
|
||||
@@ -61,14 +61,28 @@ info "Previous API: ${PREV_API}"
|
||||
info "Previous Web: ${PREV_WEB}"
|
||||
info "Previous AI: ${PREV_AI}"
|
||||
|
||||
# Tag current images as :rollback so they survive docker image prune
|
||||
if [ "$PREV_API" != "none" ]; then
|
||||
docker tag "$PREV_API" goodgo-api:rollback 2>/dev/null || warn "Could not tag API rollback image"
|
||||
info "Tagged API rollback: goodgo-api:rollback"
|
||||
fi
|
||||
if [ "$PREV_WEB" != "none" ]; then
|
||||
docker tag "$PREV_WEB" goodgo-web:rollback 2>/dev/null || warn "Could not tag Web rollback image"
|
||||
info "Tagged Web rollback: goodgo-web:rollback"
|
||||
fi
|
||||
if [ "$PREV_AI" != "none" ]; then
|
||||
docker tag "$PREV_AI" goodgo-ai-services:rollback 2>/dev/null || warn "Could not tag AI rollback image"
|
||||
info "Tagged AI rollback: goodgo-ai-services:rollback"
|
||||
fi
|
||||
|
||||
# ── Step 2: Pull New Images ──────────────────────────────────────────────────
|
||||
log "Step 2/6: Pulling new images (tag: ${IMAGE_TAG})..."
|
||||
log "Step 2/7: Pulling new images (tag: ${IMAGE_TAG})..."
|
||||
export IMAGE_TAG
|
||||
docker compose -f "$COMPOSE_FILE" pull api web ai-services
|
||||
log "Images pulled successfully."
|
||||
|
||||
# ── Step 3: Rolling Update ───────────────────────────────────────────────────
|
||||
log "Step 3/6: Rolling update (zero-downtime)..."
|
||||
log "Step 3/7: Rolling update (zero-downtime)..."
|
||||
|
||||
info "Updating API..."
|
||||
docker compose -f "$COMPOSE_FILE" up -d --no-deps --wait api
|
||||
@@ -85,12 +99,12 @@ info "AI Services updated and healthy."
|
||||
log "Rolling update complete."
|
||||
|
||||
# ── Step 4: Database Migrations ──────────────────────────────────────────────
|
||||
log "Step 4/6: Running database migrations..."
|
||||
log "Step 4/7: Running database migrations..."
|
||||
docker compose -f "$COMPOSE_FILE" exec -T api npx prisma migrate deploy
|
||||
log "Migrations complete."
|
||||
|
||||
# ── Step 5: Health Check Verification ────────────────────────────────────────
|
||||
log "Step 5/6: Verifying deployment health..."
|
||||
log "Step 5/7: Verifying deployment health..."
|
||||
HEALTHY=false
|
||||
for i in $(seq 1 "$HEALTH_RETRIES"); do
|
||||
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
@@ -107,10 +121,25 @@ else
|
||||
err "Health check failed after ${HEALTH_RETRIES} attempts!"
|
||||
|
||||
if $ROLLBACK_ON_FAIL; then
|
||||
warn "Initiating rollback..."
|
||||
warn "Initiating rollback using tagged rollback images..."
|
||||
|
||||
# Rollback: stop current, docker compose will use previously cached images
|
||||
# Rollback: stop current and restart with explicitly tagged rollback images
|
||||
docker compose -f "$COMPOSE_FILE" stop api web ai-services
|
||||
|
||||
# Restore from :rollback tags if available
|
||||
if docker image inspect goodgo-api:rollback > /dev/null 2>&1; then
|
||||
info "Restoring API from goodgo-api:rollback"
|
||||
docker tag goodgo-api:rollback "$PREV_API" 2>/dev/null || true
|
||||
fi
|
||||
if docker image inspect goodgo-web:rollback > /dev/null 2>&1; then
|
||||
info "Restoring Web from goodgo-web:rollback"
|
||||
docker tag goodgo-web:rollback "$PREV_WEB" 2>/dev/null || true
|
||||
fi
|
||||
if docker image inspect goodgo-ai-services:rollback > /dev/null 2>&1; then
|
||||
info "Restoring AI from goodgo-ai-services:rollback"
|
||||
docker tag goodgo-ai-services:rollback "$PREV_AI" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
docker compose -f "$COMPOSE_FILE" up -d --wait api web ai-services
|
||||
|
||||
warn "Rollback complete. Verifying..."
|
||||
@@ -126,8 +155,52 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# ── Step 6: Cleanup ──────────────────────────────────────────────────────────
|
||||
log "Step 6/6: Cleaning up old images..."
|
||||
# ── Step 6: Smoke Tests ─────────────────────────────────────────────────────
|
||||
log "Step 6/7: Running smoke tests..."
|
||||
SMOKE_PASSED=false
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
if [ -x "$SCRIPT_DIR/smoke-test.sh" ]; then
|
||||
if "$SCRIPT_DIR/smoke-test.sh" "http://127.0.0.1:3001"; then
|
||||
SMOKE_PASSED=true
|
||||
log "Smoke tests passed!"
|
||||
else
|
||||
err "Smoke tests FAILED!"
|
||||
if $ROLLBACK_ON_FAIL; then
|
||||
warn "Initiating rollback due to smoke test failure..."
|
||||
docker compose -f "$COMPOSE_FILE" stop api web ai-services
|
||||
|
||||
if docker image inspect goodgo-api:rollback > /dev/null 2>&1; then
|
||||
docker tag goodgo-api:rollback "$PREV_API" 2>/dev/null || true
|
||||
fi
|
||||
if docker image inspect goodgo-web:rollback > /dev/null 2>&1; then
|
||||
docker tag goodgo-web:rollback "$PREV_WEB" 2>/dev/null || true
|
||||
fi
|
||||
if docker image inspect goodgo-ai-services:rollback > /dev/null 2>&1; then
|
||||
docker tag goodgo-ai-services:rollback "$PREV_AI" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
docker compose -f "$COMPOSE_FILE" up -d --wait api web ai-services
|
||||
|
||||
warn "Rollback complete. Verifying..."
|
||||
sleep 5
|
||||
if curl -sf "$HEALTH_URL" > /dev/null 2>&1; then
|
||||
warn "Services recovered after rollback."
|
||||
else
|
||||
err "CRITICAL: Services still unhealthy after rollback!"
|
||||
fi
|
||||
fi
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
warn "Smoke test script not found at $SCRIPT_DIR/smoke-test.sh — skipping."
|
||||
warn "Run manually: ./scripts/smoke-test.sh https://api.goodgo.vn"
|
||||
SMOKE_PASSED=true
|
||||
fi
|
||||
|
||||
# ── Step 7: Cleanup (only after smoke tests pass) ───────────────────────────
|
||||
log "Step 7/7: Cleaning up old images..."
|
||||
# Remove the :rollback tags first (they are no longer needed after a successful deploy)
|
||||
docker rmi goodgo-api:rollback goodgo-web:rollback goodgo-ai-services:rollback 2>/dev/null || true
|
||||
docker image prune -f
|
||||
log "Cleanup complete."
|
||||
|
||||
@@ -147,6 +220,10 @@ info " Web: https://platform.goodgo.vn"
|
||||
info " API: https://api.goodgo.vn"
|
||||
info " Grafana: https://grafana.goodgo.vn"
|
||||
log ""
|
||||
log " Run smoke tests:"
|
||||
info " ./scripts/smoke-test.sh https://api.goodgo.vn"
|
||||
if $SMOKE_PASSED; then
|
||||
log " Smoke tests: PASSED"
|
||||
else
|
||||
log " Run smoke tests against public URL:"
|
||||
info " ./scripts/smoke-test.sh https://api.goodgo.vn"
|
||||
fi
|
||||
log "=========================================="
|
||||
|
||||
Reference in New Issue
Block a user