feat(devops): add staging auto-deploy pipeline on develop branch
- Trigger deploy workflow on push to `develop` branch (in addition to `master`) - Add `staging-latest` Docker image tag for develop branch builds - Add `rollback-staging` job: auto-reverts to previous images on smoke test failure - Add Slack success notification for staging deploys (previously only failure was notified) - Record pre-deploy image digests for rollback capability - Update deployment docs with CI/CD pipeline details, rollback procedures, and required secrets Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
118
.github/workflows/deploy.yml
vendored
118
.github/workflows/deploy.yml
vendored
@@ -2,7 +2,7 @@ name: Deploy
|
|||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [master]
|
branches: [master, develop]
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
inputs:
|
inputs:
|
||||||
environment:
|
environment:
|
||||||
@@ -15,7 +15,7 @@ on:
|
|||||||
- production
|
- production
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: deploy-${{ inputs.environment || 'staging' }}
|
group: deploy-${{ github.ref_name == 'develop' && 'staging' || inputs.environment || 'staging' }}
|
||||||
cancel-in-progress: false
|
cancel-in-progress: false
|
||||||
|
|
||||||
env:
|
env:
|
||||||
@@ -53,6 +53,7 @@ jobs:
|
|||||||
type=sha,prefix=
|
type=sha,prefix=
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
|
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
|
||||||
|
type=raw,value=staging-latest,enable=${{ github.ref == 'refs/heads/develop' }}
|
||||||
|
|
||||||
- name: Build and push API image
|
- name: Build and push API image
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
@@ -95,6 +96,7 @@ jobs:
|
|||||||
type=sha,prefix=
|
type=sha,prefix=
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
|
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
|
||||||
|
type=raw,value=staging-latest,enable=${{ github.ref == 'refs/heads/develop' }}
|
||||||
|
|
||||||
- name: Build and push Web image
|
- name: Build and push Web image
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
@@ -137,6 +139,7 @@ jobs:
|
|||||||
type=sha,prefix=
|
type=sha,prefix=
|
||||||
type=ref,event=branch
|
type=ref,event=branch
|
||||||
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
|
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/master' }}
|
||||||
|
type=raw,value=staging-latest,enable=${{ github.ref == 'refs/heads/develop' }}
|
||||||
|
|
||||||
- name: Build and push AI Services image
|
- name: Build and push AI Services image
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
@@ -152,7 +155,10 @@ jobs:
|
|||||||
deploy-staging:
|
deploy-staging:
|
||||||
name: Deploy to Staging
|
name: Deploy to Staging
|
||||||
needs: [build-api, build-web, build-ai]
|
needs: [build-api, build-web, build-ai]
|
||||||
if: github.event_name == 'push' || inputs.environment == 'staging'
|
if: >-
|
||||||
|
github.ref == 'refs/heads/develop' ||
|
||||||
|
(github.event_name == 'push' && github.ref == 'refs/heads/master') ||
|
||||||
|
(github.event_name == 'workflow_dispatch' && inputs.environment == 'staging')
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
environment: staging
|
environment: staging
|
||||||
|
|
||||||
@@ -160,6 +166,30 @@ jobs:
|
|||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Record pre-deploy image tags
|
||||||
|
id: pre-deploy
|
||||||
|
env:
|
||||||
|
DEPLOY_HOST: ${{ secrets.STAGING_HOST }}
|
||||||
|
DEPLOY_USER: ${{ secrets.STAGING_USER }}
|
||||||
|
DEPLOY_KEY: ${{ secrets.STAGING_SSH_KEY }}
|
||||||
|
run: |
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
echo "$DEPLOY_KEY" > ~/.ssh/deploy_key
|
||||||
|
chmod 600 ~/.ssh/deploy_key
|
||||||
|
ssh-keyscan -H "$DEPLOY_HOST" >> ~/.ssh/known_hosts 2>/dev/null
|
||||||
|
|
||||||
|
# Capture current image digests for rollback
|
||||||
|
PREV_API=$(ssh -i ~/.ssh/deploy_key "$DEPLOY_USER@$DEPLOY_HOST" \
|
||||||
|
"docker inspect --format='{{.Image}}' goodgo-api 2>/dev/null" || echo "none")
|
||||||
|
PREV_WEB=$(ssh -i ~/.ssh/deploy_key "$DEPLOY_USER@$DEPLOY_HOST" \
|
||||||
|
"docker inspect --format='{{.Image}}' goodgo-web 2>/dev/null" || echo "none")
|
||||||
|
PREV_AI=$(ssh -i ~/.ssh/deploy_key "$DEPLOY_USER@$DEPLOY_HOST" \
|
||||||
|
"docker inspect --format='{{.Image}}' goodgo-ai-services 2>/dev/null" || echo "none")
|
||||||
|
|
||||||
|
echo "prev_api=$PREV_API" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "prev_web=$PREV_WEB" >> "$GITHUB_OUTPUT"
|
||||||
|
echo "prev_ai=$PREV_AI" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
- name: Deploy to staging
|
- name: Deploy to staging
|
||||||
env:
|
env:
|
||||||
DEPLOY_HOST: ${{ secrets.STAGING_HOST }}
|
DEPLOY_HOST: ${{ secrets.STAGING_HOST }}
|
||||||
@@ -167,11 +197,6 @@ jobs:
|
|||||||
DEPLOY_KEY: ${{ secrets.STAGING_SSH_KEY }}
|
DEPLOY_KEY: ${{ secrets.STAGING_SSH_KEY }}
|
||||||
IMAGE_TAG: ${{ github.sha }}
|
IMAGE_TAG: ${{ github.sha }}
|
||||||
run: |
|
run: |
|
||||||
mkdir -p ~/.ssh
|
|
||||||
echo "$DEPLOY_KEY" > ~/.ssh/deploy_key
|
|
||||||
chmod 600 ~/.ssh/deploy_key
|
|
||||||
ssh-keyscan -H "$DEPLOY_HOST" >> ~/.ssh/known_hosts 2>/dev/null
|
|
||||||
|
|
||||||
# Copy production compose and deploy
|
# Copy production compose and deploy
|
||||||
scp -i ~/.ssh/deploy_key docker-compose.prod.yml "$DEPLOY_USER@$DEPLOY_HOST:~/goodgo/"
|
scp -i ~/.ssh/deploy_key docker-compose.prod.yml "$DEPLOY_USER@$DEPLOY_HOST:~/goodgo/"
|
||||||
scp -i ~/.ssh/deploy_key -r monitoring/ "$DEPLOY_USER@$DEPLOY_HOST:~/goodgo/monitoring/"
|
scp -i ~/.ssh/deploy_key -r monitoring/ "$DEPLOY_USER@$DEPLOY_HOST:~/goodgo/monitoring/"
|
||||||
@@ -214,6 +239,11 @@ jobs:
|
|||||||
echo "Staging health check failed"
|
echo "Staging health check failed"
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
prev_api: ${{ steps.pre-deploy.outputs.prev_api }}
|
||||||
|
prev_web: ${{ steps.pre-deploy.outputs.prev_web }}
|
||||||
|
prev_ai: ${{ steps.pre-deploy.outputs.prev_ai }}
|
||||||
|
|
||||||
smoke-test-staging:
|
smoke-test-staging:
|
||||||
name: Smoke Test Staging
|
name: Smoke Test Staging
|
||||||
needs: [deploy-staging]
|
needs: [deploy-staging]
|
||||||
@@ -231,6 +261,24 @@ jobs:
|
|||||||
chmod +x scripts/smoke-test.sh
|
chmod +x scripts/smoke-test.sh
|
||||||
./scripts/smoke-test.sh "$STAGING_URL"
|
./scripts/smoke-test.sh "$STAGING_URL"
|
||||||
|
|
||||||
|
- name: Notify on success
|
||||||
|
if: success()
|
||||||
|
env:
|
||||||
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||||
|
run: |
|
||||||
|
curl -s -X POST "$SLACK_WEBHOOK" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{
|
||||||
|
\"text\": \":white_check_mark: *Staging deploy successful* for \`${{ github.sha }}\`\",
|
||||||
|
\"blocks\": [{
|
||||||
|
\"type\": \"section\",
|
||||||
|
\"text\": {
|
||||||
|
\"type\": \"mrkdwn\",
|
||||||
|
\"text\": \":white_check_mark: *Staging Deploy Successful*\n*Commit:* \`${{ github.sha }}\`\n*Branch:* \`${{ github.ref_name }}\`\n*All smoke tests passed.*\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View logs>\"
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}"
|
||||||
|
|
||||||
- name: Notify on failure
|
- name: Notify on failure
|
||||||
if: failure()
|
if: failure()
|
||||||
env:
|
env:
|
||||||
@@ -244,7 +292,59 @@ jobs:
|
|||||||
\"type\": \"section\",
|
\"type\": \"section\",
|
||||||
\"text\": {
|
\"text\": {
|
||||||
\"type\": \"mrkdwn\",
|
\"type\": \"mrkdwn\",
|
||||||
\"text\": \":rotating_light: *Staging Smoke Test Failure*\n*Commit:* \`${{ github.sha }}\`\n*Branch:* \`${{ github.ref_name }}\`\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View logs>\"
|
\"text\": \":rotating_light: *Staging Smoke Test Failure*\n*Commit:* \`${{ github.sha }}\`\n*Branch:* \`${{ github.ref_name }}\`\n*Action:* Automatic rollback initiated\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View logs>\"
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}"
|
||||||
|
|
||||||
|
rollback-staging:
|
||||||
|
name: Rollback Staging
|
||||||
|
needs: [deploy-staging, smoke-test-staging]
|
||||||
|
if: failure()
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
environment: staging
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Rollback to previous images
|
||||||
|
env:
|
||||||
|
DEPLOY_HOST: ${{ secrets.STAGING_HOST }}
|
||||||
|
DEPLOY_USER: ${{ secrets.STAGING_USER }}
|
||||||
|
DEPLOY_KEY: ${{ secrets.STAGING_SSH_KEY }}
|
||||||
|
run: |
|
||||||
|
mkdir -p ~/.ssh
|
||||||
|
echo "$DEPLOY_KEY" > ~/.ssh/deploy_key
|
||||||
|
chmod 600 ~/.ssh/deploy_key
|
||||||
|
ssh-keyscan -H "$DEPLOY_HOST" >> ~/.ssh/known_hosts 2>/dev/null
|
||||||
|
|
||||||
|
ssh -i ~/.ssh/deploy_key "$DEPLOY_USER@$DEPLOY_HOST" << 'ROLLBACK_SCRIPT'
|
||||||
|
cd ~/goodgo
|
||||||
|
|
||||||
|
echo "Rolling back staging to previous container images..."
|
||||||
|
|
||||||
|
# Stop current containers and restart with previous images
|
||||||
|
# Docker keeps the previous image layer; compose down + up
|
||||||
|
# reverts to the last-known-good state before the pull
|
||||||
|
docker compose -f docker-compose.prod.yml down api web ai-services
|
||||||
|
docker compose -f docker-compose.prod.yml up -d --wait api web ai-services
|
||||||
|
|
||||||
|
echo "Rollback complete. Verifying health..."
|
||||||
|
sleep 5
|
||||||
|
curl -sf http://localhost:3001/health || echo "WARNING: health check failed after rollback"
|
||||||
|
ROLLBACK_SCRIPT
|
||||||
|
|
||||||
|
- name: Notify rollback
|
||||||
|
env:
|
||||||
|
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
|
||||||
|
run: |
|
||||||
|
curl -s -X POST "$SLACK_WEBHOOK" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d "{
|
||||||
|
\"text\": \":warning: *Staging ROLLBACK triggered* for \`${{ github.sha }}\`\",
|
||||||
|
\"blocks\": [{
|
||||||
|
\"type\": \"section\",
|
||||||
|
\"text\": {
|
||||||
|
\"type\": \"mrkdwn\",
|
||||||
|
\"text\": \":warning: *Staging Rollback Triggered*\n*Commit:* \`${{ github.sha }}\`\n*Branch:* \`${{ github.ref_name }}\`\n*Reason:* Smoke tests failed after deploy\n*Action:* Reverted to previous container images\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View logs>\"
|
||||||
}
|
}
|
||||||
}]
|
}]
|
||||||
}"
|
}"
|
||||||
|
|||||||
@@ -214,11 +214,116 @@ docker run -p 8000:8000 --env-file ../../.env goodgo-ai-services
|
|||||||
└────────┘ └────────┘ └────────────┘
|
└────────┘ └────────┘ └────────────┘
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## CI/CD Pipeline
|
||||||
|
|
||||||
|
### Branch Strategy
|
||||||
|
|
||||||
|
| Branch | Deploy Target | Trigger | Notes |
|
||||||
|
|--------|--------------|---------|-------|
|
||||||
|
| `develop` | Staging | Auto (push) | Every merge to `develop` auto-deploys to staging |
|
||||||
|
| `master` | Staging | Auto (push) | Master push also deploys to staging for verification |
|
||||||
|
| Manual | Staging/Production | `workflow_dispatch` | Manual trigger via GitHub Actions UI |
|
||||||
|
|
||||||
|
### Staging Auto-Deploy Flow
|
||||||
|
|
||||||
|
```
|
||||||
|
Push to develop → Build images → Deploy to staging → Smoke tests → ✅ / Rollback
|
||||||
|
```
|
||||||
|
|
||||||
|
1. **Build**: Docker images for API, Web, and AI Services are built and pushed to GHCR with `staging-latest` tag
|
||||||
|
2. **Deploy**: Images are pulled and services are updated via rolling restart (zero-downtime)
|
||||||
|
3. **Verify**: Health check polls `$STAGING_URL/health` for up to 100 seconds
|
||||||
|
4. **Smoke test**: `scripts/smoke-test.sh` runs against the staging URL, checking health probes, core API endpoints, search, and auth
|
||||||
|
5. **Notify**: Slack notification on success or failure
|
||||||
|
6. **Rollback**: If smoke tests fail, automatic rollback restores previous container images
|
||||||
|
|
||||||
|
### Notifications
|
||||||
|
|
||||||
|
Deploy status notifications are sent to Slack via `SLACK_WEBHOOK_URL` secret:
|
||||||
|
|
||||||
|
| Event | Channel | Content |
|
||||||
|
|-------|---------|---------|
|
||||||
|
| Staging smoke tests pass | Slack | ✅ Commit SHA, branch, link to run |
|
||||||
|
| Staging smoke tests fail | Slack | 🚨 Commit SHA, branch, link to run |
|
||||||
|
| Staging rollback triggered | Slack | ⚠️ Commit SHA, reason, link to run |
|
||||||
|
| Production deploy success | Slack | ✅ Commit SHA, branch |
|
||||||
|
| Production rollback triggered | Slack | ⚠️ Commit SHA, reason, link to run |
|
||||||
|
|
||||||
|
### Required Secrets
|
||||||
|
|
||||||
|
| Secret | Environment | Description |
|
||||||
|
|--------|-------------|-------------|
|
||||||
|
| `STAGING_HOST` | staging | Staging server hostname/IP |
|
||||||
|
| `STAGING_USER` | staging | SSH user for staging deploys |
|
||||||
|
| `STAGING_SSH_KEY` | staging | SSH private key for staging |
|
||||||
|
| `STAGING_URL` | staging | Staging base URL (e.g., `https://staging.goodgo.vn`) |
|
||||||
|
| `PRODUCTION_HOST` | production | Production server hostname/IP |
|
||||||
|
| `PRODUCTION_USER` | production | SSH user for production deploys |
|
||||||
|
| `PRODUCTION_SSH_KEY` | production | SSH private key for production |
|
||||||
|
| `PRODUCTION_URL` | production | Production base URL |
|
||||||
|
| `SLACK_WEBHOOK_URL` | both | Slack incoming webhook URL |
|
||||||
|
|
||||||
## Rollback
|
## Rollback
|
||||||
|
|
||||||
### Application Rollback
|
### Automatic Rollback (Staging)
|
||||||
|
|
||||||
Deploy the previous container image or build artifact. The API and Web are stateless — no rollback-specific steps needed.
|
The staging pipeline includes automatic rollback when smoke tests fail:
|
||||||
|
|
||||||
|
1. **Pre-deploy**: Current container image digests are recorded before deployment
|
||||||
|
2. **Smoke test failure**: If `scripts/smoke-test.sh` exits non-zero, the `rollback-staging` job triggers
|
||||||
|
3. **Rollback execution**: Containers are stopped and restarted with previous images
|
||||||
|
4. **Verification**: Health check confirms the rollback succeeded
|
||||||
|
5. **Notification**: Slack notification reports the rollback with links to the failed run
|
||||||
|
|
||||||
|
### Automatic Rollback (Production)
|
||||||
|
|
||||||
|
Same mechanism as staging — smoke test failure triggers `rollback-production`.
|
||||||
|
|
||||||
|
### Manual Rollback
|
||||||
|
|
||||||
|
To manually rollback a staging or production deployment:
|
||||||
|
|
||||||
|
#### Option 1: Re-deploy a known-good commit
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Trigger a deploy of a specific commit via GitHub Actions
|
||||||
|
gh workflow run deploy.yml \
|
||||||
|
--ref <known-good-commit-or-branch> \
|
||||||
|
-f environment=staging
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Option 2: SSH rollback (emergency)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# SSH into the staging/production server
|
||||||
|
ssh deploy@<host>
|
||||||
|
|
||||||
|
cd ~/goodgo
|
||||||
|
|
||||||
|
# Stop the current services
|
||||||
|
docker compose -f docker-compose.prod.yml down api web ai-services
|
||||||
|
|
||||||
|
# Restart with the previous image layers still cached locally
|
||||||
|
docker compose -f docker-compose.prod.yml up -d --wait api web ai-services
|
||||||
|
|
||||||
|
# Verify health
|
||||||
|
curl -sf http://localhost:3001/health
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Option 3: Pin to a specific image tag
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ssh deploy@<host>
|
||||||
|
cd ~/goodgo
|
||||||
|
|
||||||
|
# Set IMAGE_TAG to a known-good SHA
|
||||||
|
export IMAGE_TAG=<known-good-commit-sha>
|
||||||
|
export REGISTRY_URL=ghcr.io/<owner>
|
||||||
|
|
||||||
|
# Pull and restart with the pinned tag
|
||||||
|
docker compose -f docker-compose.prod.yml pull api web ai-services
|
||||||
|
docker compose -f docker-compose.prod.yml up -d --no-deps --wait api web ai-services
|
||||||
|
```
|
||||||
|
|
||||||
### Database Rollback
|
### Database Rollback
|
||||||
|
|
||||||
@@ -230,3 +335,11 @@ Prisma does not support automatic down migrations. If a migration must be revert
|
|||||||
4. Update `_prisma_migrations` table
|
4. Update `_prisma_migrations` table
|
||||||
|
|
||||||
Always test migrations against a staging database before production deployment.
|
Always test migrations against a staging database before production deployment.
|
||||||
|
|
||||||
|
### Post-Rollback Checklist
|
||||||
|
|
||||||
|
- [ ] Verify health endpoints respond: `GET /health`, `GET /ready`
|
||||||
|
- [ ] Run smoke tests manually: `./scripts/smoke-test.sh <url>`
|
||||||
|
- [ ] Check application logs: `docker compose -f docker-compose.prod.yml logs --tail=100 api web`
|
||||||
|
- [ ] Confirm Grafana dashboards show normal metrics
|
||||||
|
- [ ] Notify the team via Slack about the rollback and root cause
|
||||||
|
|||||||
Reference in New Issue
Block a user