fix: resolve 4 P0 DevOps blockers — image tags, alertmanager, port conflicts

DEVOPS-C-01: Replace hardcoded :latest with IMAGE_TAG placeholder in all 8
production K8s manifests. Update deploy-production.yml to sed-replace
IMAGE_TAG with commit SHA before kubectl apply (remove now-redundant
kubectl set image step).

DEVOPS-C-02: Configure Alertmanager — create alertmanager.yml with Slack +
email receivers (critical/warning/infra routes, inhibition rules). Add
alertmanager:v0.27.0 service to both docker-compose.observability.yml and
deployments/local/docker-compose.yml. Enable prometheus.yml target
(alertmanager:9093).

DEVOPS-C-03: Remove :latest from docker-build.yml main branch push. Now
only SHA tag is pushed for main; :staging+SHA for develop.

DEVOPS-C-04: Add 4 mkt-* services to deployments/local/docker-compose.yml
with unique host ports (facebook:5021, whatsapp:5022, x:5023, zalo:5024)
to eliminate port 5000 conflicts. Add corresponding Traefik routers and
load-balancer entries in infra/traefik/dynamic/routes.yml
(/api/v1/mkt/{facebook,whatsapp,x,zalo}).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Ho Ngoc Hai
2026-03-23 09:46:32 +07:00
parent cdc67d768f
commit 6d0ca5bee5
15 changed files with 438 additions and 20 deletions

View File

@@ -319,13 +319,10 @@ jobs:
for svc in "${!DEPLOY_MAP[@]}"; do for svc in "${!DEPLOY_MAP[@]}"; do
if echo "$SERVICES" | grep -q "\"${svc}\""; then if echo "$SERVICES" | grep -q "\"${svc}\""; then
echo "Deploying ${svc}..." echo "Deploying ${svc}..."
kubectl apply -f "deployments/production/kubernetes/${DEPLOY_MAP[$svc]}" # EN: Replace IMAGE_TAG placeholder with commit SHA before applying (never :latest in production)
# VI: Thay the IMAGE_TAG bang commit SHA truoc khi apply (khong bao gio dung :latest trong production)
# EN: Update image to commit SHA (never :latest in production) MANIFEST="deployments/production/kubernetes/${DEPLOY_MAP[$svc]}"
# VI: Cap nhat image bang commit SHA (khong bao gio dung :latest trong production) sed "s|IMAGE_TAG|${{ github.sha }}|g" "$MANIFEST" | kubectl apply -f -
kubectl set image "deployment/${svc}" \
"${svc}=${IMAGE_MAP[$svc]}:${{ github.sha }}" \
-n production
kubectl rollout restart "deployment/${svc}" -n production kubectl rollout restart "deployment/${svc}" -n production
fi fi

View File

@@ -96,8 +96,10 @@ jobs:
SHA="${{ github.sha }}" SHA="${{ github.sha }}"
BRANCH="${{ github.ref_name }}" BRANCH="${{ github.ref_name }}"
# EN: Never push :latest — use commit SHA only for main, :staging for develop
# VI: Khong bao gio push :latest — chi dung commit SHA cho main, :staging cho develop
if [ "$BRANCH" = "main" ]; then if [ "$BRANCH" = "main" ]; then
echo "tags=${IMAGE}:latest,${IMAGE}:${SHA}" >> $GITHUB_OUTPUT echo "tags=${IMAGE}:${SHA}" >> $GITHUB_OUTPUT
else else
echo "tags=${IMAGE}:staging,${IMAGE}:${SHA}" >> $GITHUB_OUTPUT echo "tags=${IMAGE}:staging,${IMAGE}:${SHA}" >> $GITHUB_OUTPUT
fi fi

View File

@@ -1227,6 +1227,190 @@ services:
- "traefik.http.routers.ads-tracking-admin.service=ads-tracking-service" - "traefik.http.routers.ads-tracking-admin.service=ads-tracking-service"
# Marketing Services - Social Media Integrations
# EN: Each service gets unique host port to avoid port 5000 conflicts
# VI: Moi service duoc cap port host rieng de tranh xung dot port 5000
mkt-facebook-service-net:
build:
context: ../../services/mkt-facebook-service-net
dockerfile: Dockerfile
image: goodgo/mkt-facebook-service-net:latest
container_name: mkt-facebook-service-net-local
environment:
- ASPNETCORE_ENVIRONMENT=Development
- ASPNETCORE_URLS=http://+:8080
- ConnectionStrings__DefaultConnection=${MKT_FACEBOOK_DATABASE_URL}
- IamService__BaseUrl=http://iam-service-net:8080
- Jwt__Authority=http://iam-service-net:8080
- Jwt__Audience=goodgo-api
- Jwt__RequireHttpsMetadata=false
- Redis__Host=${REDIS_HOST}
- Redis__Port=${REDIS_PORT}
- Redis__Password=${REDIS_PASSWORD}
- RabbitMQ__Host=rabbitmq
- RabbitMQ__Port=5672
- RabbitMQ__Username=${RABBITMQ_USERNAME}
- RabbitMQ__Password=${RABBITMQ_PASSWORD}
ports:
- "5021:8080"
depends_on:
iam-service-net:
condition: service_healthy
traefik:
condition: service_started
networks:
- microservices-network
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/health/live || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
labels:
- "traefik.enable=true"
- "traefik.http.routers.mkt-facebook.rule=PathPrefix(`/api/v1/mkt/facebook`)"
- "traefik.http.routers.mkt-facebook.entrypoints=web"
- "traefik.http.services.mkt-facebook-service.loadbalancer.server.port=8080"
- "traefik.http.services.mkt-facebook-service.loadbalancer.healthcheck.path=/health/live"
- "traefik.http.services.mkt-facebook-service.loadbalancer.healthcheck.interval=10s"
mkt-whatsapp-service-net:
build:
context: ../../services/mkt-whatsapp-service-net
dockerfile: Dockerfile
image: goodgo/mkt-whatsapp-service-net:latest
container_name: mkt-whatsapp-service-net-local
environment:
- ASPNETCORE_ENVIRONMENT=Development
- ASPNETCORE_URLS=http://+:8080
- ConnectionStrings__DefaultConnection=${MKT_WHATSAPP_DATABASE_URL}
- IamService__BaseUrl=http://iam-service-net:8080
- Jwt__Authority=http://iam-service-net:8080
- Jwt__Audience=goodgo-api
- Jwt__RequireHttpsMetadata=false
- Redis__Host=${REDIS_HOST}
- Redis__Port=${REDIS_PORT}
- Redis__Password=${REDIS_PASSWORD}
- RabbitMQ__Host=rabbitmq
- RabbitMQ__Port=5672
- RabbitMQ__Username=${RABBITMQ_USERNAME}
- RabbitMQ__Password=${RABBITMQ_PASSWORD}
ports:
- "5022:8080"
depends_on:
iam-service-net:
condition: service_healthy
traefik:
condition: service_started
networks:
- microservices-network
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/health/live || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
labels:
- "traefik.enable=true"
- "traefik.http.routers.mkt-whatsapp.rule=PathPrefix(`/api/v1/mkt/whatsapp`)"
- "traefik.http.routers.mkt-whatsapp.entrypoints=web"
- "traefik.http.services.mkt-whatsapp-service.loadbalancer.server.port=8080"
- "traefik.http.services.mkt-whatsapp-service.loadbalancer.healthcheck.path=/health/live"
- "traefik.http.services.mkt-whatsapp-service.loadbalancer.healthcheck.interval=10s"
mkt-x-service-net:
build:
context: ../../services/mkt-x-service-net
dockerfile: Dockerfile
image: goodgo/mkt-x-service-net:latest
container_name: mkt-x-service-net-local
environment:
- ASPNETCORE_ENVIRONMENT=Development
- ASPNETCORE_URLS=http://+:8080
- ConnectionStrings__DefaultConnection=${MKT_X_DATABASE_URL}
- IamService__BaseUrl=http://iam-service-net:8080
- Jwt__Authority=http://iam-service-net:8080
- Jwt__Audience=goodgo-api
- Jwt__RequireHttpsMetadata=false
- Redis__Host=${REDIS_HOST}
- Redis__Port=${REDIS_PORT}
- Redis__Password=${REDIS_PASSWORD}
- RabbitMQ__Host=rabbitmq
- RabbitMQ__Port=5672
- RabbitMQ__Username=${RABBITMQ_USERNAME}
- RabbitMQ__Password=${RABBITMQ_PASSWORD}
ports:
- "5023:8080"
depends_on:
iam-service-net:
condition: service_healthy
traefik:
condition: service_started
networks:
- microservices-network
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/health/live || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
labels:
- "traefik.enable=true"
- "traefik.http.routers.mkt-x.rule=PathPrefix(`/api/v1/mkt/x`)"
- "traefik.http.routers.mkt-x.entrypoints=web"
- "traefik.http.services.mkt-x-service.loadbalancer.server.port=8080"
- "traefik.http.services.mkt-x-service.loadbalancer.healthcheck.path=/health/live"
- "traefik.http.services.mkt-x-service.loadbalancer.healthcheck.interval=10s"
mkt-zalo-service-net:
build:
context: ../../services/mkt-zalo-service-net
dockerfile: Dockerfile
image: goodgo/mkt-zalo-service-net:latest
container_name: mkt-zalo-service-net-local
environment:
- ASPNETCORE_ENVIRONMENT=Development
- ASPNETCORE_URLS=http://+:8080
- ConnectionStrings__DefaultConnection=${MKT_ZALO_DATABASE_URL}
- IamService__BaseUrl=http://iam-service-net:8080
- Jwt__Authority=http://iam-service-net:8080
- Jwt__Audience=goodgo-api
- Jwt__RequireHttpsMetadata=false
- Redis__Host=${REDIS_HOST}
- Redis__Port=${REDIS_PORT}
- Redis__Password=${REDIS_PASSWORD}
- RabbitMQ__Host=rabbitmq
- RabbitMQ__Port=5672
- RabbitMQ__Username=${RABBITMQ_USERNAME}
- RabbitMQ__Password=${RABBITMQ_PASSWORD}
ports:
- "5024:8080"
depends_on:
iam-service-net:
condition: service_healthy
traefik:
condition: service_started
networks:
- microservices-network
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "curl -f http://localhost:8080/health/live || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
labels:
- "traefik.enable=true"
- "traefik.http.routers.mkt-zalo.rule=PathPrefix(`/api/v1/mkt/zalo`)"
- "traefik.http.routers.mkt-zalo.entrypoints=web"
- "traefik.http.services.mkt-zalo-service.loadbalancer.server.port=8080"
- "traefik.http.services.mkt-zalo-service.loadbalancer.healthcheck.path=/health/live"
- "traefik.http.services.mkt-zalo-service.loadbalancer.healthcheck.interval=10s"
# Jaeger - Distributed Tracing # Jaeger - Distributed Tracing
# jaeger: # jaeger:
# image: jaegertracing/all-in-one:1.47 # image: jaegertracing/all-in-one:1.47
@@ -1240,6 +1424,32 @@ services:
# - microservices-network # - microservices-network
# restart: unless-stopped # restart: unless-stopped
# Alertmanager - Alert Routing & Notification Delivery
alertmanager:
image: prom/alertmanager:v0.27.0
container_name: alertmanager-local
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=http://localhost:9093'
ports:
- "9093:9093"
volumes:
- ../../infra/observability/alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager_data:/alertmanager
environment:
- ALERTMANAGER_SLACK_WEBHOOK_URL=${ALERTMANAGER_SLACK_WEBHOOK_URL:-}
- ALERTMANAGER_SMTP_USER=${ALERTMANAGER_SMTP_USER:-}
- ALERTMANAGER_SMTP_PASSWORD=${ALERTMANAGER_SMTP_PASSWORD:-}
networks:
- microservices-network
restart: unless-stopped
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9093/-/healthy || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# Prometheus - Metrics Collection # Prometheus - Metrics Collection
prometheus: prometheus:
image: prom/prometheus:v2.51.0 image: prom/prometheus:v2.51.0
@@ -1402,6 +1612,8 @@ volumes:
driver: local driver: local
loki_data: loki_data:
driver: local driver: local
alertmanager_data:
driver: local
# ============================================================================= # =============================================================================
# NETWORKS # NETWORKS
# ============================================================================= # =============================================================================

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: booking-service - name: booking-service
image: goodgo/booking-service-net:latest image: goodgo/booking-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: catalog-service - name: catalog-service
image: goodgo/catalog-service-net:latest image: goodgo/catalog-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: fnb-engine - name: fnb-engine
image: goodgo/fnb-engine-net:latest image: goodgo/fnb-engine-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -42,7 +42,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: iam-service - name: iam-service
image: goodgo/iam-service-net:latest image: goodgo/iam-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: inventory-service - name: inventory-service
image: goodgo/inventory-service-net:latest image: goodgo/inventory-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: merchant-service - name: merchant-service
image: goodgo/merchant-service-net:latest image: goodgo/merchant-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: order-service - name: order-service
image: goodgo/order-service-net:latest image: goodgo/order-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -40,7 +40,7 @@ spec:
topologyKey: kubernetes.io/hostname topologyKey: kubernetes.io/hostname
containers: containers:
- name: wallet-service - name: wallet-service
image: goodgo/wallet-service-net:latest image: goodgo/wallet-service-net:IMAGE_TAG
imagePullPolicy: Always imagePullPolicy: Always
ports: ports:
- containerPort: 8080 - containerPort: 8080

View File

@@ -0,0 +1,118 @@
# =============================================================================
# GoodGo Platform - Alertmanager Configuration
# =============================================================================
# EN: Alertmanager routes and receivers for all platform alerts.
# VI: Cau hinh Alertmanager — routes va receivers cho tat ca canh bao.
# =============================================================================
global:
# EN: Default SMTP settings (override via environment variables in production)
# VI: Cai dat SMTP mac dinh (ghi de bang bien moi truong trong production)
smtp_smarthost: 'smtp.gmail.com:587'
smtp_from: 'alerts@goodgo.vn'
smtp_auth_username: '${ALERTMANAGER_SMTP_USER}'
smtp_auth_password: '${ALERTMANAGER_SMTP_PASSWORD}'
smtp_require_tls: true
resolve_timeout: 5m
# EN: Alert routing tree — critical alerts go to pagerduty, warnings to email
# VI: Cay dinh tuyen canh bao — critical -> pagerduty, warning -> email
route:
group_by: ['alertname', 'service', 'environment']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'default-receiver'
routes:
# EN: Critical severity — immediate notification
# VI: Muc do critical — thong bao ngay lap tuc
- match:
severity: critical
receiver: 'critical-receiver'
group_wait: 10s
repeat_interval: 1h
continue: true
# EN: Warning severity — grouped notification
# VI: Muc do warning — thong bao gom nhom
- match:
severity: warning
receiver: 'warning-receiver'
repeat_interval: 4h
# EN: Infrastructure alerts (postgres, redis, rabbitmq)
# VI: Canh bao ha tang (postgres, redis, rabbitmq)
- match:
tier: infra
receiver: 'infra-receiver'
group_wait: 30s
repeat_interval: 2h
receivers:
# EN: Default receiver — catches all unmatched alerts
# VI: Receiver mac dinh — bat tat ca canh bao khong khop
- name: 'default-receiver'
email_configs:
- to: 'devops@goodgo.vn'
send_resolved: true
headers:
Subject: '[GoodGo] {{ .Status | toUpper }} {{ .GroupLabels.alertname }}'
# EN: Critical alerts — Slack + email
# VI: Canh bao critical — Slack + email
- name: 'critical-receiver'
slack_configs:
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
channel: '#alerts-critical'
send_resolved: true
title: ':fire: [CRITICAL] {{ .GroupLabels.alertname }}'
text: >-
{{ range .Alerts }}
*Service:* {{ .Labels.service }}
*Summary:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
{{ end }}
color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
email_configs:
- to: 'oncall@goodgo.vn'
send_resolved: true
headers:
Subject: '[GoodGo CRITICAL] {{ .GroupLabels.alertname }}'
# EN: Warning alerts — Slack only
# VI: Canh bao warning — chi Slack
- name: 'warning-receiver'
slack_configs:
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
channel: '#alerts-warning'
send_resolved: true
title: ':warning: [WARNING] {{ .GroupLabels.alertname }}'
text: >-
{{ range .Alerts }}
*Service:* {{ .Labels.service }}
*Summary:* {{ .Annotations.summary }}
{{ end }}
color: 'warning'
# EN: Infrastructure receiver — DevOps team
# VI: Receiver ha tang — doi DevOps
- name: 'infra-receiver'
slack_configs:
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
channel: '#alerts-infra'
send_resolved: true
title: ':gear: [INFRA] {{ .GroupLabels.alertname }}'
text: >-
{{ range .Alerts }}
*Component:* {{ .Labels.service }}
*Summary:* {{ .Annotations.summary }}
{{ end }}
# EN: Inhibition rules — silence downstream alerts when upstream is down
# VI: Quy tac uc che — tat tat canh bao downstream khi upstream down
inhibit_rules:
- source_match:
severity: critical
target_match:
severity: warning
equal: ['alertname', 'service']

View File

@@ -1,6 +1,28 @@
services: services:
# EN: Alertmanager - Alert routing and notification delivery
# VI: Alertmanager - Dinh tuyen canh bao va gui thong bao
alertmanager:
image: prom/alertmanager:v0.27.0
container_name: alertmanager
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=http://localhost:9093'
ports:
- "9093:9093"
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager_data:/alertmanager
environment:
- ALERTMANAGER_SLACK_WEBHOOK_URL=${ALERTMANAGER_SLACK_WEBHOOK_URL:-}
- ALERTMANAGER_SMTP_USER=${ALERTMANAGER_SMTP_USER:-}
- ALERTMANAGER_SMTP_PASSWORD=${ALERTMANAGER_SMTP_PASSWORD:-}
networks:
- microservices-network
restart: unless-stopped
prometheus: prometheus:
image: prom/prometheus:latest image: prom/prometheus:latest
container_name: prometheus container_name: prometheus
@@ -64,6 +86,7 @@ volumes:
prometheus_data: prometheus_data:
grafana_data: grafana_data:
loki_data: loki_data:
alertmanager_data:
networks: networks:
microservices-network: microservices-network:

View File

@@ -26,9 +26,7 @@ rule_files:
alerting: alerting:
alertmanagers: alertmanagers:
- static_configs: - static_configs:
- targets: [] - targets: ['alertmanager:9093']
# Uncomment when Alertmanager is deployed:
# - targets: ['alertmanager:9093']
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Scrape Targets # Scrape Targets

View File

@@ -182,6 +182,52 @@ http:
entryPoints: entryPoints:
- web - web
# EN: Marketing Services — Facebook, WhatsApp, X (Twitter), Zalo integrations
# VI: Marketing Services — Tich hop Facebook, WhatsApp, X (Twitter), Zalo
mkt-facebook-router:
rule: "PathPrefix(`/api/v1/mkt/facebook`)"
service: mkt-facebook-service
priority: 100
middlewares:
- api-ratelimit
- cors
- secure-headers
entryPoints:
- web
mkt-whatsapp-router:
rule: "PathPrefix(`/api/v1/mkt/whatsapp`)"
service: mkt-whatsapp-service
priority: 100
middlewares:
- api-ratelimit
- cors
- secure-headers
entryPoints:
- web
mkt-x-router:
rule: "PathPrefix(`/api/v1/mkt/x`)"
service: mkt-x-service
priority: 100
middlewares:
- api-ratelimit
- cors
- secure-headers
entryPoints:
- web
mkt-zalo-router:
rule: "PathPrefix(`/api/v1/mkt/zalo`)"
service: mkt-zalo-service
priority: 100
middlewares:
- api-ratelimit
- cors
- secure-headers
entryPoints:
- web
services: services:
iam-service: iam-service:
loadBalancer: loadBalancer:
@@ -259,4 +305,26 @@ http:
booking-service: booking-service:
loadBalancer: loadBalancer:
servers: servers:
- url: "http://booking-service-net:8080" - url: "http://booking-service-net:8080"
# EN: Marketing Services — Social Media Integrations
# VI: Marketing Services — Tich hop mang xa hoi
mkt-facebook-service:
loadBalancer:
servers:
- url: "http://mkt-facebook-service-net:8080"
mkt-whatsapp-service:
loadBalancer:
servers:
- url: "http://mkt-whatsapp-service-net:8080"
mkt-x-service:
loadBalancer:
servers:
- url: "http://mkt-x-service-net:8080"
mkt-zalo-service:
loadBalancer:
servers:
- url: "http://mkt-zalo-service-net:8080"