fix: resolve 4 P0 DevOps blockers — image tags, alertmanager, port conflicts
DEVOPS-C-01: Replace hardcoded :latest with IMAGE_TAG placeholder in all 8
production K8s manifests. Update deploy-production.yml to sed-replace
IMAGE_TAG with commit SHA before kubectl apply (remove now-redundant
kubectl set image step).
DEVOPS-C-02: Configure Alertmanager — create alertmanager.yml with Slack +
email receivers (critical/warning/infra routes, inhibition rules). Add
alertmanager:v0.27.0 service to both docker-compose.observability.yml and
deployments/local/docker-compose.yml. Enable prometheus.yml target
(alertmanager:9093).
DEVOPS-C-03: Remove :latest from docker-build.yml main branch push. Now
only SHA tag is pushed for main; :staging+SHA for develop.
DEVOPS-C-04: Add 4 mkt-* services to deployments/local/docker-compose.yml
with unique host ports (facebook:5021, whatsapp:5022, x:5023, zalo:5024)
to eliminate port 5000 conflicts. Add corresponding Traefik routers and
load-balancer entries in infra/traefik/dynamic/routes.yml
(/api/v1/mkt/{facebook,whatsapp,x,zalo}).
Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
118
infra/observability/alertmanager/alertmanager.yml
Normal file
118
infra/observability/alertmanager/alertmanager.yml
Normal file
@@ -0,0 +1,118 @@
|
||||
# =============================================================================
|
||||
# GoodGo Platform - Alertmanager Configuration
|
||||
# =============================================================================
|
||||
# EN: Alertmanager routes and receivers for all platform alerts.
|
||||
# VI: Cau hinh Alertmanager — routes va receivers cho tat ca canh bao.
|
||||
# =============================================================================
|
||||
|
||||
global:
|
||||
# EN: Default SMTP settings (override via environment variables in production)
|
||||
# VI: Cai dat SMTP mac dinh (ghi de bang bien moi truong trong production)
|
||||
smtp_smarthost: 'smtp.gmail.com:587'
|
||||
smtp_from: 'alerts@goodgo.vn'
|
||||
smtp_auth_username: '${ALERTMANAGER_SMTP_USER}'
|
||||
smtp_auth_password: '${ALERTMANAGER_SMTP_PASSWORD}'
|
||||
smtp_require_tls: true
|
||||
resolve_timeout: 5m
|
||||
|
||||
# EN: Alert routing tree — critical alerts go to pagerduty, warnings to email
|
||||
# VI: Cay dinh tuyen canh bao — critical -> pagerduty, warning -> email
|
||||
route:
|
||||
group_by: ['alertname', 'service', 'environment']
|
||||
group_wait: 30s
|
||||
group_interval: 5m
|
||||
repeat_interval: 4h
|
||||
receiver: 'default-receiver'
|
||||
routes:
|
||||
# EN: Critical severity — immediate notification
|
||||
# VI: Muc do critical — thong bao ngay lap tuc
|
||||
- match:
|
||||
severity: critical
|
||||
receiver: 'critical-receiver'
|
||||
group_wait: 10s
|
||||
repeat_interval: 1h
|
||||
continue: true
|
||||
|
||||
# EN: Warning severity — grouped notification
|
||||
# VI: Muc do warning — thong bao gom nhom
|
||||
- match:
|
||||
severity: warning
|
||||
receiver: 'warning-receiver'
|
||||
repeat_interval: 4h
|
||||
|
||||
# EN: Infrastructure alerts (postgres, redis, rabbitmq)
|
||||
# VI: Canh bao ha tang (postgres, redis, rabbitmq)
|
||||
- match:
|
||||
tier: infra
|
||||
receiver: 'infra-receiver'
|
||||
group_wait: 30s
|
||||
repeat_interval: 2h
|
||||
|
||||
receivers:
|
||||
# EN: Default receiver — catches all unmatched alerts
|
||||
# VI: Receiver mac dinh — bat tat ca canh bao khong khop
|
||||
- name: 'default-receiver'
|
||||
email_configs:
|
||||
- to: 'devops@goodgo.vn'
|
||||
send_resolved: true
|
||||
headers:
|
||||
Subject: '[GoodGo] {{ .Status | toUpper }} {{ .GroupLabels.alertname }}'
|
||||
|
||||
# EN: Critical alerts — Slack + email
|
||||
# VI: Canh bao critical — Slack + email
|
||||
- name: 'critical-receiver'
|
||||
slack_configs:
|
||||
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
|
||||
channel: '#alerts-critical'
|
||||
send_resolved: true
|
||||
title: ':fire: [CRITICAL] {{ .GroupLabels.alertname }}'
|
||||
text: >-
|
||||
{{ range .Alerts }}
|
||||
*Service:* {{ .Labels.service }}
|
||||
*Summary:* {{ .Annotations.summary }}
|
||||
*Description:* {{ .Annotations.description }}
|
||||
{{ end }}
|
||||
color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
|
||||
email_configs:
|
||||
- to: 'oncall@goodgo.vn'
|
||||
send_resolved: true
|
||||
headers:
|
||||
Subject: '[GoodGo CRITICAL] {{ .GroupLabels.alertname }}'
|
||||
|
||||
# EN: Warning alerts — Slack only
|
||||
# VI: Canh bao warning — chi Slack
|
||||
- name: 'warning-receiver'
|
||||
slack_configs:
|
||||
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
|
||||
channel: '#alerts-warning'
|
||||
send_resolved: true
|
||||
title: ':warning: [WARNING] {{ .GroupLabels.alertname }}'
|
||||
text: >-
|
||||
{{ range .Alerts }}
|
||||
*Service:* {{ .Labels.service }}
|
||||
*Summary:* {{ .Annotations.summary }}
|
||||
{{ end }}
|
||||
color: 'warning'
|
||||
|
||||
# EN: Infrastructure receiver — DevOps team
|
||||
# VI: Receiver ha tang — doi DevOps
|
||||
- name: 'infra-receiver'
|
||||
slack_configs:
|
||||
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
|
||||
channel: '#alerts-infra'
|
||||
send_resolved: true
|
||||
title: ':gear: [INFRA] {{ .GroupLabels.alertname }}'
|
||||
text: >-
|
||||
{{ range .Alerts }}
|
||||
*Component:* {{ .Labels.service }}
|
||||
*Summary:* {{ .Annotations.summary }}
|
||||
{{ end }}
|
||||
|
||||
# EN: Inhibition rules — silence downstream alerts when upstream is down
|
||||
# VI: Quy tac uc che — tat tat canh bao downstream khi upstream down
|
||||
inhibit_rules:
|
||||
- source_match:
|
||||
severity: critical
|
||||
target_match:
|
||||
severity: warning
|
||||
equal: ['alertname', 'service']
|
||||
@@ -1,6 +1,28 @@
|
||||
|
||||
|
||||
services:
|
||||
# EN: Alertmanager - Alert routing and notification delivery
|
||||
# VI: Alertmanager - Dinh tuyen canh bao va gui thong bao
|
||||
alertmanager:
|
||||
image: prom/alertmanager:v0.27.0
|
||||
container_name: alertmanager
|
||||
command:
|
||||
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
||||
- '--storage.path=/alertmanager'
|
||||
- '--web.external-url=http://localhost:9093'
|
||||
ports:
|
||||
- "9093:9093"
|
||||
volumes:
|
||||
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
||||
- alertmanager_data:/alertmanager
|
||||
environment:
|
||||
- ALERTMANAGER_SLACK_WEBHOOK_URL=${ALERTMANAGER_SLACK_WEBHOOK_URL:-}
|
||||
- ALERTMANAGER_SMTP_USER=${ALERTMANAGER_SMTP_USER:-}
|
||||
- ALERTMANAGER_SMTP_PASSWORD=${ALERTMANAGER_SMTP_PASSWORD:-}
|
||||
networks:
|
||||
- microservices-network
|
||||
restart: unless-stopped
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
container_name: prometheus
|
||||
@@ -64,6 +86,7 @@ volumes:
|
||||
prometheus_data:
|
||||
grafana_data:
|
||||
loki_data:
|
||||
alertmanager_data:
|
||||
|
||||
networks:
|
||||
microservices-network:
|
||||
|
||||
@@ -26,9 +26,7 @@ rule_files:
|
||||
alerting:
|
||||
alertmanagers:
|
||||
- static_configs:
|
||||
- targets: []
|
||||
# Uncomment when Alertmanager is deployed:
|
||||
# - targets: ['alertmanager:9093']
|
||||
- targets: ['alertmanager:9093']
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scrape Targets
|
||||
|
||||
@@ -182,6 +182,52 @@ http:
|
||||
entryPoints:
|
||||
- web
|
||||
|
||||
# EN: Marketing Services — Facebook, WhatsApp, X (Twitter), Zalo integrations
|
||||
# VI: Marketing Services — Tich hop Facebook, WhatsApp, X (Twitter), Zalo
|
||||
mkt-facebook-router:
|
||||
rule: "PathPrefix(`/api/v1/mkt/facebook`)"
|
||||
service: mkt-facebook-service
|
||||
priority: 100
|
||||
middlewares:
|
||||
- api-ratelimit
|
||||
- cors
|
||||
- secure-headers
|
||||
entryPoints:
|
||||
- web
|
||||
|
||||
mkt-whatsapp-router:
|
||||
rule: "PathPrefix(`/api/v1/mkt/whatsapp`)"
|
||||
service: mkt-whatsapp-service
|
||||
priority: 100
|
||||
middlewares:
|
||||
- api-ratelimit
|
||||
- cors
|
||||
- secure-headers
|
||||
entryPoints:
|
||||
- web
|
||||
|
||||
mkt-x-router:
|
||||
rule: "PathPrefix(`/api/v1/mkt/x`)"
|
||||
service: mkt-x-service
|
||||
priority: 100
|
||||
middlewares:
|
||||
- api-ratelimit
|
||||
- cors
|
||||
- secure-headers
|
||||
entryPoints:
|
||||
- web
|
||||
|
||||
mkt-zalo-router:
|
||||
rule: "PathPrefix(`/api/v1/mkt/zalo`)"
|
||||
service: mkt-zalo-service
|
||||
priority: 100
|
||||
middlewares:
|
||||
- api-ratelimit
|
||||
- cors
|
||||
- secure-headers
|
||||
entryPoints:
|
||||
- web
|
||||
|
||||
services:
|
||||
iam-service:
|
||||
loadBalancer:
|
||||
@@ -259,4 +305,26 @@ http:
|
||||
booking-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://booking-service-net:8080"
|
||||
- url: "http://booking-service-net:8080"
|
||||
|
||||
# EN: Marketing Services — Social Media Integrations
|
||||
# VI: Marketing Services — Tich hop mang xa hoi
|
||||
mkt-facebook-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://mkt-facebook-service-net:8080"
|
||||
|
||||
mkt-whatsapp-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://mkt-whatsapp-service-net:8080"
|
||||
|
||||
mkt-x-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://mkt-x-service-net:8080"
|
||||
|
||||
mkt-zalo-service:
|
||||
loadBalancer:
|
||||
servers:
|
||||
- url: "http://mkt-zalo-service-net:8080"
|
||||
Reference in New Issue
Block a user