Files
pos-system/infra/observability/alertmanager/alertmanager.yml
Ho Ngoc Hai 6d0ca5bee5 fix: resolve 4 P0 DevOps blockers — image tags, alertmanager, port conflicts
DEVOPS-C-01: Replace hardcoded :latest with IMAGE_TAG placeholder in all 8
production K8s manifests. Update deploy-production.yml to sed-replace
IMAGE_TAG with commit SHA before kubectl apply (remove now-redundant
kubectl set image step).

DEVOPS-C-02: Configure Alertmanager — create alertmanager.yml with Slack +
email receivers (critical/warning/infra routes, inhibition rules). Add
alertmanager:v0.27.0 service to both docker-compose.observability.yml and
deployments/local/docker-compose.yml. Enable prometheus.yml target
(alertmanager:9093).

DEVOPS-C-03: Remove :latest from docker-build.yml main branch push. Now
only SHA tag is pushed for main; :staging+SHA for develop.

DEVOPS-C-04: Add 4 mkt-* services to deployments/local/docker-compose.yml
with unique host ports (facebook:5021, whatsapp:5022, x:5023, zalo:5024)
to eliminate port 5000 conflicts. Add corresponding Traefik routers and
load-balancer entries in infra/traefik/dynamic/routes.yml
(/api/v1/mkt/{facebook,whatsapp,x,zalo}).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-03-23 09:46:32 +07:00

119 lines
4.1 KiB
YAML

# =============================================================================
# GoodGo Platform - Alertmanager Configuration
# =============================================================================
# EN: Alertmanager routes and receivers for all platform alerts.
# VI: Cau hinh Alertmanager — routes va receivers cho tat ca canh bao.
# =============================================================================
global:
# EN: Default SMTP settings (override via environment variables in production)
# VI: Cai dat SMTP mac dinh (ghi de bang bien moi truong trong production)
smtp_smarthost: 'smtp.gmail.com:587'
smtp_from: 'alerts@goodgo.vn'
smtp_auth_username: '${ALERTMANAGER_SMTP_USER}'
smtp_auth_password: '${ALERTMANAGER_SMTP_PASSWORD}'
smtp_require_tls: true
resolve_timeout: 5m
# EN: Alert routing tree — critical alerts go to pagerduty, warnings to email
# VI: Cay dinh tuyen canh bao — critical -> pagerduty, warning -> email
route:
group_by: ['alertname', 'service', 'environment']
group_wait: 30s
group_interval: 5m
repeat_interval: 4h
receiver: 'default-receiver'
routes:
# EN: Critical severity — immediate notification
# VI: Muc do critical — thong bao ngay lap tuc
- match:
severity: critical
receiver: 'critical-receiver'
group_wait: 10s
repeat_interval: 1h
continue: true
# EN: Warning severity — grouped notification
# VI: Muc do warning — thong bao gom nhom
- match:
severity: warning
receiver: 'warning-receiver'
repeat_interval: 4h
# EN: Infrastructure alerts (postgres, redis, rabbitmq)
# VI: Canh bao ha tang (postgres, redis, rabbitmq)
- match:
tier: infra
receiver: 'infra-receiver'
group_wait: 30s
repeat_interval: 2h
receivers:
# EN: Default receiver — catches all unmatched alerts
# VI: Receiver mac dinh — bat tat ca canh bao khong khop
- name: 'default-receiver'
email_configs:
- to: 'devops@goodgo.vn'
send_resolved: true
headers:
Subject: '[GoodGo] {{ .Status | toUpper }} {{ .GroupLabels.alertname }}'
# EN: Critical alerts — Slack + email
# VI: Canh bao critical — Slack + email
- name: 'critical-receiver'
slack_configs:
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
channel: '#alerts-critical'
send_resolved: true
title: ':fire: [CRITICAL] {{ .GroupLabels.alertname }}'
text: >-
{{ range .Alerts }}
*Service:* {{ .Labels.service }}
*Summary:* {{ .Annotations.summary }}
*Description:* {{ .Annotations.description }}
{{ end }}
color: '{{ if eq .Status "firing" }}danger{{ else }}good{{ end }}'
email_configs:
- to: 'oncall@goodgo.vn'
send_resolved: true
headers:
Subject: '[GoodGo CRITICAL] {{ .GroupLabels.alertname }}'
# EN: Warning alerts — Slack only
# VI: Canh bao warning — chi Slack
- name: 'warning-receiver'
slack_configs:
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
channel: '#alerts-warning'
send_resolved: true
title: ':warning: [WARNING] {{ .GroupLabels.alertname }}'
text: >-
{{ range .Alerts }}
*Service:* {{ .Labels.service }}
*Summary:* {{ .Annotations.summary }}
{{ end }}
color: 'warning'
# EN: Infrastructure receiver — DevOps team
# VI: Receiver ha tang — doi DevOps
- name: 'infra-receiver'
slack_configs:
- api_url: '${ALERTMANAGER_SLACK_WEBHOOK_URL}'
channel: '#alerts-infra'
send_resolved: true
title: ':gear: [INFRA] {{ .GroupLabels.alertname }}'
text: >-
{{ range .Alerts }}
*Component:* {{ .Labels.service }}
*Summary:* {{ .Annotations.summary }}
{{ end }}
# EN: Inhibition rules — silence downstream alerts when upstream is down
# VI: Quy tac uc che — tat tat canh bao downstream khi upstream down
inhibit_rules:
- source_match:
severity: critical
target_match:
severity: warning
equal: ['alertname', 'service']