# GoodGo Platform — Alertmanager Configuration # Routes alerts from Prometheus to notification channels. # # Environment variables (set in .env): # SLACK_WEBHOOK_URL — Slack incoming webhook for alert notifications # ALERTMANAGER_SMTP_* — SMTP settings for email alerts (optional) global: resolve_timeout: 5m slack_api_url: '${SLACK_WEBHOOK_URL}' # ── Notification Templates ───────────────────────────────────────────────────── templates: - '/etc/alertmanager/templates/*.tmpl' # ── Inhibition Rules ────────────────────────────────────────────────────────── # Suppress warning alerts when a critical alert is already firing for the same service inhibit_rules: - source_matchers: - severity = critical target_matchers: - severity = warning equal: ['service'] # ── Routing Tree ────────────────────────────────────────────────────────────── route: receiver: 'slack-sre' group_by: ['alertname', 'service'] group_wait: 30s group_interval: 5m repeat_interval: 4h routes: # Staging SLO soak — burn-rate alerts to Slack only, no pager - matchers: - environment = staging - slo_type =~ "availability|latency" receiver: 'slack-sre-staging-soak' group_by: ['alertname', 'route', 'burn_window'] group_wait: 15s group_interval: 5m repeat_interval: 30m continue: false # Critical alerts — immediate notification, shorter repeat - matchers: - severity = critical receiver: 'slack-critical' group_wait: 10s repeat_interval: 1h continue: false # Backup alerts — route to infrastructure channel - matchers: - alertname =~ "Backup.*" receiver: 'slack-infrastructure' group_wait: 1m repeat_interval: 6h # ── Receivers ───────────────────────────────────────────────────────────────── receivers: - name: 'slack-sre' slack_configs: - channel: '#sre-oncall' send_resolved: true title: '{{ if eq .Status "firing" }}🔥{{ else }}✅{{ end }} [{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}' text: >- *Service:* {{ .CommonLabels.service }} *Severity:* {{ .CommonLabels.severity }} {{ range .Alerts }} *Summary:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} {{ if .Annotations.runbook_url }}*Runbook:* {{ .Annotations.runbook_url }}{{ end }} {{ if .Annotations.dashboard }}*Dashboard:* {{ .Annotations.dashboard }}{{ end }} {{ end }} - name: 'slack-critical' slack_configs: - channel: '#sre-oncall' send_resolved: true title: '{{ if eq .Status "firing" }}🚨 CRITICAL{{ else }}✅ RESOLVED{{ end }} {{ .CommonLabels.alertname }}' text: >- *Service:* {{ .CommonLabels.service }} *Severity:* CRITICAL — Immediate action required {{ range .Alerts }} *Summary:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} {{ if .Annotations.runbook_url }}*Runbook:* {{ .Annotations.runbook_url }}{{ end }} {{ end }} - name: 'slack-sre-staging-soak' slack_configs: - channel: '#sre-staging-soak' send_resolved: true title: 'SOAK {{ .CommonLabels.alertname }}' text: >- Route: {{ .CommonLabels.method }} {{ .CommonLabels.route }} Burn: {{ .CommonLabels.burn_window }} | {{ .CommonLabels.severity }} {{ range .Alerts }}{{ .Annotations.summary }}{{ end }} Staging soak — NOT paging. - name: 'slack-infrastructure' slack_configs: - channel: '#infrastructure' send_resolved: true title: '{{ if eq .Status "firing" }}⚠️{{ else }}✅{{ end }} [{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}' text: >- *Service:* {{ .CommonLabels.service }} {{ range .Alerts }} *Summary:* {{ .Annotations.summary }} *Description:* {{ .Annotations.description }} {{ end }}