goodgo-platform/monitoring/alertmanager/alertmanager.yml

# GoodGo Platform — Alertmanager Configuration
# Routes alerts from Prometheus to notification channels.
#
# Environment variables (set in .env):
#   SLACK_WEBHOOK_URL      — Slack incoming webhook for alert notifications
#   ALERTMANAGER_SMTP_*    — SMTP settings for email alerts (optional)

global:
  resolve_timeout: 5m
  slack_api_url: '${SLACK_WEBHOOK_URL}'

# ── Notification Templates ─────────────────────────────────────────────────────
templates:
  - '/etc/alertmanager/templates/*.tmpl'

# ── Inhibition Rules ──────────────────────────────────────────────────────────
# Suppress warning alerts when a critical alert is already firing for the same service
inhibit_rules:
  - source_matchers:
      - severity = critical
    target_matchers:
      - severity = warning
    equal: ['service']

# ── Routing Tree ──────────────────────────────────────────────────────────────
route:
  receiver: 'slack-sre'
  group_by: ['alertname', 'service']
  group_wait: 30s
  group_interval: 5m
  repeat_interval: 4h

  routes:
    # Staging SLO soak — burn-rate alerts to Slack only, no pager
    - matchers:
        - environment = staging
        - slo_type =~ "availability|latency"
      receiver: 'slack-sre-staging-soak'
      group_by: ['alertname', 'route', 'burn_window']
      group_wait: 15s
      group_interval: 5m
      repeat_interval: 30m
      continue: false

    # Critical alerts — immediate notification, shorter repeat
    - matchers:
        - severity = critical
      receiver: 'slack-critical'
      group_wait: 10s
      repeat_interval: 1h
      continue: false

    # Backup alerts — route to infrastructure channel
    - matchers:
        - alertname =~ "Backup.*"
      receiver: 'slack-infrastructure'
      group_wait: 1m
      repeat_interval: 6h

# ── Receivers ─────────────────────────────────────────────────────────────────
receivers:
  - name: 'slack-sre'
    slack_configs:
      - channel: '#sre-oncall'
        send_resolved: true
        title: '{{ if eq .Status "firing" }}🔥{{ else }}✅{{ end }} [{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}'
        text: >-
          *Service:* {{ .CommonLabels.service }}
          *Severity:* {{ .CommonLabels.severity }}
          {{ range .Alerts }}
          *Summary:* {{ .Annotations.summary }}
          *Description:* {{ .Annotations.description }}
          {{ if .Annotations.runbook_url }}*Runbook:* {{ .Annotations.runbook_url }}{{ end }}
          {{ if .Annotations.dashboard }}*Dashboard:* {{ .Annotations.dashboard }}{{ end }}
          {{ end }}

  - name: 'slack-critical'
    slack_configs:
      - channel: '#sre-oncall'
        send_resolved: true
        title: '{{ if eq .Status "firing" }}🚨 CRITICAL{{ else }}✅ RESOLVED{{ end }} {{ .CommonLabels.alertname }}'
        text: >-
          *Service:* {{ .CommonLabels.service }}
          *Severity:* CRITICAL — Immediate action required
          {{ range .Alerts }}
          *Summary:* {{ .Annotations.summary }}
          *Description:* {{ .Annotations.description }}
          {{ if .Annotations.runbook_url }}*Runbook:* {{ .Annotations.runbook_url }}{{ end }}
          {{ end }}

  - name: 'slack-sre-staging-soak'
    slack_configs:
      - channel: '#sre-staging-soak'
        send_resolved: true
        title: 'SOAK {{ .CommonLabels.alertname }}'
        text: >-
          Route: {{ .CommonLabels.method }} {{ .CommonLabels.route }}
          Burn: {{ .CommonLabels.burn_window }} | {{ .CommonLabels.severity }}
          {{ range .Alerts }}{{ .Annotations.summary }}{{ end }}
          Staging soak — NOT paging.

  - name: 'slack-infrastructure'
    slack_configs:
      - channel: '#infrastructure'
        send_resolved: true
        title: '{{ if eq .Status "firing" }}⚠️{{ else }}✅{{ end }} [{{ .Status | toUpper }}] {{ .CommonLabels.alertname }}'
        text: >-
          *Service:* {{ .CommonLabels.service }}
          {{ range .Alerts }}
          *Summary:* {{ .Annotations.summary }}
          *Description:* {{ .Annotations.description }}
          {{ end }}