Files
vm-cloudflare/observatory/alertmanager/alertmanager.yml
Vault Sovereign 37a867c485 Initial commit: Cloudflare infrastructure with WAF Intelligence
- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access)
- WAF Intelligence MCP server with threat analysis and ML classification
- GitOps automation with PR workflows and drift detection
- Observatory monitoring stack with Prometheus/Grafana
- IDE operator rules for governed development
- Security playbooks and compliance frameworks
- Autonomous remediation and state reconciliation
2025-12-16 18:31:53 +00:00

366 lines
11 KiB
YAML

# Alertmanager Configuration for Cloudflare Mesh Observatory
# Phase 5B - Alerts & Escalation
global:
# Default SMTP settings (override in receivers)
smtp_smarthost: 'smtp.example.com:587'
smtp_from: 'cloudflare-alerts@yourdomain.com'
smtp_auth_username: '${SMTP_USERNAME}'
smtp_auth_password: '${SMTP_PASSWORD}'
smtp_require_tls: true
# Slack API URL (set via environment)
slack_api_url: '${SLACK_WEBHOOK_URL}'
# PagerDuty integration key
pagerduty_url: 'https://events.pagerduty.com/v2/enqueue'
# Resolve timeout
resolve_timeout: 5m
# Templates for notifications
templates:
- '/etc/alertmanager/templates/*.tmpl'
# Routing tree
route:
# Default receiver
receiver: 'slack-default'
# Group alerts by these labels
group_by: ['alertname', 'severity', 'component']
# Wait before sending first notification
group_wait: 30s
# Wait before sending notification about new alerts in group
group_interval: 5m
# Wait before re-sending notification
repeat_interval: 4h
# Child routes for different severities and components
routes:
# ============================================
# CRITICAL ALERTS - Immediate PagerDuty
# ============================================
- match:
severity: critical
receiver: 'pagerduty-critical'
group_wait: 10s
repeat_interval: 1h
continue: true # Also send to Slack
- match:
severity: critical
receiver: 'slack-critical'
group_wait: 10s
# ============================================
# TUNNEL ALERTS
# ============================================
- match:
component: tunnel
receiver: 'slack-tunnels'
routes:
- match:
severity: critical
receiver: 'pagerduty-critical'
continue: true
- match:
severity: critical
receiver: 'slack-critical'
# ============================================
# DNS ALERTS
# ============================================
- match:
component: dns
receiver: 'slack-dns'
routes:
- match:
severity: critical
receiver: 'pagerduty-critical'
continue: true
- match:
alertname: DNSHijackDetected
receiver: 'pagerduty-critical'
# ============================================
# WAF ALERTS
# ============================================
- match:
component: waf
receiver: 'slack-waf'
routes:
- match:
severity: critical
receiver: 'pagerduty-critical'
continue: true
- match:
alertname: WAFMassiveAttack
receiver: 'pagerduty-critical'
# ============================================
# INVARIANT ALERTS (Security Policy Violations)
# ============================================
- match:
component: invariant
receiver: 'slack-security'
routes:
- match:
severity: critical
receiver: 'pagerduty-critical'
continue: true
# ============================================
# PROOFCHAIN ALERTS
# ============================================
- match:
component: proofchain
receiver: 'slack-proofchain'
routes:
- match:
severity: critical
receiver: 'pagerduty-critical'
# ============================================
# WARNING ALERTS - Slack only
# ============================================
- match:
severity: warning
receiver: 'slack-warnings'
repeat_interval: 8h
# ============================================
# INFO ALERTS - Daily digest
# ============================================
- match:
severity: info
receiver: 'email-daily'
group_wait: 1h
repeat_interval: 24h
# ============================================
# PHASE 6 - GITOPS DRIFT REMEDIATION
# Route drift alerts to GitOps webhook for auto-PR
# ============================================
- match:
alertname: DNSDriftDetected
receiver: 'gitops-drift-pr'
continue: true # Also send to slack-dns
- match:
alertname: WAFRuleMissing
receiver: 'gitops-drift-pr'
continue: true
- match:
alertname: FirewallRuleMissing
receiver: 'gitops-drift-pr'
continue: true
- match:
alertname: TunnelConfigChanged
receiver: 'gitops-drift-pr'
continue: true
- match_re:
alertname: '.*(Drift|Mismatch|Changed).*'
receiver: 'gitops-drift-pr'
continue: true
# Inhibition rules - suppress lower severity when higher fires
inhibit_rules:
# If critical fires, suppress warning for same alert
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'component']
# If warning fires, suppress info for same alert
- source_match:
severity: 'warning'
target_match:
severity: 'info'
equal: ['alertname', 'component']
# Suppress all tunnel alerts if Cloudflare API is down
- source_match:
alertname: 'CloudflareAPIDown'
target_match:
component: 'tunnel'
equal: []
# Suppress DNS alerts during planned maintenance
- source_match:
alertname: 'PlannedMaintenance'
target_match:
component: 'dns'
equal: []
# Receivers definition
receivers:
# ============================================
# SLACK RECEIVERS
# ============================================
- name: 'slack-default'
slack_configs:
- channel: '#cloudflare-alerts'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: '{{ template "slack.cloudflare.color" . }}'
actions:
- type: button
text: 'Runbook'
url: '{{ template "slack.cloudflare.runbook" . }}'
- type: button
text: 'Grafana'
url: 'http://localhost:3000/d/cloudflare-overview'
- name: 'slack-critical'
slack_configs:
- channel: '#cloudflare-critical'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: 'danger'
actions:
- type: button
text: 'Runbook'
url: '{{ template "slack.cloudflare.runbook" . }}'
- type: button
text: 'Grafana'
url: 'http://localhost:3000/d/cloudflare-overview'
- name: 'slack-warnings'
slack_configs:
- channel: '#cloudflare-alerts'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: 'warning'
- name: 'slack-tunnels'
slack_configs:
- channel: '#cloudflare-tunnels'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: '{{ template "slack.cloudflare.color" . }}'
actions:
- type: button
text: 'Tunnel Playbook'
url: 'https://wiki.internal/playbooks/tunnel-rotation'
- type: button
text: 'Tunnel Dashboard'
url: 'http://localhost:3000/d/tunnel-status'
- name: 'slack-dns'
slack_configs:
- channel: '#cloudflare-dns'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: '{{ template "slack.cloudflare.color" . }}'
actions:
- type: button
text: 'DNS Playbook'
url: 'https://wiki.internal/playbooks/dns-compromise'
- type: button
text: 'DNS Dashboard'
url: 'http://localhost:3000/d/dns-health'
- name: 'slack-waf'
slack_configs:
- channel: '#cloudflare-waf'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: '{{ template "slack.cloudflare.color" . }}'
actions:
- type: button
text: 'WAF Playbook'
url: 'https://wiki.internal/playbooks/waf-incident'
- type: button
text: 'WAF Dashboard'
url: 'http://localhost:3000/d/security-settings'
- name: 'slack-security'
slack_configs:
- channel: '#cloudflare-security'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: '{{ template "slack.cloudflare.color" . }}'
actions:
- type: button
text: 'Invariants Dashboard'
url: 'http://localhost:3000/d/invariants'
- name: 'slack-proofchain'
slack_configs:
- channel: '#cloudflare-proofchain'
send_resolved: true
title: '{{ template "slack.cloudflare.title" . }}'
text: '{{ template "slack.cloudflare.text" . }}'
color: '{{ template "slack.cloudflare.color" . }}'
actions:
- type: button
text: 'Proofchain Dashboard'
url: 'http://localhost:3000/d/proofchain'
# ============================================
# PAGERDUTY RECEIVERS
# ============================================
- name: 'pagerduty-critical'
pagerduty_configs:
- service_key: '${PAGERDUTY_SERVICE_KEY}'
send_resolved: true
description: '{{ template "pagerduty.cloudflare.description" . }}'
severity: 'critical'
client: 'Cloudflare Mesh Observatory'
client_url: 'http://localhost:3000'
details:
alertname: '{{ .GroupLabels.alertname }}'
component: '{{ .GroupLabels.component }}'
severity: '{{ .GroupLabels.severity }}'
summary: '{{ .CommonAnnotations.summary }}'
runbook: '{{ .CommonAnnotations.runbook_url }}'
# ============================================
# EMAIL RECEIVERS
# ============================================
- name: 'email-daily'
email_configs:
- to: 'cloudflare-team@yourdomain.com'
send_resolved: true
html: '{{ template "email.cloudflare.html" . }}'
headers:
Subject: '[Cloudflare] Daily Alert Digest - {{ .Status | toUpper }}'
# ============================================
# WEBHOOK RECEIVERS (for custom integrations)
# ============================================
- name: 'webhook-remediation'
webhook_configs:
- url: 'http://autonomous-remediator:8080/webhook/alert'
send_resolved: true
max_alerts: 10
# ============================================
# PHASE 6 - GITOPS WEBHOOK RECEIVER
# ============================================
- name: 'gitops-drift-pr'
webhook_configs:
- url: '${GITOPS_WEBHOOK_URL:-http://gitops-webhook:8080/webhook/alert}'
send_resolved: false # Only fire on new alerts, not resolved
max_alerts: 5
http_config:
# Optional: Add bearer token or basic auth
# authorization:
# type: Bearer
# credentials: '${GITOPS_WEBHOOK_TOKEN}'