- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
124 lines
3.6 KiB
YAML
124 lines
3.6 KiB
YAML
# Cloudflare Mesh Observatory Docker Stack
|
|
# Prometheus + Grafana + Alertmanager + Custom Metrics Exporter
|
|
# Phase 5B - Full Observability + Alerting
|
|
|
|
services:
|
|
# Prometheus - Metrics Collection
|
|
prometheus:
|
|
image: prom/prometheus:v2.48.0
|
|
container_name: cf-prometheus
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- ./prometheus/alerts:/etc/prometheus/alerts:ro
|
|
- prometheus_data:/prometheus
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--storage.tsdb.retention.time=30d'
|
|
- '--web.enable-lifecycle'
|
|
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
|
|
- '--web.console.templates=/usr/share/prometheus/consoles'
|
|
networks:
|
|
- observatory
|
|
depends_on:
|
|
- alertmanager
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
# Alertmanager - Alert Routing & Notifications
|
|
alertmanager:
|
|
image: prom/alertmanager:v0.26.0
|
|
container_name: cf-alertmanager
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9093:9093"
|
|
volumes:
|
|
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
|
- ./alertmanager/templates:/etc/alertmanager/templates:ro
|
|
- alertmanager_data:/alertmanager
|
|
command:
|
|
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
|
- '--storage.path=/alertmanager'
|
|
- '--web.listen-address=:9093'
|
|
- '--cluster.listen-address='
|
|
environment:
|
|
- SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL}
|
|
- PAGERDUTY_SERVICE_KEY=${PAGERDUTY_SERVICE_KEY}
|
|
- SMTP_USERNAME=${SMTP_USERNAME}
|
|
- SMTP_PASSWORD=${SMTP_PASSWORD}
|
|
networks:
|
|
- observatory
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9093/-/healthy"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
# Grafana - Visualization
|
|
grafana:
|
|
image: grafana/grafana:10.2.2
|
|
container_name: cf-grafana
|
|
restart: unless-stopped
|
|
ports:
|
|
- "3000:3000"
|
|
environment:
|
|
- GF_SECURITY_ADMIN_USER=admin
|
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-changeme}
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
- GF_SERVER_ROOT_URL=%(protocol)s://%(domain)s:%(http_port)s/
|
|
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-piechart-panel
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- ./dashboards:/etc/grafana/provisioning/dashboards:ro
|
|
- ./datasources:/etc/grafana/provisioning/datasources:ro
|
|
networks:
|
|
- observatory
|
|
depends_on:
|
|
- prometheus
|
|
healthcheck:
|
|
test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
# Cloudflare Metrics Exporter
|
|
metrics-exporter:
|
|
build:
|
|
context: .
|
|
dockerfile: Dockerfile.exporter
|
|
container_name: cf-metrics-exporter
|
|
restart: unless-stopped
|
|
ports:
|
|
- "9100:9100"
|
|
environment:
|
|
- CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN}
|
|
- CLOUDFLARE_ZONE_ID=${CLOUDFLARE_ZONE_ID}
|
|
- CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID}
|
|
- SNAPSHOT_DIR=/data/snapshots
|
|
- ANOMALY_DIR=/data/anomalies
|
|
volumes:
|
|
- ../snapshots:/data/snapshots:ro
|
|
- ../anomalies:/data/anomalies:ro
|
|
networks:
|
|
- observatory
|
|
healthcheck:
|
|
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9100/health"]
|
|
interval: 30s
|
|
timeout: 10s
|
|
retries: 3
|
|
|
|
networks:
|
|
observatory:
|
|
driver: bridge
|
|
|
|
volumes:
|
|
prometheus_data:
|
|
grafana_data:
|
|
alertmanager_data:
|