Files
vm-cloudflare/observatory/docker-compose.yml
Vault Sovereign 37a867c485 Initial commit: Cloudflare infrastructure with WAF Intelligence
- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access)
- WAF Intelligence MCP server with threat analysis and ML classification
- GitOps automation with PR workflows and drift detection
- Observatory monitoring stack with Prometheus/Grafana
- IDE operator rules for governed development
- Security playbooks and compliance frameworks
- Autonomous remediation and state reconciliation
2025-12-16 18:31:53 +00:00

124 lines
3.6 KiB
YAML

# Cloudflare Mesh Observatory Docker Stack
# Prometheus + Grafana + Alertmanager + Custom Metrics Exporter
# Phase 5B - Full Observability + Alerting
services:
# Prometheus - Metrics Collection
prometheus:
image: prom/prometheus:v2.48.0
container_name: cf-prometheus
restart: unless-stopped
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- ./prometheus/alerts:/etc/prometheus/alerts:ro
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
- '--web.console.libraries=/usr/share/prometheus/console_libraries'
- '--web.console.templates=/usr/share/prometheus/consoles'
networks:
- observatory
depends_on:
- alertmanager
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
# Alertmanager - Alert Routing & Notifications
alertmanager:
image: prom/alertmanager:v0.26.0
container_name: cf-alertmanager
restart: unless-stopped
ports:
- "9093:9093"
volumes:
- ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- ./alertmanager/templates:/etc/alertmanager/templates:ro
- alertmanager_data:/alertmanager
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.listen-address=:9093'
- '--cluster.listen-address='
environment:
- SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL}
- PAGERDUTY_SERVICE_KEY=${PAGERDUTY_SERVICE_KEY}
- SMTP_USERNAME=${SMTP_USERNAME}
- SMTP_PASSWORD=${SMTP_PASSWORD}
networks:
- observatory
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9093/-/healthy"]
interval: 30s
timeout: 10s
retries: 3
# Grafana - Visualization
grafana:
image: grafana/grafana:10.2.2
container_name: cf-grafana
restart: unless-stopped
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-changeme}
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SERVER_ROOT_URL=%(protocol)s://%(domain)s:%(http_port)s/
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-piechart-panel
volumes:
- grafana_data:/var/lib/grafana
- ./dashboards:/etc/grafana/provisioning/dashboards:ro
- ./datasources:/etc/grafana/provisioning/datasources:ro
networks:
- observatory
depends_on:
- prometheus
healthcheck:
test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
# Cloudflare Metrics Exporter
metrics-exporter:
build:
context: .
dockerfile: Dockerfile.exporter
container_name: cf-metrics-exporter
restart: unless-stopped
ports:
- "9100:9100"
environment:
- CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN}
- CLOUDFLARE_ZONE_ID=${CLOUDFLARE_ZONE_ID}
- CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID}
- SNAPSHOT_DIR=/data/snapshots
- ANOMALY_DIR=/data/anomalies
volumes:
- ../snapshots:/data/snapshots:ro
- ../anomalies:/data/anomalies:ro
networks:
- observatory
healthcheck:
test: ["CMD", "wget", "-q", "--spider", "http://localhost:9100/health"]
interval: 30s
timeout: 10s
retries: 3
networks:
observatory:
driver: bridge
volumes:
prometheus_data:
grafana_data:
alertmanager_data: