# Cloudflare Mesh Observatory Docker Stack # Prometheus + Grafana + Alertmanager + Custom Metrics Exporter # Phase 5B - Full Observability + Alerting services: # Prometheus - Metrics Collection prometheus: image: prom/prometheus:v2.48.0 container_name: cf-prometheus restart: unless-stopped ports: - "9090:9090" volumes: - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro - ./prometheus/alerts:/etc/prometheus/alerts:ro - prometheus_data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.enable-lifecycle' - '--web.console.libraries=/usr/share/prometheus/console_libraries' - '--web.console.templates=/usr/share/prometheus/consoles' networks: - observatory depends_on: - alertmanager healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:9090/-/healthy"] interval: 30s timeout: 10s retries: 3 # Alertmanager - Alert Routing & Notifications alertmanager: image: prom/alertmanager:v0.26.0 container_name: cf-alertmanager restart: unless-stopped ports: - "9093:9093" volumes: - ./alertmanager/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro - ./alertmanager/templates:/etc/alertmanager/templates:ro - alertmanager_data:/alertmanager command: - '--config.file=/etc/alertmanager/alertmanager.yml' - '--storage.path=/alertmanager' - '--web.listen-address=:9093' - '--cluster.listen-address=' environment: - SLACK_WEBHOOK_URL=${SLACK_WEBHOOK_URL} - PAGERDUTY_SERVICE_KEY=${PAGERDUTY_SERVICE_KEY} - SMTP_USERNAME=${SMTP_USERNAME} - SMTP_PASSWORD=${SMTP_PASSWORD} networks: - observatory healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:9093/-/healthy"] interval: 30s timeout: 10s retries: 3 # Grafana - Visualization grafana: image: grafana/grafana:10.2.2 container_name: cf-grafana restart: unless-stopped ports: - "3000:3000" environment: - GF_SECURITY_ADMIN_USER=admin - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-changeme} - GF_USERS_ALLOW_SIGN_UP=false - GF_SERVER_ROOT_URL=%(protocol)s://%(domain)s:%(http_port)s/ - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-piechart-panel volumes: - grafana_data:/var/lib/grafana - ./dashboards:/etc/grafana/provisioning/dashboards:ro - ./datasources:/etc/grafana/provisioning/datasources:ro networks: - observatory depends_on: - prometheus healthcheck: test: ["CMD-SHELL", "wget -q --spider http://localhost:3000/api/health || exit 1"] interval: 30s timeout: 10s retries: 3 # Cloudflare Metrics Exporter metrics-exporter: build: context: . dockerfile: Dockerfile.exporter container_name: cf-metrics-exporter restart: unless-stopped ports: - "9100:9100" environment: - CLOUDFLARE_API_TOKEN=${CLOUDFLARE_API_TOKEN} - CLOUDFLARE_ZONE_ID=${CLOUDFLARE_ZONE_ID} - CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID} - SNAPSHOT_DIR=/data/snapshots - ANOMALY_DIR=/data/anomalies volumes: - ../snapshots:/data/snapshots:ro - ../anomalies:/data/anomalies:ro networks: - observatory healthcheck: test: ["CMD", "wget", "-q", "--spider", "http://localhost:9100/health"] interval: 30s timeout: 10s retries: 3 networks: observatory: driver: bridge volumes: prometheus_data: grafana_data: alertmanager_data: