#!/usr/bin/env python3 """ Alertmanager Webhook Receiver for Cloudflare GitOps Phase 6 - PR Workflows Receives alerts from Alertmanager and triggers GitOps actions: - Drift remediation PRs - Pipeline triggers - Slack notifications """ import hashlib import hmac import json import os import subprocess import sys from dataclasses import dataclass from datetime import datetime from http.server import HTTPServer, BaseHTTPRequestHandler from pathlib import Path from typing import Any, Dict, List, Optional import threading import queue try: import requests import yaml except ImportError: print("ERROR: pip install requests pyyaml", file=sys.stderr) sys.exit(1) HERE = Path(__file__).resolve().parent CONFIG_PATH = HERE / "config.yml" # Job queue for background processing job_queue: queue.Queue = queue.Queue() def load_config() -> Dict[str, Any]: """Load gitops configuration""" with open(CONFIG_PATH) as f: config = yaml.safe_load(f) def expand_env(obj): if isinstance(obj, str): if obj.startswith("${") and "}" in obj: inner = obj[2:obj.index("}")] default = None var = inner if ":-" in inner: var, default = inner.split(":-", 1) return os.environ.get(var, default) return obj elif isinstance(obj, dict): return {k: expand_env(v) for k, v in obj.items()} elif isinstance(obj, list): return [expand_env(i) for i in obj] return obj return expand_env(config) @dataclass class AlertPayload: """Parsed Alertmanager webhook payload""" receiver: str status: str # "firing" or "resolved" alerts: List[Dict] group_labels: Dict[str, str] common_labels: Dict[str, str] common_annotations: Dict[str, str] external_url: str version: str group_key: str @classmethod def from_json(cls, data: Dict) -> "AlertPayload": return cls( receiver=data.get("receiver", ""), status=data.get("status", ""), alerts=data.get("alerts", []), group_labels=data.get("groupLabels", {}), common_labels=data.get("commonLabels", {}), common_annotations=data.get("commonAnnotations", {}), external_url=data.get("externalURL", ""), version=data.get("version", "4"), group_key=data.get("groupKey", ""), ) @property def alert_name(self) -> str: return self.common_labels.get("alertname", "unknown") @property def severity(self) -> str: return self.common_labels.get("severity", "unknown") @property def component(self) -> str: return self.common_labels.get("component", "unknown") def should_trigger_pr(cfg: Dict[str, Any], payload: AlertPayload) -> bool: """Determine if this alert should trigger a PR""" webhook_cfg = cfg.get("webhook", {}) trigger_alerts = webhook_cfg.get("trigger_alerts", []) notify_only = webhook_cfg.get("notify_only_alerts", []) # Never auto-PR for resolved alerts if payload.status == "resolved": return False # Check if in trigger list if payload.alert_name in trigger_alerts: return True # Check if explicitly notify-only if payload.alert_name in notify_only: return False # Default: don't trigger return False def trigger_gitlab_pipeline(cfg: Dict[str, Any], payload: AlertPayload) -> Optional[str]: """Trigger GitLab pipeline for drift remediation""" gitlab_cfg = cfg.get("gitlab", {}) webhook_cfg = cfg.get("webhook", {}).get("gitlab_trigger", {}) if not webhook_cfg.get("enabled", False): return None base_url = gitlab_cfg.get("base_url", "https://gitlab.com") project_id = gitlab_cfg.get("project_id") trigger_token = webhook_cfg.get("trigger_token") or os.environ.get("GITLAB_TRIGGER_TOKEN") ref = webhook_cfg.get("ref", "main") if not project_id or not trigger_token: print("GitLab trigger not configured", file=sys.stderr) return None url = f"{base_url}/api/v4/projects/{project_id}/trigger/pipeline" data = { "ref": ref, "token": trigger_token, "variables[GITOPS_TRIGGER_SOURCE]": "alert", "variables[GITOPS_ALERT_NAME]": payload.alert_name, "variables[GITOPS_ALERT_SEVERITY]": payload.severity, "variables[GITOPS_ALERT_COMPONENT]": payload.component, } try: resp = requests.post(url, data=data, timeout=30) resp.raise_for_status() result = resp.json() return result.get("web_url") except Exception as e: print(f"Failed to trigger pipeline: {e}", file=sys.stderr) return None def run_drift_bot_locally(cfg: Dict[str, Any], payload: AlertPayload): """Run drift_pr_bot.py directly (for local webhook receiver)""" env = os.environ.copy() env["GITOPS_TRIGGER_SOURCE"] = "alert" env["GITOPS_ALERT_NAME"] = payload.alert_name subprocess.run( ["python3", "drift_pr_bot.py", "--trigger-source", "alert", "--alert-name", payload.alert_name], cwd=HERE, env=env, ) def notify_slack(cfg: Dict[str, Any], message: str, alert: AlertPayload): """Send Slack notification""" slack_cfg = cfg.get("slack", {}) webhook_url = slack_cfg.get("webhook_url") if not webhook_url: return color = { "critical": "danger", "warning": "warning", "info": "#439FE0", }.get(alert.severity, "#808080") payload = { "channel": slack_cfg.get("channel", "#cloudflare-gitops"), "attachments": [ { "color": color, "title": f"GitOps Alert: {alert.alert_name}", "text": message, "fields": [ {"title": "Status", "value": alert.status, "short": True}, {"title": "Severity", "value": alert.severity, "short": True}, {"title": "Component", "value": alert.component, "short": True}, ], "footer": "Cloudflare GitOps Webhook", "ts": int(datetime.utcnow().timestamp()), } ], } try: requests.post(webhook_url, json=payload, timeout=10) except Exception as e: print(f"Slack notification failed: {e}", file=sys.stderr) def process_alert(cfg: Dict[str, Any], payload: AlertPayload): """Process a single alert payload""" print(f"Processing alert: {payload.alert_name} ({payload.status})") # Check if we should trigger a PR if should_trigger_pr(cfg, payload): print(f"Alert {payload.alert_name} triggers drift remediation") # Try GitLab pipeline trigger first pipeline_url = trigger_gitlab_pipeline(cfg, payload) if pipeline_url: message = f"Triggered drift remediation pipeline: {pipeline_url}" else: # Fall back to local execution print("Falling back to local drift_pr_bot execution") run_drift_bot_locally(cfg, payload) message = "Triggered local drift remediation" notify_slack(cfg, message, payload) else: # Just notify webhook_cfg = cfg.get("webhook", {}) notify_only = webhook_cfg.get("notify_only_alerts", []) if payload.alert_name in notify_only: message = f"Alert {payload.alert_name} received (notify-only, no auto-PR)" notify_slack(cfg, message, payload) def job_worker(): """Background worker to process jobs""" cfg = load_config() while True: try: payload = job_queue.get(timeout=1) if payload is None: # Shutdown signal break process_alert(cfg, payload) except queue.Empty: continue except Exception as e: print(f"Job processing error: {e}", file=sys.stderr) class WebhookHandler(BaseHTTPRequestHandler): """HTTP handler for Alertmanager webhooks""" def __init__(self, *args, **kwargs): self.cfg = load_config() super().__init__(*args, **kwargs) def log_message(self, format, *args): print(f"[{datetime.utcnow().isoformat()}] {format % args}") def do_GET(self): """Health check endpoint""" if self.path == "/health": self.send_response(200) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(json.dumps({"status": "ok"}).encode()) else: self.send_response(404) self.end_headers() def do_POST(self): """Handle webhook POST""" if self.path != "/webhook/alert": self.send_response(404) self.end_headers() return # Read body content_length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(content_length) # Verify signature if configured secret = os.environ.get("WEBHOOK_SECRET") if secret: signature = self.headers.get("X-Webhook-Signature") expected = hmac.new( secret.encode(), body, hashlib.sha256 ).hexdigest() if not hmac.compare_digest(signature or "", expected): self.send_response(403) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(json.dumps({"error": "invalid signature"}).encode()) return # Parse payload try: data = json.loads(body) payload = AlertPayload.from_json(data) except Exception as e: self.send_response(400) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(json.dumps({"error": str(e)}).encode()) return # Queue for processing job_queue.put(payload) # Respond immediately self.send_response(202) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(json.dumps({ "status": "accepted", "alert": payload.alert_name, }).encode()) def main(): """Main entry point""" import argparse parser = argparse.ArgumentParser( description="Alertmanager webhook receiver for GitOps" ) parser.add_argument( "--host", default=os.environ.get("WEBHOOK_HOST", "0.0.0.0"), help="Host to bind to", ) parser.add_argument( "--port", type=int, default=int(os.environ.get("WEBHOOK_PORT", "8080")), help="Port to listen on", ) args = parser.parse_args() # Start worker thread worker = threading.Thread(target=job_worker, daemon=True) worker.start() # Start server server = HTTPServer((args.host, args.port), WebhookHandler) print(f"GitOps webhook receiver listening on {args.host}:{args.port}") print(f" POST /webhook/alert - Alertmanager webhook") print(f" GET /health - Health check") try: server.serve_forever() except KeyboardInterrupt: print("\nShutting down...") job_queue.put(None) # Signal worker to stop server.shutdown() if __name__ == "__main__": main()