- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
374 lines
11 KiB
Python
374 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Alertmanager Webhook Receiver for Cloudflare GitOps
|
|
Phase 6 - PR Workflows
|
|
|
|
Receives alerts from Alertmanager and triggers GitOps actions:
|
|
- Drift remediation PRs
|
|
- Pipeline triggers
|
|
- Slack notifications
|
|
"""
|
|
|
|
import hashlib
|
|
import hmac
|
|
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from http.server import HTTPServer, BaseHTTPRequestHandler
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
import threading
|
|
import queue
|
|
|
|
try:
|
|
import requests
|
|
import yaml
|
|
except ImportError:
|
|
print("ERROR: pip install requests pyyaml", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
HERE = Path(__file__).resolve().parent
|
|
CONFIG_PATH = HERE / "config.yml"
|
|
|
|
# Job queue for background processing
|
|
job_queue: queue.Queue = queue.Queue()
|
|
|
|
|
|
def load_config() -> Dict[str, Any]:
|
|
"""Load gitops configuration"""
|
|
with open(CONFIG_PATH) as f:
|
|
config = yaml.safe_load(f)
|
|
|
|
def expand_env(obj):
|
|
if isinstance(obj, str):
|
|
if obj.startswith("${") and "}" in obj:
|
|
inner = obj[2:obj.index("}")]
|
|
default = None
|
|
var = inner
|
|
if ":-" in inner:
|
|
var, default = inner.split(":-", 1)
|
|
return os.environ.get(var, default)
|
|
return obj
|
|
elif isinstance(obj, dict):
|
|
return {k: expand_env(v) for k, v in obj.items()}
|
|
elif isinstance(obj, list):
|
|
return [expand_env(i) for i in obj]
|
|
return obj
|
|
|
|
return expand_env(config)
|
|
|
|
|
|
@dataclass
|
|
class AlertPayload:
|
|
"""Parsed Alertmanager webhook payload"""
|
|
receiver: str
|
|
status: str # "firing" or "resolved"
|
|
alerts: List[Dict]
|
|
group_labels: Dict[str, str]
|
|
common_labels: Dict[str, str]
|
|
common_annotations: Dict[str, str]
|
|
external_url: str
|
|
version: str
|
|
group_key: str
|
|
|
|
@classmethod
|
|
def from_json(cls, data: Dict) -> "AlertPayload":
|
|
return cls(
|
|
receiver=data.get("receiver", ""),
|
|
status=data.get("status", ""),
|
|
alerts=data.get("alerts", []),
|
|
group_labels=data.get("groupLabels", {}),
|
|
common_labels=data.get("commonLabels", {}),
|
|
common_annotations=data.get("commonAnnotations", {}),
|
|
external_url=data.get("externalURL", ""),
|
|
version=data.get("version", "4"),
|
|
group_key=data.get("groupKey", ""),
|
|
)
|
|
|
|
@property
|
|
def alert_name(self) -> str:
|
|
return self.common_labels.get("alertname", "unknown")
|
|
|
|
@property
|
|
def severity(self) -> str:
|
|
return self.common_labels.get("severity", "unknown")
|
|
|
|
@property
|
|
def component(self) -> str:
|
|
return self.common_labels.get("component", "unknown")
|
|
|
|
|
|
def should_trigger_pr(cfg: Dict[str, Any], payload: AlertPayload) -> bool:
|
|
"""Determine if this alert should trigger a PR"""
|
|
webhook_cfg = cfg.get("webhook", {})
|
|
trigger_alerts = webhook_cfg.get("trigger_alerts", [])
|
|
notify_only = webhook_cfg.get("notify_only_alerts", [])
|
|
|
|
# Never auto-PR for resolved alerts
|
|
if payload.status == "resolved":
|
|
return False
|
|
|
|
# Check if in trigger list
|
|
if payload.alert_name in trigger_alerts:
|
|
return True
|
|
|
|
# Check if explicitly notify-only
|
|
if payload.alert_name in notify_only:
|
|
return False
|
|
|
|
# Default: don't trigger
|
|
return False
|
|
|
|
|
|
def trigger_gitlab_pipeline(cfg: Dict[str, Any], payload: AlertPayload) -> Optional[str]:
|
|
"""Trigger GitLab pipeline for drift remediation"""
|
|
gitlab_cfg = cfg.get("gitlab", {})
|
|
webhook_cfg = cfg.get("webhook", {}).get("gitlab_trigger", {})
|
|
|
|
if not webhook_cfg.get("enabled", False):
|
|
return None
|
|
|
|
base_url = gitlab_cfg.get("base_url", "https://gitlab.com")
|
|
project_id = gitlab_cfg.get("project_id")
|
|
trigger_token = webhook_cfg.get("trigger_token") or os.environ.get("GITLAB_TRIGGER_TOKEN")
|
|
ref = webhook_cfg.get("ref", "main")
|
|
|
|
if not project_id or not trigger_token:
|
|
print("GitLab trigger not configured", file=sys.stderr)
|
|
return None
|
|
|
|
url = f"{base_url}/api/v4/projects/{project_id}/trigger/pipeline"
|
|
|
|
data = {
|
|
"ref": ref,
|
|
"token": trigger_token,
|
|
"variables[GITOPS_TRIGGER_SOURCE]": "alert",
|
|
"variables[GITOPS_ALERT_NAME]": payload.alert_name,
|
|
"variables[GITOPS_ALERT_SEVERITY]": payload.severity,
|
|
"variables[GITOPS_ALERT_COMPONENT]": payload.component,
|
|
}
|
|
|
|
try:
|
|
resp = requests.post(url, data=data, timeout=30)
|
|
resp.raise_for_status()
|
|
result = resp.json()
|
|
return result.get("web_url")
|
|
except Exception as e:
|
|
print(f"Failed to trigger pipeline: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
|
|
def run_drift_bot_locally(cfg: Dict[str, Any], payload: AlertPayload):
|
|
"""Run drift_pr_bot.py directly (for local webhook receiver)"""
|
|
env = os.environ.copy()
|
|
env["GITOPS_TRIGGER_SOURCE"] = "alert"
|
|
env["GITOPS_ALERT_NAME"] = payload.alert_name
|
|
|
|
subprocess.run(
|
|
["python3", "drift_pr_bot.py", "--trigger-source", "alert", "--alert-name", payload.alert_name],
|
|
cwd=HERE,
|
|
env=env,
|
|
)
|
|
|
|
|
|
def notify_slack(cfg: Dict[str, Any], message: str, alert: AlertPayload):
|
|
"""Send Slack notification"""
|
|
slack_cfg = cfg.get("slack", {})
|
|
webhook_url = slack_cfg.get("webhook_url")
|
|
|
|
if not webhook_url:
|
|
return
|
|
|
|
color = {
|
|
"critical": "danger",
|
|
"warning": "warning",
|
|
"info": "#439FE0",
|
|
}.get(alert.severity, "#808080")
|
|
|
|
payload = {
|
|
"channel": slack_cfg.get("channel", "#cloudflare-gitops"),
|
|
"attachments": [
|
|
{
|
|
"color": color,
|
|
"title": f"GitOps Alert: {alert.alert_name}",
|
|
"text": message,
|
|
"fields": [
|
|
{"title": "Status", "value": alert.status, "short": True},
|
|
{"title": "Severity", "value": alert.severity, "short": True},
|
|
{"title": "Component", "value": alert.component, "short": True},
|
|
],
|
|
"footer": "Cloudflare GitOps Webhook",
|
|
"ts": int(datetime.utcnow().timestamp()),
|
|
}
|
|
],
|
|
}
|
|
|
|
try:
|
|
requests.post(webhook_url, json=payload, timeout=10)
|
|
except Exception as e:
|
|
print(f"Slack notification failed: {e}", file=sys.stderr)
|
|
|
|
|
|
def process_alert(cfg: Dict[str, Any], payload: AlertPayload):
|
|
"""Process a single alert payload"""
|
|
print(f"Processing alert: {payload.alert_name} ({payload.status})")
|
|
|
|
# Check if we should trigger a PR
|
|
if should_trigger_pr(cfg, payload):
|
|
print(f"Alert {payload.alert_name} triggers drift remediation")
|
|
|
|
# Try GitLab pipeline trigger first
|
|
pipeline_url = trigger_gitlab_pipeline(cfg, payload)
|
|
|
|
if pipeline_url:
|
|
message = f"Triggered drift remediation pipeline: {pipeline_url}"
|
|
else:
|
|
# Fall back to local execution
|
|
print("Falling back to local drift_pr_bot execution")
|
|
run_drift_bot_locally(cfg, payload)
|
|
message = "Triggered local drift remediation"
|
|
|
|
notify_slack(cfg, message, payload)
|
|
else:
|
|
# Just notify
|
|
webhook_cfg = cfg.get("webhook", {})
|
|
notify_only = webhook_cfg.get("notify_only_alerts", [])
|
|
|
|
if payload.alert_name in notify_only:
|
|
message = f"Alert {payload.alert_name} received (notify-only, no auto-PR)"
|
|
notify_slack(cfg, message, payload)
|
|
|
|
|
|
def job_worker():
|
|
"""Background worker to process jobs"""
|
|
cfg = load_config()
|
|
while True:
|
|
try:
|
|
payload = job_queue.get(timeout=1)
|
|
if payload is None: # Shutdown signal
|
|
break
|
|
process_alert(cfg, payload)
|
|
except queue.Empty:
|
|
continue
|
|
except Exception as e:
|
|
print(f"Job processing error: {e}", file=sys.stderr)
|
|
|
|
|
|
class WebhookHandler(BaseHTTPRequestHandler):
|
|
"""HTTP handler for Alertmanager webhooks"""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
self.cfg = load_config()
|
|
super().__init__(*args, **kwargs)
|
|
|
|
def log_message(self, format, *args):
|
|
print(f"[{datetime.utcnow().isoformat()}] {format % args}")
|
|
|
|
def do_GET(self):
|
|
"""Health check endpoint"""
|
|
if self.path == "/health":
|
|
self.send_response(200)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({"status": "ok"}).encode())
|
|
else:
|
|
self.send_response(404)
|
|
self.end_headers()
|
|
|
|
def do_POST(self):
|
|
"""Handle webhook POST"""
|
|
if self.path != "/webhook/alert":
|
|
self.send_response(404)
|
|
self.end_headers()
|
|
return
|
|
|
|
# Read body
|
|
content_length = int(self.headers.get("Content-Length", 0))
|
|
body = self.rfile.read(content_length)
|
|
|
|
# Verify signature if configured
|
|
secret = os.environ.get("WEBHOOK_SECRET")
|
|
if secret:
|
|
signature = self.headers.get("X-Webhook-Signature")
|
|
expected = hmac.new(
|
|
secret.encode(),
|
|
body,
|
|
hashlib.sha256
|
|
).hexdigest()
|
|
|
|
if not hmac.compare_digest(signature or "", expected):
|
|
self.send_response(403)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({"error": "invalid signature"}).encode())
|
|
return
|
|
|
|
# Parse payload
|
|
try:
|
|
data = json.loads(body)
|
|
payload = AlertPayload.from_json(data)
|
|
except Exception as e:
|
|
self.send_response(400)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({"error": str(e)}).encode())
|
|
return
|
|
|
|
# Queue for processing
|
|
job_queue.put(payload)
|
|
|
|
# Respond immediately
|
|
self.send_response(202)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(json.dumps({
|
|
"status": "accepted",
|
|
"alert": payload.alert_name,
|
|
}).encode())
|
|
|
|
|
|
def main():
|
|
"""Main entry point"""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Alertmanager webhook receiver for GitOps"
|
|
)
|
|
parser.add_argument(
|
|
"--host",
|
|
default=os.environ.get("WEBHOOK_HOST", "0.0.0.0"),
|
|
help="Host to bind to",
|
|
)
|
|
parser.add_argument(
|
|
"--port",
|
|
type=int,
|
|
default=int(os.environ.get("WEBHOOK_PORT", "8080")),
|
|
help="Port to listen on",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Start worker thread
|
|
worker = threading.Thread(target=job_worker, daemon=True)
|
|
worker.start()
|
|
|
|
# Start server
|
|
server = HTTPServer((args.host, args.port), WebhookHandler)
|
|
print(f"GitOps webhook receiver listening on {args.host}:{args.port}")
|
|
print(f" POST /webhook/alert - Alertmanager webhook")
|
|
print(f" GET /health - Health check")
|
|
|
|
try:
|
|
server.serve_forever()
|
|
except KeyboardInterrupt:
|
|
print("\nShutting down...")
|
|
job_queue.put(None) # Signal worker to stop
|
|
server.shutdown()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|