Initial commit: Cloudflare infrastructure with WAF Intelligence
- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
This commit is contained in:
373
gitops/webhook_receiver.py
Normal file
373
gitops/webhook_receiver.py
Normal file
@@ -0,0 +1,373 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Alertmanager Webhook Receiver for Cloudflare GitOps
|
||||
Phase 6 - PR Workflows
|
||||
|
||||
Receives alerts from Alertmanager and triggers GitOps actions:
|
||||
- Drift remediation PRs
|
||||
- Pipeline triggers
|
||||
- Slack notifications
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
import threading
|
||||
import queue
|
||||
|
||||
try:
|
||||
import requests
|
||||
import yaml
|
||||
except ImportError:
|
||||
print("ERROR: pip install requests pyyaml", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
HERE = Path(__file__).resolve().parent
|
||||
CONFIG_PATH = HERE / "config.yml"
|
||||
|
||||
# Job queue for background processing
|
||||
job_queue: queue.Queue = queue.Queue()
|
||||
|
||||
|
||||
def load_config() -> Dict[str, Any]:
|
||||
"""Load gitops configuration"""
|
||||
with open(CONFIG_PATH) as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
def expand_env(obj):
|
||||
if isinstance(obj, str):
|
||||
if obj.startswith("${") and "}" in obj:
|
||||
inner = obj[2:obj.index("}")]
|
||||
default = None
|
||||
var = inner
|
||||
if ":-" in inner:
|
||||
var, default = inner.split(":-", 1)
|
||||
return os.environ.get(var, default)
|
||||
return obj
|
||||
elif isinstance(obj, dict):
|
||||
return {k: expand_env(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [expand_env(i) for i in obj]
|
||||
return obj
|
||||
|
||||
return expand_env(config)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AlertPayload:
|
||||
"""Parsed Alertmanager webhook payload"""
|
||||
receiver: str
|
||||
status: str # "firing" or "resolved"
|
||||
alerts: List[Dict]
|
||||
group_labels: Dict[str, str]
|
||||
common_labels: Dict[str, str]
|
||||
common_annotations: Dict[str, str]
|
||||
external_url: str
|
||||
version: str
|
||||
group_key: str
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data: Dict) -> "AlertPayload":
|
||||
return cls(
|
||||
receiver=data.get("receiver", ""),
|
||||
status=data.get("status", ""),
|
||||
alerts=data.get("alerts", []),
|
||||
group_labels=data.get("groupLabels", {}),
|
||||
common_labels=data.get("commonLabels", {}),
|
||||
common_annotations=data.get("commonAnnotations", {}),
|
||||
external_url=data.get("externalURL", ""),
|
||||
version=data.get("version", "4"),
|
||||
group_key=data.get("groupKey", ""),
|
||||
)
|
||||
|
||||
@property
|
||||
def alert_name(self) -> str:
|
||||
return self.common_labels.get("alertname", "unknown")
|
||||
|
||||
@property
|
||||
def severity(self) -> str:
|
||||
return self.common_labels.get("severity", "unknown")
|
||||
|
||||
@property
|
||||
def component(self) -> str:
|
||||
return self.common_labels.get("component", "unknown")
|
||||
|
||||
|
||||
def should_trigger_pr(cfg: Dict[str, Any], payload: AlertPayload) -> bool:
|
||||
"""Determine if this alert should trigger a PR"""
|
||||
webhook_cfg = cfg.get("webhook", {})
|
||||
trigger_alerts = webhook_cfg.get("trigger_alerts", [])
|
||||
notify_only = webhook_cfg.get("notify_only_alerts", [])
|
||||
|
||||
# Never auto-PR for resolved alerts
|
||||
if payload.status == "resolved":
|
||||
return False
|
||||
|
||||
# Check if in trigger list
|
||||
if payload.alert_name in trigger_alerts:
|
||||
return True
|
||||
|
||||
# Check if explicitly notify-only
|
||||
if payload.alert_name in notify_only:
|
||||
return False
|
||||
|
||||
# Default: don't trigger
|
||||
return False
|
||||
|
||||
|
||||
def trigger_gitlab_pipeline(cfg: Dict[str, Any], payload: AlertPayload) -> Optional[str]:
|
||||
"""Trigger GitLab pipeline for drift remediation"""
|
||||
gitlab_cfg = cfg.get("gitlab", {})
|
||||
webhook_cfg = cfg.get("webhook", {}).get("gitlab_trigger", {})
|
||||
|
||||
if not webhook_cfg.get("enabled", False):
|
||||
return None
|
||||
|
||||
base_url = gitlab_cfg.get("base_url", "https://gitlab.com")
|
||||
project_id = gitlab_cfg.get("project_id")
|
||||
trigger_token = webhook_cfg.get("trigger_token") or os.environ.get("GITLAB_TRIGGER_TOKEN")
|
||||
ref = webhook_cfg.get("ref", "main")
|
||||
|
||||
if not project_id or not trigger_token:
|
||||
print("GitLab trigger not configured", file=sys.stderr)
|
||||
return None
|
||||
|
||||
url = f"{base_url}/api/v4/projects/{project_id}/trigger/pipeline"
|
||||
|
||||
data = {
|
||||
"ref": ref,
|
||||
"token": trigger_token,
|
||||
"variables[GITOPS_TRIGGER_SOURCE]": "alert",
|
||||
"variables[GITOPS_ALERT_NAME]": payload.alert_name,
|
||||
"variables[GITOPS_ALERT_SEVERITY]": payload.severity,
|
||||
"variables[GITOPS_ALERT_COMPONENT]": payload.component,
|
||||
}
|
||||
|
||||
try:
|
||||
resp = requests.post(url, data=data, timeout=30)
|
||||
resp.raise_for_status()
|
||||
result = resp.json()
|
||||
return result.get("web_url")
|
||||
except Exception as e:
|
||||
print(f"Failed to trigger pipeline: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def run_drift_bot_locally(cfg: Dict[str, Any], payload: AlertPayload):
|
||||
"""Run drift_pr_bot.py directly (for local webhook receiver)"""
|
||||
env = os.environ.copy()
|
||||
env["GITOPS_TRIGGER_SOURCE"] = "alert"
|
||||
env["GITOPS_ALERT_NAME"] = payload.alert_name
|
||||
|
||||
subprocess.run(
|
||||
["python3", "drift_pr_bot.py", "--trigger-source", "alert", "--alert-name", payload.alert_name],
|
||||
cwd=HERE,
|
||||
env=env,
|
||||
)
|
||||
|
||||
|
||||
def notify_slack(cfg: Dict[str, Any], message: str, alert: AlertPayload):
|
||||
"""Send Slack notification"""
|
||||
slack_cfg = cfg.get("slack", {})
|
||||
webhook_url = slack_cfg.get("webhook_url")
|
||||
|
||||
if not webhook_url:
|
||||
return
|
||||
|
||||
color = {
|
||||
"critical": "danger",
|
||||
"warning": "warning",
|
||||
"info": "#439FE0",
|
||||
}.get(alert.severity, "#808080")
|
||||
|
||||
payload = {
|
||||
"channel": slack_cfg.get("channel", "#cloudflare-gitops"),
|
||||
"attachments": [
|
||||
{
|
||||
"color": color,
|
||||
"title": f"GitOps Alert: {alert.alert_name}",
|
||||
"text": message,
|
||||
"fields": [
|
||||
{"title": "Status", "value": alert.status, "short": True},
|
||||
{"title": "Severity", "value": alert.severity, "short": True},
|
||||
{"title": "Component", "value": alert.component, "short": True},
|
||||
],
|
||||
"footer": "Cloudflare GitOps Webhook",
|
||||
"ts": int(datetime.utcnow().timestamp()),
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
try:
|
||||
requests.post(webhook_url, json=payload, timeout=10)
|
||||
except Exception as e:
|
||||
print(f"Slack notification failed: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
def process_alert(cfg: Dict[str, Any], payload: AlertPayload):
|
||||
"""Process a single alert payload"""
|
||||
print(f"Processing alert: {payload.alert_name} ({payload.status})")
|
||||
|
||||
# Check if we should trigger a PR
|
||||
if should_trigger_pr(cfg, payload):
|
||||
print(f"Alert {payload.alert_name} triggers drift remediation")
|
||||
|
||||
# Try GitLab pipeline trigger first
|
||||
pipeline_url = trigger_gitlab_pipeline(cfg, payload)
|
||||
|
||||
if pipeline_url:
|
||||
message = f"Triggered drift remediation pipeline: {pipeline_url}"
|
||||
else:
|
||||
# Fall back to local execution
|
||||
print("Falling back to local drift_pr_bot execution")
|
||||
run_drift_bot_locally(cfg, payload)
|
||||
message = "Triggered local drift remediation"
|
||||
|
||||
notify_slack(cfg, message, payload)
|
||||
else:
|
||||
# Just notify
|
||||
webhook_cfg = cfg.get("webhook", {})
|
||||
notify_only = webhook_cfg.get("notify_only_alerts", [])
|
||||
|
||||
if payload.alert_name in notify_only:
|
||||
message = f"Alert {payload.alert_name} received (notify-only, no auto-PR)"
|
||||
notify_slack(cfg, message, payload)
|
||||
|
||||
|
||||
def job_worker():
|
||||
"""Background worker to process jobs"""
|
||||
cfg = load_config()
|
||||
while True:
|
||||
try:
|
||||
payload = job_queue.get(timeout=1)
|
||||
if payload is None: # Shutdown signal
|
||||
break
|
||||
process_alert(cfg, payload)
|
||||
except queue.Empty:
|
||||
continue
|
||||
except Exception as e:
|
||||
print(f"Job processing error: {e}", file=sys.stderr)
|
||||
|
||||
|
||||
class WebhookHandler(BaseHTTPRequestHandler):
|
||||
"""HTTP handler for Alertmanager webhooks"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.cfg = load_config()
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def log_message(self, format, *args):
|
||||
print(f"[{datetime.utcnow().isoformat()}] {format % args}")
|
||||
|
||||
def do_GET(self):
|
||||
"""Health check endpoint"""
|
||||
if self.path == "/health":
|
||||
self.send_response(200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"status": "ok"}).encode())
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
def do_POST(self):
|
||||
"""Handle webhook POST"""
|
||||
if self.path != "/webhook/alert":
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
return
|
||||
|
||||
# Read body
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length)
|
||||
|
||||
# Verify signature if configured
|
||||
secret = os.environ.get("WEBHOOK_SECRET")
|
||||
if secret:
|
||||
signature = self.headers.get("X-Webhook-Signature")
|
||||
expected = hmac.new(
|
||||
secret.encode(),
|
||||
body,
|
||||
hashlib.sha256
|
||||
).hexdigest()
|
||||
|
||||
if not hmac.compare_digest(signature or "", expected):
|
||||
self.send_response(403)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"error": "invalid signature"}).encode())
|
||||
return
|
||||
|
||||
# Parse payload
|
||||
try:
|
||||
data = json.loads(body)
|
||||
payload = AlertPayload.from_json(data)
|
||||
except Exception as e:
|
||||
self.send_response(400)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({"error": str(e)}).encode())
|
||||
return
|
||||
|
||||
# Queue for processing
|
||||
job_queue.put(payload)
|
||||
|
||||
# Respond immediately
|
||||
self.send_response(202)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(json.dumps({
|
||||
"status": "accepted",
|
||||
"alert": payload.alert_name,
|
||||
}).encode())
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Alertmanager webhook receiver for GitOps"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
default=os.environ.get("WEBHOOK_HOST", "0.0.0.0"),
|
||||
help="Host to bind to",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=int(os.environ.get("WEBHOOK_PORT", "8080")),
|
||||
help="Port to listen on",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Start worker thread
|
||||
worker = threading.Thread(target=job_worker, daemon=True)
|
||||
worker.start()
|
||||
|
||||
# Start server
|
||||
server = HTTPServer((args.host, args.port), WebhookHandler)
|
||||
print(f"GitOps webhook receiver listening on {args.host}:{args.port}")
|
||||
print(f" POST /webhook/alert - Alertmanager webhook")
|
||||
print(f" GET /health - Health check")
|
||||
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\nShutting down...")
|
||||
job_queue.put(None) # Signal worker to stop
|
||||
server.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user