Initial commit: Cloudflare infrastructure with WAF Intelligence

- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
2025-12-16 18:31:53 +00:00
commit 37a867c485
123 changed files with 25407 additions and 0 deletions
--- a/mcp/waf_intelligence/orchestrator.py
+++ b/mcp/waf_intelligence/orchestrator.py
@@ -0,0 +1,370 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from mcp.waf_intelligence.analyzer import AnalysisResult, RuleViolation, WAFRuleAnalyzer
+from mcp.waf_intelligence.compliance import ComplianceMapper, FrameworkMapping
+from mcp.waf_intelligence.generator import GeneratedRule, WAFRuleGenerator
+
+# Optional advanced modules (Phase 7)
+try:
+    from mcp.waf_intelligence.threat_intel import (
+        ThreatIntelCollector,
+        ThreatIntelReport,
+        ThreatIndicator,
+    )
+    _HAS_THREAT_INTEL = True
+except ImportError:
+    _HAS_THREAT_INTEL = False
+    ThreatIntelCollector = None
+
+try:
+    from mcp.waf_intelligence.classifier import (
+        ThreatClassifier,
+        ClassificationResult,
+    )
+    _HAS_CLASSIFIER = True
+except ImportError:
+    _HAS_CLASSIFIER = False
+    ThreatClassifier = None
+
+
+@dataclass
+class WAFInsight:
+    """Single high-quality insight across analysis + generation + compliance."""
+
+    violation: RuleViolation | None
+    suggested_rule: GeneratedRule | None
+    mappings: List[FrameworkMapping]
+
+
+@dataclass
+class ThreatAssessment:
+    """Phase 7: Comprehensive threat assessment result."""
+    
+    analysis_result: Optional[AnalysisResult] = None
+    threat_report: Optional[Any] = None  # ThreatIntelReport when available
+    classification_summary: Dict[str, int] = field(default_factory=dict)
+    risk_score: float = 0.0
+    recommended_actions: List[str] = field(default_factory=list)
+    generated_at: datetime = field(default_factory=datetime.utcnow)
+    
+    @property
+    def risk_level(self) -> str:
+        if self.risk_score >= 0.8:
+            return "critical"
+        elif self.risk_score >= 0.6:
+            return "high"
+        elif self.risk_score >= 0.4:
+            return "medium"
+        else:
+            return "low"
+
+
+class WAFIntelligence:
+    """
+    Quality-first orchestration layer:
+    - analyze WAF config
+    - propose a few rules
+    - attach compliance mappings
+    - Phase 7: integrate threat intel and ML classification
+    """
+
+    def __init__(
+        self,
+        workspace_path: Optional[str] = None,
+        enable_threat_intel: bool = True,
+        enable_ml_classifier: bool = True,
+    ) -> None:
+        self.workspace = Path(workspace_path) if workspace_path else Path.cwd()
+        
+        # Core components
+        self.analyzer = WAFRuleAnalyzer()
+        self.generator = WAFRuleGenerator()
+        self.mapper = ComplianceMapper()
+        
+        # Phase 7 components (optional)
+        self.threat_intel: Optional[Any] = None
+        self.classifier: Optional[Any] = None
+        
+        if enable_threat_intel and _HAS_THREAT_INTEL:
+            try:
+                self.threat_intel = ThreatIntelCollector()
+            except Exception:
+                pass
+        
+        if enable_ml_classifier and _HAS_CLASSIFIER:
+            try:
+                self.classifier = ThreatClassifier()
+            except Exception:
+                pass
+
+    def analyze_and_recommend(
+        self,
+        path: str,
+        *,
+        limit: int = 3,
+        min_severity: str = "warning",
+    ) -> List[WAFInsight]:
+        analysis: AnalysisResult = self.analyzer.analyze_file(
+            path,
+            min_severity=min_severity,
+        )
+        top_violations = analysis.top_violations(
+            min_severity=min_severity,
+            limit=limit,
+        )
+
+        insights: List[WAFInsight] = []
+
+        for violation in top_violations:
+            mappings = self.mapper.best_effort_from_violation(violation.message)
+
+            scenario = violation.message
+            rules = self.generator.generate_from_scenario(scenario, limit=1)
+            suggested = rules[0] if rules else None
+
+            insights.append(
+                WAFInsight(
+                    violation=violation,
+                    suggested_rule=suggested,
+                    mappings=mappings,
+                )
+            )
+
+        return insights
+
+    # ─────────────────────────────────────────────────────────────────────────
+    # Phase 7: Advanced threat intelligence methods
+    # ─────────────────────────────────────────────────────────────────────────
+
+    def collect_threat_intel(
+        self,
+        log_paths: Optional[List[str]] = None,
+        max_indicators: int = 100,
+    ) -> Optional[Any]:
+        """
+        Collect threat intelligence from logs and external feeds.
+        
+        Args:
+            log_paths: Paths to Cloudflare log files
+            max_indicators: Maximum indicators to collect
+        
+        Returns:
+            ThreatIntelReport or None if unavailable
+        """
+        if not self.threat_intel:
+            return None
+        
+        # Default log paths
+        if log_paths is None:
+            log_paths = [
+                str(self.workspace / "logs"),
+                "/var/log/cloudflare",
+            ]
+        
+        return self.threat_intel.collect(
+            log_paths=log_paths,
+            max_indicators=max_indicators,
+        )
+
+    def classify_threat(self, payload: str) -> Optional[Any]:
+        """
+        Classify a payload using ML classifier.
+        
+        Args:
+            payload: Request payload to classify
+        
+        Returns:
+            ClassificationResult or None
+        """
+        if not self.classifier:
+            return None
+        
+        return self.classifier.classify(payload)
+
+    def full_assessment(
+        self,
+        waf_config_path: Optional[str] = None,
+        log_paths: Optional[List[str]] = None,
+        include_threat_intel: bool = True,
+    ) -> ThreatAssessment:
+        """
+        Phase 7: Perform comprehensive threat assessment.
+        
+        Combines:
+        - WAF configuration analysis
+        - Threat intelligence collection
+        - ML classification summary
+        - Risk scoring
+        
+        Args:
+            waf_config_path: Path to WAF Terraform file
+            log_paths: Paths to log files
+            include_threat_intel: Whether to collect threat intel
+        
+        Returns:
+            ThreatAssessment with full analysis results
+        """
+        assessment = ThreatAssessment()
+        risk_factors: List[float] = []
+        recommendations: List[str] = []
+        
+        # 1. Analyze WAF configuration
+        if waf_config_path is None:
+            waf_config_path = str(self.workspace / "terraform" / "waf.tf")
+        
+        if Path(waf_config_path).exists():
+            assessment.analysis_result = self.analyzer.analyze_file(
+                waf_config_path,
+                min_severity="info",
+            )
+            
+            # Calculate risk from violations
+            severity_weights = {"error": 0.8, "warning": 0.5, "info": 0.2}
+            for violation in assessment.analysis_result.violations:
+                weight = severity_weights.get(violation.severity, 0.3)
+                risk_factors.append(weight)
+            
+            # Generate recommendations
+            critical_count = sum(
+                1 for v in assessment.analysis_result.violations
+                if v.severity == "error"
+            )
+            if critical_count > 0:
+                recommendations.append(
+                    f"🔴 Fix {critical_count} critical WAF configuration issues"
+                )
+        
+        # 2. Collect threat intelligence
+        if include_threat_intel and self.threat_intel:
+            try:
+                assessment.threat_report = self.collect_threat_intel(
+                    log_paths=log_paths,
+                    max_indicators=50,
+                )
+                
+                if assessment.threat_report:
+                    indicators = assessment.threat_report.indicators
+                    
+                    # Count by severity
+                    severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
+                    for ind in indicators:
+                        sev = getattr(ind, "severity", "low")
+                        severity_counts[sev] = severity_counts.get(sev, 0) + 1
+                    
+                    # Add to classification summary
+                    assessment.classification_summary["threat_indicators"] = len(indicators)
+                    assessment.classification_summary.update(severity_counts)
+                    
+                    # Calculate threat intel risk
+                    if indicators:
+                        critical_ratio = severity_counts["critical"] / len(indicators)
+                        high_ratio = severity_counts["high"] / len(indicators)
+                        risk_factors.append(critical_ratio * 0.9 + high_ratio * 0.7)
+                    
+                    if severity_counts["critical"] > 0:
+                        recommendations.append(
+                            f"🚨 Block {severity_counts['critical']} critical threat IPs immediately"
+                        )
+            except Exception:
+                pass
+        
+        # 3. ML classification summary (from any collected data)
+        if self.classifier and assessment.threat_report:
+            try:
+                attack_types = {"sqli": 0, "xss": 0, "rce": 0, "clean": 0, "unknown": 0}
+                
+                indicators = assessment.threat_report.indicators
+                pattern_indicators = [
+                    i for i in indicators
+                    if getattr(i, "indicator_type", "") == "pattern"
+                ]
+                
+                for ind in pattern_indicators[:20]:  # Sample first 20
+                    result = self.classifier.classify(ind.value)
+                    if result:
+                        label = result.label
+                        attack_types[label] = attack_types.get(label, 0) + 1
+                
+                assessment.classification_summary["ml_classifications"] = attack_types
+                
+                # Add ML risk factor
+                dangerous = attack_types.get("sqli", 0) + attack_types.get("rce", 0)
+                if dangerous > 5:
+                    risk_factors.append(0.8)
+                    recommendations.append(
+                        f"⚠️ ML detected {dangerous} dangerous attack patterns"
+                    )
+            except Exception:
+                pass
+        
+        # 4. Calculate final risk score
+        if risk_factors:
+            assessment.risk_score = min(1.0, sum(risk_factors) / max(len(risk_factors), 1))
+        else:
+            assessment.risk_score = 0.3  # Baseline risk
+        
+        assessment.recommended_actions = recommendations
+        
+        return assessment
+
+    def generate_gitops_proposals(
+        self,
+        threat_report: Optional[Any] = None,
+        max_proposals: int = 5,
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate GitOps-ready rule proposals.
+        
+        Args:
+            threat_report: ThreatIntelReport to use
+            max_proposals: Maximum proposals to generate
+        
+        Returns:
+            List of proposal dicts ready for MR creation
+        """
+        proposals: List[Dict[str, Any]] = []
+        
+        if not threat_report:
+            return proposals
+        
+        try:
+            # Import proposer dynamically
+            from gitops.waf_rule_proposer import WAFRuleProposer
+            
+            proposer = WAFRuleProposer(workspace_path=str(self.workspace))
+            batch = proposer.generate_proposals(
+                threat_report=threat_report,
+                max_proposals=max_proposals,
+            )
+            
+            for proposal in batch.proposals:
+                proposals.append({
+                    "name": proposal.rule_name,
+                    "type": proposal.rule_type,
+                    "severity": proposal.severity,
+                    "confidence": proposal.confidence,
+                    "terraform": proposal.terraform_code,
+                    "justification": proposal.justification,
+                    "auto_deploy": proposal.auto_deploy_eligible,
+                })
+        except ImportError:
+            pass
+        
+        return proposals
+
+    @property
+    def capabilities(self) -> Dict[str, bool]:
+        """Report available capabilities."""
+        return {
+            "core_analysis": True,
+            "rule_generation": True,
+            "compliance_mapping": True,
+            "threat_intel": self.threat_intel is not None,
+            "ml_classification": self.classifier is not None,
+        }