Initial commit: Cloudflare infrastructure with WAF Intelligence

- Complete Cloudflare Terraform configuration (DNS, WAF, tunnels, access) - WAF Intelligence MCP server with threat analysis and ML classification - GitOps automation with PR workflows and drift detection - Observatory monitoring stack with Prometheus/Grafana - IDE operator rules for governed development - Security playbooks and compliance frameworks - Autonomous remediation and state reconciliation
2025-12-16 18:31:53 +00:00
commit 37a867c485
123 changed files with 25407 additions and 0 deletions
--- a/mcp/init.py
+++ b/mcp/init.py
@@ -0,0 +1,6 @@
+"""
+MCP tools for the CLOUDFLARE workspace.
+
+Currently:
+- oracle_answer: compliance / security oracle
+"""
--- a/mcp/oracle_answer/init.py
+++ b/mcp/oracle_answer/init.py
@@ -0,0 +1,13 @@
+"""
+ORACLE_ANSWER MCP TOOL
+
+Modular, production-ready compliance oracle for OpenCode integration.
+
+Version: 0.2.0
+Architecture: Clean separation of concerns (tool + optional CLI wrapper)
+"""
+
+from .tool import OracleAnswerTool, ToolResponse
+
+__version__ = "0.2.0"
+__all__ = ["OracleAnswerTool", "ToolResponse", "__version__"]
--- a/mcp/oracle_answer/cli.py
+++ b/mcp/oracle_answer/cli.py
@@ -0,0 +1,134 @@
+"""
+Command-line interface for oracle_answer tool.
+
+Uses NVIDIA's free API (build.nvidia.com) for actual LLM responses.
+
+NOTE FOR AUTOMATION:
+- All CLI arguments must be defined ONLY in build_parser().
+- When changing CLI flags, rewrite build_parser() entirely.
+- Do not define duplicate flags like --question in other functions.
+"""
+
+import argparse
+import asyncio
+import json
+import sys
+from typing import List, Optional
+
+from .tool import OracleAnswerTool
+
+
+def build_parser() -> argparse.ArgumentParser:
+    """
+    Build argument parser.
+
+    RULE: This function is the single source of truth for CLI args.
+    Never append args elsewhere.
+    """
+    parser = argparse.ArgumentParser(
+        prog="oracle-answer",
+        description="Sovereign compliance oracle powered by NVIDIA AI.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  oracle-answer --question "Are we GDPR compliant?" --frameworks GDPR ISO-27001
+  oracle-answer --question "Incident response time SLA?" --mode advisory
+  oracle-answer --question "Test?" --local-only  (skip NVIDIA API)
+        """,
+    )
+
+    parser.add_argument(
+        "--question",
+        required=True,
+        type=str,
+        help="Compliance / security question to answer.",
+    )
+
+    parser.add_argument(
+        "--frameworks",
+        nargs="*",
+        default=["NIST-CSF", "ISO-27001"],
+        type=str,
+        help="Frameworks to reference (space-separated).",
+    )
+
+    parser.add_argument(
+        "--mode",
+        default="strict",
+        choices=["strict", "advisory"],
+        help="strict = conservative, advisory = more exploratory.",
+    )
+
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Output ToolResponse as JSON instead of pretty text.",
+    )
+
+    parser.add_argument(
+        "--local-only",
+        action="store_true",
+        help="Skip NVIDIA API calls (for testing).",
+    )
+
+    return parser
+
+
+async def main_async(args: Optional[List[str]] = None) -> int:
+    """Async main entry point."""
+    parser = build_parser()
+    ns = parser.parse_args(args=args)
+
+    tool = OracleAnswerTool(
+        default_frameworks=ns.frameworks,
+        use_local_only=ns.local_only,
+    )
+    resp = await tool.answer(
+        question=ns.question,
+        frameworks=ns.frameworks,
+        mode=ns.mode,
+    )
+
+    if ns.json:
+        print(
+            json.dumps(
+                {
+                    "answer": resp.answer,
+                    "framework_hits": resp.framework_hits,
+                    "reasoning": resp.reasoning,
+                    "model": resp.model,
+                },
+                indent=2,
+            )
+        )
+    else:
+        print("\n" + "=" * 80)
+        print("ORACLE ANSWER (Powered by NVIDIA AI)")
+        print("=" * 80 + "\n")
+        print(resp.answer)
+        if resp.reasoning:
+            print("\n--- Reasoning ---\n")
+            print(resp.reasoning)
+        if resp.framework_hits:
+            print("\n--- Framework Hits ---\n")
+            for framework, hits in resp.framework_hits.items():
+                if hits:
+                    print(f"{framework}:")
+                    for hit in hits:
+                        print(f"  • {hit}")
+        print(f"\n[Model: {resp.model}]")
+        print()
+
+    return 0
+
+
+def main() -> None:
+    """Sync wrapper for CLI entry point."""
+    try:
+        sys.exit(asyncio.run(main_async()))
+    except KeyboardInterrupt:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/mcp/oracle_answer/tool.py
+++ b/mcp/oracle_answer/tool.py
@@ -0,0 +1,185 @@
+"""
+Core oracle tool implementation with NVIDIA AI integration.
+
+This module contains the logic that answers compliance questions using
+NVIDIA's API (free tier from build.nvidia.com).
+
+Separate from CLI/API wrapper for clean testability.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+try:
+    import httpx
+except ImportError:
+    httpx = None
+
+
+@dataclass
+class ToolResponse:
+    """Canonical response from the oracle tool."""
+
+    answer: str
+    framework_hits: Dict[str, List[str]]
+    reasoning: Optional[str] = None
+    raw_context: Optional[Dict[str, Any]] = None
+    model: str = "nvidia"
+
+
+class OracleAnswerTool:
+    """
+    Compliance / security oracle powered by NVIDIA AI.
+
+    This tool:
+    - takes `question`, `frameworks`, `mode`, etc.
+    - queries NVIDIA's LLM API (free tier)
+    - searches local documentation for context
+    - assembles structured ToolResponse with framework mapping
+    """
+
+    # NVIDIA API configuration
+    NVIDIA_API_BASE = "https://integrate.api.nvidia.com/v1"
+    NVIDIA_MODEL = "meta/llama-2-7b-chat"  # Free tier model
+
+    def __init__(
+        self,
+        *,
+        default_frameworks: Optional[List[str]] = None,
+        api_key: Optional[str] = None,
+        use_local_only: bool = False,
+    ) -> None:
+        """
+        Initialize oracle with NVIDIA API integration.
+
+        Args:
+            default_frameworks: Default compliance frameworks to use
+            api_key: NVIDIA API key (defaults to NVIDIA_API_KEY env var)
+            use_local_only: If True, skip LLM calls (for testing)
+        """
+        self.default_frameworks = default_frameworks or ["NIST-CSF", "ISO-27001"]
+        self.api_key = api_key or os.environ.get("NVIDIA_API_KEY")
+        self.use_local_only = use_local_only
+
+        if not self.use_local_only and not self.api_key:
+            raise ValueError(
+                "NVIDIA_API_KEY not found. Set it in .env or pass api_key parameter."
+            )
+
+    def _extract_framework_hits(
+        self, answer: str, frameworks: List[str]
+    ) -> Dict[str, List[str]]:
+        """Extract mentions of frameworks from the LLM answer."""
+        hits = {fw: [] for fw in frameworks}
+        answer_lower = answer.lower()
+
+        for framework in frameworks:
+            # Simple keyword matching for framework mentions
+            if framework.lower() in answer_lower:
+                # Extract sentences containing the framework
+                sentences = answer.split(".")
+                for sentence in sentences:
+                    if framework.lower() in sentence.lower():
+                        hits[framework].append(sentence.strip())
+
+        return hits
+
+    async def _call_nvidia_api(self, prompt: str) -> str:
+        """Call NVIDIA's API to get LLM response."""
+        if self.use_local_only:
+            return "Local-only mode: skipping NVIDIA API call"
+
+        if not httpx:
+            raise ImportError("httpx not installed. Install with: pip install httpx")
+
+        headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Accept": "application/json",
+        }
+
+        payload = {
+            "model": self.NVIDIA_MODEL,
+            "messages": [{"role": "user", "content": prompt}],
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "max_tokens": 1024,
+        }
+
+        try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(
+                    f"{self.NVIDIA_API_BASE}/chat/completions",
+                    json=payload,
+                    headers=headers,
+                    timeout=30.0,
+                )
+                response.raise_for_status()
+                data = response.json()
+                return data["choices"][0]["message"]["content"]
+        except Exception as e:
+            return f"(API Error: {str(e)}) Falling back to local analysis..."
+
+    async def answer(
+        self,
+        question: str,
+        frameworks: Optional[List[str]] = None,
+        mode: str = "strict",
+    ) -> ToolResponse:
+        """
+        Main entry point for MCP / clients.
+
+        Args:
+            question: Compliance question to answer
+            frameworks: Frameworks to reference (default: NIST-CSF, ISO-27001)
+            mode: "strict" (conservative) or "advisory" (exploratory)
+
+        Returns:
+            ToolResponse with answer, framework hits, and reasoning
+        """
+        frameworks = frameworks or self.default_frameworks
+
+        # Build context-aware prompt for NVIDIA API
+        mode_instruction = (
+            "conservative and cautious, assuming worst-case scenarios"
+            if mode == "strict"
+            else "exploratory and comprehensive"
+        )
+
+        prompt = f"""You are a compliance and security expert analyzing infrastructure questions.
+
+Question: {question}
+
+Compliance Frameworks to Consider:
+{chr(10).join(f"- {fw}" for fw in frameworks)}
+
+Analysis Mode: {mode_instruction}
+
+Provide a structured answer that:
+1. Directly addresses the question
+2. References the relevant frameworks
+3. Identifies gaps or risks
+4. Suggests mitigations where applicable
+
+Be concise but thorough."""
+
+        # Call NVIDIA API for actual LLM response
+        answer = await self._call_nvidia_api(prompt)
+
+        # Extract framework mentions from the response
+        framework_hits = self._extract_framework_hits(answer, frameworks)
+
+        # Generate reasoning based on mode
+        reasoning = (
+            f"Analyzed question against frameworks: {', '.join(frameworks)}. "
+            f"Mode={mode}. Used NVIDIA LLM for compliance analysis."
+        )
+
+        return ToolResponse(
+            answer=answer,
+            framework_hits=framework_hits,
+            reasoning=reasoning,
+            model="nvidia/llama-2-7b-chat",
+        )
--- a/mcp/waf_intelligence/init.py
+++ b/mcp/waf_intelligence/init.py
@@ -0,0 +1,41 @@
+"""
+WAF Intelligence Engine - Analyze, audit, and generate Cloudflare WAF rules.
+
+This module provides tools to:
+- Analyze existing WAF rules for gaps and compliance issues
+- Generate new WAF rules based on threat models
+- Map rules to compliance frameworks (NIST, PCI-DSS, GDPR, etc.)
+- Validate Terraform WAF configurations
+
+Export primary classes and functions:
+"""
+
+from mcp.waf_intelligence.analyzer import (
+    WAFRuleAnalyzer,
+    RuleViolation,
+    AnalysisResult,
+)
+from mcp.waf_intelligence.generator import (
+    WAFRuleGenerator,
+    GeneratedRule,
+)
+from mcp.waf_intelligence.compliance import (
+    ComplianceMapper,
+    FrameworkMapping,
+)
+from mcp.waf_intelligence.orchestrator import (
+    WAFIntelligence,
+    WAFInsight,
+)
+
+__all__ = [
+    "WAFRuleAnalyzer",
+    "WAFRuleGenerator",
+    "ComplianceMapper",
+    "WAFIntelligence",
+    "WAFInsight",
+    "RuleViolation",
+    "AnalysisResult",
+    "GeneratedRule",
+    "FrameworkMapping",
+]
--- a/mcp/waf_intelligence/main.py
+++ b/mcp/waf_intelligence/main.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from dataclasses import asdict
+from pathlib import Path
+from typing import Any, Dict, List
+
+from .orchestrator import WAFInsight, WAFIntelligence
+
+
+def _insight_to_dict(insight: WAFInsight) -> Dict[str, Any]:
+    """Convert a WAFInsight dataclass into a plain dict."""
+    return asdict(insight)
+
+
+def _has_error(insights: List[WAFInsight]) -> bool:
+    """Return True if any violation is error-severity."""
+    for insight in insights:
+        if insight.violation and insight.violation.severity == "error":
+            return True
+    return False
+
+
+def run_cli(argv: List[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="python -m mcp.waf_intelligence",
+        description="Analyze Cloudflare WAF Terraform configs and produce curated security + compliance insights.",
+    )
+    parser.add_argument(
+        "--file",
+        "-f",
+        required=True,
+        help="Path to the Terraform WAF file (e.g. terraform/waf.tf)",
+    )
+    parser.add_argument(
+        "--limit",
+        "-n",
+        type=int,
+        default=3,
+        help="Maximum number of high-priority insights to return (default: 3)",
+    )
+    parser.add_argument(
+        "--format",
+        "-o",
+        choices=["text", "json"],
+        default="text",
+        help="Output format: text (human-readable) or json (machine-readable). Default: text.",
+    )
+    parser.add_argument(
+        "--fail-on-error",
+        action="store_true",
+        help="Exit with non-zero code if any error-severity violations are found.",
+    )
+
+    args = parser.parse_args(argv)
+
+    path = Path(args.file)
+    if not path.exists():
+        print(f"[error] file not found: {path}", file=sys.stderr)
+        return 1
+
+    intel = WAFIntelligence()
+    insights = intel.analyze_and_recommend(str(path), limit=args.limit)
+
+    if args.format == "json":
+        payload = {
+            "file": str(path),
+            "insights": [_insight_to_dict(insight) for insight in insights],
+        }
+        print(json.dumps(payload, indent=2))
+        if args.fail_on_error and _has_error(insights):
+            print(
+                "[waf_intel] error-severity violations present, failing as requested.",
+                file=sys.stderr,
+            )
+            return 2
+        return 0
+
+    print(f"\nWAF Intelligence Report for: {path}\n{'-' * 72}")
+
+    if not insights:
+        print("No high-severity, high-confidence issues detected based on current heuristics.")
+        return 0
+
+    for idx, insight in enumerate(insights, start=1):
+        print(f"\nInsight #{idx}")
+        print("-" * 40)
+
+        if insight.violation:
+            violation = insight.violation
+            print(f"Problem   : {violation.message}")
+            print(f"Severity  : {violation.severity.upper()}")
+            print(f"Confidence: {int(violation.confidence * 100)}%")
+            if violation.location:
+                print(f"Location  : {violation.location}")
+            if violation.hint:
+                print(f"Remediate : {violation.hint}")
+
+        if insight.suggested_rule:
+            rule = insight.suggested_rule
+            print("\nSuggested Rule:")
+            print(f"  Name    : {rule.name}")
+            print(f"  Severity: {rule.severity.upper()}")
+            print(f"  Impact  : {int(rule.impact_score * 100)}%")
+            print(f"  Effort  : {int(rule.effort_score * 100)}%")
+            print(f"  Summary : {rule.description}")
+
+        if insight.mappings:
+            print("\nCompliance Mapping:")
+            for mapping in insight.mappings:
+                print(f"  - {mapping.framework} {mapping.control_id}: {mapping.description}")
+
+    print()
+
+    if args.fail_on_error and _has_error(insights):
+        print(
+            "[waf_intel] error-severity violations present, failing as requested.",
+            file=sys.stderr,
+        )
+        return 2
+
+    return 0
+
+
+def main() -> None:
+    raise SystemExit(run_cli())
+
+
+if __name__ == "__main__":
+    main()
--- a/mcp/waf_intelligence/analyzer.py
+++ b/mcp/waf_intelligence/analyzer.py
@@ -0,0 +1,231 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+
+@dataclass
+class RuleViolation:
+    """Represents a potential issue in a WAF rule or configuration."""
+
+    rule_id: Optional[str]
+    message: str
+    severity: str  # "info" | "warning" | "error"
+    framework_refs: List[str] = field(default_factory=list)
+    location: Optional[str] = None
+    confidence: float = 0.5  # 0.0-1.0: how sure we are
+    hint: Optional[str] = None  # short suggestion on how to fix
+
+
+@dataclass
+class AnalysisResult:
+    """High-level result of analyzing one or more WAF configs."""
+
+    source: str
+    violations: List[RuleViolation] = field(default_factory=list)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+    @property
+    def has_issues(self) -> bool:
+        return any(v.severity in ("warning", "error") for v in self.violations)
+
+    def top_violations(
+        self,
+        *,
+        min_severity: str = "warning",
+        min_confidence: float = 0.7,
+        limit: int = 5,
+    ) -> List[RuleViolation]:
+        """Return a small, high-quality subset of violations."""
+        severity_order = {"info": 0, "warning": 1, "error": 2}
+        min_level = severity_order.get(min_severity, 1)
+
+        ranked = [
+            v
+            for v in self.violations
+            if severity_order.get(v.severity, 0) >= min_level
+            and v.confidence >= min_confidence
+        ]
+
+        ranked.sort(key=lambda v: (v.severity != "error", -v.confidence))
+        return ranked[:limit]
+
+
+class WAFRuleAnalyzer:
+    """
+    Analyze Cloudflare WAF rules from Terraform with a quality-first posture.
+    """
+
+    def analyze_file(
+        self,
+        path: str | Path,
+        *,
+        min_severity: str = "warning",
+        min_confidence: float = 0.6,
+    ) -> AnalysisResult:
+        path = Path(path)
+        text = path.read_text(encoding="utf-8")
+
+        violations: List[RuleViolation] = []
+
+        # Example heuristic: no managed rules present
+        if "managed_rules" not in text:
+            violations.append(
+                RuleViolation(
+                    rule_id=None,
+                    message="No managed WAF rules detected in this file.",
+                    severity="warning",
+                    confidence=0.9,
+                    framework_refs=["PCI-DSS 6.6", "OWASP-ASVS 13"],
+                    location=str(path),
+                    hint="Enable Cloudflare managed WAF rulesets (SQLi, XSS, RCE, bots) for this zone.",
+                )
+            )
+
+        # Example heuristic: overly broad allow
+        if '"*"' in text and "allow" in text:
+            violations.append(
+                RuleViolation(
+                    rule_id=None,
+                    message="Potentially overly broad allow rule detected ('*').",
+                    severity="error",
+                    confidence=0.85,
+                    framework_refs=["Zero-Trust Principle"],
+                    location=str(path),
+                    hint="Narrow the rule expression to specific paths, methods, or IP ranges.",
+                )
+            )
+
+        result = AnalysisResult(
+            source=str(path),
+            violations=violations,
+            metadata={
+                "file_size": path.stat().st_size,
+                "heuristics_version": "0.2.0",
+            },
+        )
+
+        result.violations = result.top_violations(
+            min_severity=min_severity,
+            min_confidence=min_confidence,
+            limit=5,
+        )
+        return result
+
+    def analyze_terraform_text(
+        self,
+        source_name: str,
+        text: str,
+        *,
+        min_severity: str = "warning",
+        min_confidence: float = 0.6,
+    ) -> AnalysisResult:
+        """Same as analyze_file but for already-loaded text."""
+        tmp_path = Path(source_name)
+        violations: List[RuleViolation] = []
+
+        if "managed_rules" not in text:
+            violations.append(
+                RuleViolation(
+                    rule_id=None,
+                    message="No managed WAF rules detected in this snippet.",
+                    severity="warning",
+                    confidence=0.9,
+                    framework_refs=["PCI-DSS 6.6", "OWASP-ASVS 13"],
+                    location=source_name,
+                    hint="Enable Cloudflare managed WAF rulesets (SQLi, XSS, RCE, bots) for this zone.",
+                )
+            )
+
+        result = AnalysisResult(
+            source=str(tmp_path),
+            violations=violations,
+            metadata={"heuristics_version": "0.2.0"},
+        )
+
+        result.violations = result.top_violations(
+            min_severity=min_severity,
+            min_confidence=min_confidence,
+            limit=5,
+        )
+        return result
+
+    def analyze_with_threat_intel(
+        self,
+        path: str | Path,
+        threat_indicators: List[Any],
+        *,
+        min_severity: str = "warning",
+        min_confidence: float = 0.6,
+    ) -> AnalysisResult:
+        """
+        Enhanced analysis using threat intelligence data.
+        
+        Args:
+            path: WAF config file path
+            threat_indicators: List of ThreatIndicator objects from threat_intel module
+            min_severity: Minimum severity to include
+            min_confidence: Minimum confidence threshold
+        
+        Returns:
+            AnalysisResult with violations informed by threat intel
+        """
+        # Start with base analysis
+        base_result = self.analyze_file(path, min_severity=min_severity, min_confidence=min_confidence)
+        
+        path = Path(path)
+        text = path.read_text(encoding="utf-8")
+        text_lower = text.lower()
+        
+        # Check if threat indicators are addressed by existing rules
+        critical_ips = [i for i in threat_indicators if i.indicator_type == "ip" and i.severity in ("critical", "high")]
+        critical_patterns = [i for i in threat_indicators if i.indicator_type == "pattern" and i.severity in ("critical", "high")]
+        
+        # Check for IP blocking coverage
+        if critical_ips:
+            ip_block_present = "ip.src" in text_lower or "cf.client.ip" in text_lower
+            if not ip_block_present:
+                base_result.violations.append(
+                    RuleViolation(
+                        rule_id=None,
+                        message=f"Threat intel identified {len(critical_ips)} high-risk IPs not addressed by WAF rules.",
+                        severity="error",
+                        confidence=0.85,
+                        framework_refs=["Zero-Trust", "Threat Intelligence"],
+                        location=str(path),
+                        hint=f"Add IP blocking rules for identified threat actors. Sample IPs: {', '.join(i.value for i in critical_ips[:3])}",
+                    )
+                )
+        
+        # Check for pattern-based attack coverage
+        attack_types_seen = set()
+        for ind in critical_patterns:
+            for tag in ind.tags:
+                if tag in ("sqli", "xss", "rce", "path_traversal"):
+                    attack_types_seen.add(tag)
+        
+        # Check managed ruleset coverage
+        for attack_type in attack_types_seen:
+            if attack_type not in text_lower and f'"{attack_type}"' not in text_lower:
+                base_result.violations.append(
+                    RuleViolation(
+                        rule_id=None,
+                        message=f"Threat intel detected {attack_type.upper()} attacks but no explicit protection found.",
+                        severity="warning",
+                        confidence=0.8,
+                        framework_refs=["OWASP Top 10", "Threat Intelligence"],
+                        location=str(path),
+                        hint=f"Enable Cloudflare managed rules for {attack_type.upper()} protection.",
+                    )
+                )
+        
+        # Update metadata with threat intel stats
+        base_result.metadata["threat_intel"] = {
+            "critical_ips": len(critical_ips),
+            "critical_patterns": len(critical_patterns),
+            "attack_types_seen": list(attack_types_seen),
+        }
+        
+        return base_result
+
--- a/mcp/waf_intelligence/classifier.py
+++ b/mcp/waf_intelligence/classifier.py
@@ -0,0 +1,564 @@
+"""
+Phase 7: ML-Based Threat Classifier
+
+Uses simple but effective ML techniques for:
+- Attack pattern classification (SQLi, XSS, RCE, etc.)
+- Anomaly scoring based on request features
+- Risk-level prediction for proposed rules
+
+Designed to work offline without heavy dependencies.
+Uses scikit-learn-style interface but can run with pure Python fallback.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import math
+import re
+from collections import Counter, defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+# Try to import sklearn, fall back to pure Python
+try:
+    from sklearn.feature_extraction.text import TfidfVectorizer
+    from sklearn.naive_bayes import MultinomialNB
+    from sklearn.preprocessing import LabelEncoder
+    HAS_SKLEARN = True
+except ImportError:
+    HAS_SKLEARN = False
+
+
+@dataclass
+class ClassificationResult:
+    """Result of classifying a threat indicator or pattern."""
+    
+    label: str  # "sqli", "xss", "rce", "path_traversal", "scanner", "benign", etc.
+    confidence: float  # 0.0-1.0
+    probabilities: Dict[str, float] = field(default_factory=dict)
+    features_used: List[str] = field(default_factory=list)
+    explanation: str = ""
+
+
+@dataclass
+class AnomalyScore:
+    """Anomaly detection result."""
+    
+    score: float  # 0.0-1.0 (higher = more anomalous)
+    baseline_deviation: float  # standard deviations from mean
+    anomalous_features: List[str] = field(default_factory=list)
+    recommendation: str = ""
+
+
+class FeatureExtractor:
+    """Extract features from request/log data for ML classification."""
+    
+    # Character distribution features
+    SPECIAL_CHARS = set("'\"<>(){}[];=&|`$\\")
+    
+    # Known attack signatures for feature detection
+    SQLI_PATTERNS = [
+        r"(?i)union\s+select",
+        r"(?i)select\s+.*\s+from",
+        r"(?i)insert\s+into",
+        r"(?i)update\s+.*\s+set",
+        r"(?i)delete\s+from",
+        r"(?i)drop\s+table",
+        r"(?i);\s*--",
+        r"(?i)'\s*or\s+'?1'?\s*=\s*'?1",
+        r"(?i)'\s*and\s+'?1'?\s*=\s*'?1",
+    ]
+    
+    XSS_PATTERNS = [
+        r"(?i)<script",
+        r"(?i)javascript:",
+        r"(?i)on\w+\s*=",
+        r"(?i)alert\s*\(",
+        r"(?i)document\.",
+        r"(?i)window\.",
+        r"(?i)eval\s*\(",
+    ]
+    
+    RCE_PATTERNS = [
+        r"(?i);\s*(?:cat|ls|id|whoami|pwd)",
+        r"(?i)\|\s*(?:cat|ls|id|whoami)",
+        r"(?i)`[^`]+`",
+        r"(?i)\$\([^)]+\)",
+        r"(?i)system\s*\(",
+        r"(?i)exec\s*\(",
+        r"(?i)passthru\s*\(",
+    ]
+    
+    PATH_TRAVERSAL_PATTERNS = [
+        r"\.\./",
+        r"\.\.\\",
+        r"(?i)etc/passwd",
+        r"(?i)windows/system32",
+    ]
+    
+    def extract(self, text: str) -> Dict[str, float]:
+        """Extract numerical features from text."""
+        features: Dict[str, float] = {}
+        
+        if not text:
+            return features
+        
+        text_lower = text.lower()
+        text_len = len(text)
+        
+        # Length features
+        features["length"] = min(text_len / 1000, 1.0)  # normalized
+        features["length_log"] = math.log1p(text_len) / 10
+        
+        # Character distribution
+        special_count = sum(1 for c in text if c in self.SPECIAL_CHARS)
+        features["special_char_ratio"] = special_count / max(text_len, 1)
+        features["uppercase_ratio"] = sum(1 for c in text if c.isupper()) / max(text_len, 1)
+        features["digit_ratio"] = sum(1 for c in text if c.isdigit()) / max(text_len, 1)
+        
+        # Entropy (randomness indicator)
+        features["entropy"] = self._calculate_entropy(text)
+        
+        # Pattern-based features
+        features["sqli_score"] = self._pattern_score(text, self.SQLI_PATTERNS)
+        features["xss_score"] = self._pattern_score(text, self.XSS_PATTERNS)
+        features["rce_score"] = self._pattern_score(text, self.RCE_PATTERNS)
+        features["path_traversal_score"] = self._pattern_score(text, self.PATH_TRAVERSAL_PATTERNS)
+        
+        # Structural features
+        features["quote_count"] = (text.count("'") + text.count('"')) / max(text_len, 1)
+        features["paren_count"] = (text.count("(") + text.count(")")) / max(text_len, 1)
+        features["bracket_count"] = (text.count("[") + text.count("]") + text.count("{") + text.count("}")) / max(text_len, 1)
+        
+        # Keyword presence
+        features["has_select"] = 1.0 if "select" in text_lower else 0.0
+        features["has_script"] = 1.0 if "<script" in text_lower else 0.0
+        features["has_etc_passwd"] = 1.0 if "etc/passwd" in text_lower else 0.0
+        
+        return features
+    
+    def _calculate_entropy(self, text: str) -> float:
+        """Calculate Shannon entropy of text."""
+        if not text:
+            return 0.0
+        
+        freq = Counter(text)
+        length = len(text)
+        entropy = 0.0
+        
+        for count in freq.values():
+            prob = count / length
+            if prob > 0:
+                entropy -= prob * math.log2(prob)
+        
+        # Normalize to 0-1 range (max entropy for ASCII is ~7)
+        return min(entropy / 7, 1.0)
+    
+    def _pattern_score(self, text: str, patterns: List[str]) -> float:
+        """Calculate pattern match score."""
+        matches = sum(1 for p in patterns if re.search(p, text))
+        return min(matches / max(len(patterns), 1), 1.0)
+
+
+class NaiveBayesClassifier:
+    """
+    Simple Naive Bayes classifier for attack type classification.
+    Works with or without sklearn.
+    """
+    
+    LABELS = ["sqli", "xss", "rce", "path_traversal", "scanner", "benign"]
+    
+    def __init__(self):
+        self.feature_extractor = FeatureExtractor()
+        self._trained = False
+        
+        # Training data (curated examples)
+        self._training_data = self._get_training_data()
+        
+        # Feature statistics per class (for pure Python implementation)
+        self._class_priors: Dict[str, float] = {}
+        self._feature_means: Dict[str, Dict[str, float]] = defaultdict(dict)
+        self._feature_vars: Dict[str, Dict[str, float]] = defaultdict(dict)
+    
+    def _get_training_data(self) -> List[Tuple[str, str]]:
+        """Return curated training examples."""
+        return [
+            # SQLi examples
+            ("' OR '1'='1", "sqli"),
+            ("1; DROP TABLE users--", "sqli"),
+            ("UNION SELECT * FROM passwords", "sqli"),
+            ("admin'--", "sqli"),
+            ("1' AND 1=1--", "sqli"),
+            ("'; INSERT INTO users VALUES('hack','hack')--", "sqli"),
+            
+            # XSS examples
+            ("<script>alert('xss')</script>", "xss"),
+            ("<img src=x onerror=alert(1)>", "xss"),
+            ("javascript:alert(document.cookie)", "xss"),
+            ("<svg onload=alert(1)>", "xss"),
+            ("'\"><script>alert('XSS')</script>", "xss"),
+            
+            # RCE examples
+            ("; cat /etc/passwd", "rce"),
+            ("| ls -la", "rce"),
+            ("`id`", "rce"),
+            ("$(whoami)", "rce"),
+            ("; rm -rf /", "rce"),
+            ("system('cat /etc/passwd')", "rce"),
+            
+            # Path traversal
+            ("../../../etc/passwd", "path_traversal"),
+            ("..\\..\\..\\windows\\system32\\config\\sam", "path_traversal"),
+            ("/etc/passwd%00", "path_traversal"),
+            ("....//....//etc/passwd", "path_traversal"),
+            
+            # Scanner signatures
+            ("Mozilla/5.0 (compatible; Nmap Scripting Engine)", "scanner"),
+            ("sqlmap/1.0", "scanner"),
+            ("Nikto/2.1.5", "scanner"),
+            ("masscan/1.0", "scanner"),
+            
+            # Benign examples
+            ("/api/users/123", "benign"),
+            ("Mozilla/5.0 (Windows NT 10.0; Win64; x64)", "benign"),
+            ("/products?category=electronics&page=2", "benign"),
+            ("GET /index.html HTTP/1.1", "benign"),
+            ("/static/css/main.css", "benign"),
+        ]
+    
+    def train(self) -> None:
+        """Train the classifier on built-in examples."""
+        # Extract features for all training data
+        X: List[Dict[str, float]] = []
+        y: List[str] = []
+        
+        for text, label in self._training_data:
+            features = self.feature_extractor.extract(text)
+            X.append(features)
+            y.append(label)
+        
+        # Calculate class priors
+        label_counts = Counter(y)
+        total = len(y)
+        for label, count in label_counts.items():
+            self._class_priors[label] = count / total
+        
+        # Calculate feature means and variances per class
+        all_features = set()
+        for features in X:
+            all_features.update(features.keys())
+        
+        for label in self.LABELS:
+            class_features = [X[i] for i in range(len(X)) if y[i] == label]
+            if not class_features:
+                continue
+            
+            for feature in all_features:
+                values = [f.get(feature, 0.0) for f in class_features]
+                mean = sum(values) / len(values)
+                var = sum((v - mean) ** 2 for v in values) / len(values)
+                self._feature_means[label][feature] = mean
+                self._feature_vars[label][feature] = max(var, 1e-6)  # avoid division by zero
+        
+        self._trained = True
+    
+    def classify(self, text: str) -> ClassificationResult:
+        """Classify text into attack category."""
+        if not self._trained:
+            self.train()
+        
+        features = self.feature_extractor.extract(text)
+        
+        # Calculate log probabilities for each class
+        log_probs: Dict[str, float] = {}
+        
+        for label in self.LABELS:
+            if label not in self._class_priors:
+                continue
+            
+            log_prob = math.log(self._class_priors[label])
+            
+            for feature, value in features.items():
+                if feature in self._feature_means[label]:
+                    mean = self._feature_means[label][feature]
+                    var = self._feature_vars[label][feature]
+                    # Gaussian likelihood
+                    log_prob += -0.5 * math.log(2 * math.pi * var)
+                    log_prob += -0.5 * ((value - mean) ** 2) / var
+            
+            log_probs[label] = log_prob
+        
+        # Convert to probabilities via softmax
+        max_log_prob = max(log_probs.values()) if log_probs else 0
+        exp_probs = {k: math.exp(v - max_log_prob) for k, v in log_probs.items()}
+        total = sum(exp_probs.values())
+        probs = {k: v / total for k, v in exp_probs.items()}
+        
+        # Find best label
+        best_label = max(probs, key=probs.get) if probs else "benign"
+        confidence = probs.get(best_label, 0.0)
+        
+        # Generate explanation
+        explanation = self._generate_explanation(text, features, best_label)
+        
+        return ClassificationResult(
+            label=best_label,
+            confidence=confidence,
+            probabilities=probs,
+            features_used=list(features.keys()),
+            explanation=explanation
+        )
+    
+    def _generate_explanation(self, text: str, features: Dict[str, float], label: str) -> str:
+        """Generate human-readable explanation for classification."""
+        reasons = []
+        
+        if features.get("sqli_score", 0) > 0.3:
+            reasons.append("SQL injection patterns detected")
+        if features.get("xss_score", 0) > 0.3:
+            reasons.append("XSS patterns detected")
+        if features.get("rce_score", 0) > 0.3:
+            reasons.append("Command injection patterns detected")
+        if features.get("path_traversal_score", 0) > 0.3:
+            reasons.append("Path traversal patterns detected")
+        if features.get("special_char_ratio", 0) > 0.2:
+            reasons.append("High special character ratio")
+        if features.get("entropy", 0) > 0.7:
+            reasons.append("High entropy (possible encoding/obfuscation)")
+        
+        if not reasons:
+            reasons.append(f"General pattern matching suggests {label}")
+        
+        return "; ".join(reasons)
+
+
+class AnomalyDetector:
+    """
+    Detect anomalous requests based on baseline behavior.
+    Uses statistical methods (z-score, IQR) without requiring ML libraries.
+    """
+    
+    def __init__(self):
+        self.feature_extractor = FeatureExtractor()
+        self._baseline_stats: Dict[str, Dict[str, float]] = {}
+        self._observations: List[Dict[str, float]] = []
+    
+    def add_observation(self, text: str) -> None:
+        """Add an observation to the baseline."""
+        features = self.feature_extractor.extract(text)
+        self._observations.append(features)
+        
+        # Recalculate baseline after enough observations
+        if len(self._observations) >= 10:
+            self._update_baseline()
+    
+    def _update_baseline(self) -> None:
+        """Update baseline statistics."""
+        if not self._observations:
+            return
+        
+        all_features = set()
+        for obs in self._observations:
+            all_features.update(obs.keys())
+        
+        for feature in all_features:
+            values = [obs.get(feature, 0.0) for obs in self._observations]
+            mean = sum(values) / len(values)
+            var = sum((v - mean) ** 2 for v in values) / len(values)
+            std = math.sqrt(var) if var > 0 else 0.001
+            
+            self._baseline_stats[feature] = {
+                "mean": mean,
+                "std": std,
+                "min": min(values),
+                "max": max(values),
+            }
+    
+    def score(self, text: str) -> AnomalyScore:
+        """Score how anomalous a request is."""
+        features = self.feature_extractor.extract(text)
+        
+        if not self._baseline_stats:
+            # No baseline yet, use heuristics
+            return self._heuristic_score(features)
+        
+        z_scores: Dict[str, float] = {}
+        anomalous_features: List[str] = []
+        
+        for feature, value in features.items():
+            if feature in self._baseline_stats:
+                stats = self._baseline_stats[feature]
+                z = (value - stats["mean"]) / stats["std"]
+                z_scores[feature] = abs(z)
+                
+                if abs(z) > 2:  # More than 2 std deviations
+                    anomalous_features.append(f"{feature} (z={z:.2f})")
+        
+        # Overall anomaly score (average of z-scores, normalized)
+        if z_scores:
+            avg_z = sum(z_scores.values()) / len(z_scores)
+            max_z = max(z_scores.values())
+            score = min(max_z / 5, 1.0)  # Normalize to 0-1
+            baseline_deviation = avg_z
+        else:
+            score = 0.5
+            baseline_deviation = 0.0
+        
+        # Generate recommendation
+        if score > 0.8:
+            recommendation = "BLOCK: Highly anomalous, likely attack"
+        elif score > 0.5:
+            recommendation = "CHALLENGE: Moderately anomalous, requires verification"
+        elif score > 0.3:
+            recommendation = "LOG: Slightly unusual, monitor closely"
+        else:
+            recommendation = "ALLOW: Within normal parameters"
+        
+        return AnomalyScore(
+            score=score,
+            baseline_deviation=baseline_deviation,
+            anomalous_features=anomalous_features,
+            recommendation=recommendation
+        )
+    
+    def _heuristic_score(self, features: Dict[str, float]) -> AnomalyScore:
+        """Score based on heuristics when no baseline exists."""
+        score = 0.0
+        anomalous_features: List[str] = []
+        
+        # Check for attack indicators
+        for attack_type in ["sqli_score", "xss_score", "rce_score", "path_traversal_score"]:
+            if features.get(attack_type, 0) > 0.3:
+                score += 0.25
+                anomalous_features.append(attack_type)
+        
+        # Check for suspicious characteristics
+        if features.get("special_char_ratio", 0) > 0.15:
+            score += 0.15
+            anomalous_features.append("high_special_chars")
+        
+        if features.get("entropy", 0) > 0.8:
+            score += 0.1
+            anomalous_features.append("high_entropy")
+        
+        score = min(score, 1.0)
+        
+        if score > 0.7:
+            recommendation = "BLOCK: Multiple attack indicators"
+        elif score > 0.4:
+            recommendation = "CHALLENGE: Suspicious characteristics"
+        else:
+            recommendation = "ALLOW: No obvious threats"
+        
+        return AnomalyScore(
+            score=score,
+            baseline_deviation=0.0,
+            anomalous_features=anomalous_features,
+            recommendation=recommendation
+        )
+
+
+class ThreatClassifier:
+    """
+    High-level threat classifier combining multiple techniques.
+    
+    Usage:
+        classifier = ThreatClassifier()
+        result = classifier.classify("' OR '1'='1")
+        print(f"Label: {result.label}, Confidence: {result.confidence}")
+    """
+    
+    def __init__(self, model_path: Optional[Path] = None):
+        self.naive_bayes = NaiveBayesClassifier()
+        self.anomaly_detector = AnomalyDetector()
+        self.model_path = model_path
+        
+        # Train on startup
+        self.naive_bayes.train()
+    
+    def classify(self, text: str) -> ClassificationResult:
+        """Classify a request/pattern."""
+        return self.naive_bayes.classify(text)
+    
+    def score_anomaly(self, text: str) -> AnomalyScore:
+        """Score how anomalous a request is."""
+        return self.anomaly_detector.score(text)
+    
+    def analyze(self, text: str) -> Dict[str, Any]:
+        """Full analysis combining classification and anomaly detection."""
+        classification = self.classify(text)
+        anomaly = self.score_anomaly(text)
+        
+        return {
+            "classification": {
+                "label": classification.label,
+                "confidence": classification.confidence,
+                "probabilities": classification.probabilities,
+                "explanation": classification.explanation,
+            },
+            "anomaly": {
+                "score": anomaly.score,
+                "baseline_deviation": anomaly.baseline_deviation,
+                "anomalous_features": anomaly.anomalous_features,
+                "recommendation": anomaly.recommendation,
+            },
+            "risk_level": self._compute_risk_level(classification, anomaly),
+        }
+    
+    def _compute_risk_level(
+        self,
+        classification: ClassificationResult,
+        anomaly: AnomalyScore
+    ) -> str:
+        """Compute overall risk level."""
+        # High-risk attack types
+        high_risk_labels = {"sqli", "xss", "rce"}
+        
+        if classification.label in high_risk_labels and classification.confidence > 0.7:
+            return "critical"
+        
+        if classification.label in high_risk_labels and classification.confidence > 0.4:
+            return "high"
+        
+        if anomaly.score > 0.7:
+            return "high"
+        
+        if classification.label == "scanner":
+            return "medium"
+        
+        if anomaly.score > 0.4:
+            return "medium"
+        
+        return "low"
+
+
+# CLI for testing
+if __name__ == "__main__":
+    import sys
+    
+    classifier = ThreatClassifier()
+    
+    test_inputs = [
+        "' OR '1'='1",
+        "<script>alert('xss')</script>",
+        "; cat /etc/passwd",
+        "../../../etc/passwd",
+        "Mozilla/5.0 (Windows NT 10.0)",
+        "/api/users/123",
+    ]
+    
+    if len(sys.argv) > 1:
+        test_inputs = sys.argv[1:]
+    
+    print("\n🤖 ML Threat Classifier Test")
+    print("=" * 60)
+    
+    for text in test_inputs:
+        result = classifier.analyze(text)
+        print(f"\nInput: {text[:50]}...")
+        print(f"  Label: {result['classification']['label']}")
+        print(f"  Confidence: {result['classification']['confidence']:.2%}")
+        print(f"  Risk Level: {result['risk_level'].upper()}")
+        print(f"  Anomaly Score: {result['anomaly']['score']:.2%}")
+        print(f"  Recommendation: {result['anomaly']['recommendation']}")
--- a/mcp/waf_intelligence/compliance.py
+++ b/mcp/waf_intelligence/compliance.py
@@ -0,0 +1,83 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict, List, Optional
+
+
+@dataclass
+class FrameworkMapping:
+    """
+    Mapping between a WAF concept (e.g. 'SQLi protection') and references
+    in one or more compliance frameworks.
+    """
+
+    control_id: str
+    framework: str  # e.g. "PCI-DSS", "NIST-800-53", "GDPR"
+    description: str
+    references: List[str]
+
+
+class ComplianceMapper:
+    """
+    Map WAF rules / violations to compliance frameworks.
+
+    This starts as a simple static lookup table that we can extend over time.
+    """
+
+    def __init__(self) -> None:
+        self._mappings: Dict[str, List[FrameworkMapping]] = self._build_default_mappings()
+
+    def _build_default_mappings(self) -> Dict[str, List[FrameworkMapping]]:
+        return {
+            "sqli_protection": [
+                FrameworkMapping(
+                    control_id="6.6",
+                    framework="PCI-DSS",
+                    description="Ensure web-facing applications are protected against attacks such as SQL injection.",
+                    references=["PCI-DSS v4.0 6.6", "OWASP Top 10 - A03:2021"],
+                )
+            ],
+            "xss_protection": [
+                FrameworkMapping(
+                    control_id="A5",
+                    framework="OWASP-ASVS",
+                    description="Verify that all user-controllable input is properly encoded or escaped.",
+                    references=["OWASP Top 10 - A3: Cross-Site Scripting"],
+                )
+            ],
+            "baseline_waf": [
+                FrameworkMapping(
+                    control_id="13",
+                    framework="OWASP-ASVS",
+                    description="Centralized input validation, filtering, and WAF as compensating control.",
+                    references=["OWASP-ASVS 13", "PCI-DSS 6.4.1"],
+                )
+            ],
+        }
+
+    def map_concept(self, concept: str) -> List[FrameworkMapping]:
+        """
+        Map a high-level WAF concept to compliance controls.
+
+        Example concepts:
+        - "sqli_protection"
+        - "xss_protection"
+        - "baseline_waf"
+        """
+        return self._mappings.get(concept, [])
+
+    def best_effort_from_violation(self, message: str) -> List[FrameworkMapping]:
+        """
+        Try to infer framework mappings from a violation message string.
+        This allows the analyzer to stay dumb while still attaching controls.
+        """
+        msg = message.lower()
+
+        if "sql" in msg and "inject" in msg:
+            return self.map_concept("sqli_protection")
+        if "xss" in msg or "cross-site scripting" in msg:
+            return self.map_concept("xss_protection")
+        if "waf" in msg or "managed rules" in msg:
+            return self.map_concept("baseline_waf")
+
+        return []
--- a/mcp/waf_intelligence/generator.py
+++ b/mcp/waf_intelligence/generator.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+
+
+@dataclass
+class GeneratedRule:
+    """Represents a Terraform WAF rule we propose to add."""
+
+    name: str
+    description: str
+    terraform_snippet: str
+    severity: str  # "low" | "medium" | "high" | "critical"
+    tags: List[str] = field(default_factory=list)
+    notes: Optional[str] = None
+    impact_score: float = 0.5   # 0-1: estimated security impact
+    effort_score: float = 0.5   # 0-1: estimated effort to implement
+
+
+class WAFRuleGenerator:
+    """
+    Generate Cloudflare WAF Terraform rules with a quality-first strategy.
+    """
+
+    def generate_from_scenario(
+        self,
+        scenario: str,
+        *,
+        limit: int = 3,
+        max_effort: float = 0.8,
+    ) -> List[GeneratedRule]:
+        """
+        Return a small set of high-impact, reasonable-effort rules.
+        """
+        scenario_lower = scenario.lower()
+        candidates: List[GeneratedRule] = []
+
+        if "sql injection" in scenario_lower or "sqli" in scenario_lower:
+            candidates.append(self._sql_injection_rule())
+
+        if "xss" in scenario_lower:
+            candidates.append(self._xss_rule())
+
+        # If nothing matched, fallback to baseline
+        if not candidates:
+            candidates.append(self._baseline_waf_rule())
+
+        # Filter by effort & sort by impact
+        filtered = [r for r in candidates if r.effort_score <= max_effort]
+        if not filtered:
+            filtered = candidates
+
+        filtered.sort(key=lambda r: (-r.impact_score, r.effort_score))
+        return filtered[:limit]
+
+    def _sql_injection_rule(self) -> GeneratedRule:
+        snippet = '''resource "cloudflare_ruleset" "waf_sqli_protection" {
+  # TODO: adjust zone_id / account_id and phase for your setup
+  name    = "WAF - SQLi protection"
+  kind    = "zone"
+  phase   = "http_request_firewall_managed"
+
+  rules = [{
+    action = "block"
+    expression = "(cf.waf.ruleset eq \\"sqli\\")"
+    enabled    = true
+  }]
+}
+'''
+        return GeneratedRule(
+            name="waf_sqli_protection",
+            description="Enable blocking against SQL injection attempts using Cloudflare managed rules.",
+            terraform_snippet=snippet,
+            severity="high",
+            tags=["sqli", "managed_rules", "waf"],
+            impact_score=0.95,
+            effort_score=0.3,
+        )
+
+    def _xss_rule(self) -> GeneratedRule:
+        snippet = '''resource "cloudflare_ruleset" "waf_xss_protection" {
+  name  = "WAF - XSS protection"
+  kind  = "zone"
+  phase = "http_request_firewall_managed"
+
+  rules = [{
+    action = "block"
+    expression = "(cf.waf.ruleset eq \\"xss\\")"
+    enabled    = true
+  }]
+}
+'''
+        return GeneratedRule(
+            name="waf_xss_protection",
+            description="Enable blocking against cross-site scripting (XSS) attacks.",
+            terraform_snippet=snippet,
+            severity="high",
+            tags=["xss", "managed_rules", "waf"],
+            impact_score=0.9,
+            effort_score=0.3,
+        )
+
+    def _baseline_waf_rule(self) -> GeneratedRule:
+        snippet = '''# Baseline WAF hardening (placeholder - customize for your environment)
+# Consider enabling Cloudflare managed WAF rulesets for:
+# - SQLi
+# - XSS
+# - RCE
+# - Bot protection
+'''
+        return GeneratedRule(
+            name="waf_baseline_hardening",
+            description="Baseline recommendation to enable managed WAF rulesets.",
+            terraform_snippet=snippet,
+            severity="medium",
+            tags=["baseline", "waf"],
+            impact_score=0.7,
+            effort_score=0.1,
+        )
--- a/mcp/waf_intelligence/orchestrator.py
+++ b/mcp/waf_intelligence/orchestrator.py
@@ -0,0 +1,370 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from mcp.waf_intelligence.analyzer import AnalysisResult, RuleViolation, WAFRuleAnalyzer
+from mcp.waf_intelligence.compliance import ComplianceMapper, FrameworkMapping
+from mcp.waf_intelligence.generator import GeneratedRule, WAFRuleGenerator
+
+# Optional advanced modules (Phase 7)
+try:
+    from mcp.waf_intelligence.threat_intel import (
+        ThreatIntelCollector,
+        ThreatIntelReport,
+        ThreatIndicator,
+    )
+    _HAS_THREAT_INTEL = True
+except ImportError:
+    _HAS_THREAT_INTEL = False
+    ThreatIntelCollector = None
+
+try:
+    from mcp.waf_intelligence.classifier import (
+        ThreatClassifier,
+        ClassificationResult,
+    )
+    _HAS_CLASSIFIER = True
+except ImportError:
+    _HAS_CLASSIFIER = False
+    ThreatClassifier = None
+
+
+@dataclass
+class WAFInsight:
+    """Single high-quality insight across analysis + generation + compliance."""
+
+    violation: RuleViolation | None
+    suggested_rule: GeneratedRule | None
+    mappings: List[FrameworkMapping]
+
+
+@dataclass
+class ThreatAssessment:
+    """Phase 7: Comprehensive threat assessment result."""
+    
+    analysis_result: Optional[AnalysisResult] = None
+    threat_report: Optional[Any] = None  # ThreatIntelReport when available
+    classification_summary: Dict[str, int] = field(default_factory=dict)
+    risk_score: float = 0.0
+    recommended_actions: List[str] = field(default_factory=list)
+    generated_at: datetime = field(default_factory=datetime.utcnow)
+    
+    @property
+    def risk_level(self) -> str:
+        if self.risk_score >= 0.8:
+            return "critical"
+        elif self.risk_score >= 0.6:
+            return "high"
+        elif self.risk_score >= 0.4:
+            return "medium"
+        else:
+            return "low"
+
+
+class WAFIntelligence:
+    """
+    Quality-first orchestration layer:
+    - analyze WAF config
+    - propose a few rules
+    - attach compliance mappings
+    - Phase 7: integrate threat intel and ML classification
+    """
+
+    def __init__(
+        self,
+        workspace_path: Optional[str] = None,
+        enable_threat_intel: bool = True,
+        enable_ml_classifier: bool = True,
+    ) -> None:
+        self.workspace = Path(workspace_path) if workspace_path else Path.cwd()
+        
+        # Core components
+        self.analyzer = WAFRuleAnalyzer()
+        self.generator = WAFRuleGenerator()
+        self.mapper = ComplianceMapper()
+        
+        # Phase 7 components (optional)
+        self.threat_intel: Optional[Any] = None
+        self.classifier: Optional[Any] = None
+        
+        if enable_threat_intel and _HAS_THREAT_INTEL:
+            try:
+                self.threat_intel = ThreatIntelCollector()
+            except Exception:
+                pass
+        
+        if enable_ml_classifier and _HAS_CLASSIFIER:
+            try:
+                self.classifier = ThreatClassifier()
+            except Exception:
+                pass
+
+    def analyze_and_recommend(
+        self,
+        path: str,
+        *,
+        limit: int = 3,
+        min_severity: str = "warning",
+    ) -> List[WAFInsight]:
+        analysis: AnalysisResult = self.analyzer.analyze_file(
+            path,
+            min_severity=min_severity,
+        )
+        top_violations = analysis.top_violations(
+            min_severity=min_severity,
+            limit=limit,
+        )
+
+        insights: List[WAFInsight] = []
+
+        for violation in top_violations:
+            mappings = self.mapper.best_effort_from_violation(violation.message)
+
+            scenario = violation.message
+            rules = self.generator.generate_from_scenario(scenario, limit=1)
+            suggested = rules[0] if rules else None
+
+            insights.append(
+                WAFInsight(
+                    violation=violation,
+                    suggested_rule=suggested,
+                    mappings=mappings,
+                )
+            )
+
+        return insights
+
+    # ─────────────────────────────────────────────────────────────────────────
+    # Phase 7: Advanced threat intelligence methods
+    # ─────────────────────────────────────────────────────────────────────────
+
+    def collect_threat_intel(
+        self,
+        log_paths: Optional[List[str]] = None,
+        max_indicators: int = 100,
+    ) -> Optional[Any]:
+        """
+        Collect threat intelligence from logs and external feeds.
+        
+        Args:
+            log_paths: Paths to Cloudflare log files
+            max_indicators: Maximum indicators to collect
+        
+        Returns:
+            ThreatIntelReport or None if unavailable
+        """
+        if not self.threat_intel:
+            return None
+        
+        # Default log paths
+        if log_paths is None:
+            log_paths = [
+                str(self.workspace / "logs"),
+                "/var/log/cloudflare",
+            ]
+        
+        return self.threat_intel.collect(
+            log_paths=log_paths,
+            max_indicators=max_indicators,
+        )
+
+    def classify_threat(self, payload: str) -> Optional[Any]:
+        """
+        Classify a payload using ML classifier.
+        
+        Args:
+            payload: Request payload to classify
+        
+        Returns:
+            ClassificationResult or None
+        """
+        if not self.classifier:
+            return None
+        
+        return self.classifier.classify(payload)
+
+    def full_assessment(
+        self,
+        waf_config_path: Optional[str] = None,
+        log_paths: Optional[List[str]] = None,
+        include_threat_intel: bool = True,
+    ) -> ThreatAssessment:
+        """
+        Phase 7: Perform comprehensive threat assessment.
+        
+        Combines:
+        - WAF configuration analysis
+        - Threat intelligence collection
+        - ML classification summary
+        - Risk scoring
+        
+        Args:
+            waf_config_path: Path to WAF Terraform file
+            log_paths: Paths to log files
+            include_threat_intel: Whether to collect threat intel
+        
+        Returns:
+            ThreatAssessment with full analysis results
+        """
+        assessment = ThreatAssessment()
+        risk_factors: List[float] = []
+        recommendations: List[str] = []
+        
+        # 1. Analyze WAF configuration
+        if waf_config_path is None:
+            waf_config_path = str(self.workspace / "terraform" / "waf.tf")
+        
+        if Path(waf_config_path).exists():
+            assessment.analysis_result = self.analyzer.analyze_file(
+                waf_config_path,
+                min_severity="info",
+            )
+            
+            # Calculate risk from violations
+            severity_weights = {"error": 0.8, "warning": 0.5, "info": 0.2}
+            for violation in assessment.analysis_result.violations:
+                weight = severity_weights.get(violation.severity, 0.3)
+                risk_factors.append(weight)
+            
+            # Generate recommendations
+            critical_count = sum(
+                1 for v in assessment.analysis_result.violations
+                if v.severity == "error"
+            )
+            if critical_count > 0:
+                recommendations.append(
+                    f"🔴 Fix {critical_count} critical WAF configuration issues"
+                )
+        
+        # 2. Collect threat intelligence
+        if include_threat_intel and self.threat_intel:
+            try:
+                assessment.threat_report = self.collect_threat_intel(
+                    log_paths=log_paths,
+                    max_indicators=50,
+                )
+                
+                if assessment.threat_report:
+                    indicators = assessment.threat_report.indicators
+                    
+                    # Count by severity
+                    severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
+                    for ind in indicators:
+                        sev = getattr(ind, "severity", "low")
+                        severity_counts[sev] = severity_counts.get(sev, 0) + 1
+                    
+                    # Add to classification summary
+                    assessment.classification_summary["threat_indicators"] = len(indicators)
+                    assessment.classification_summary.update(severity_counts)
+                    
+                    # Calculate threat intel risk
+                    if indicators:
+                        critical_ratio = severity_counts["critical"] / len(indicators)
+                        high_ratio = severity_counts["high"] / len(indicators)
+                        risk_factors.append(critical_ratio * 0.9 + high_ratio * 0.7)
+                    
+                    if severity_counts["critical"] > 0:
+                        recommendations.append(
+                            f"🚨 Block {severity_counts['critical']} critical threat IPs immediately"
+                        )
+            except Exception:
+                pass
+        
+        # 3. ML classification summary (from any collected data)
+        if self.classifier and assessment.threat_report:
+            try:
+                attack_types = {"sqli": 0, "xss": 0, "rce": 0, "clean": 0, "unknown": 0}
+                
+                indicators = assessment.threat_report.indicators
+                pattern_indicators = [
+                    i for i in indicators
+                    if getattr(i, "indicator_type", "") == "pattern"
+                ]
+                
+                for ind in pattern_indicators[:20]:  # Sample first 20
+                    result = self.classifier.classify(ind.value)
+                    if result:
+                        label = result.label
+                        attack_types[label] = attack_types.get(label, 0) + 1
+                
+                assessment.classification_summary["ml_classifications"] = attack_types
+                
+                # Add ML risk factor
+                dangerous = attack_types.get("sqli", 0) + attack_types.get("rce", 0)
+                if dangerous > 5:
+                    risk_factors.append(0.8)
+                    recommendations.append(
+                        f"⚠️ ML detected {dangerous} dangerous attack patterns"
+                    )
+            except Exception:
+                pass
+        
+        # 4. Calculate final risk score
+        if risk_factors:
+            assessment.risk_score = min(1.0, sum(risk_factors) / max(len(risk_factors), 1))
+        else:
+            assessment.risk_score = 0.3  # Baseline risk
+        
+        assessment.recommended_actions = recommendations
+        
+        return assessment
+
+    def generate_gitops_proposals(
+        self,
+        threat_report: Optional[Any] = None,
+        max_proposals: int = 5,
+    ) -> List[Dict[str, Any]]:
+        """
+        Generate GitOps-ready rule proposals.
+        
+        Args:
+            threat_report: ThreatIntelReport to use
+            max_proposals: Maximum proposals to generate
+        
+        Returns:
+            List of proposal dicts ready for MR creation
+        """
+        proposals: List[Dict[str, Any]] = []
+        
+        if not threat_report:
+            return proposals
+        
+        try:
+            # Import proposer dynamically
+            from gitops.waf_rule_proposer import WAFRuleProposer
+            
+            proposer = WAFRuleProposer(workspace_path=str(self.workspace))
+            batch = proposer.generate_proposals(
+                threat_report=threat_report,
+                max_proposals=max_proposals,
+            )
+            
+            for proposal in batch.proposals:
+                proposals.append({
+                    "name": proposal.rule_name,
+                    "type": proposal.rule_type,
+                    "severity": proposal.severity,
+                    "confidence": proposal.confidence,
+                    "terraform": proposal.terraform_code,
+                    "justification": proposal.justification,
+                    "auto_deploy": proposal.auto_deploy_eligible,
+                })
+        except ImportError:
+            pass
+        
+        return proposals
+
+    @property
+    def capabilities(self) -> Dict[str, bool]:
+        """Report available capabilities."""
+        return {
+            "core_analysis": True,
+            "rule_generation": True,
+            "compliance_mapping": True,
+            "threat_intel": self.threat_intel is not None,
+            "ml_classification": self.classifier is not None,
+        }
--- a/mcp/waf_intelligence/server.py
+++ b/mcp/waf_intelligence/server.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+"""
+WAF Intelligence MCP Server for VS Code Copilot.
+
+This implements the Model Context Protocol (MCP) stdio interface
+so VS Code can communicate with your WAF Intelligence system.
+"""
+
+import json
+import sys
+from typing import Any
+
+# Add parent to path for imports
+sys.path.insert(0, '/Users/sovereign/Desktop/CLOUDFLARE')
+
+from mcp.waf_intelligence.orchestrator import WAFIntelligence
+from mcp.waf_intelligence.analyzer import WAFRuleAnalyzer
+
+
+class WAFIntelligenceMCPServer:
+    """MCP Server wrapper for WAF Intelligence."""
+    
+    def __init__(self):
+        self.waf = WAFIntelligence()
+        self.analyzer = WAFRuleAnalyzer()
+        
+    def get_capabilities(self) -> dict:
+        """Return server capabilities."""
+        return {
+            "tools": [
+                {
+                    "name": "waf_analyze",
+                    "description": "Analyze WAF logs and detect attack patterns",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "log_file": {
+                                "type": "string",
+                                "description": "Path to WAF log file (optional)"
+                            },
+                            "zone_id": {
+                                "type": "string",
+                                "description": "Cloudflare zone ID (optional)"
+                            }
+                        }
+                    }
+                },
+                {
+                    "name": "waf_assess",
+                    "description": "Run full security assessment with threat intel and ML classification",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "zone_id": {
+                                "type": "string",
+                                "description": "Cloudflare zone ID"
+                            }
+                        },
+                        "required": ["zone_id"]
+                    }
+                },
+                {
+                    "name": "waf_generate_rules",
+                    "description": "Generate Terraform WAF rules from threat intelligence",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {
+                            "zone_id": {
+                                "type": "string",
+                                "description": "Cloudflare zone ID"
+                            },
+                            "min_confidence": {
+                                "type": "number",
+                                "description": "Minimum confidence threshold (0-1)",
+                                "default": 0.7
+                            }
+                        },
+                        "required": ["zone_id"]
+                    }
+                },
+                {
+                    "name": "waf_capabilities",
+                    "description": "List available WAF Intelligence capabilities",
+                    "inputSchema": {
+                        "type": "object",
+                        "properties": {}
+                    }
+                }
+            ]
+        }
+    
+    def handle_tool_call(self, name: str, arguments: dict) -> dict:
+        """Handle a tool invocation."""
+        try:
+            if name == "waf_capabilities":
+                return {
+                    "content": [
+                        {
+                            "type": "text",
+                            "text": json.dumps({
+                                "capabilities": self.waf.capabilities,
+                                "status": "operational"
+                            }, indent=2)
+                        }
+                    ]
+                }
+            
+            elif name == "waf_analyze":
+                log_file = arguments.get("log_file")
+                zone_id = arguments.get("zone_id")
+                
+                if log_file:
+                    result = self.analyzer.analyze_log_file(log_file)
+                else:
+                    result = {
+                        "message": "No log file provided. Use zone_id for live analysis.",
+                        "capabilities": self.waf.capabilities
+                    }
+                
+                return {
+                    "content": [
+                        {"type": "text", "text": json.dumps(result, indent=2, default=str)}
+                    ]
+                }
+            
+            elif name == "waf_assess":
+                zone_id = arguments.get("zone_id")
+                # full_assessment uses workspace paths, not zone_id
+                assessment = self.waf.full_assessment(
+                    include_threat_intel=True
+                )
+                # Build result from ThreatAssessment dataclass
+                result = {
+                    "zone_id": zone_id,
+                    "risk_score": assessment.risk_score,
+                    "risk_level": assessment.risk_level,
+                    "classification_summary": assessment.classification_summary,
+                    "recommended_actions": assessment.recommended_actions[:10],  # Top 10
+                    "has_analysis": assessment.analysis_result is not None,
+                    "has_threat_intel": assessment.threat_report is not None,
+                    "generated_at": str(assessment.generated_at)
+                }
+                
+                return {
+                    "content": [
+                        {"type": "text", "text": json.dumps(result, indent=2, default=str)}
+                    ]
+                }
+            
+            elif name == "waf_generate_rules":
+                zone_id = arguments.get("zone_id")
+                min_confidence = arguments.get("min_confidence", 0.7)
+                
+                # Generate proposals (doesn't use zone_id directly)
+                proposals = self.waf.generate_gitops_proposals(
+                    max_proposals=5
+                )
+                
+                result = {
+                    "zone_id": zone_id,
+                    "min_confidence": min_confidence,
+                    "proposals_count": len(proposals),
+                    "proposals": proposals
+                }
+                
+                return {
+                    "content": [
+                        {"type": "text", "text": json.dumps(result, indent=2, default=str) if proposals else "No rules generated (no threat data available)"}
+                    ]
+                }
+            
+            else:
+                return {
+                    "content": [
+                        {"type": "text", "text": f"Unknown tool: {name}"}
+                    ],
+                    "isError": True
+                }
+                
+        except Exception as e:
+            return {
+                "content": [
+                    {"type": "text", "text": f"Error: {str(e)}"}
+                ],
+                "isError": True
+            }
+    
+    def run(self):
+        """Run the MCP server (stdio mode)."""
+        # Send server info
+        server_info = {
+            "jsonrpc": "2.0",
+            "method": "initialized",
+            "params": {
+                "serverInfo": {
+                    "name": "waf-intelligence",
+                    "version": "1.0.0"
+                },
+                "capabilities": self.get_capabilities()
+            }
+        }
+        
+        # Main loop - read JSON-RPC messages from stdin
+        for line in sys.stdin:
+            try:
+                message = json.loads(line.strip())
+                
+                if message.get("method") == "initialize":
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": message.get("id"),
+                        "result": {
+                            "protocolVersion": "2024-11-05",
+                            "serverInfo": {
+                                "name": "waf-intelligence",
+                                "version": "1.0.0"
+                            },
+                            "capabilities": {
+                                "tools": {}
+                            }
+                        }
+                    }
+                    print(json.dumps(response), flush=True)
+                
+                elif message.get("method") == "tools/list":
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": message.get("id"),
+                        "result": self.get_capabilities()
+                    }
+                    print(json.dumps(response), flush=True)
+                
+                elif message.get("method") == "tools/call":
+                    params = message.get("params", {})
+                    tool_name = params.get("name")
+                    tool_args = params.get("arguments", {})
+                    
+                    result = self.handle_tool_call(tool_name, tool_args)
+                    
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": message.get("id"),
+                        "result": result
+                    }
+                    print(json.dumps(response), flush=True)
+                
+                elif message.get("method") == "notifications/initialized":
+                    # Client acknowledged initialization
+                    pass
+                
+                else:
+                    # Unknown method
+                    response = {
+                        "jsonrpc": "2.0",
+                        "id": message.get("id"),
+                        "error": {
+                            "code": -32601,
+                            "message": f"Method not found: {message.get('method')}"
+                        }
+                    }
+                    print(json.dumps(response), flush=True)
+                    
+            except json.JSONDecodeError:
+                continue
+            except Exception as e:
+                error_response = {
+                    "jsonrpc": "2.0",
+                    "id": None,
+                    "error": {
+                        "code": -32603,
+                        "message": str(e)
+                    }
+                }
+                print(json.dumps(error_response), flush=True)
+
+
+if __name__ == "__main__":
+    server = WAFIntelligenceMCPServer()
+    server.run()
--- a/mcp/waf_intelligence/threat_intel.py
+++ b/mcp/waf_intelligence/threat_intel.py
@@ -0,0 +1,445 @@
+"""
+Phase 7: Multi-Source Threat Intelligence Collector
+
+Aggregates threat data from:
+- Cloudflare Analytics API (WAF events, firewall logs)
+- External threat feeds (AbuseIPDB, Emerging Threats, etc.)
+- Local honeypot signals (if configured)
+- Historical attack patterns from receipts/logs
+
+Produces scored ThreatIndicators for ML classification and rule generation.
+"""
+from __future__ import annotations
+
+import hashlib
+import json
+import os
+import re
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set
+from urllib.parse import urlparse
+
+# Optional: requests for external API calls
+try:
+    import requests
+    HAS_REQUESTS = True
+except ImportError:
+    HAS_REQUESTS = False
+
+
+@dataclass
+class ThreatIndicator:
+    """Single threat indicator with scoring metadata."""
+    
+    indicator_type: str  # "ip", "ua", "path", "pattern", "country"
+    value: str
+    confidence: float  # 0.0-1.0
+    severity: str  # "low", "medium", "high", "critical"
+    sources: List[str] = field(default_factory=list)
+    tags: List[str] = field(default_factory=list)
+    first_seen: Optional[datetime] = None
+    last_seen: Optional[datetime] = None
+    hit_count: int = 1
+    context: Dict[str, Any] = field(default_factory=dict)
+    
+    @property
+    def fingerprint(self) -> str:
+        """Unique identifier for deduplication."""
+        raw = f"{self.indicator_type}:{self.value}"
+        return hashlib.sha256(raw.encode()).hexdigest()[:16]
+    
+    def merge(self, other: "ThreatIndicator") -> None:
+        """Merge another indicator into this one (for deduplication)."""
+        self.hit_count += other.hit_count
+        self.confidence = max(self.confidence, other.confidence)
+        self.sources = list(set(self.sources + other.sources))
+        self.tags = list(set(self.tags + other.tags))
+        if other.first_seen and (not self.first_seen or other.first_seen < self.first_seen):
+            self.first_seen = other.first_seen
+        if other.last_seen and (not self.last_seen or other.last_seen > self.last_seen):
+            self.last_seen = other.last_seen
+
+
+@dataclass
+class ThreatIntelReport:
+    """Aggregated threat intelligence from all sources."""
+    
+    indicators: List[ThreatIndicator] = field(default_factory=list)
+    sources_queried: List[str] = field(default_factory=list)
+    collection_time: datetime = field(default_factory=datetime.utcnow)
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    
+    @property
+    def critical_count(self) -> int:
+        return sum(1 for i in self.indicators if i.severity == "critical")
+    
+    @property
+    def high_count(self) -> int:
+        return sum(1 for i in self.indicators if i.severity == "high")
+    
+    def top_indicators(self, limit: int = 10) -> List[ThreatIndicator]:
+        """Return highest-priority indicators."""
+        severity_order = {"critical": 4, "high": 3, "medium": 2, "low": 1}
+        sorted_indicators = sorted(
+            self.indicators,
+            key=lambda x: (severity_order.get(x.severity, 0), x.confidence, x.hit_count),
+            reverse=True
+        )
+        return sorted_indicators[:limit]
+
+
+class CloudflareLogParser:
+    """Parse Cloudflare WAF/firewall logs for threat indicators."""
+    
+    # Common attack patterns in URIs
+    ATTACK_PATTERNS = [
+        (r"(?i)(?:union\s+select|select\s+.*\s+from)", "sqli", "high"),
+        (r"(?i)<script[^>]*>", "xss", "high"),
+        (r"(?i)(?:\.\./|\.\.\\)", "path_traversal", "medium"),
+        (r"(?i)(?:cmd=|exec=|system\()", "rce", "critical"),
+        (r"(?i)(?:wp-admin|wp-login|xmlrpc\.php)", "wordpress_probe", "low"),
+        (r"(?i)(?:\.env|\.git|\.htaccess)", "sensitive_file", "medium"),
+        (r"(?i)(?:phpmyadmin|adminer|mysql)", "db_probe", "medium"),
+        (r"(?i)(?:eval\(|base64_decode)", "code_injection", "high"),
+    ]
+    
+    # Known bad user agents
+    BAD_USER_AGENTS = [
+        ("sqlmap", "sqli_tool", "high"),
+        ("nikto", "scanner", "medium"),
+        ("nmap", "scanner", "medium"),
+        ("masscan", "scanner", "medium"),
+        ("zgrab", "scanner", "low"),
+        ("python-requests", "bot", "low"),  # contextual
+        ("curl", "bot", "low"),  # contextual
+    ]
+    
+    def parse_log_file(self, path: Path) -> List[ThreatIndicator]:
+        """Parse a log file and extract threat indicators."""
+        indicators: List[ThreatIndicator] = []
+        
+        if not path.exists():
+            return indicators
+        
+        try:
+            with open(path, "r", encoding="utf-8", errors="ignore") as f:
+                for line in f:
+                    indicators.extend(self._parse_log_line(line))
+        except Exception:
+            pass
+        
+        return indicators
+    
+    def _parse_log_line(self, line: str) -> List[ThreatIndicator]:
+        """Extract indicators from a single log line."""
+        indicators: List[ThreatIndicator] = []
+        
+        # Try JSON format first
+        try:
+            data = json.loads(line)
+            indicators.extend(self._parse_json_log(data))
+            return indicators
+        except json.JSONDecodeError:
+            pass
+        
+        # Fall back to pattern matching on raw line
+        indicators.extend(self._scan_for_patterns(line))
+        
+        return indicators
+    
+    def _parse_json_log(self, data: Dict[str, Any]) -> List[ThreatIndicator]:
+        """Parse structured JSON log entry."""
+        indicators: List[ThreatIndicator] = []
+        
+        # Extract IP if blocked or challenged
+        action = data.get("action", "").lower()
+        if action in ("block", "challenge", "managed_challenge"):
+            ip = data.get("clientIP") or data.get("client_ip") or data.get("ip")
+            if ip:
+                indicators.append(ThreatIndicator(
+                    indicator_type="ip",
+                    value=ip,
+                    confidence=0.8 if action == "block" else 0.6,
+                    severity="high" if action == "block" else "medium",
+                    sources=["cloudflare_log"],
+                    tags=[action, data.get("ruleId", "unknown_rule")],
+                    context={"rule": data.get("ruleName", ""), "action": action}
+                ))
+        
+        # Extract URI patterns
+        uri = data.get("clientRequestURI") or data.get("uri") or data.get("path", "")
+        if uri:
+            indicators.extend(self._scan_for_patterns(uri))
+        
+        # Extract user agent
+        ua = data.get("clientRequestHTTPHost") or data.get("user_agent", "")
+        if ua:
+            for pattern, tag, severity in self.BAD_USER_AGENTS:
+                if pattern.lower() in ua.lower():
+                    indicators.append(ThreatIndicator(
+                        indicator_type="ua",
+                        value=ua[:200],  # truncate
+                        confidence=0.7,
+                        severity=severity,
+                        sources=["cloudflare_log"],
+                        tags=[tag, "bad_ua"]
+                    ))
+                    break
+        
+        return indicators
+    
+    def _scan_for_patterns(self, text: str) -> List[ThreatIndicator]:
+        """Scan text for known attack patterns."""
+        indicators: List[ThreatIndicator] = []
+        
+        for pattern, tag, severity in self.ATTACK_PATTERNS:
+            if re.search(pattern, text):
+                indicators.append(ThreatIndicator(
+                    indicator_type="pattern",
+                    value=text[:500],  # truncate
+                    confidence=0.75,
+                    severity=severity,
+                    sources=["pattern_match"],
+                    tags=[tag, "attack_pattern"]
+                ))
+        
+        return indicators
+
+
+class ExternalThreatFeed:
+    """Fetch threat intelligence from external APIs."""
+    
+    def __init__(self, api_keys: Optional[Dict[str, str]] = None):
+        self.api_keys = api_keys or {}
+        self._cache: Dict[str, ThreatIndicator] = {}
+    
+    def query_abuseipdb(self, ip: str) -> Optional[ThreatIndicator]:
+        """Query AbuseIPDB for IP reputation."""
+        if not HAS_REQUESTS:
+            return None
+        
+        api_key = self.api_keys.get("abuseipdb") or os.getenv("ABUSEIPDB_API_KEY")
+        if not api_key:
+            return None
+        
+        cache_key = f"abuseipdb:{ip}"
+        if cache_key in self._cache:
+            return self._cache[cache_key]
+        
+        try:
+            resp = requests.get(
+                "https://api.abuseipdb.com/api/v2/check",
+                headers={"Key": api_key, "Accept": "application/json"},
+                params={"ipAddress": ip, "maxAgeInDays": 90},
+                timeout=5
+            )
+            if resp.status_code == 200:
+                data = resp.json().get("data", {})
+                abuse_score = data.get("abuseConfidenceScore", 0)
+                
+                if abuse_score > 0:
+                    severity = "critical" if abuse_score > 80 else "high" if abuse_score > 50 else "medium"
+                    indicator = ThreatIndicator(
+                        indicator_type="ip",
+                        value=ip,
+                        confidence=abuse_score / 100,
+                        severity=severity,
+                        sources=["abuseipdb"],
+                        tags=["external_intel", "ip_reputation"],
+                        hit_count=data.get("totalReports", 1),
+                        context={
+                            "abuse_score": abuse_score,
+                            "country": data.get("countryCode"),
+                            "isp": data.get("isp"),
+                            "domain": data.get("domain"),
+                            "usage_type": data.get("usageType"),
+                        }
+                    )
+                    self._cache[cache_key] = indicator
+                    return indicator
+        except Exception:
+            pass
+        
+        return None
+    
+    def query_emerging_threats(self, ip: str) -> Optional[ThreatIndicator]:
+        """Check IP against Emerging Threats blocklist (free, no API key)."""
+        if not HAS_REQUESTS:
+            return None
+        
+        # This is a simplified check - real implementation would cache the blocklist
+        # For demo purposes, we return None and rely on other sources
+        return None
+    
+    def enrich_indicator(self, indicator: ThreatIndicator) -> ThreatIndicator:
+        """Enrich an indicator with external intelligence."""
+        if indicator.indicator_type == "ip":
+            external = self.query_abuseipdb(indicator.value)
+            if external:
+                indicator.merge(external)
+        
+        return indicator
+
+
+class ThreatIntelCollector:
+    """
+    Main collector that aggregates from all sources.
+    
+    Usage:
+        collector = ThreatIntelCollector(workspace_path="/path/to/cloudflare")
+        report = collector.collect()
+        for indicator in report.top_indicators(10):
+            print(f"{indicator.severity}: {indicator.indicator_type}={indicator.value}")
+    """
+    
+    def __init__(
+        self,
+        workspace_path: Optional[str] = None,
+        api_keys: Optional[Dict[str, str]] = None,
+        enable_external: bool = True
+    ):
+        self.workspace = Path(workspace_path) if workspace_path else Path.cwd()
+        self.log_parser = CloudflareLogParser()
+        self.external_feed = ExternalThreatFeed(api_keys) if enable_external else None
+        self._indicators: Dict[str, ThreatIndicator] = {}
+    
+    def collect(
+        self,
+        log_dirs: Optional[List[str]] = None,
+        enrich_external: bool = True,
+        max_indicators: int = 1000
+    ) -> ThreatIntelReport:
+        """
+        Collect threat intelligence from all configured sources.
+        
+        Args:
+            log_dirs: Directories to scan for logs (default: observatory/, anomalies/)
+            enrich_external: Whether to query external APIs for enrichment
+            max_indicators: Maximum indicators to return
+        
+        Returns:
+            ThreatIntelReport with deduplicated, scored indicators
+        """
+        sources_queried: List[str] = []
+        
+        # Default log directories
+        if log_dirs is None:
+            log_dirs = ["observatory", "anomalies", "archive_runtime/receipts"]
+        
+        # Collect from local logs
+        for log_dir in log_dirs:
+            dir_path = self.workspace / log_dir
+            if dir_path.exists():
+                sources_queried.append(f"local:{log_dir}")
+                self._collect_from_directory(dir_path)
+        
+        # Collect from Terraform state (extract referenced IPs/patterns)
+        tf_path = self.workspace / "terraform"
+        if tf_path.exists():
+            sources_queried.append("terraform_state")
+            self._collect_from_terraform(tf_path)
+        
+        # Enrich with external intel if enabled
+        if enrich_external and self.external_feed:
+            sources_queried.append("external_apis")
+            self._enrich_all_indicators()
+        
+        # Build report
+        all_indicators = list(self._indicators.values())
+        
+        # Sort by priority and truncate
+        severity_order = {"critical": 4, "high": 3, "medium": 2, "low": 1}
+        all_indicators.sort(
+            key=lambda x: (severity_order.get(x.severity, 0), x.confidence, x.hit_count),
+            reverse=True
+        )
+        
+        return ThreatIntelReport(
+            indicators=all_indicators[:max_indicators],
+            sources_queried=sources_queried,
+            metadata={
+                "workspace": str(self.workspace),
+                "total_raw": len(self._indicators),
+                "external_enabled": enrich_external and self.external_feed is not None
+            }
+        )
+    
+    def _collect_from_directory(self, dir_path: Path) -> None:
+        """Scan a directory for log files and extract indicators."""
+        log_patterns = ["*.log", "*.json", "*.jsonl"]
+        
+        for pattern in log_patterns:
+            for log_file in dir_path.rglob(pattern):
+                for indicator in self.log_parser.parse_log_file(log_file):
+                    self._add_indicator(indicator)
+    
+    def _collect_from_terraform(self, tf_path: Path) -> None:
+        """Extract indicators referenced in Terraform files."""
+        for tf_file in tf_path.glob("*.tf"):
+            try:
+                content = tf_file.read_text(encoding="utf-8")
+                
+                # Extract IPs from allow/block rules
+                ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}(?:/\d{1,2})?\b'
+                for match in re.finditer(ip_pattern, content):
+                    ip = match.group()
+                    # Only flag if in a block context
+                    context_start = max(0, match.start() - 100)
+                    context = content[context_start:match.start()].lower()
+                    if "block" in context or "deny" in context:
+                        self._add_indicator(ThreatIndicator(
+                            indicator_type="ip",
+                            value=ip,
+                            confidence=0.9,
+                            severity="medium",
+                            sources=["terraform_blocklist"],
+                            tags=["existing_rule", "blocked_ip"],
+                            context={"file": str(tf_file.name)}
+                        ))
+            except Exception:
+                pass
+    
+    def _add_indicator(self, indicator: ThreatIndicator) -> None:
+        """Add indicator with deduplication."""
+        key = indicator.fingerprint
+        if key in self._indicators:
+            self._indicators[key].merge(indicator)
+        else:
+            self._indicators[key] = indicator
+    
+    def _enrich_all_indicators(self) -> None:
+        """Enrich all IP indicators with external intelligence."""
+        if not self.external_feed:
+            return
+        
+        for key, indicator in list(self._indicators.items()):
+            if indicator.indicator_type == "ip":
+                self.external_feed.enrich_indicator(indicator)
+
+
+# CLI interface for testing
+if __name__ == "__main__":
+    import sys
+    
+    workspace = sys.argv[1] if len(sys.argv) > 1 else "."
+    
+    collector = ThreatIntelCollector(
+        workspace_path=workspace,
+        enable_external=False  # Don't hit APIs in CLI test
+    )
+    
+    report = collector.collect()
+    
+    print(f"\n🔍 Threat Intelligence Report")
+    print(f"=" * 50)
+    print(f"Sources: {', '.join(report.sources_queried)}")
+    print(f"Total indicators: {len(report.indicators)}")
+    print(f"Critical: {report.critical_count} | High: {report.high_count}")
+    print(f"\nTop 10 Indicators:")
+    print("-" * 50)
+    
+    for ind in report.top_indicators(10):
+        print(f"  [{ind.severity.upper():8}] {ind.indicator_type}={ind.value[:50]}")
+        print(f"           confidence={ind.confidence:.2f} hits={ind.hit_count} sources={ind.sources}")