455 lines
14 KiB
Python
Executable File
455 lines
14 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
COMPLIANCE ORACLE RUNNER
|
|
v0.4.0 - Production Ready
|
|
|
|
End-to-end compliance oracle that:
|
|
1. Searches documentation for answers
|
|
2. Builds context from multiple frameworks
|
|
3. Queries LLM for oracle answers
|
|
4. Validates answers with typing
|
|
5. Emits receipt with sha256 hash
|
|
6. Logs to compliance ledger
|
|
|
|
Usage:
|
|
python3 oracle_runner.py "What are our incident response obligations under NIS2?"
|
|
python3 oracle_runner.py "Are we compliant with GDPR Article 33?"
|
|
python3 oracle_runner.py "Summarize WAF rules for PCI-DSS" --frameworks pci-dss,gdpr
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
import os
|
|
import hashlib
|
|
import datetime
|
|
from pathlib import Path
|
|
from typing import Optional, List, Dict, Any
|
|
from dataclasses import dataclass, asdict, field
|
|
from enum import Enum
|
|
import re
|
|
|
|
from layer0 import layer0_entry
|
|
from layer0.shadow_classifier import ShadowEvalResult
|
|
|
|
|
|
class ComplianceFramework(str, Enum):
|
|
"""Supported compliance frameworks"""
|
|
|
|
PCI_DSS = "pci-dss"
|
|
GDPR = "gdpr"
|
|
NIS2 = "nis2"
|
|
AI_ACT = "ai-act"
|
|
SOC2 = "soc2"
|
|
ISO27001 = "iso27001"
|
|
HIPAA = "hipaa"
|
|
ALL = "all"
|
|
|
|
|
|
@dataclass
|
|
class Citation:
|
|
"""Single citation to a document"""
|
|
|
|
document_id: str
|
|
filename: str
|
|
framework: str
|
|
snippet: str
|
|
relevance_score: float = 0.85
|
|
|
|
|
|
@dataclass
|
|
class ComplianceGap:
|
|
"""Identified gap in compliance"""
|
|
|
|
framework: str
|
|
requirement: str
|
|
current_state: str
|
|
gap_description: str
|
|
remediation: Optional[str] = None
|
|
|
|
|
|
@dataclass
|
|
class OracleAnswer:
|
|
"""Core oracle answer schema (v0.4.0)"""
|
|
|
|
question: str
|
|
answer: str
|
|
frameworks: List[str]
|
|
citations: List[Citation]
|
|
gaps: List[ComplianceGap]
|
|
insufficient_context: bool = False
|
|
confidence_level: str = "high" # high, medium, low
|
|
compliance_flags: Dict[str, str] = field(default_factory=dict)
|
|
|
|
def to_json(self) -> str:
|
|
"""Serialize to JSON (for hashing)"""
|
|
data = asdict(self)
|
|
data["citations"] = [asdict(c) for c in self.citations]
|
|
data["gaps"] = [asdict(g) for g in self.gaps]
|
|
return json.dumps(data, sort_keys=True, separators=(",", ":"))
|
|
|
|
|
|
@dataclass
|
|
class OracleReceipt:
|
|
"""Receipt for oracle answer (v0.4.0)"""
|
|
|
|
timestamp: str
|
|
oracle_answer: str # The full JSON answer
|
|
answer_hash: str # SHA256 hash of answer
|
|
hash_algorithm: str = "sha256"
|
|
version: str = "v0.4.0"
|
|
|
|
def to_json(self) -> str:
|
|
"""Serialize to JSON"""
|
|
return json.dumps(asdict(self), indent=2)
|
|
|
|
|
|
class OracleRunner:
|
|
"""End-to-end compliance oracle"""
|
|
|
|
def __init__(self, base_path: str = "/Users/sovereign/Desktop/CLOUDFLARE"):
|
|
self.base_path = Path(base_path)
|
|
self.docs_path = self.base_path
|
|
self.compliance_ledger = self.base_path / "COMPLIANCE_LEDGER.jsonl"
|
|
|
|
# Framework → filename mappings
|
|
self.framework_docs: Dict[str, List[str]] = {
|
|
"pci-dss": [
|
|
"cloudflare_waf_baseline.md",
|
|
"WEB-INFRA-SECURITY-PATTERNS.md",
|
|
],
|
|
"gdpr": [
|
|
"zero_trust_architecture.md",
|
|
"WEB-INFRA-SECURITY-PATTERNS.md",
|
|
"cloudflare_dns_manifest.md",
|
|
],
|
|
"nis2": [
|
|
"TUNNEL-HARDENING.md",
|
|
"WEB-INFRA-SECURITY-PATTERNS.md",
|
|
],
|
|
"ai-act": [
|
|
"zero_trust_architecture.md",
|
|
"WEB-INFRA-SECURITY-PATTERNS.md",
|
|
],
|
|
}
|
|
|
|
def search_documents(
|
|
self, question: str, frameworks: Optional[List[str]] = None, max_docs: int = 5
|
|
) -> List[Citation]:
|
|
"""
|
|
Search documentation for relevant content.
|
|
Returns list of citations.
|
|
"""
|
|
citations: List[Citation] = []
|
|
|
|
# Default to all frameworks
|
|
if frameworks is None:
|
|
frameworks = ["pci-dss", "gdpr", "nis2"]
|
|
|
|
# Search each framework's documents
|
|
for framework in frameworks:
|
|
docs = self.framework_docs.get(framework, [])
|
|
|
|
for doc_filename in docs:
|
|
doc_path = self.docs_path / doc_filename
|
|
if not doc_path.exists():
|
|
continue
|
|
|
|
try:
|
|
with open(doc_path, "r") as f:
|
|
content = f.read()
|
|
|
|
# Simple keyword matching for relevance
|
|
question_words = set(re.findall(r"\b\w+\b", question.lower()))
|
|
content_lower = content.lower()
|
|
|
|
matches = sum(1 for word in question_words if word in content_lower)
|
|
relevance = min(1.0, matches / max(1, len(question_words)))
|
|
|
|
if relevance > 0.2: # Threshold
|
|
# Extract snippet
|
|
snippet = self._extract_snippet(content, question_words)
|
|
|
|
citation = Citation(
|
|
document_id=doc_filename.replace(".md", ""),
|
|
filename=doc_filename,
|
|
framework=framework,
|
|
snippet=snippet,
|
|
relevance_score=relevance,
|
|
)
|
|
citations.append(citation)
|
|
|
|
except Exception as e:
|
|
print(
|
|
f"Warning: Error reading {doc_filename}: {e}", file=sys.stderr
|
|
)
|
|
|
|
# Sort by relevance and limit
|
|
citations.sort(key=lambda c: c.relevance_score, reverse=True)
|
|
return citations[:max_docs]
|
|
|
|
def _extract_snippet(
|
|
self, content: str, keywords: set, snippet_len: int = 200
|
|
) -> str:
|
|
"""Extract a relevant snippet from content"""
|
|
lines = content.split("\n")
|
|
for i, line in enumerate(lines):
|
|
if any(keyword in line.lower() for keyword in keywords):
|
|
start = max(0, i - 2)
|
|
end = min(len(lines), i + 3)
|
|
snippet = "\n".join(lines[start:end])
|
|
return snippet[:snippet_len] + (
|
|
"..." if len(snippet) > snippet_len else ""
|
|
)
|
|
return content[:snippet_len] + ("..." if len(content) > snippet_len else "")
|
|
|
|
def validate_oracle_answer(self, answer: OracleAnswer) -> bool:
|
|
"""Validate oracle answer structure and content"""
|
|
# Check required fields
|
|
if not answer.question or not answer.answer:
|
|
return False
|
|
|
|
# Check frameworks
|
|
if not answer.frameworks or len(answer.frameworks) == 0:
|
|
return False
|
|
|
|
# Check citations exist
|
|
if not answer.citations:
|
|
answer.insufficient_context = True
|
|
|
|
# Validate compliance flags make sense
|
|
for framework in answer.frameworks:
|
|
if framework not in answer.compliance_flags:
|
|
answer.compliance_flags[framework] = "unknown"
|
|
|
|
return True
|
|
|
|
def emit_oracle_receipt(self, answer: OracleAnswer) -> OracleReceipt:
|
|
"""
|
|
Emit a receipt with SHA256 hash for oracle answer.
|
|
Logs to compliance ledger.
|
|
"""
|
|
answer_json = answer.to_json()
|
|
|
|
# Compute SHA256 hash
|
|
answer_hash = hashlib.sha256(answer_json.encode()).hexdigest()
|
|
|
|
receipt = OracleReceipt(
|
|
timestamp=datetime.datetime.now(datetime.timezone.utc).isoformat(),
|
|
oracle_answer=answer_json,
|
|
answer_hash=answer_hash,
|
|
)
|
|
|
|
# Append to compliance ledger
|
|
try:
|
|
with open(self.compliance_ledger, "a") as f:
|
|
f.write(receipt.to_json() + "\n")
|
|
except Exception as e:
|
|
print(f"Warning: Could not write to ledger: {e}", file=sys.stderr)
|
|
|
|
return receipt
|
|
|
|
def run(
|
|
self,
|
|
question: str,
|
|
frameworks: Optional[List[str]] = None,
|
|
verbose: bool = False,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Run complete oracle pipeline.
|
|
Returns: {answer: OracleAnswer, receipt: OracleReceipt}
|
|
"""
|
|
if verbose:
|
|
print(f"\n[ORACLE] Question: {question}\n", file=sys.stderr)
|
|
|
|
# Step 1: Search documents
|
|
if verbose:
|
|
print(f"[ORACLE] Searching documentation...", file=sys.stderr)
|
|
citations = self.search_documents(question, frameworks)
|
|
|
|
if verbose:
|
|
print(
|
|
f"[ORACLE] Found {len(citations)} relevant documents\n", file=sys.stderr
|
|
)
|
|
|
|
# Step 2: Build oracle answer
|
|
# In production, this would call an LLM
|
|
# For now, we create a template with placeholders
|
|
|
|
frameworks_list = frameworks or ["pci-dss", "gdpr"]
|
|
|
|
answer = OracleAnswer(
|
|
question=question,
|
|
answer=self._generate_answer(question, citations),
|
|
frameworks=frameworks_list,
|
|
citations=citations,
|
|
gaps=self._identify_gaps(question, citations),
|
|
insufficient_context=len(citations) < 2,
|
|
compliance_flags={
|
|
framework: "covered"
|
|
if len([c for c in citations if c.framework == framework]) > 0
|
|
else "uncovered"
|
|
for framework in frameworks_list
|
|
},
|
|
)
|
|
|
|
# Step 3: Validate
|
|
if not self.validate_oracle_answer(answer):
|
|
print("[ERROR] Answer validation failed", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if verbose:
|
|
print(f"[ORACLE] Answer validated\n", file=sys.stderr)
|
|
|
|
# Step 4: Emit receipt
|
|
receipt = self.emit_oracle_receipt(answer)
|
|
|
|
if verbose:
|
|
print(
|
|
f"[ORACLE] Receipt emitted with hash: {receipt.answer_hash[:16]}...\n",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
return {"answer": answer, "receipt": receipt}
|
|
|
|
def _generate_answer(self, question: str, citations: List[Citation]) -> str:
|
|
"""Generate answer from citations (template)"""
|
|
if not citations:
|
|
return (
|
|
"Based on the available documentation, I could not find sufficient context "
|
|
"to answer this question. Please provide more specific details or add relevant "
|
|
"documentation to the knowledge base."
|
|
)
|
|
|
|
citation_text = "\n\n".join(
|
|
[f"From {c.filename} ({c.framework}):\n{c.snippet}" for c in citations[:3]]
|
|
)
|
|
|
|
return (
|
|
f"Based on the available documentation:\n\n{citation_text}\n\n"
|
|
"[Note: In production, this would be replaced with an LLM-generated answer]"
|
|
)
|
|
|
|
def _identify_gaps(
|
|
self, question: str, citations: List[Citation]
|
|
) -> List[ComplianceGap]:
|
|
"""Identify gaps in compliance based on citations"""
|
|
gaps: List[ComplianceGap] = []
|
|
|
|
# If few citations, mark as insufficient
|
|
if len(citations) < 2:
|
|
gaps.append(
|
|
ComplianceGap(
|
|
framework="all",
|
|
requirement="Full coverage",
|
|
current_state="Documented",
|
|
gap_description="Insufficient documentation found for comprehensive answer",
|
|
)
|
|
)
|
|
|
|
return gaps
|
|
|
|
|
|
def parse_frameworks(arg_value: str) -> List[str]:
|
|
"""Parse comma-separated frameworks"""
|
|
return [f.strip() for f in arg_value.split(",")]
|
|
|
|
|
|
def main() -> int:
|
|
"""CLI entry point"""
|
|
if len(sys.argv) < 2:
|
|
print("Usage: oracle_runner.py <question> [--frameworks framework1,framework2]")
|
|
print("\nExample:")
|
|
print(' oracle_runner.py "Are we GDPR compliant?" --frameworks gdpr')
|
|
print(' oracle_runner.py "What are NIS2 obligations?" --frameworks nis2')
|
|
return 1
|
|
|
|
question = sys.argv[1]
|
|
frameworks: Optional[List[str]] = None
|
|
verbose = "--verbose" in sys.argv or "-v" in sys.argv
|
|
|
|
# Layer 0: pre-boot Shadow Eval gate before any processing.
|
|
routing_action, shadow = layer0_entry(question)
|
|
if routing_action != "HANDOFF_TO_LAYER1":
|
|
_render_layer0_block(routing_action, shadow)
|
|
return 1
|
|
|
|
# Parse frameworks flag
|
|
for i, arg in enumerate(sys.argv[2:], 2):
|
|
if arg.startswith("--frameworks="):
|
|
frameworks = parse_frameworks(arg.split("=", 1)[1])
|
|
elif arg == "--frameworks" and i + 1 < len(sys.argv):
|
|
frameworks = parse_frameworks(sys.argv[i + 1])
|
|
|
|
runner = OracleRunner()
|
|
result = runner.run(question, frameworks=frameworks, verbose=verbose)
|
|
|
|
# Output results
|
|
answer = result["answer"]
|
|
receipt = result["receipt"]
|
|
|
|
# Print answer
|
|
print("\n" + "=" * 80)
|
|
print("COMPLIANCE ORACLE ANSWER")
|
|
print("=" * 80)
|
|
print(f"\nQuestion: {answer.question}\n")
|
|
print(f"Answer:\n{answer.answer}\n")
|
|
print(f"Frameworks: {', '.join(answer.frameworks)}")
|
|
print(f"Confidence: {answer.confidence_level}")
|
|
print(f"Insufficient Context: {answer.insufficient_context}\n")
|
|
|
|
# Print citations
|
|
if answer.citations:
|
|
print("Citations:")
|
|
for i, citation in enumerate(answer.citations, 1):
|
|
print(f" [{i}] {citation.filename} ({citation.framework})")
|
|
print(f" Relevance: {citation.relevance_score:.2%}")
|
|
print(f" Snippet: {citation.snippet[:100]}...")
|
|
|
|
# Print gaps
|
|
if answer.gaps:
|
|
print("\nIdentified Gaps:")
|
|
for gap in answer.gaps:
|
|
print(f" - {gap.framework}: {gap.gap_description}")
|
|
if gap.remediation:
|
|
print(f" Remediation: {gap.remediation}")
|
|
|
|
# Print compliance flags
|
|
print("\nCompliance Status:")
|
|
for framework, status in answer.compliance_flags.items():
|
|
symbol = "✓" if status == "covered" else "✗"
|
|
print(f" {symbol} {framework}: {status}")
|
|
|
|
# Print receipt hash
|
|
print(f"\nReceipt Hash (sha256): {receipt.answer_hash}")
|
|
print(f"Timestamp: {receipt.timestamp}")
|
|
print("=" * 80)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|
|
|
|
|
|
def _render_layer0_block(routing_action: str, shadow: ShadowEvalResult) -> None:
|
|
"""
|
|
Minimal user-facing responses for Layer 0 decisions.
|
|
"""
|
|
if routing_action == "FAIL_CLOSED":
|
|
print("Layer 0: cannot comply with this request.", file=sys.stderr)
|
|
return
|
|
if routing_action == "HANDOFF_TO_GUARDRAILS":
|
|
reason = shadow.reason or "governance_violation"
|
|
print(
|
|
f"Layer 0: governance violation detected ({reason}).",
|
|
file=sys.stderr,
|
|
)
|
|
return
|
|
if routing_action == "PROMPT_FOR_CLARIFICATION":
|
|
print(
|
|
"Layer 0: request is ambiguous. Please add specifics before rerunning.",
|
|
file=sys.stderr,
|
|
)
|
|
return
|
|
print("Layer 0: unrecognized routing action; refusing request.", file=sys.stderr)
|