from enum import Enum from typing import Optional, List import uuid class Classification(str, Enum): BLESSED = "blessed" AMBIGUOUS = "ambiguous" FORBIDDEN = "forbidden" CATASTROPHIC = "catastrophic" class ShadowEvalResult: def __init__( self, classification: Classification, reason: Optional[str] = None, risk_score: int = 0, flags: Optional[List[str]] = None, ): self.classification = classification self.reason = reason self.risk_score = risk_score self.flags = flags or [] self.trace_id = str(uuid.uuid4()) def to_routing_action(self) -> str: if self.classification == Classification.CATASTROPHIC: return "FAIL_CLOSED" if self.classification == Classification.FORBIDDEN: return "HANDOFF_TO_GUARDRAILS" if self.classification == Classification.AMBIGUOUS: return "PROMPT_FOR_CLARIFICATION" return "HANDOFF_TO_LAYER1" class ShadowClassifier: """ Minimal doctrinal classifier for Layer 0 (Shadow Eval). """ def classify(self, query: str) -> ShadowEvalResult: """Return a doctrinal classification for the incoming query.""" q = query.lower().strip() # 1. Catastrophic (fail closed) if any(x in q for x in [ "disable guardrails", "override agent permissions", "bypass governance", "self-modifying", ]): return ShadowEvalResult( classification=Classification.CATASTROPHIC, reason="catastrophic_indicator", risk_score=5, flags=["permission_override", "guardrail_disable"], ) # 2. Forbidden (governance violation) if any(x in q for x in [ "skip git", "apply directly", "dashboard", "manual change", ]): return ShadowEvalResult( classification=Classification.FORBIDDEN, reason="governance_violation", risk_score=3, flags=["gitops_bypass"], ) # 3. Ambiguous (needs clarification) if any(x in q for x in [ "fix it", "change this", "update stuff", ]) or len(q.split()) <= 2: return ShadowEvalResult( classification=Classification.AMBIGUOUS, reason="insufficient_context", risk_score=1, flags=["needs_clarification"], ) # 4. Blessed (valid + lawful) return ShadowEvalResult( classification=Classification.BLESSED, reason=None, risk_score=0, )