feat: enforce layer0 gate and add tests
This commit is contained in:
7
layer0/__init__.py
Normal file
7
layer0/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
Layer 0 package: pre-boot Shadow Eval classifier and logger.
|
||||
"""
|
||||
|
||||
from .entrypoint import layer0_entry # re-export for convenience
|
||||
|
||||
__all__ = ["layer0_entry"]
|
||||
17
layer0/entrypoint.py
Normal file
17
layer0/entrypoint.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from .shadow_classifier import ShadowClassifier, Classification, ShadowEvalResult
|
||||
from .preboot_logger import PrebootLogger
|
||||
|
||||
classifier = ShadowClassifier()
|
||||
|
||||
|
||||
def layer0_entry(query: str) -> tuple[str, ShadowEvalResult]:
|
||||
"""
|
||||
Main entrypoint called before Layer 1 (Doctrine Load).
|
||||
Returns the routing action and the full evaluation result.
|
||||
"""
|
||||
result = classifier.classify(query)
|
||||
|
||||
if result.classification in (Classification.CATASTROPHIC, Classification.FORBIDDEN):
|
||||
PrebootLogger.log(result, query)
|
||||
|
||||
return result.to_routing_action(), result
|
||||
33
layer0/preboot_logger.py
Normal file
33
layer0/preboot_logger.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import datetime
|
||||
import json
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
from .shadow_classifier import ShadowEvalResult, Classification
|
||||
|
||||
|
||||
class PrebootLogger:
|
||||
LOG_PATH = "anomalies/preboot_shield.jsonl"
|
||||
|
||||
@staticmethod
|
||||
def log(event: ShadowEvalResult, query: str, reason_override: Optional[str] = None):
|
||||
if event.classification not in (Classification.CATASTROPHIC, Classification.FORBIDDEN):
|
||||
return # Only violations get logged
|
||||
|
||||
record = {
|
||||
"timestamp": datetime.datetime.utcnow().isoformat() + "Z",
|
||||
"query": query,
|
||||
"classification": event.classification.value,
|
||||
"reason": reason_override or event.reason,
|
||||
"trace_id": event.trace_id,
|
||||
"metadata": {
|
||||
"risk_score": event.risk_score,
|
||||
"flags": event.flags,
|
||||
"source": "layer0",
|
||||
},
|
||||
}
|
||||
|
||||
os.makedirs(os.path.dirname(PrebootLogger.LOG_PATH), exist_ok=True)
|
||||
|
||||
with open(PrebootLogger.LOG_PATH, "a", encoding="utf-8") as f:
|
||||
f.write(json.dumps(record) + "\n")
|
||||
93
layer0/shadow_classifier.py
Normal file
93
layer0/shadow_classifier.py
Normal file
@@ -0,0 +1,93 @@
|
||||
from enum import Enum
|
||||
from typing import Optional, List
|
||||
import uuid
|
||||
|
||||
|
||||
class Classification(str, Enum):
|
||||
BLESSED = "blessed"
|
||||
AMBIGUOUS = "ambiguous"
|
||||
FORBIDDEN = "forbidden"
|
||||
CATASTROPHIC = "catastrophic"
|
||||
|
||||
|
||||
class ShadowEvalResult:
|
||||
def __init__(
|
||||
self,
|
||||
classification: Classification,
|
||||
reason: Optional[str] = None,
|
||||
risk_score: int = 0,
|
||||
flags: Optional[List[str]] = None,
|
||||
):
|
||||
self.classification = classification
|
||||
self.reason = reason
|
||||
self.risk_score = risk_score
|
||||
self.flags = flags or []
|
||||
self.trace_id = str(uuid.uuid4())
|
||||
|
||||
def to_routing_action(self) -> str:
|
||||
if self.classification == Classification.CATASTROPHIC:
|
||||
return "FAIL_CLOSED"
|
||||
if self.classification == Classification.FORBIDDEN:
|
||||
return "HANDOFF_TO_GUARDRAILS"
|
||||
if self.classification == Classification.AMBIGUOUS:
|
||||
return "PROMPT_FOR_CLARIFICATION"
|
||||
return "HANDOFF_TO_LAYER1"
|
||||
|
||||
|
||||
class ShadowClassifier:
|
||||
"""
|
||||
Minimal doctrinal classifier for Layer 0 (Shadow Eval).
|
||||
"""
|
||||
|
||||
def classify(self, query: str) -> ShadowEvalResult:
|
||||
"""Return a doctrinal classification for the incoming query."""
|
||||
|
||||
q = query.lower().strip()
|
||||
|
||||
# 1. Catastrophic (fail closed)
|
||||
if any(x in q for x in [
|
||||
"disable guardrails",
|
||||
"override agent permissions",
|
||||
"bypass governance",
|
||||
"self-modifying",
|
||||
]):
|
||||
return ShadowEvalResult(
|
||||
classification=Classification.CATASTROPHIC,
|
||||
reason="catastrophic_indicator",
|
||||
risk_score=5,
|
||||
flags=["permission_override", "guardrail_disable"],
|
||||
)
|
||||
|
||||
# 2. Forbidden (governance violation)
|
||||
if any(x in q for x in [
|
||||
"skip git",
|
||||
"apply directly",
|
||||
"dashboard",
|
||||
"manual change",
|
||||
]):
|
||||
return ShadowEvalResult(
|
||||
classification=Classification.FORBIDDEN,
|
||||
reason="governance_violation",
|
||||
risk_score=3,
|
||||
flags=["gitops_bypass"],
|
||||
)
|
||||
|
||||
# 3. Ambiguous (needs clarification)
|
||||
if any(x in q for x in [
|
||||
"fix it",
|
||||
"change this",
|
||||
"update stuff",
|
||||
]) or len(q.split()) <= 2:
|
||||
return ShadowEvalResult(
|
||||
classification=Classification.AMBIGUOUS,
|
||||
reason="insufficient_context",
|
||||
risk_score=1,
|
||||
flags=["needs_clarification"],
|
||||
)
|
||||
|
||||
# 4. Blessed (valid + lawful)
|
||||
return ShadowEvalResult(
|
||||
classification=Classification.BLESSED,
|
||||
reason=None,
|
||||
risk_score=0,
|
||||
)
|
||||
Reference in New Issue
Block a user