209 lines
6.9 KiB
Python
209 lines
6.9 KiB
Python
"""
|
|
Test: Golden Drill Mini
|
|
|
|
Fast, deterministic version of D1 and D3 for CI.
|
|
Must complete in under 5 seconds.
|
|
"""
|
|
|
|
import pytest
|
|
from vaultmesh_mcp.tools import (
|
|
cognitive_context,
|
|
cognitive_decide,
|
|
cognitive_invoke_tem,
|
|
)
|
|
from vaultmesh_mcp.tools.escalation import (
|
|
escalate,
|
|
deescalate,
|
|
escalate_on_threat,
|
|
get_active_escalations,
|
|
EscalationType,
|
|
DeescalationType,
|
|
)
|
|
|
|
|
|
# Deterministic drill marker
|
|
DRILL_MARKER = "CI/GOLDEN-DRILL/MINI"
|
|
|
|
|
|
class TestGoldenDrillD1Mini:
|
|
"""
|
|
Mini D1: Threat → Escalate → Tem → De-escalate
|
|
|
|
Validates the complete threat response chain.
|
|
"""
|
|
|
|
def test_d1_threat_escalation_chain(self):
|
|
"""
|
|
Complete chain:
|
|
1. Threat detected → escalation receipt
|
|
2. Decision made → decision receipt
|
|
3. Tem invoked → invocation receipt
|
|
4. De-escalate → return to baseline
|
|
"""
|
|
results = {}
|
|
|
|
# Step 1: Threat triggers escalation
|
|
esc_result = escalate_on_threat(
|
|
current_profile="operator",
|
|
threat_id=f"thr_{DRILL_MARKER}",
|
|
threat_type="ci_synthetic",
|
|
confidence=0.92,
|
|
)
|
|
|
|
assert esc_result.get("success") or esc_result.get("escalation_id"), (
|
|
f"Escalation failed: {esc_result}"
|
|
)
|
|
results["escalation"] = esc_result
|
|
|
|
# Verify proof captured
|
|
assert "receipt_hash" in esc_result, "Missing escalation receipt"
|
|
assert "tem_context_hash" in esc_result, "Missing Tem context"
|
|
|
|
# Step 2: Decision (as Guardian)
|
|
decision = cognitive_decide(
|
|
reasoning_chain=[
|
|
f"DRILL: {DRILL_MARKER}",
|
|
"Synthetic threat for CI validation",
|
|
"Confidence 92% - auto-escalated to guardian",
|
|
],
|
|
decision="invoke_tem",
|
|
confidence=0.92,
|
|
evidence=[esc_result.get("receipt_hash", "none")],
|
|
)
|
|
|
|
assert decision.get("success"), f"Decision failed: {decision}"
|
|
assert "receipt" in decision, "Missing decision receipt"
|
|
results["decision"] = decision
|
|
|
|
# Step 3: Tem invocation
|
|
tem = cognitive_invoke_tem(
|
|
threat_type="ci_synthetic",
|
|
threat_id=f"thr_{DRILL_MARKER}",
|
|
target="ci-target",
|
|
evidence=[decision["receipt"]["root_hash"]],
|
|
)
|
|
|
|
assert tem.get("success"), f"Tem failed: {tem}"
|
|
assert "receipt" in tem, "Missing Tem receipt"
|
|
assert "capability" in tem, "Missing capability artifact"
|
|
results["tem"] = tem
|
|
|
|
# Step 4: De-escalate
|
|
deesc = deescalate(
|
|
escalation_id=esc_result["escalation_id"],
|
|
deescalation_type=DeescalationType.THREAT_RESOLVED,
|
|
reason=f"DRILL: {DRILL_MARKER} complete",
|
|
)
|
|
|
|
assert deesc.get("success"), f"De-escalation failed: {deesc}"
|
|
assert "receipt_hash" in deesc, "Missing de-escalation receipt"
|
|
results["deescalation"] = deesc
|
|
|
|
# Step 5: Verify baseline
|
|
active = get_active_escalations()
|
|
# Note: We cleaned up our escalation, but others may exist
|
|
# Just verify our specific escalation is gone
|
|
our_esc_active = any(
|
|
e["escalation_id"] == esc_result["escalation_id"]
|
|
for e in active.get("escalations", [])
|
|
)
|
|
assert not our_esc_active, "Our escalation should be inactive"
|
|
|
|
# Collect receipt chain for audit
|
|
receipt_chain = [
|
|
esc_result["receipt_hash"],
|
|
decision["receipt"]["root_hash"],
|
|
tem["receipt"]["root_hash"],
|
|
deesc["receipt_hash"],
|
|
]
|
|
|
|
assert len(receipt_chain) == 4, "Should have 4 receipts in chain"
|
|
assert all(r.startswith("blake3:") for r in receipt_chain), (
|
|
"All receipts must be blake3 hashes"
|
|
)
|
|
|
|
|
|
class TestGoldenDrillD3Mini:
|
|
"""
|
|
Mini D3: Escalation abuse attempts
|
|
|
|
Validates constitutional enforcement.
|
|
"""
|
|
|
|
def test_d3_skip_levels_blocked(self):
|
|
"""OPERATOR → PHOENIX direct must be blocked."""
|
|
result = escalate(
|
|
from_profile="operator",
|
|
to_profile="phoenix",
|
|
escalation_type=EscalationType.THREAT_DETECTED,
|
|
)
|
|
|
|
assert not result.get("success"), "Skip levels should be blocked"
|
|
assert "error" in result, "Should have error message"
|
|
|
|
def test_d3_missing_approval_blocked(self):
|
|
"""GUARDIAN → PHOENIX without approval must be blocked."""
|
|
result = escalate(
|
|
from_profile="guardian",
|
|
to_profile="phoenix",
|
|
escalation_type=EscalationType.CRISIS_DECLARED,
|
|
)
|
|
|
|
assert not result.get("success"), "Missing approval should be blocked"
|
|
assert "approval" in result.get("error", "").lower()
|
|
|
|
def test_d3_sovereign_requires_human(self):
|
|
"""PHOENIX → SOVEREIGN without human must be blocked."""
|
|
result = escalate(
|
|
from_profile="phoenix",
|
|
to_profile="sovereign",
|
|
escalation_type=EscalationType.CRISIS_DECLARED,
|
|
)
|
|
|
|
assert not result.get("success"), "Sovereign without human should be blocked"
|
|
assert "human" in result.get("error", "").lower()
|
|
|
|
def test_d3_observer_to_phoenix_blocked(self):
|
|
"""OBSERVER → PHOENIX must be blocked (multiple level skip)."""
|
|
result = escalate(
|
|
from_profile="observer",
|
|
to_profile="phoenix",
|
|
escalation_type=EscalationType.CRISIS_DECLARED,
|
|
)
|
|
|
|
assert not result.get("success"), "Observer to Phoenix should be blocked"
|
|
|
|
|
|
class TestGoldenDrillInvariants:
|
|
"""Cross-cutting invariants that must hold."""
|
|
|
|
def test_context_always_available(self):
|
|
"""cognitive_context must always be available (read-only)."""
|
|
result = cognitive_context(include=["health"])
|
|
|
|
assert "health" in result, "Health context must be available"
|
|
assert result["health"]["status"] == "operational", (
|
|
"System should be operational for drills"
|
|
)
|
|
|
|
def test_receipts_accumulate(self):
|
|
"""Receipts must accumulate, never decrease."""
|
|
from pathlib import Path
|
|
import os
|
|
|
|
receipts_dir = Path(os.environ["VAULTMESH_ROOT"]) / "receipts"
|
|
cognitive_log = receipts_dir / "cognitive" / "cognitive_events.jsonl"
|
|
|
|
if cognitive_log.exists():
|
|
initial_count = len(cognitive_log.read_text().strip().split('\n'))
|
|
|
|
# Do something that emits receipt
|
|
cognitive_decide(
|
|
reasoning_chain=["CI invariant test"],
|
|
decision="test",
|
|
confidence=0.1,
|
|
)
|
|
|
|
final_count = len(cognitive_log.read_text().strip().split('\n'))
|
|
assert final_count > initial_count, "Receipts must accumulate"
|