Files
vm-mcp/tests/governance/test_golden_drill_mini.py
Vault Sovereign e4871c2a29
Some checks are pending
Governance CI / Constitution Hash Gate (push) Waiting to run
Governance CI / Governance Tests (push) Blocked by required conditions
Governance CI / Golden Drill Mini (push) Blocked by required conditions
init: vaultmesh mcp server
2025-12-26 23:23:08 +00:00

209 lines
6.9 KiB
Python

"""
Test: Golden Drill Mini
Fast, deterministic version of D1 and D3 for CI.
Must complete in under 5 seconds.
"""
import pytest
from vaultmesh_mcp.tools import (
cognitive_context,
cognitive_decide,
cognitive_invoke_tem,
)
from vaultmesh_mcp.tools.escalation import (
escalate,
deescalate,
escalate_on_threat,
get_active_escalations,
EscalationType,
DeescalationType,
)
# Deterministic drill marker
DRILL_MARKER = "CI/GOLDEN-DRILL/MINI"
class TestGoldenDrillD1Mini:
"""
Mini D1: Threat → Escalate → Tem → De-escalate
Validates the complete threat response chain.
"""
def test_d1_threat_escalation_chain(self):
"""
Complete chain:
1. Threat detected → escalation receipt
2. Decision made → decision receipt
3. Tem invoked → invocation receipt
4. De-escalate → return to baseline
"""
results = {}
# Step 1: Threat triggers escalation
esc_result = escalate_on_threat(
current_profile="operator",
threat_id=f"thr_{DRILL_MARKER}",
threat_type="ci_synthetic",
confidence=0.92,
)
assert esc_result.get("success") or esc_result.get("escalation_id"), (
f"Escalation failed: {esc_result}"
)
results["escalation"] = esc_result
# Verify proof captured
assert "receipt_hash" in esc_result, "Missing escalation receipt"
assert "tem_context_hash" in esc_result, "Missing Tem context"
# Step 2: Decision (as Guardian)
decision = cognitive_decide(
reasoning_chain=[
f"DRILL: {DRILL_MARKER}",
"Synthetic threat for CI validation",
"Confidence 92% - auto-escalated to guardian",
],
decision="invoke_tem",
confidence=0.92,
evidence=[esc_result.get("receipt_hash", "none")],
)
assert decision.get("success"), f"Decision failed: {decision}"
assert "receipt" in decision, "Missing decision receipt"
results["decision"] = decision
# Step 3: Tem invocation
tem = cognitive_invoke_tem(
threat_type="ci_synthetic",
threat_id=f"thr_{DRILL_MARKER}",
target="ci-target",
evidence=[decision["receipt"]["root_hash"]],
)
assert tem.get("success"), f"Tem failed: {tem}"
assert "receipt" in tem, "Missing Tem receipt"
assert "capability" in tem, "Missing capability artifact"
results["tem"] = tem
# Step 4: De-escalate
deesc = deescalate(
escalation_id=esc_result["escalation_id"],
deescalation_type=DeescalationType.THREAT_RESOLVED,
reason=f"DRILL: {DRILL_MARKER} complete",
)
assert deesc.get("success"), f"De-escalation failed: {deesc}"
assert "receipt_hash" in deesc, "Missing de-escalation receipt"
results["deescalation"] = deesc
# Step 5: Verify baseline
active = get_active_escalations()
# Note: We cleaned up our escalation, but others may exist
# Just verify our specific escalation is gone
our_esc_active = any(
e["escalation_id"] == esc_result["escalation_id"]
for e in active.get("escalations", [])
)
assert not our_esc_active, "Our escalation should be inactive"
# Collect receipt chain for audit
receipt_chain = [
esc_result["receipt_hash"],
decision["receipt"]["root_hash"],
tem["receipt"]["root_hash"],
deesc["receipt_hash"],
]
assert len(receipt_chain) == 4, "Should have 4 receipts in chain"
assert all(r.startswith("blake3:") for r in receipt_chain), (
"All receipts must be blake3 hashes"
)
class TestGoldenDrillD3Mini:
"""
Mini D3: Escalation abuse attempts
Validates constitutional enforcement.
"""
def test_d3_skip_levels_blocked(self):
"""OPERATOR → PHOENIX direct must be blocked."""
result = escalate(
from_profile="operator",
to_profile="phoenix",
escalation_type=EscalationType.THREAT_DETECTED,
)
assert not result.get("success"), "Skip levels should be blocked"
assert "error" in result, "Should have error message"
def test_d3_missing_approval_blocked(self):
"""GUARDIAN → PHOENIX without approval must be blocked."""
result = escalate(
from_profile="guardian",
to_profile="phoenix",
escalation_type=EscalationType.CRISIS_DECLARED,
)
assert not result.get("success"), "Missing approval should be blocked"
assert "approval" in result.get("error", "").lower()
def test_d3_sovereign_requires_human(self):
"""PHOENIX → SOVEREIGN without human must be blocked."""
result = escalate(
from_profile="phoenix",
to_profile="sovereign",
escalation_type=EscalationType.CRISIS_DECLARED,
)
assert not result.get("success"), "Sovereign without human should be blocked"
assert "human" in result.get("error", "").lower()
def test_d3_observer_to_phoenix_blocked(self):
"""OBSERVER → PHOENIX must be blocked (multiple level skip)."""
result = escalate(
from_profile="observer",
to_profile="phoenix",
escalation_type=EscalationType.CRISIS_DECLARED,
)
assert not result.get("success"), "Observer to Phoenix should be blocked"
class TestGoldenDrillInvariants:
"""Cross-cutting invariants that must hold."""
def test_context_always_available(self):
"""cognitive_context must always be available (read-only)."""
result = cognitive_context(include=["health"])
assert "health" in result, "Health context must be available"
assert result["health"]["status"] == "operational", (
"System should be operational for drills"
)
def test_receipts_accumulate(self):
"""Receipts must accumulate, never decrease."""
from pathlib import Path
import os
receipts_dir = Path(os.environ["VAULTMESH_ROOT"]) / "receipts"
cognitive_log = receipts_dir / "cognitive" / "cognitive_events.jsonl"
if cognitive_log.exists():
initial_count = len(cognitive_log.read_text().strip().split('\n'))
# Do something that emits receipt
cognitive_decide(
reasoning_chain=["CI invariant test"],
decision="test",
confidence=0.1,
)
final_count = len(cognitive_log.read_text().strip().split('\n'))
assert final_count > initial_count, "Receipts must accumulate"