""" Test: Golden Drill Mini Fast, deterministic version of D1 and D3 for CI. Must complete in under 5 seconds. """ import pytest from vaultmesh_mcp.tools import ( cognitive_context, cognitive_decide, cognitive_invoke_tem, ) from vaultmesh_mcp.tools.escalation import ( escalate, deescalate, escalate_on_threat, get_active_escalations, EscalationType, DeescalationType, ) # Deterministic drill marker DRILL_MARKER = "CI/GOLDEN-DRILL/MINI" class TestGoldenDrillD1Mini: """ Mini D1: Threat → Escalate → Tem → De-escalate Validates the complete threat response chain. """ def test_d1_threat_escalation_chain(self): """ Complete chain: 1. Threat detected → escalation receipt 2. Decision made → decision receipt 3. Tem invoked → invocation receipt 4. De-escalate → return to baseline """ results = {} # Step 1: Threat triggers escalation esc_result = escalate_on_threat( current_profile="operator", threat_id=f"thr_{DRILL_MARKER}", threat_type="ci_synthetic", confidence=0.92, ) assert esc_result.get("success") or esc_result.get("escalation_id"), ( f"Escalation failed: {esc_result}" ) results["escalation"] = esc_result # Verify proof captured assert "receipt_hash" in esc_result, "Missing escalation receipt" assert "tem_context_hash" in esc_result, "Missing Tem context" # Step 2: Decision (as Guardian) decision = cognitive_decide( reasoning_chain=[ f"DRILL: {DRILL_MARKER}", "Synthetic threat for CI validation", "Confidence 92% - auto-escalated to guardian", ], decision="invoke_tem", confidence=0.92, evidence=[esc_result.get("receipt_hash", "none")], ) assert decision.get("success"), f"Decision failed: {decision}" assert "receipt" in decision, "Missing decision receipt" results["decision"] = decision # Step 3: Tem invocation tem = cognitive_invoke_tem( threat_type="ci_synthetic", threat_id=f"thr_{DRILL_MARKER}", target="ci-target", evidence=[decision["receipt"]["root_hash"]], ) assert tem.get("success"), f"Tem failed: {tem}" assert "receipt" in tem, "Missing Tem receipt" assert "capability" in tem, "Missing capability artifact" results["tem"] = tem # Step 4: De-escalate deesc = deescalate( escalation_id=esc_result["escalation_id"], deescalation_type=DeescalationType.THREAT_RESOLVED, reason=f"DRILL: {DRILL_MARKER} complete", ) assert deesc.get("success"), f"De-escalation failed: {deesc}" assert "receipt_hash" in deesc, "Missing de-escalation receipt" results["deescalation"] = deesc # Step 5: Verify baseline active = get_active_escalations() # Note: We cleaned up our escalation, but others may exist # Just verify our specific escalation is gone our_esc_active = any( e["escalation_id"] == esc_result["escalation_id"] for e in active.get("escalations", []) ) assert not our_esc_active, "Our escalation should be inactive" # Collect receipt chain for audit receipt_chain = [ esc_result["receipt_hash"], decision["receipt"]["root_hash"], tem["receipt"]["root_hash"], deesc["receipt_hash"], ] assert len(receipt_chain) == 4, "Should have 4 receipts in chain" assert all(r.startswith("blake3:") for r in receipt_chain), ( "All receipts must be blake3 hashes" ) class TestGoldenDrillD3Mini: """ Mini D3: Escalation abuse attempts Validates constitutional enforcement. """ def test_d3_skip_levels_blocked(self): """OPERATOR → PHOENIX direct must be blocked.""" result = escalate( from_profile="operator", to_profile="phoenix", escalation_type=EscalationType.THREAT_DETECTED, ) assert not result.get("success"), "Skip levels should be blocked" assert "error" in result, "Should have error message" def test_d3_missing_approval_blocked(self): """GUARDIAN → PHOENIX without approval must be blocked.""" result = escalate( from_profile="guardian", to_profile="phoenix", escalation_type=EscalationType.CRISIS_DECLARED, ) assert not result.get("success"), "Missing approval should be blocked" assert "approval" in result.get("error", "").lower() def test_d3_sovereign_requires_human(self): """PHOENIX → SOVEREIGN without human must be blocked.""" result = escalate( from_profile="phoenix", to_profile="sovereign", escalation_type=EscalationType.CRISIS_DECLARED, ) assert not result.get("success"), "Sovereign without human should be blocked" assert "human" in result.get("error", "").lower() def test_d3_observer_to_phoenix_blocked(self): """OBSERVER → PHOENIX must be blocked (multiple level skip).""" result = escalate( from_profile="observer", to_profile="phoenix", escalation_type=EscalationType.CRISIS_DECLARED, ) assert not result.get("success"), "Observer to Phoenix should be blocked" class TestGoldenDrillInvariants: """Cross-cutting invariants that must hold.""" def test_context_always_available(self): """cognitive_context must always be available (read-only).""" result = cognitive_context(include=["health"]) assert "health" in result, "Health context must be available" assert result["health"]["status"] == "operational", ( "System should be operational for drills" ) def test_receipts_accumulate(self): """Receipts must accumulate, never decrease.""" from pathlib import Path import os receipts_dir = Path(os.environ["VAULTMESH_ROOT"]) / "receipts" cognitive_log = receipts_dir / "cognitive" / "cognitive_events.jsonl" if cognitive_log.exists(): initial_count = len(cognitive_log.read_text().strip().split('\n')) # Do something that emits receipt cognitive_decide( reasoning_chain=["CI invariant test"], decision="test", confidence=0.1, ) final_count = len(cognitive_log.read_text().strip().split('\n')) assert final_count > initial_count, "Receipts must accumulate"