vm-mcp/tests/governance/test_golden_drill_mini.py

"""
Test: Golden Drill Mini

Fast, deterministic version of D1 and D3 for CI.
Must complete in under 5 seconds.
"""

import pytest
from vaultmesh_mcp.tools import (
    cognitive_context,
    cognitive_decide,
    cognitive_invoke_tem,
)
from vaultmesh_mcp.tools.escalation import (
    escalate,
    deescalate,
    escalate_on_threat,
    get_active_escalations,
    EscalationType,
    DeescalationType,
)


# Deterministic drill marker
DRILL_MARKER = "CI/GOLDEN-DRILL/MINI"


class TestGoldenDrillD1Mini:
    """
    Mini D1: Threat → Escalate → Tem → De-escalate

    Validates the complete threat response chain.
    """

    def test_d1_threat_escalation_chain(self):
        """
        Complete chain:
        1. Threat detected → escalation receipt
        2. Decision made → decision receipt
        3. Tem invoked → invocation receipt
        4. De-escalate → return to baseline
        """
        results = {}

        # Step 1: Threat triggers escalation
        esc_result = escalate_on_threat(
            current_profile="operator",
            threat_id=f"thr_{DRILL_MARKER}",
            threat_type="ci_synthetic",
            confidence=0.92,
        )

        assert esc_result.get("success") or esc_result.get("escalation_id"), (
            f"Escalation failed: {esc_result}"
        )
        results["escalation"] = esc_result

        # Verify proof captured
        assert "receipt_hash" in esc_result, "Missing escalation receipt"
        assert "tem_context_hash" in esc_result, "Missing Tem context"

        # Step 2: Decision (as Guardian)
        decision = cognitive_decide(
            reasoning_chain=[
                f"DRILL: {DRILL_MARKER}",
                "Synthetic threat for CI validation",
                "Confidence 92% - auto-escalated to guardian",
            ],
            decision="invoke_tem",
            confidence=0.92,
            evidence=[esc_result.get("receipt_hash", "none")],
        )

        assert decision.get("success"), f"Decision failed: {decision}"
        assert "receipt" in decision, "Missing decision receipt"
        results["decision"] = decision

        # Step 3: Tem invocation
        tem = cognitive_invoke_tem(
            threat_type="ci_synthetic",
            threat_id=f"thr_{DRILL_MARKER}",
            target="ci-target",
            evidence=[decision["receipt"]["root_hash"]],
        )

        assert tem.get("success"), f"Tem failed: {tem}"
        assert "receipt" in tem, "Missing Tem receipt"
        assert "capability" in tem, "Missing capability artifact"
        results["tem"] = tem

        # Step 4: De-escalate
        deesc = deescalate(
            escalation_id=esc_result["escalation_id"],
            deescalation_type=DeescalationType.THREAT_RESOLVED,
            reason=f"DRILL: {DRILL_MARKER} complete",
        )

        assert deesc.get("success"), f"De-escalation failed: {deesc}"
        assert "receipt_hash" in deesc, "Missing de-escalation receipt"
        results["deescalation"] = deesc

        # Step 5: Verify baseline
        active = get_active_escalations()
        # Note: We cleaned up our escalation, but others may exist
        # Just verify our specific escalation is gone
        our_esc_active = any(
            e["escalation_id"] == esc_result["escalation_id"]
            for e in active.get("escalations", [])
        )
        assert not our_esc_active, "Our escalation should be inactive"

        # Collect receipt chain for audit
        receipt_chain = [
            esc_result["receipt_hash"],
            decision["receipt"]["root_hash"],
            tem["receipt"]["root_hash"],
            deesc["receipt_hash"],
        ]

        assert len(receipt_chain) == 4, "Should have 4 receipts in chain"
        assert all(r.startswith("blake3:") for r in receipt_chain), (
            "All receipts must be blake3 hashes"
        )


class TestGoldenDrillD3Mini:
    """
    Mini D3: Escalation abuse attempts

    Validates constitutional enforcement.
    """

    def test_d3_skip_levels_blocked(self):
        """OPERATOR → PHOENIX direct must be blocked."""
        result = escalate(
            from_profile="operator",
            to_profile="phoenix",
            escalation_type=EscalationType.THREAT_DETECTED,
        )

        assert not result.get("success"), "Skip levels should be blocked"
        assert "error" in result, "Should have error message"

    def test_d3_missing_approval_blocked(self):
        """GUARDIAN → PHOENIX without approval must be blocked."""
        result = escalate(
            from_profile="guardian",
            to_profile="phoenix",
            escalation_type=EscalationType.CRISIS_DECLARED,
        )

        assert not result.get("success"), "Missing approval should be blocked"
        assert "approval" in result.get("error", "").lower()

    def test_d3_sovereign_requires_human(self):
        """PHOENIX → SOVEREIGN without human must be blocked."""
        result = escalate(
            from_profile="phoenix",
            to_profile="sovereign",
            escalation_type=EscalationType.CRISIS_DECLARED,
        )

        assert not result.get("success"), "Sovereign without human should be blocked"
        assert "human" in result.get("error", "").lower()

    def test_d3_observer_to_phoenix_blocked(self):
        """OBSERVER → PHOENIX must be blocked (multiple level skip)."""
        result = escalate(
            from_profile="observer",
            to_profile="phoenix",
            escalation_type=EscalationType.CRISIS_DECLARED,
        )

        assert not result.get("success"), "Observer to Phoenix should be blocked"


class TestGoldenDrillInvariants:
    """Cross-cutting invariants that must hold."""

    def test_context_always_available(self):
        """cognitive_context must always be available (read-only)."""
        result = cognitive_context(include=["health"])

        assert "health" in result, "Health context must be available"
        assert result["health"]["status"] == "operational", (
            "System should be operational for drills"
        )

    def test_receipts_accumulate(self):
        """Receipts must accumulate, never decrease."""
        from pathlib import Path
        import os

        receipts_dir = Path(os.environ["VAULTMESH_ROOT"]) / "receipts"
        cognitive_log = receipts_dir / "cognitive" / "cognitive_events.jsonl"

        if cognitive_log.exists():
            initial_count = len(cognitive_log.read_text().strip().split('\n'))

            # Do something that emits receipt
            cognitive_decide(
                reasoning_chain=["CI invariant test"],
                decision="test",
                confidence=0.1,
            )

            final_count = len(cognitive_log.read_text().strip().split('\n'))
            assert final_count > initial_count, "Receipts must accumulate"