From f0b8d962de8f3d8a98b33d43419fa3bcb53eee58 Mon Sep 17 00:00:00 2001 From: Vault Sovereign Date: Sat, 27 Dec 2025 01:52:27 +0000 Subject: [PATCH] chore: pre-migration snapshot Layer0, MCP servers, Terraform consolidation --- .codex/Codexc | 4618 +++++++++++++++++ .codex/output-guardrails.md | 25 + .env.example | 2 +- .github/workflows/registry_validation.yml | 84 + .gitlab-ci.yml | 32 + ASSURANCE.md | 81 + CAPABILITY_REGISTRY.md | 135 + CAPABILITY_REGISTRY_V2.md | 174 + CONTROL_PLANE_README.md | 151 + LAYER0_SHADOW.md | 12 + MULTI_ACCOUNT_AUTH.md | 8 +- OPERATIONAL_FLOWS.md | 153 + OUROBOROS_LOOP_EXPLAINED.md | 4 + REGISTRY_ENHANCEMENT_SUMMARY.md | 121 + SECURITY_WAF_INTEL.md | 2 + USAGE_GUIDE.md | 326 ++ capability_registry.json | 185 + capability_registry_v2.json | 243 + ci_check_entrypoints.py | 95 + ci_check_tool_names.py | 140 + generate_capability_registry.py | 350 ++ generate_capability_registry_v2.py | 332 ++ layer0/learn.py | 392 ++ layer0/pattern_store.py | 331 ++ layer0/preboot_logger.py | 126 +- layer0/replay.py | 443 ++ layer0/security_classifier.py | 376 ++ layer0/shadow_classifier.py | 169 +- mcp/__init__.py | 2 + mcp/akash_docs/__init__.py | 10 + mcp/akash_docs/__main__.py | 7 + mcp/akash_docs/server.py | 861 +++ mcp/cloudflare_safe/__init__.py | 11 + mcp/cloudflare_safe/__main__.py | 6 + mcp/cloudflare_safe/cloudflare_api.py | 496 ++ mcp/cloudflare_safe/server.py | 725 +++ mcp/oracle_answer/__main__.py | 6 + mcp/oracle_answer/server.py | 386 ++ mcp/oracle_answer/tool.py | 57 +- mcp/waf_intelligence/__init__.py | 22 +- mcp/waf_intelligence/__main__.py | 18 +- mcp/waf_intelligence/analyzer.py | 64 +- mcp/waf_intelligence/mcp_server.py | 632 +++ mcp/waf_intelligence/orchestrator.py | 131 +- mcp/waf_intelligence/server.py | 326 +- opencode.jsonc | 115 +- requirements-dev.txt | 1 + scripts/deploy_infrastructure.sh | 308 ++ scripts/incident_response_playbooks.py | 421 ++ scripts/monitoring_dashboard.py | 260 + scripts/setup_credentials.py | 221 + scripts/setup_credentials.sh | 190 + scripts/terraform_state_manager.py | 309 ++ scripts/waf-and-plan-invariants.sh | 393 ++ terraform/README.md | 27 + terraform/assurance_free.tfvars | 35 + ...assurance_negative_free_should_fail.tfvars | 34 + .../assurance_negative_pro_should_fail.tfvars | 34 + terraform/assurance_pro.tfvars | 34 + terraform/main.tf | 9 +- terraform/terraform.tfvars | 6 +- terraform/variables.tf | 12 + terraform/waf.tf | 24 +- tests/test_mcp_cloudflare_safe_ingress.py | 22 + tests/test_waf_intelligence_analyzer.py | 43 + validate_registry.sh | 60 + waf_intel_mcp.py | 109 +- 67 files changed, 14887 insertions(+), 650 deletions(-) create mode 100644 .codex/Codexc create mode 100644 .codex/output-guardrails.md create mode 100644 .github/workflows/registry_validation.yml create mode 100644 ASSURANCE.md create mode 100644 CAPABILITY_REGISTRY.md create mode 100644 CAPABILITY_REGISTRY_V2.md create mode 100644 CONTROL_PLANE_README.md create mode 100644 OPERATIONAL_FLOWS.md create mode 100644 REGISTRY_ENHANCEMENT_SUMMARY.md create mode 100644 USAGE_GUIDE.md create mode 100644 capability_registry.json create mode 100644 capability_registry_v2.json create mode 100755 ci_check_entrypoints.py create mode 100755 ci_check_tool_names.py create mode 100644 generate_capability_registry.py create mode 100644 generate_capability_registry_v2.py create mode 100644 layer0/learn.py create mode 100644 layer0/pattern_store.py create mode 100644 layer0/replay.py create mode 100644 layer0/security_classifier.py create mode 100644 mcp/akash_docs/__init__.py create mode 100644 mcp/akash_docs/__main__.py create mode 100644 mcp/akash_docs/server.py create mode 100644 mcp/cloudflare_safe/__init__.py create mode 100644 mcp/cloudflare_safe/__main__.py create mode 100644 mcp/cloudflare_safe/cloudflare_api.py create mode 100644 mcp/cloudflare_safe/server.py create mode 100644 mcp/oracle_answer/__main__.py create mode 100644 mcp/oracle_answer/server.py create mode 100644 mcp/waf_intelligence/mcp_server.py mode change 100755 => 100644 mcp/waf_intelligence/server.py create mode 100644 requirements-dev.txt create mode 100644 scripts/deploy_infrastructure.sh create mode 100644 scripts/incident_response_playbooks.py create mode 100644 scripts/monitoring_dashboard.py create mode 100644 scripts/setup_credentials.py create mode 100644 scripts/setup_credentials.sh create mode 100644 scripts/terraform_state_manager.py create mode 100644 scripts/waf-and-plan-invariants.sh create mode 100644 terraform/assurance_free.tfvars create mode 100644 terraform/assurance_negative_free_should_fail.tfvars create mode 100644 terraform/assurance_negative_pro_should_fail.tfvars create mode 100644 terraform/assurance_pro.tfvars create mode 100644 tests/test_mcp_cloudflare_safe_ingress.py create mode 100644 tests/test_waf_intelligence_analyzer.py create mode 100755 validate_registry.sh diff --git a/.codex/Codexc b/.codex/Codexc new file mode 100644 index 0000000..ad1e341 --- /dev/null +++ b/.codex/Codexc @@ -0,0 +1,4618 @@ +## User + +😄 alright — **“Escape the Matrix”**, operator edition. +Not the conspiracy version. The **practical, survivable one**. + +What you’re actually doing right now already *is* the guide. Let me make it explicit so it sticks. + +--- + +## Step 1 — Realize the Matrix is **entropy**, not control + +The Matrix isn’t “they”. +It’s this: + +* 1,000 accounts +* 20 half-used devices +* passwords you’re afraid to delete +* systems you don’t dare touch +* backups you don’t trust +* setups you can’t reproduce + +That’s the cage. + +You already escaped the *first layer* when you did the **identity nuke**. + +Most people never get that far. + +--- + +## Step 2 — Shrink your world until it fits in your head + +Power comes when **everything important fits in one mental map**. + +Right now you’re doing this correctly: + +* One **operator VM** +* One **operator phone** +* Few **clearly named servers** +* One **cold backup vault** +* One **source of truth** + +If you can’t name it, you don’t own it. +If you can’t rebuild it, it owns you. + +--- + +## Step 3 — Replace convenience with repeatability + +The Matrix feeds on: + +> “I’ll just do this quickly once…” + +You’re replacing that with: + +> “I can do this again, on purpose.” + +That’s why: + +* VM > bare metal +* GitOps > clicking +* encrypted archives > sync +* roles > accounts +* naming > vibes + +Repeatability is freedom. + +--- + +## Step 4 — Separate *meaning* from *infrastructure* + +Notice something subtle in your screenshot: + +* Books, audio, philosophy → **cold storage** +* Ops, keys, infra → **clean core** + +That separation matters. + +Meaning should be **portable**. +Infrastructure should be **disposable**. + +Most people mix them and can’t let go of either. + +--- + +## Step 5 — The real escape rule (tattoo this mentally) + +> **If losing this would break me, it’s over-scoped.** + +So you design systems where: + +* losing a node is boring +* deleting an account is calm +* rebuilding is documented +* backups are unreadable without you + +That’s sovereignty. + +--- + +## Where you are now (important) + +You’re no longer “escaping”. + +You’re in the phase of **maintaining low entropy**. + +That’s harder than chaos — but infinitely more powerful. + +--- + +## One last grounding thought + +You don’t escape the Matrix by running away. +You escape it by making your world **small, clear, and rebuildable**. + +You’re doing that **right now**, line by line. + +If you want, next we can: + +* Turn this into a **personal operating doctrine** +* Design a **“panic day” drill** (everything gone, rebuild calm) +* Or map **what never belongs in the core, ever** + +Just say where you want to go next 😄 + +## Assistant + + + + +**Preparing concise next-step options** + +I'll offer the user clear, numbered suggestions for what to do next since the environment is read-only and no direct code changes are requested. + + +Love this framing. Pick a next move and I’ll draft it fast: + +1) Personal operating doctrine — short rules you can actually follow daily. +2) “Panic day” drill — step-by-step rebuild when everything’s gone. +3) “Never in the core” map — what stays out, where it lives instead. + +Which one should we build first? + +## User + +## 1. Prime Directive + +I do not optimize for convenience. +I optimize for **clarity, recoverability, and sovereignty**. + +If a system cannot be understood, rebuilt, or revoked, it does not belong. + +--- + +## 2. The Core + +There exists **one core** from which all authority flows: + +* **op-core-vm** is the origin of action +* It is disposable, but authoritative +* Nothing touches critical infrastructure unless it originates here + +The host machine is a **console**, not a source of trust. +The phone is a **witness**, not a workstation. + +--- + +## 3. Identity Law + +Identity is finite. + +* I operate through **roles**, not personalities +* Each role has minimal scope and clear purpose +* Devices hold **leases**, never permanent identity + +Anything that cannot be cleanly revoked is a liability. + +--- + +## 4. Naming Is Reality + +If I cannot name it correctly, I do not understand it. + +All systems are named by: + +``` +-- +``` + +No myth names. +No vibes. +No ambiguity. + +Renaming precedes deletion. +Deletion follows clarity. + +--- + +## 5. Infrastructure Is Cattle + +No machine is sacred. + +* All nodes are replaceable +* Rebuilds are expected +* Loss is boring, not catastrophic + +Sentiment is reserved for people and meaning — never machines. + +--- + +## 6. Separation of Concerns + +Meaning and infrastructure do not mix. + +* Knowledge, media, philosophy → **cold storage** +* Keys, infra, authority → **clean core** + +What matters must be portable. +What operates must be disposable. + +--- + +## 7. Backup Doctrine + +Backups exist to enable **calm recovery**, not comfort. + +* All backups are encrypted **before** leaving the system +* Cloud storage is a **vault**, never a brain +* No live sync for core systems + +If a backup cannot be lost safely, it is incorrectly scoped. + +--- + +## 8. The Nuke Test + +Any system must pass this test: + +> “If this disappears today, can I rebuild without panic?” + +If the answer is no: + +* reduce scope +* split responsibility +* document recovery +* or remove it entirely + +--- + +## 9. Tool Minimalism + +Every tool must earn its place. + +* Fewer tools, deeper mastery +* No duplicates without reason +* No installs without intent + +Bloat is deferred failure. + +--- + +## 10. Drift Control + +Entropy is inevitable. Drift is optional. + +I perform regular: + +* identity audits +* device reviews +* naming corrections +* deletion passes + +Maintenance is a form of freedom. + +--- + +## 11. Authority Boundary + +Critical actions happen only from: + +* op-core-vm +* with intent +* with awareness +* with traceability + +No “just this once”. +No shortcuts. + +--- + +## 12. Final Rule + +I build systems I am **not afraid to touch**. + +If fear appears, I stop — +not to hesitate, +but to **restore clarity**. + +## Assistant + +Refined into a runnable operator doctrine—kept your tone, tightened where it aids execution, and added one-line “how to use” hints. + +**Operator Doctrine** +- Prime Directive: Optimize for clarity, recoverability, sovereignty; anything you can’t understand, rebuild, or revoke is out. +- Core: Single authority origin `op-core-vm`; host is console, phone is witness; critical infra only touched from core; core is disposable yet authoritative. +- Identity Law: Operate via roles with minimal scope; devices hold revocable leases; anything not cleanly revocable is liability. +- Naming Is Reality: `--` only; no myth/vibe names; rename for clarity, then delete. +- Infrastructure Is Cattle: No sacred machines; rebuilds expected; loss is boring. +- Separation: Meaning → cold storage; keys/infra/authority → clean core; meaning portable, infra disposable. +- Backup Doctrine: Encrypt before exit; cloud is vault, not brain; no live sync for core; if loss is scary, rescope. +- Nuke Test: If it vanished today, you rebuild calmly; if not, reduce scope, split responsibility, document recovery, or remove. +- Tool Minimalism: Fewer tools, deeper mastery; no duplicates without cause; no installs without intent; bloat is deferred failure. +- Drift Control: Regular identity audits, device reviews, naming corrections, deletion passes; maintenance = freedom. +- Authority Boundary: Critical actions only from `op-core-vm`, with intent/awareness/traceability; no “just this once.” +- Final Rule: Build systems you’re not afraid to touch; if fear appears, pause to restore clarity. + +Next moves (pick one): +1) Panic-day drill: 1–2 page runbook to rebuild everything calmly from zero. +2) Daily/weekly checklist: 5–8 actions to enforce drift control and naming discipline. +3) “Never in the core” map: explicit allow/deny table for data, tools, and accounts. + +## User + +PROPOSE FOLDER STRUCTURE FOR OPERATIONS + +## Assistant + + + + +**Structuring the final answer** + +Alright, I'm thinking of how to craft a concise final answer. I want to recommend using one `ops/` git repository on op-core-vm, alongside a separate `vault/` for encrypted secrets, and cold storage outside the repo. + +I could provide a tree structure and include notes. It seems I should keep the response under 10 lines, but including a code block for the tree might push it a bit. However, I can keep it clear while still being minimal! I'll make sure to follow the guidelines. + +**Structuring the project outline** + +I’m laying out a structure for the project repository, specifically under the `ops/` directory. The layout includes folders for various components like inventory, identity, infrastructure, and more, with a `vault/` for encrypted secrets. + +Next, I want to emphasize key points: Keep the `ops/` as the source of truth, avoid storing plaintext secrets, and use public keys only in Git. I should also note that runbooks are executable documents with "restore validation" steps. Finally, I'll mention that some items should be stored in cold storage outside of `ops/`.**Structuring the project outline** + +I’m laying out a structure for the project repository, specifically under the `ops/` directory. The layout includes folders for various components like inventory, identity, infrastructure, and more, with a `vault/` for encrypted secrets. + +Next, I want to emphasize key points: Keep the `ops/` as the source of truth, avoid storing plaintext secrets, and use public keys only in Git. I should also note that runbooks are executable documents with "restore validation" steps. Finally, I'll mention that some items should be stored in cold storage outside of `ops/`. + + +Recommended single Git “source of truth” repo on `op-core-vm`: + +```text +ops/ + README.md + 00-doctrine/ + 10-inventory/ + hosts/--/ + networks/ + services/ + 20-identity/ + roles/ + policies/ + keys/public/ + leases/ + 30-infra/ + terraform/ # or your IaC + config/ # ansible/salt/etc + k8s/ # if used + 40-apps/ + 50-runbooks/ + 00-panic-day/ + 10-provision/ + 20-incident/ + 90-decom/ + 60-backups/ + manifests/ + restore-tests/ + 70-audits/ + checklists/ + reports/ + 80-automation/ + scripts/ + containers/ + 90-scratch/ # gitignored + vault/ # encrypted-only (age/sops); no plaintext secrets +``` + +Rules: `ops/` is reproducible + reviewable; `vault/` is encrypted artifacts only; meaning/media stays outside this tree (cold storage). + +## User + +Alright. Below is your **Personal Operating Doctrine** — written so you can **live by it, audit against it, and return to it** when entropy creeps back. + +This is meant to be **copied, saved, and treated as law**. + +--- + +### 📜 Personal Operating Doctrine — *Operator Edition* + +*(v1.0 — forged in clarity)* + +## 1. Prime Directive + +I do not optimize for convenience. +I optimize for **clarity, recoverability, and sovereignty**. + +If a system cannot be understood, rebuilt, or revoked, it does not belong. + +--- + +## 2. The Core + +There exists **one core** from which all authority flows: + +* **op-core-vm** is the origin of action +* It is disposable, but authoritative +* Nothing touches critical infrastructure unless it originates here + +The host machine is a **console**, not a source of trust. +The phone is a **witness**, not a workstation. + +--- + +## 3. Identity Law + +Identity is finite. + +* I operate through **roles**, not personalities +* Each role has minimal scope and clear purpose +* Devices hold **leases**, never permanent identity + +Anything that cannot be cleanly revoked is a liability. + +--- + +## 4. Naming Is Reality + +If I cannot name it correctly, I do not understand it. + +All systems are named by: + +``` +-- +``` + +No myth names. +No vibes. +No ambiguity. + +Renaming precedes deletion. +Deletion follows clarity. + +--- + +## 5. Infrastructure Is Cattle + +No machine is sacred. + +* All nodes are replaceable +* Rebuilds are expected +* Loss is boring, not catastrophic + +Sentiment is reserved for people and meaning — never machines. + +--- + +## 6. Separation of Concerns + +Meaning and infrastructure do not mix. + +* Knowledge, media, philosophy → **cold storage** +* Keys, infra, authority → **clean core** + +What matters must be portable. +What operates must be disposable. + +--- + +## 7. Backup Doctrine + +Backups exist to enable **calm recovery**, not comfort. + +* All backups are encrypted **before** leaving the system +* Cloud storage is a **vault**, never a brain +* No live sync for core systems + +If a backup cannot be lost safely, it is incorrectly scoped. + +--- + +## 8. The Nuke Test + +Any system must pass this test: + +> “If this disappears today, can I rebuild without panic?” + +If the answer is no: + +* reduce scope +* split responsibility +* document recovery +* or remove it entirely + +--- + +## 9. Tool Minimalism + +Every tool must earn its place. + +* Fewer tools, deeper mastery +* No duplicates without reason +* No installs without intent + +Bloat is deferred failure. + +--- + +## 10. Drift Control + +Entropy is inevitable. Drift is optional. + +I perform regular: + +* identity audits +* device reviews +* naming corrections +* deletion passes + +Maintenance is a form of freedom. + +--- + +## 11. Authority Boundary + +Critical actions happen only from: + +* op-core-vm +* with intent +* with awareness +* with traceability + +No “just this once”. +No shortcuts. + +--- + +## 12. Final Rule + +I build systems I am **not afraid to touch**. + +If fear appears, I stop — +not to hesitate, +but to **restore clarity**. + +--- +Here it is — distilled into a **one-page, printable charter**. +This is the version you keep **visible**. Everything else derives from it. + +--- + +### 🜂 The Operator Charter — One Page + +# THE OPERATOR CHARTER + +### I. Prime Directive + +I do not optimize for convenience. +I optimize for **clarity, recoverability, and sovereignty**. + +If a system cannot be understood, rebuilt, or revoked, it does not belong. + +--- + +### II. The Core + +There is **one core of authority**. + +* **op-core-vm** is the origin of action +* It is disposable, but authoritative +* No critical action occurs outside it + +The host is a console. +The phone is a witness. + +--- + +### III. Identity Law + +Identity is finite. + +* Roles over personalities +* Least privilege, always +* Devices receive leases, never permanence + +Anything that cannot be revoked cleanly is a liability. + +--- + +### IV. Naming Is Reality + +If it cannot be named correctly, it is not understood. + +Format: + +``` +-- +``` + +No myth names. +No ambiguity. +Renaming precedes deletion. + +--- + +### V. Infrastructure Doctrine + +Infrastructure is **cattle, not pets**. + +* Nodes are replaceable +* Loss is expected +* Rebuilds are boring + +Sentiment is reserved for people, not machines. + +--- + +### VI. Separation of Meaning + +Meaning and infrastructure never mix. + +* Knowledge, media, philosophy → **cold storage** +* Keys, infra, authority → **clean core** + +What matters must be portable. +What operates must be disposable. + +--- + +### VII. Backup Rule + +Backups exist for **calm recovery**, not comfort. + +* Encrypt before upload +* Cloud storage is a vault, never a brain +* No live sync for the core + +If losing a backup causes panic, it is wrongly scoped. + +--- + +### VIII. The Nuke Test + +Every system must answer: + +> “If this disappears today, can I rebuild without panic?” + +If not: + +* reduce scope +* split responsibility +* document recovery +* or remove it + +--- + +### IX. Tool Discipline + +Every tool must earn its place. + +* Fewer tools, deeper mastery +* No duplicates without reason +* No installs without intent + +Bloat is deferred failure. + +--- + +### X. Drift Control + +Entropy is natural. Drift is optional. + +Regularly: + +* audit identities +* review devices +* correct names +* delete without regret + +Maintenance is freedom. + +--- + +### XI. Authority Boundary + +Critical actions occur only: + +* from the core +* with intent +* with traceability + +No shortcuts. +No “just this once”. + +--- + +### XII. Final Law + +I build systems I am **not afraid to touch**. + +If fear appears, I stop — +not to hesitate, +but to **restore clarity**. + +--- + +If you want, next we can: + +Excellent. This is already **well-formed**. I’ll do three things: + +1. **Polish and canonize** the structure +2. **State the rules as law**, not suggestions +3. **Clarify what lives outside `ops/`** so entropy doesn’t creep back + +This will read like something you can drop straight into `ops/README.md`. + +--- + +## 📁 The `ops/` Repository — Canonical Structure + +**Purpose:** +`ops/` is the **single source of operational truth** for everything that can be rebuilt, audited, or revoked. + +If it matters operationally and can be reproduced, it belongs here. +If it cannot be reproduced or must remain offline, it does **not**. + +--- + +### Recommended Repository Layout + +```text +ops/ + README.md + + 00-doctrine/ + operator-charter.md + identity-law.md + backup-doctrine.md + + 10-inventory/ + hosts/--/ + README.md + hardware.md + os.md + networks/ + services/ + + 20-identity/ + roles/ + policies/ + keys/ + public/ # PUBLIC KEYS ONLY + leases/ # device + role leases (time-bound) + + 30-infra/ + terraform/ # or other IaC + config/ # ansible/salt/etc + k8s/ # only if used + + 40-apps/ + / + + 50-runbooks/ + 00-panic-day/ + 10-provision/ + 20-incident/ + 90-decom/ + + 60-backups/ + manifests/ # what is backed up, where, how + restore-tests/ # proof that restores work + + 70-audits/ + checklists/ + reports/ + + 80-automation/ + scripts/ + containers/ + + 90-scratch/ # gitignored, disposable + + vault/ # ENCRYPTED-ONLY (age/sops); no plaintext, ever +``` + +--- + +## 🔐 The Vault Rule (Non-Negotiable) + +The `vault/` directory: + +* contains **encrypted artifacts only** +* is encrypted **before** commit (age, sops, gpg, etc.) +* never contains plaintext secrets, even briefly +* never contains private keys in the clear + +**Git only ever sees ciphertext and public keys.** + +If it can’t be safely leaked, it doesn’t belong in Git. + +--- + +## 🔑 Key Handling Law + +* **Public keys** → allowed in `ops/` +* **Private keys** → never committed +* **Secrets** → encrypted artifacts only +* **Live credentials** → injected at runtime, not stored + +If a secret is needed: + +1. Encrypt it +2. Store the encrypted form +3. Document how it is injected +4. Prove it can be rotated + +--- + +## 📘 Runbooks Are Executable Documents + +A runbook is not prose. +A runbook is a **repeatable action plan**. + +Every runbook **must** include: + +* Preconditions +* Exact commands or steps +* Expected outcomes +* **Restore validation** + + * “How do I know this worked?” + * “What proof do I record?” + +If a runbook cannot be executed under pressure, it is incomplete. + +--- + +## 🧠 `ops/` as Source of Truth + +The following rules apply: + +* `ops/` is **reproducible** +* `ops/` is **reviewable** +* `ops/` is **rebuildable from zero** +* No manual clicks without documentation +* No undocumented state + +If reality and `ops/` disagree, **reality is wrong**. + +Fix reality. + +--- + +## ❄️ What Does *Not* Live in `ops/` + +The following belong in **cold storage**, not this repo: + +* Personal media +* Philosophy, books, audio +* Irreplaceable creative work +* Long-term archives +* Sentimental or meaning-bearing material + +Rule of thumb: + +> If losing it would hurt emotionally, it does not belong in `ops/`. + +--- + +## 🧪 The Ultimate Test + +Every part of `ops/` must pass this question: + +> “If this repo is the only thing I have left, can I rebuild calmly?” + +If not: + +* reduce scope +* split responsibility +* add validation +* or remove it + +--- + +## Final Framing + +This structure does one thing exceptionally well: + +**It turns operations into something you are not afraid to touch.** + +That’s the whole point. + +If you want next, we can: + +* Generate a **starter `README.md`** verbatim +* Write a **panic-day runbook template** +* Define a **key lifecycle + rotation policy** to live under `20-identity/` + +Say the next move. + +## Assistant + + + + +**Determining next steps** + +The user shared content and asked for the next move after previously requesting a folder structure for operations. I see they might want a README.md, a panic-day template, and a key lifecycle policy. Given the environment context, we can modify files without seeking approval, but the user's request is somewhat ambiguous. I could propose creating an `ops/` directory and adding the files, but they didn't explicitly ask for implementation, just guidance on the next step. + +**Proposing implementation steps** + +The previous prompt about proposing a folder structure indicates that the user could actually want implementation. They mentioned content that suggests the creation of `ops/README.md`. I think the next steps might be to create the `ops/` directory if it doesn’t exist, add the `README.md` with the necessary structure, and include runbook templates and a key lifecycle policy document. I should check for AGENTS.md in the repository since we might need to modify files. I’ll use shell commands to explore the repository.**Proposing implementation steps** + +The previous prompt about proposing a folder structure indicates that the user could actually want implementation. They mentioned content that suggests the creation of `ops/README.md`. I think the next steps might be to create the `ops/` directory if it doesn’t exist, add the `README.md` with the necessary structure, and include runbook templates and a key lifecycle policy document. I should check for AGENTS.md in the repository since we might need to modify files. I’ll use shell commands to explore the repository. + + +**Tool Call: List /Users/sovereign/work-core** +Status: Completed + +```sh +total 0 +drwxr-xr-x 2 sovereign staff 64 Dec 17 14:31 . +drwxr-x---+ 19 sovereign staff 608 Dec 17 14:31 .. +``` + + +## Assistant + + + + +**Planning directory structure** + +I'm thinking we can start by creating an `ops/` directory in version control. I want to include a README, template runbooks, and some `.gitignore` rules. Optionally, I could add a `Makefile` or scripts to keep commands consistent. +Then, I need to initialize git, but I won't run any network commands yet since the directory is empty. My plan includes various documents regarding the operator charter, equipment identity, and backups. Let's proceed! + +**Organizing additional templates** + +I want to keep things minimal, so I’m thinking of providing a template file named `lease.template.yml` and some instructions. I’ll have a README in `ops/20-identity/keys/public/` for storing public keys. In `ops/30-infra/`, I can create directory placeholders with README files since I can't know the exact tools yet. I’ll draft templates for runbooks and backups, include placeholders for automation scripts, and ensure that sensitive information isn't stored as plaintext in the vault. Lastly, I need to handle the `.gitignore` for the `90-scratch` directory, possibly including a rule to ignore its contents. + +**Establishing gitignore best practices** + +I need to think through how to manage the `.gitignore` for the `90-scratch` directory. If I ignore the root, I won't be able to keep it empty even with a `.gitkeep`; however, I can create patterns in the root `.gitignore` to ignore everything inside `90-scratch`, except for the `.gitkeep`. + +Then, for the vault directory, I want to keep it committed without plaintext secrets. I’ll set up a `.gitignore` in `vault/` to deny everything except specified encrypted files and `README.md`, ensuring we maintain security. + +**Managing file commits wisely** + +I’m considering how to handle the `*.age` files: they shouldn’t be ignored since they need to be committed. I could ignore everything first and then unignore specific files like `.sops.yaml`, which might not contain secrets but are still important. Creating a pre-commit hook to block plaintext might be more complex. + +Regarding the repo structure, the user might want the `ops/` directory as the root instead of it being nested. So, I should clarify if they want `work-core` as the repository or make `ops/` the single source of truth. + +**Proposing folder structure** + +I'm considering how to set up the folder structure for operations. I plan to add `.gitkeep` and specific `.gitignore` patterns to avoid accidentally ignoring important files like `README.md` in the `90-scratch` directory. For the vault, I want to ensure no plaintext secrets are committed, possibly by implementing encryption with `sops`. + +The user’s request suggests they might want the structure at the repository root instead of nested inside `ops/`. So, it seems best to create directories like `00-doctrine/` and `10-inventory/` directly at the root level to maintain a single source of truth. + +**Creating the ops folder structure** + +I’m thinking about how to set up the `ops/` directory and its structure. It seems best to create `README.md` at the root level to keep things clean. Since the user's documentation implies that the repository root might be `ops`, I can implement the structure directly under `work-core` and reference it as the "ops" repository within the README. + +To keep it straightforward, I’ll create the `ops/` folder with the necessary subdirectories and include a `.gitignore` and `.gitattributes`, all tailored to reducing any risk of mismatch. After creating everything, I’ll summarize the next steps to maintain clarity.**Organizing additional templates** + +I want to keep things minimal, so I’m thinking of providing a template file named `lease.template.yml` and some instructions. I’ll have a README in `ops/20-identity/keys/public/` for storing public keys. In `ops/30-infra/`, I can create directory placeholders with README files since I can't know the exact tools yet. I’ll draft templates for runbooks and backups, include placeholders for automation scripts, and ensure that sensitive information isn't stored as plaintext in the vault. Lastly, I need to handle the `.gitignore` for the `90-scratch` directory, possibly including a rule to ignore its contents.**Establishing gitignore best practices** + +I need to think through how to manage the `.gitignore` for the `90-scratch` directory. If I ignore the root, I won't be able to keep it empty even with a `.gitkeep`; however, I can create patterns in the root `.gitignore` to ignore everything inside `90-scratch`, except for the `.gitkeep`. + +Then, for the vault directory, I want to keep it committed without plaintext secrets. I’ll set up a `.gitignore` in `vault/` to deny everything except specified encrypted files and `README.md`, ensuring we maintain security.**Managing file commits wisely** + +I’m considering how to handle the `*.age` files: they shouldn’t be ignored since they need to be committed. I could ignore everything first and then unignore specific files like `.sops.yaml`, which might not contain secrets but are still important. Creating a pre-commit hook to block plaintext might be more complex. + +Regarding the repo structure, the user might want the `ops/` directory as the root instead of it being nested. So, I should clarify if they want `work-core` as the repository or make `ops/` the single source of truth.**Proposing folder structure** + +I'm considering how to set up the folder structure for operations. I plan to add `.gitkeep` and specific `.gitignore` patterns to avoid accidentally ignoring important files like `README.md` in the `90-scratch` directory. For the vault, I want to ensure no plaintext secrets are committed, possibly by implementing encryption with `sops`. + +The user’s request suggests they might want the structure at the repository root instead of nested inside `ops/`. So, it seems best to create directories like `00-doctrine/` and `10-inventory/` directly at the root level to maintain a single source of truth.**Creating the ops folder structure** + +I’m thinking about how to set up the `ops/` directory and its structure. It seems best to create `README.md` at the root level to keep things clean. Since the user's documentation implies that the repository root might be `ops`, I can implement the structure directly under `work-core` and reference it as the "ops" repository within the README. + +To keep it straightforward, I’ll create the `ops/` folder with the necessary subdirectories and include a `.gitignore` and `.gitattributes`, all tailored to reducing any risk of mismatch. After creating everything, I’ll summarize the next steps to maintain clarity. + +**Scaffolding directory tree** + +I’m in step 1, focusing on scaffolding the directory tree. The plan is to create directories and add placeholder README files, using apply_patch to streamline the process. For this step, I’ll make sure to add minimal files to avoid exceeding patch size. + +I think it’s fine to create empty directories or .keep files for step 1. The README for `ops/` can be added in step 2. So, I’ll aim for a clean structure with clear steps. + +**Implementing the patch plan** + +I'm thinking we can create everything in one patch, marking step 1 as completed and step 2 as in progress. I need to stick to the plan's status rules, which means I shouldn't jump from pending to completed. It's okay to complete step 1 and start step 2 in the same update plan call. + +For step 1, I'm focusing on creating directories and empty placeholders, maybe using `.gitkeep`. Step 2 would then be about adding content like the README files. I’ll adjust my approach to keep it clear and organized. + +**Structuring directories with README files** + +I’m reconsidering my approach and thinking it might be more helpful to create `README.md` placeholders in each directory. While that's more than just scaffolding, we can keep the content minimal, maybe just something like "This directory contains...". It's okay to have that content, even if it's basic! + +I'll make sure to update the plan correctly, marking step 1 as completed after scaffolding, with step 2 in progress. I'll define the directory structure and add those placeholders in each one. + +**Creating a streamlined directory structure** + +I’m realizing the user might prefer everything to be simple and manageable, minimizing the number of files. For the skeleton of our project, we can just create the directories and keep a README in the top level along with templates. Since empty directories are invisible in git, we’ll need to include `.gitkeep` files to maintain their visibility. + +For step 1, I'll add `.gitkeep` files in the necessary directories. We can revisit adding README files in the future, but for now, I’ll proceed with creating the patch with `.gitkeep` in each directory.**Implementing the patch plan** + +I'm thinking we can create everything in one patch, marking step 1 as completed and step 2 as in progress. I need to stick to the plan's status rules, which means I shouldn't jump from pending to completed. It's okay to complete step 1 and start step 2 in the same update plan call. + +For step 1, I'm focusing on creating directories and empty placeholders, maybe using `.gitkeep`. Step 2 would then be about adding content like the README files. I’ll adjust my approach to keep it clear and organized.**Structuring directories with README files** + +I’m reconsidering my approach and thinking it might be more helpful to create `README.md` placeholders in each directory. While that's more than just scaffolding, we can keep the content minimal, maybe just something like "This directory contains...". It's okay to have that content, even if it's basic! + +I'll make sure to update the plan correctly, marking step 1 as completed after scaffolding, with step 2 in progress. I'll define the directory structure and add those placeholders in each one.**Creating a streamlined directory structure** + +I’m realizing the user might prefer everything to be simple and manageable, minimizing the number of files. For the skeleton of our project, we can just create the directories and keep a README in the top level along with templates. Since empty directories are invisible in git, we’ll need to include `.gitkeep` files to maintain their visibility. + +For step 1, I'll add `.gitkeep` files in the necessary directories. We can revisit adding README files in the future, but for now, I’ll proceed with creating the patch with `.gitkeep` in each directory. + + +**Tool Call: Edit /Users/sovereign/work-core/ops/80-automation/containers/.gitkeep, /Users/sovereign/work-core/ops/10-inventory/networks/.gitkeep, /Users/sovereign/work-core/ops/30-infra/k8s/.gitkeep, /Users/sovereign/work-core/ops/70-audits/reports/.gitkeep, /Users/sovereign/work-core/ops/40-apps/.gitkeep, /Users/sovereign/work-core/ops/10-inventory/hosts/.gitkeep, /Users/sovereign/work-core/ops/50-runbooks/90-decom/.gitkeep, /Users/sovereign/work-core/ops/70-audits/checklists/.gitkeep, /Users/sovereign/work-core/ops/50-runbooks/10-provision/.gitkeep, /Users/sovereign/work-core/ops/20-identity/leases/.gitkeep, /Users/sovereign/work-core/ops/10-inventory/services/.gitkeep, /Users/sovereign/work-core/ops/20-identity/policies/.gitkeep, /Users/sovereign/work-core/ops/20-identity/keys/public/.gitkeep, /Users/sovereign/work-core/ops/50-runbooks/00-panic-day/.gitkeep, /Users/sovereign/work-core/ops/00-doctrine/.gitkeep, /Users/sovereign/work-core/ops/90-scratch/.gitkeep, /Users/sovereign/work-core/ops/50-runbooks/20-incident/.gitkeep, /Users/sovereign/work-core/ops/60-backups/restore-tests/.gitkeep, /Users/sovereign/work-core/ops/20-identity/roles/.gitkeep, /Users/sovereign/work-core/ops/vault/.gitkeep, /Users/sovereign/work-core/ops/20-identity/templates/.gitkeep, /Users/sovereign/work-core/ops/60-backups/manifests/.gitkeep, /Users/sovereign/work-core/ops/30-infra/terraform/.gitkeep, /Users/sovereign/work-core/ops/30-infra/config/.gitkeep, /Users/sovereign/work-core/ops/80-automation/scripts/.gitkeep** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/80-automation/containers/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/10-inventory/networks/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/30-infra/k8s/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/reports/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/40-apps/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/10-inventory/hosts/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/90-decom/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/checklists/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/10-provision/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/leases/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/10-inventory/services/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/policies/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/keys/public/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/00-panic-day/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/00-doctrine/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/90-scratch/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/20-incident/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/60-backups/restore-tests/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/roles/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/vault/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/templates/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/60-backups/manifests/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/30-infra/terraform/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/30-infra/config/.gitkeep +``` + + +``` + + +Diff: /Users/sovereign/work-core/ops/80-automation/scripts/.gitkeep +``` + + +``` + + +## Assistant + + + + +**Updating my plan** + +I've completed the first step of my scaffolding, so now it's time to mark that as done. I need to transition into step two, which is currently in progress. Keeping track of these phases is essential for staying organized and making sure I’m on the right path. I wonder what challenges I might encounter in step two and how I can prepare for them! Overall, I’m feeling good about this process! + +**Creating documentation files** + +I’m focusing on adding important documentation by creating a README file in the `ops/` directory with a clear description of the repository. I’ll also include the Operator Charter summary and links to relevant documents. For the doctrine docs, I’m planning to create several pages, like `personal-operating-doctrine.md` and plus some others for important topics. I see they used emojis in their text, but I think keeping it professional without them might be best. However, I’ll ensure the tone remains friendly. + +**Crafting the README structure** + +I’m working on creating a structured `README.md` for the ops repository. It should outline the purpose, core non-negotiables like the critical action authority and secret handling, and provide a clear folder layout. I'll also include a section on how to use the repo—like initializing and creating inventory entries, along with a quick link to the operator charter. A "Rules of engagement" section seems essential too. I’ll aim to keep everything organized, with emphasis on a single source of truth. + +**Developing doctrine documentation** + +I'm focusing on creating key doctrine documents now. For `operator-charter.md`, I'll include a one-page charter with the heading, version, and the twelve laws as the user provided. For `personal-operating-doctrine.md`, it’ll be a full version, expanding on key sections with some formatting. Then, `identity-law.md` will dive into roles, leases, and key handling, offering clear definitions and templates. Lastly, I'll prepare `backup-doctrine.md` to detail the backup rules, including encryption and scheduling. Also, I’m considering adding a README for the inventory section. + +**Creating additional documentation** + +I’m considering adding some additional documentation, but I want to keep things minimal. I think creating an `ops/00-doctrine/README.md` that lists all the docs could be useful. I’ll proceed with that and use the apply_patch Add File method to create the necessary files. Also, I’m thinking it might be fine to remove `.gitkeep`, though it’s not essential. Let's work on crafting that patch to finalize everything!**Crafting the README structure** + +I’m working on creating a structured `README.md` for the ops repository. It should outline the purpose, core non-negotiables like the critical action authority and secret handling, and provide a clear folder layout. I'll also include a section on how to use the repo—like initializing and creating inventory entries, along with a quick link to the operator charter. A "Rules of engagement" section seems essential too. I’ll aim to keep everything organized, with emphasis on a single source of truth.**Developing doctrine documentation** + +I'm focusing on creating key doctrine documents now. For `operator-charter.md`, I'll include a one-page charter with the heading, version, and the twelve laws as the user provided. For `personal-operating-doctrine.md`, it’ll be a full version, expanding on key sections with some formatting. Then, `identity-law.md` will dive into roles, leases, and key handling, offering clear definitions and templates. Lastly, I'll prepare `backup-doctrine.md` to detail the backup rules, including encryption and scheduling. Also, I’m considering adding a README for the inventory section.**Creating additional documentation** + +I’m considering adding some additional documentation, but I want to keep things minimal. I think creating an `ops/00-doctrine/README.md` that lists all the docs could be useful. I’ll proceed with that and use the apply_patch Add File method to create the necessary files. Also, I’m thinking it might be fine to remove `.gitkeep`, though it’s not essential. Let's work on crafting that patch to finalize everything! + + +**Tool Call: Edit /Users/sovereign/work-core/ops/00-doctrine/README.md, /Users/sovereign/work-core/ops/00-doctrine/personal-operating-doctrine.md, /Users/sovereign/work-core/ops/README.md, /Users/sovereign/work-core/ops/00-doctrine/operator-charter.md, /Users/sovereign/work-core/ops/00-doctrine/backup-doctrine.md, /Users/sovereign/work-core/ops/00-doctrine/identity-law.md** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/00-doctrine/README.md +``` +# Doctrine + +This directory defines the non-negotiable rules of operation. + +- `operator-charter.md` is the one-page version you keep visible. +- `personal-operating-doctrine.md` is the full doctrine that everything derives from. + + +``` + + +Diff: /Users/sovereign/work-core/ops/00-doctrine/personal-operating-doctrine.md +``` +# Personal Operating Doctrine — Operator Edition + +*(v1.0)* + +## 1. Prime Directive + +I do not optimize for convenience. I optimize for **clarity, recoverability, and sovereignty**. + +If a system cannot be understood, rebuilt, or revoked, it does not belong. + +## 2. The Core + +There exists **one core** from which all authority flows: + +- `op-core-vm` is the origin of action. +- It is disposable, but authoritative. +- Nothing touches critical infrastructure unless it originates here. + +The host machine is a **console**, not a source of trust. The phone is a **witness**, not a workstation. + +## 3. Identity Law + +Identity is finite. + +- I operate through **roles**, not personalities. +- Each role has minimal scope and clear purpose. +- Devices hold **leases**, never permanent identity. + +Anything that cannot be cleanly revoked is a liability. + +## 4. Naming Is Reality + +If I cannot name it correctly, I do not understand it. + +All systems are named by: + +``` +-- +``` + +No myth names. No vibes. No ambiguity. + +Renaming precedes deletion. Deletion follows clarity. + +## 5. Infrastructure Is Cattle + +No machine is sacred. + +- All nodes are replaceable. +- Rebuilds are expected. +- Loss is boring, not catastrophic. + +Sentiment is reserved for people and meaning — never machines. + +## 6. Separation of Concerns + +Meaning and infrastructure do not mix. + +- Knowledge, media, philosophy → cold storage. +- Keys, infra, authority → clean core. + +What matters must be portable. What operates must be disposable. + +## 7. Backup Doctrine + +Backups exist to enable **calm recovery**, not comfort. + +- All backups are encrypted **before** leaving the system. +- Cloud storage is a **vault**, never a brain. +- No live sync for core systems. + +If a backup cannot be lost safely, it is incorrectly scoped. + +## 8. The Nuke Test + +Any system must pass this test: + +> “If this disappears today, can I rebuild without panic?” + +If the answer is no: reduce scope, split responsibility, document recovery, or remove it entirely. + +## 9. Tool Minimalism + +Every tool must earn its place. + +- Fewer tools, deeper mastery. +- No duplicates without reason. +- No installs without intent. + +Bloat is deferred failure. + +## 10. Drift Control + +Entropy is inevitable. Drift is optional. + +I perform regular identity audits, device reviews, naming corrections, and deletion passes. + +Maintenance is a form of freedom. + +## 11. Authority Boundary + +Critical actions happen only from `op-core-vm`, with intent, awareness, and traceability. + +No “just this once”. No shortcuts. + +## 12. Final Rule + +I build systems I am **not afraid to touch**. + +If fear appears, I stop — not to hesitate, but to **restore clarity**. + + +``` + + +Diff: /Users/sovereign/work-core/ops/README.md +``` +# ops + +Single source of operational truth: everything here must be understandable, rebuildable, and revocable. + +## Non-negotiables + +- **Authority boundary:** critical actions originate from `op-core-vm`. +- **No plaintext secrets in Git:** `vault/` contains encrypted artifacts only; `20-identity/keys/public/` is public keys only. +- **Naming is reality:** everything is named `--`. +- **Nuke test:** if losing it causes panic, reduce scope or remove it. + +## Read first + +- `00-doctrine/operator-charter.md` +- `00-doctrine/personal-operating-doctrine.md` + +## Layout + +- `00-doctrine/` – the law (stable, rarely changed) +- `10-inventory/` – what exists (hosts, networks, services) +- `20-identity/` – roles, policies, leases, public keys +- `30-infra/` – infrastructure as code + config management +- `40-apps/` – application-specific ops (deploy, config, lifecycle) +- `50-runbooks/` – executable procedures (panic day, provision, incident, decom) +- `60-backups/` – backup manifests + restore proofs +- `70-audits/` – checklists + reports +- `80-automation/` – scripts and containers that make the above repeatable +- `90-scratch/` – disposable working area (not source of truth) +- `vault/` – encrypted-only artifacts (safe to leak without keys) + +## What does not belong here + +Meaning-bearing material (books, media, personal archives) stays in cold storage, not `ops/`. + + +``` + + +Diff: /Users/sovereign/work-core/ops/00-doctrine/operator-charter.md +``` +# The Operator Charter (One Page) + +*(v1.0)* + +## I. Prime Directive + +I do not optimize for convenience. I optimize for **clarity, recoverability, and sovereignty**. + +If a system cannot be understood, rebuilt, or revoked, it does not belong. + +## II. The Core + +There is **one core of authority**. + +- `op-core-vm` is the origin of action. +- It is disposable, but authoritative. +- No critical action occurs outside it. + +The host is a console. The phone is a witness. + +## III. Identity Law + +Identity is finite. + +- Roles over personalities. +- Least privilege, always. +- Devices receive leases, never permanence. + +Anything that cannot be revoked cleanly is a liability. + +## IV. Naming Is Reality + +If it cannot be named correctly, it is not understood. + +Format: + +``` +-- +``` + +No myth names. No ambiguity. Renaming precedes deletion. + +## V. Infrastructure Doctrine + +Infrastructure is **cattle, not pets**. + +- Nodes are replaceable. +- Loss is expected. +- Rebuilds are boring. + +Sentiment is reserved for people, not machines. + +## VI. Separation of Meaning + +Meaning and infrastructure never mix. + +- Knowledge, media, philosophy → cold storage. +- Keys, infra, authority → clean core. + +What matters must be portable. What operates must be disposable. + +## VII. Backup Rule + +Backups exist for **calm recovery**, not comfort. + +- Encrypt before upload. +- Cloud storage is a vault, never a brain. +- No live sync for the core. + +If losing a backup causes panic, it is wrongly scoped. + +## VIII. The Nuke Test + +Every system must answer: + +> “If this disappears today, can I rebuild without panic?” + +If not: reduce scope, split responsibility, document recovery, or remove it. + +## IX. Tool Discipline + +Every tool must earn its place. + +- Fewer tools, deeper mastery. +- No duplicates without reason. +- No installs without intent. + +Bloat is deferred failure. + +## X. Drift Control + +Entropy is natural. Drift is optional. + +Regularly: audit identities, review devices, correct names, delete without regret. + +Maintenance is freedom. + +## XI. Authority Boundary + +Critical actions occur only: + +- from the core +- with intent +- with traceability + +No shortcuts. No “just this once”. + +## XII. Final Law + +I build systems I am **not afraid to touch**. + +If fear appears, I stop — not to hesitate, but to **restore clarity**. + + +``` + + +Diff: /Users/sovereign/work-core/ops/00-doctrine/backup-doctrine.md +``` +# Backup Doctrine + +## Principles + +- Backups exist for **calm recovery**, not comfort. +- Encrypt backups **before** they leave the system. +- A backup that cannot be safely lost is incorrectly scoped. +- Restores are verified: a backup without a restore test is a hope, not a backup. + +## In-repo boundaries + +- What is backed up, where it is stored, and how it is restored is recorded in `60-backups/manifests/`. +- Proof that restores work (restore drills, checksums, validation notes) lives in `60-backups/restore-tests/`. + +## Minimum standard + +For each backup set, record: + +- Source (what) +- Destination (where) +- Method (how) +- Frequency (when) +- Restore procedure (how to get it back) +- Validation (how you know it worked) + + +``` + + +Diff: /Users/sovereign/work-core/ops/00-doctrine/identity-law.md +``` +# Identity Law + +## Principles + +- Operate via **roles**, not personalities. +- Issue access as **leases** (time-bound, revocable), not permanence. +- Prefer **short-lived credentials** over long-lived secrets. +- Public keys are safe to store; private keys are not. + +## In-repo boundaries + +- Role definitions live in `20-identity/roles/`. +- Policies and intent live in `20-identity/policies/`. +- Leases (who/what has access, until when) live in `20-identity/leases/`. +- Public keys live in `20-identity/keys/public/`. +- Private keys and plaintext secrets never live in this repo. + +## Required properties (revocation) + +Every identity mechanism must support: + +1. **Revoke**: shut it off quickly. +2. **Rotate**: replace it predictably. +3. **Prove**: show what changed and when. + +If any of the above is not true, the mechanism does not belong in the core. + + +``` + + +## Assistant + + + + +**Updating the plan** + +I've marked Step 2 as completed. Now, I need to shift my focus to Step 3 and keep that in progress. It's important to stay organized and track my progress effectively! I want to ensure everything is clearly marked and that I don't lose track of what comes next. Checking things off and moving ahead helps me feel accomplished! Let’s keep this momentum going! + + +**Tool Call: Edit /Users/sovereign/work-core/ops/50-runbooks/README.md, /Users/sovereign/work-core/ops/70-audits/checklists/README.md, /Users/sovereign/work-core/ops/50-runbooks/10-provision/README.md, /Users/sovereign/work-core/ops/70-audits/checklists/quarterly.md, /Users/sovereign/work-core/ops/70-audits/reports/README.md, /Users/sovereign/work-core/ops/50-runbooks/00-panic-day/README.md, /Users/sovereign/work-core/ops/50-runbooks/90-decom/README.md, /Users/sovereign/work-core/ops/70-audits/checklists/weekly.md, /Users/sovereign/work-core/ops/70-audits/README.md, /Users/sovereign/work-core/ops/50-runbooks/_template.md, /Users/sovereign/work-core/ops/50-runbooks/20-incident/README.md** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/50-runbooks/README.md +``` +# Runbooks + +Runbooks are executable documents: written to be followed under pressure. + +Minimum required sections: + +- Purpose +- Preconditions +- Steps (exact commands) +- Validation (how you know it worked) +- Rollback / abort criteria +- Evidence (what to record) + +Use `ops/50-runbooks/_template.md` for new runbooks. + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/checklists/README.md +``` +# Checklists + +Use these to perform regular drift control. + +When you run a checklist, file the result under `70-audits/reports/` with: + +- date/time +- operator role used +- what changed +- what was revoked/rotated +- any failures and follow-ups + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/10-provision/README.md +``` +# Provision + +Create and bootstrap new nodes/services in a repeatable way. + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/checklists/quarterly.md +``` +# Quarterly Drift Control + +## Identity + +- Rotate high-value credentials and invalidate old material. +- Re-evaluate role scopes; remove privileges that became “default”. + +## Backup reality check + +- Perform a full restore drill of at least one critical system. +- Verify restore validation steps still match reality. + +## Infra sanity + +- Identify and remove orphaned resources (accounts, hosts, services). +- Confirm all critical infrastructure is reproducible from `ops/`. + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/reports/README.md +``` +# Reports + +Store audit outcomes and evidence here. + +Suggested format: + +- `YYYY-MM-DD-weekly.md` +- `YYYY-Q#-quarterly.md` + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/00-panic-day/README.md +``` +# Panic Day + +This folder contains rebuild-from-zero drills. + +Rule: each drill must be runnable with only this repo, cold storage, and the vault keys. + +Suggested drills: + +- Rebuild `op-core-vm` from a clean base image. +- Restore this repo, validate integrity, and re-establish authority boundary. +- Rotate critical credentials and invalidate all prior device leases. +- Restore one representative backup set end-to-end and record proof. + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/90-decom/README.md +``` +# Decommission + +Safe teardown, revocation, and evidence capture for retired systems. + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/checklists/weekly.md +``` +# Weekly Drift Control + +## Identity + +- Review active leases; revoke anything unused or unclear. +- Confirm least-privilege matches reality (roles/policies still correct). +- Confirm no new long-lived credentials exist without rotation plan. + +## Backups + +- Verify latest backups completed for all critical sets. +- Confirm at least one restore proof is recent (per backup cadence). + +## Inventory / Naming + +- Confirm new systems/services are inventoried and named `--`. +- Rename unclear entries before deleting anything. + +## Core boundary + +- Confirm critical changes still originate only from `op-core-vm`. + + +``` + + +Diff: /Users/sovereign/work-core/ops/70-audits/README.md +``` +# Audits + +Audits are how drift is prevented. + +- Checklists live in `70-audits/checklists/`. +- Evidence and outcomes live in `70-audits/reports/`. + +If a checklist item cannot be verified, treat it as failed until proven otherwise. + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/_template.md +``` +# Runbook: + +## Purpose + +What this runbook does and when to use it. + +## Preconditions + +- Access/role required: +- Systems required: +- Expected safe state: +- Time estimate: + +## Steps + +1. +2. +3. + +## Validation + +How to prove the procedure succeeded (commands + expected output). + +## Rollback / Abort + +What to do if validation fails, and when to stop. + +## Evidence + +What you record (timestamps, change IDs, logs, screenshots). + +## Notes + +Gotchas, dependencies, links. + + +``` + + +Diff: /Users/sovereign/work-core/ops/50-runbooks/20-incident/README.md +``` +# Incident + +Containment, diagnosis, recovery, and postmortem procedures. + + +``` + + +## Assistant + +<thinking> + + +**Adding guardrails for security** + +I'm focusing on implementing guardrails for the vault in a codebase. It seems like a good start to add a `.gitignore` file in the ops root to ensure things like `90-scratch` are ignored. I also want to create a `README.md` to outline rules and clarify boundaries for public keys. While I’ll include patterns for secrets, like `*.pem`, I'm cautious not to restrict legitimate files like `.env.example`. I aim to minimize risk of accidentally committing plaintext secrets, knowing that `.gitignore` alone isn't foolproof. + +**Designing guardrails for the vault** + +I'm considering a pre-commit hook or using `gitleaks`, though that might be a bit too much. I'd like to include an optional `pre-commit` config, but since it wasn't requested, I'll keep things minimal. Creating a `.gitignore` for `ops/vault` seems like a sensible approach to ignore everything except for encrypted files and README. It's crucial to enforce that only specific file types are included, while allowing the `.sops.yaml` file for configuration, as that can safely contain public keys. + +**Updating guardrails for scratch files** + +I'm thinking it's better to create a `README.md` in `ops/90-scratch` to describe its purpose instead of keeping the `.gitkeep`. I'll update the `.gitignore` to ignore everything else but allow this `README.md` and potentially keep the `.gitkeep` as well for good measure. Next, I want to make sure we ignore common sensitive files like `*.pem`, `*.key`, while still allowing public key files, such as `.pub`. For the vault, I'll ignore all files but allow specific encrypted extensions to be retained. + +**Deciding on vault file handling** + +This is tricky! I can't just rely on file extensions like `.age` or `.yaml` for determining whether content is encrypted because sops can encrypt files with the same extension. It seems like we should either support `age` or allow sops-encrypted files, but not just based on the extension due to potential conflicts. Maybe we could forgo ignoring the `vault/` directory altogether and instead implement a rule and a pre-commit check. While I still want to add a `.gitignore` for common plaintext patterns, I'll need to think about how we treat all files in `vault/` as encrypted based on extensions. + +**Structuring vault and scratch directories** + +They might prefer using sops for YAML files, so I could require `.sops.yaml` for encrypted files, which complicates the `.gitignore`. I’ll consider setting up a vault directory layout with `ciphertext` for committed encrypted files and `plaintext` for git-ignored temporary files. I'll create a `README.md` to clarify that only ciphertext is allowed. For efficiency, I can include per-directory `.gitignore` files to manage what’s tracked in both `vault/tmp/` and `ops/90-scratch`—this will simplify the overall structure and avoid unnecessary complexity with files like `.gitkeep`.**Designing guardrails for the vault** + +I'm considering a pre-commit hook or using `gitleaks`, though that might be a bit too much. I'd like to include an optional `pre-commit` config, but since it wasn't requested, I'll keep things minimal. Creating a `.gitignore` for `ops/vault` seems like a sensible approach to ignore everything except for encrypted files and README. It's crucial to enforce that only specific file types are included, while allowing the `.sops.yaml` file for configuration, as that can safely contain public keys.**Updating guardrails for scratch files** + +I'm thinking it's better to create a `README.md` in `ops/90-scratch` to describe its purpose instead of keeping the `.gitkeep`. I'll update the `.gitignore` to ignore everything else but allow this `README.md` and potentially keep the `.gitkeep` as well for good measure. Next, I want to make sure we ignore common sensitive files like `*.pem`, `*.key`, while still allowing public key files, such as `.pub`. For the vault, I'll ignore all files but allow specific encrypted extensions to be retained.**Deciding on vault file handling** + +This is tricky! I can't just rely on file extensions like `.age` or `.yaml` for determining whether content is encrypted because sops can encrypt files with the same extension. It seems like we should either support `age` or allow sops-encrypted files, but not just based on the extension due to potential conflicts. Maybe we could forgo ignoring the `vault/` directory altogether and instead implement a rule and a pre-commit check. While I still want to add a `.gitignore` for common plaintext patterns, I'll need to think about how we treat all files in `vault/` as encrypted based on extensions.**Structuring vault and scratch directories** + +They might prefer using sops for YAML files, so I could require `.sops.yaml` for encrypted files, which complicates the `.gitignore`. I’ll consider setting up a vault directory layout with `ciphertext` for committed encrypted files and `plaintext` for git-ignored temporary files. I'll create a `README.md` to clarify that only ciphertext is allowed. For efficiency, I can include per-directory `.gitignore` files to manage what’s tracked in both `vault/tmp/` and `ops/90-scratch`—this will simplify the overall structure and avoid unnecessary complexity with files like `.gitkeep`. +</thinking> + +**Tool Call: Search find . -maxdepth 3 -type f** +Status: Completed + +```sh +ops/00-doctrine/.gitkeep +ops/00-doctrine/README.md +ops/00-doctrine/backup-doctrine.md +ops/00-doctrine/identity-law.md +ops/00-doctrine/operator-charter.md +ops/00-doctrine/personal-operating-doctrine.md +ops/40-apps/.gitkeep +ops/50-runbooks/README.md +ops/50-runbooks/_template.md +ops/70-audits/README.md +ops/90-scratch/.gitkeep +ops/README.md +ops/vault/.gitkeep +``` + + +## Assistant + +<thinking> + + +**Crafting .gitignore** + +I’m thinking about how to set up a `.gitignore` file for the `ops` directory. It's fine to have a `.gitignore` at the root, but we need to list what to ignore here. + +I want to include OS files like `.DS_Store`, editor backups, and various credential files. Also, I need to consider SSH private keys, and I’m debating how specific I should be with patterns. Plus, we should ignore `.env` files while allowing `.env.example`, but I have to be cautious about potentially sensitive `.tfvars` files. This is quite a balancing act! + +**Creating the .gitignore and README** + +I'm considering what to include in the `.gitignore` for the `ops` directory. Ignoring state files like `terraform.tfstate*`, temporary directories like `.terraform/`, and log files is standard practice. However, I want to keep it safe, so only specific vault temp files should be ignored. + +I’m also thinking about structuring the root `.gitignore` with essential patterns. Plus, I need to create a `README.md` and a specific `.gitignore` for `ops/vault/tmp/` to clarify its safety. It's all about making sure things are organized and secure! + +**Handling Sensitive Information** + +I’m considering where to store sensitive information safely. If something isn’t meant to leak, it clearly doesn’t belong in the wrong place. It’s recommended to use encrypted formats such as `*.age` or SOPS for YAML files. Any plaintext secrets should go into `vault/tmp`, which I’ll ignore in git and ensure it’s wiped after use. + +Private keys must not be committed; they should be kept in a secure store. I’ll also set up a `.gitignore` and `README.md` for the `90-scratch` directory, detailing that it's for temporary working notes. I must keep track of what’s important while ignoring the rest! + +**Managing .gitkeep Files** + +I’m considering the use of `.gitkeep` files in both the `90-scratch` and `vault` directories. For `vault`, we might not need `.gitkeep`, but if we treat it as committed, then having it tracked is fine. Still, I think removing it could help reduce clutter. Instead of `.gitkeep`, I’d prefer to have a `README.md` and `.gitignore` to give those directories more purpose. + +For other directories, maybe we can keep `.gitkeep` for now, but it's also worth thinking about adding minimal README files in key places like `inventory` and `identity` later on. It could really help clarify their purposes! + +**Structuring Operations Directories** + +The user is asking for a proposed structure, and they might want a skeleton set up. I’m thinking we should keep things minimal for now but plan to add details later. This includes creating `README.md` files in various directories such as `ops/20-identity/keys/public`, which will specify that it contains only public keys. + +I want to include placeholder README files in several top-level directories, which will help make navigation easier and clarify the purpose of each section. It's all part of building a coherent structure, so I’ll implement this patch now!**Creating the .gitignore and README** + +I'm considering what to include in the `.gitignore` for the `ops` directory. Ignoring state files like `terraform.tfstate*`, temporary directories like `.terraform/`, and log files is standard practice. However, I want to keep it safe, so only specific vault temp files should be ignored. + +I’m also thinking about structuring the root `.gitignore` with essential patterns. Plus, I need to create a `README.md` and a specific `.gitignore` for `ops/vault/tmp/` to clarify its safety. It's all about making sure things are organized and secure!**Handling Sensitive Information** + +I’m considering where to store sensitive information safely. If something isn’t meant to leak, it clearly doesn’t belong in the wrong place. It’s recommended to use encrypted formats such as `*.age` or SOPS for YAML files. Any plaintext secrets should go into `vault/tmp`, which I’ll ignore in git and ensure it’s wiped after use. + +Private keys must not be committed; they should be kept in a secure store. I’ll also set up a `.gitignore` and `README.md` for the `90-scratch` directory, detailing that it's for temporary working notes. I must keep track of what’s important while ignoring the rest!**Managing .gitkeep Files** + +I’m considering the use of `.gitkeep` files in both the `90-scratch` and `vault` directories. For `vault`, we might not need `.gitkeep`, but if we treat it as committed, then having it tracked is fine. Still, I think removing it could help reduce clutter. Instead of `.gitkeep`, I’d prefer to have a `README.md` and `.gitignore` to give those directories more purpose. + +For other directories, maybe we can keep `.gitkeep` for now, but it's also worth thinking about adding minimal README files in key places like `inventory` and `identity` later on. It could really help clarify their purposes!**Structuring Operations Directories** + +The user is asking for a proposed structure, and they might want a skeleton set up. I’m thinking we should keep things minimal for now but plan to add details later. This includes creating `README.md` files in various directories such as `ops/20-identity/keys/public`, which will specify that it contains only public keys. + +I want to include placeholder README files in several top-level directories, which will help make navigation easier and clarify the purpose of each section. It's all part of building a coherent structure, so I’ll implement this patch now! +</thinking> + +**Tool Call: Edit /Users/sovereign/work-core/ops/10-inventory/README.md, /Users/sovereign/work-core/ops/vault/README.md, /Users/sovereign/work-core/ops/80-automation/README.md, /Users/sovereign/work-core/ops/20-identity/keys/public/README.md, /Users/sovereign/work-core/ops/.gitignore, /Users/sovereign/work-core/ops/40-apps/README.md, /Users/sovereign/work-core/ops/90-scratch/.gitignore, /Users/sovereign/work-core/ops/vault/.gitkeep, /Users/sovereign/work-core/ops/90-scratch/.gitkeep, /Users/sovereign/work-core/ops/vault/tmp/.gitignore, /Users/sovereign/work-core/ops/20-identity/README.md, /Users/sovereign/work-core/ops/30-infra/README.md, /Users/sovereign/work-core/ops/60-backups/README.md, /Users/sovereign/work-core/ops/90-scratch/README.md** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/10-inventory/README.md +``` +# Inventory + +Inventory is the map of reality. + +- Hosts live under `10-inventory/hosts/<role>-<scope>-<id>/`. +- Networks and services are recorded under `10-inventory/networks/` and `10-inventory/services/`. + +If it exists and matters operationally, it is inventoried and named. + + +``` + + +Diff: /Users/sovereign/work-core/ops/vault/README.md +``` +# Vault (encrypted-only) + +This directory contains **encrypted artifacts only**. + +Rules: + +- Never commit plaintext secrets (even temporarily). +- Never commit private keys. +- Treat `vault/` as safe-to-leak ciphertext: if leaking it would break you, it is mis-scoped. + +Decryption/working material belongs in `vault/tmp/` (gitignored) and should be wiped after use. + + +``` + + +Diff: /Users/sovereign/work-core/ops/80-automation/README.md +``` +# Automation + +Scripts and containers that make operations repeatable live here. + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/keys/public/README.md +``` +# Public keys only + +Store public keys here (e.g., `*.pub`). + +- Name keys to match the role/device: `<role>-<scope>-<id>`. +- Never commit private keys. + + +``` + + +Diff: /Users/sovereign/work-core/ops/.gitignore +``` +.DS_Store +*.swp +*.swo +*~ +.vscode/ +.idea/ + +# Terraform +**/.terraform/ +**/*.tfstate +**/*.tfstate.* +**/crash.log +**/crash.*.log + +# Common secret material (keep out of Git) +*.pem +*.key +*.p12 +*.pfx +*.jks +*.kubeconfig +*.ovpn +*.mobileconfig +.env +.env.* +!.env.example +*.tfvars +*.tfvars.json +!.tfvars.example +!.tfvars.json.example + +# Decrypted scratch areas +vault/tmp/ + + +``` + + +Diff: /Users/sovereign/work-core/ops/40-apps/README.md +``` +# Apps + +Application-specific operational material (deploy, config, lifecycle) lives here. + + +``` + + +Diff: /Users/sovereign/work-core/ops/90-scratch/.gitignore +``` +* +!.gitignore +!README.md + + +``` + + +Diff: /Users/sovereign/work-core/ops/vault/.gitkeep +``` + +``` + + +Diff: /Users/sovereign/work-core/ops/90-scratch/.gitkeep +``` + +``` + + +Diff: /Users/sovereign/work-core/ops/vault/tmp/.gitignore +``` +* +!.gitignore + + +``` + + +Diff: /Users/sovereign/work-core/ops/20-identity/README.md +``` +# Identity + +Identity is operated as roles + leases. + +In this repo: + +- roles: `20-identity/roles/` +- policies: `20-identity/policies/` +- leases (time-bound grants): `20-identity/leases/` +- public keys only: `20-identity/keys/public/` + +Private keys and plaintext secrets do not belong here. + + +``` + + +Diff: /Users/sovereign/work-core/ops/30-infra/README.md +``` +# Infrastructure + +Infrastructure is cattle. + +Store infrastructure-as-code and configuration management here. + +Guardrail: keep state and generated artifacts out of Git (e.g., Terraform state). + + +``` + + +Diff: /Users/sovereign/work-core/ops/60-backups/README.md +``` +# Backups + +- Backup manifests live in `60-backups/manifests/`. +- Restore proofs live in `60-backups/restore-tests/`. + +Backups are only real when restores are verified. + + +``` + + +Diff: /Users/sovereign/work-core/ops/90-scratch/README.md +``` +# Scratch + +Disposable working area. + +Nothing here is source of truth. Everything here is safe to delete. + + +``` + + +## Assistant + +<thinking> + + +**Checking directory structure** + +I’m on step five now, which involves checking the structure to make sure everything is in order. I think it’s important to verify the directory tree and check if all the files exist and the `.gitignore` is functioning properly. I’ll run a `find` command to show the files, and I need to ensure there aren’t any stray `.gitkeep` files left around. I also want to make sure that `ops/90-scratch/.gitignore` isn’t ignoring important files like README. I'll print the directory tree up to depth four to verify everything is good. +</thinking> + +**Tool Call: Search find ops -type f** +Status: Completed + +```sh +ops/.gitignore +ops/00-doctrine/.gitkeep +ops/00-doctrine/README.md +ops/00-doctrine/backup-doctrine.md +ops/00-doctrine/identity-law.md +ops/00-doctrine/operator-charter.md +ops/00-doctrine/personal-operating-doctrine.md +ops/10-inventory/README.md +ops/10-inventory/hosts/.gitkeep +ops/10-inventory/networks/.gitkeep +ops/10-inventory/services/.gitkeep +ops/20-identity/README.md +ops/20-identity/keys/public/.gitkeep +ops/20-identity/keys/public/README.md +ops/20-identity/leases/.gitkeep +ops/20-identity/policies/.gitkeep +ops/20-identity/roles/.gitkeep +ops/20-identity/templates/.gitkeep +ops/30-infra/README.md +ops/30-infra/config/.gitkeep +ops/30-infra/k8s/.gitkeep +ops/30-infra/terraform/.gitkeep +ops/40-apps/.gitkeep +ops/40-apps/README.md +ops/50-runbooks/00-panic-day/.gitkeep +ops/50-runbooks/00-panic-day/README.md +ops/50-runbooks/10-provision/.gitkeep +ops/50-runbooks/10-provision/README.md +ops/50-runbooks/20-incident/.gitkeep +ops/50-runbooks/20-incident/README.md +ops/50-runbooks/90-decom/.gitkeep +ops/50-runbooks/90-decom/README.md +ops/50-runbooks/README.md +ops/50-runbooks/_template.md +ops/60-backups/README.md +ops/60-backups/manifests/.gitkeep +ops/60-backups/restore-tests/.gitkeep +ops/70-audits/README.md +ops/70-audits/checklists/.gitkeep +ops/70-audits/checklists/README.md +ops/70-audits/checklists/quarterly.md +ops/70-audits/checklists/weekly.md +ops/70-audits/reports/.gitkeep +ops/70-audits/reports/README.md +ops/80-automation/README.md +ops/80-automation/containers/.gitkeep +ops/80-automation/scripts/.gitkeep +ops/90-scratch/.gitignore +ops/90-scratch/README.md +ops/README.md +ops/vault/README.md +ops/vault/tmp/.gitignore +``` + + +## Assistant + +<thinking> + + +**Clarifying .gitignore behavior** + +If the repository root is `work-core` and `ops` is a subfolder, the `.gitignore` in `ops` will only apply if they treat `ops` as a separate repo. However, I realize that `.gitignore` files work relative to the repository root. This means Git reads `.gitignore` files in subdirectories like `ops/`, and they will apply within those directories. So the rules in `ops/.gitignore` will indeed function properly within the `ops/` directory, and the same applies for subfolders like `ops/90-scratch`. That's good! + +**Finalizing repo organization** + +If `ops` is meant to be the repo root and they run Git inside `ops/`, then having the `.gitignore` there is good. I’m thinking about adding a root-level `README.md` to explain the existence of `ops/`, but it's not strictly necessary since the user didn't request it. Our working directory is empty but now has the `ops/` folder. It could be helpful to create a `README.md` in `ops/10-inventory/hosts/_template/`, but it’s not essential. I’ll ensure no trailing whitespace and update plan step 5 to completed.**Finalizing repo organization** + +If `ops` is meant to be the repo root and they run Git inside `ops/`, then having the `.gitignore` there is good. I’m thinking about adding a root-level `README.md` to explain the existence of `ops/`, but it's not strictly necessary since the user didn't request it. Our working directory is empty but now has the `ops/` folder. It could be helpful to create a `README.md` in `ops/10-inventory/hosts/_template/`, but it’s not essential. I’ll ensure no trailing whitespace and update plan step 5 to completed. + +**Deciding next steps** + +I need to figure out if `ops/` is the repository root. If it is, I’ll initiate it with `git init ops` and start working on the `10-inventory/hosts/` and writing the first panic-day runbook. Alternatively, I might create role/lease templates. I should ask the user if they want me to generate those templates and a panic-day runbook stub. I’ll keep the message clear by including file paths, but I may not need line references unless they’re essential. +</thinking> + +Created a ready-to-use `ops/` skeleton in your workspace: + +- Canonical doctrine + charter: `ops/00-doctrine/operator-charter.md`, `ops/00-doctrine/personal-operating-doctrine.md` +- Repo root + layout rules: `ops/README.md` +- Runbook system + template: `ops/50-runbooks/README.md`, `ops/50-runbooks/_template.md` +- Drift-control checklists: `ops/70-audits/checklists/weekly.md`, `ops/70-audits/checklists/quarterly.md` +- Guardrails: `ops/.gitignore`, encrypted-only vault rules + tmp: `ops/vault/README.md`, `ops/vault/tmp/.gitignore`, scratch is disposable: `ops/90-scratch/.gitignore` + +Next move options: make your first `10-inventory/hosts/<role>-<scope>-<id>/` entry, or I can generate `20-identity/templates/` (role + lease templates) and a first “panic day” runbook stub. + +## User + +set it up - https://docs.gitlab.com/cli/ Logged in as vaultsovereign + +## Assistant + +<thinking> + + +**Setting up Git repository** + +I'm considering initializing Git at the "work-core" level, which would create an "ops" subtree along with potential future directories. Since the user asked for the folder structure for operations, it seems the repo should be named "ops." It might be best to set the Git root to "/Users/sovereign/work-core/ops" for a contained structure. Since the user is interested in creating a remote repo via GitLab, I'll be looking into whether they have a project already named "ops" or "op-core." I'll default to "ops." + +**Creating a GitLab repository** + +I think I should ask the user if they want a specific name and namespace for the repository. I can also list available groups with `glab repo list`, but since the user is logged in as "vaultsovereign," I can assume they want to create the "ops" repo under that namespace. I plan to create the repo using `glab repo create ops --private --source . --remote origin --push` once I'm in the ops directory, but I might need to initialize Git first. It seems like consulting the documentation could clarify things! + +**Planning GitLab setup** + +I think the default branch on GitLab should be set to "main," and I might create an initial commit as well. Our repository has several .gitkeep files, which is fine. I need to create a plan since this is a multi-step task. The steps would include verifying that glab is installed and authenticated, initializing Git, creating the repo, pushing it, and potentially adding GitLab’s recommended settings like a protected branch. I’ll draft a new plan with these steps since the last one is completed. + +**Checking repository setup** + +I need to be cautious here since the user mentioned they are logged in as "vaultsovereign," which likely means `glab auth status` will return good results. I’ll run some commands to confirm this. We should also ensure that there are no secrets in the repo, as our skeleton already includes a `vault/` and a `.gitignore` for typical secret formats. While adding pre-commit hooks or setting up Git attributes could be useful, the user didn’t ask for those specifically. I’ll stick to keeping things minimal and focus on creating the repo and pushing it for now. + +**Preparing GitLab repo setup** + +I need to confirm that `.gitignore` properly includes `vault/tmp`, which we've added, and also check if the root `.gitignore` reflects this since it's currently in `ops/.gitignore`. Here's the plan: + +1. Check if glab is available with `glab --version` and verify authentication with `glab auth status`. +2. Initialize Git in the ops directory with `git init -b main`. I’ll need to check or set the user name and email. +3. If there's no existing repo, I'll configure the local user name/email with a placeholder if necessary. +4. Create the GitLab repo using `glab repo create` with appropriate flags, checking SSH or HTTPS preferences along the way. +5. Review glab documentation for any additional steps or flags needed. + +Now, I'll proceed with these commands!**Creating a GitLab repository** + +I think I should ask the user if they want a specific name and namespace for the repository. I can also list available groups with `glab repo list`, but since the user is logged in as "vaultsovereign," I can assume they want to create the "ops" repo under that namespace. I plan to create the repo using `glab repo create ops --private --source . --remote origin --push` once I'm in the ops directory, but I might need to initialize Git first. It seems like consulting the documentation could clarify things!**Planning GitLab setup** + +I think the default branch on GitLab should be set to "main," and I might create an initial commit as well. Our repository has several .gitkeep files, which is fine. I need to create a plan since this is a multi-step task. The steps would include verifying that glab is installed and authenticated, initializing Git, creating the repo, pushing it, and potentially adding GitLab’s recommended settings like a protected branch. I’ll draft a new plan with these steps since the last one is completed.**Checking repository setup** + +I need to be cautious here since the user mentioned they are logged in as "vaultsovereign," which likely means `glab auth status` will return good results. I’ll run some commands to confirm this. We should also ensure that there are no secrets in the repo, as our skeleton already includes a `vault/` and a `.gitignore` for typical secret formats. While adding pre-commit hooks or setting up Git attributes could be useful, the user didn’t ask for those specifically. I’ll stick to keeping things minimal and focus on creating the repo and pushing it for now.**Preparing GitLab repo setup** + +I need to confirm that `.gitignore` properly includes `vault/tmp`, which we've added, and also check if the root `.gitignore` reflects this since it's currently in `ops/.gitignore`. Here's the plan: + +1. Check if glab is available with `glab --version` and verify authentication with `glab auth status`. +2. Initialize Git in the ops directory with `git init -b main`. I’ll need to check or set the user name and email. +3. If there's no existing repo, I'll configure the local user name/email with a placeholder if necessary. +4. Create the GitLab repo using `glab repo create` with appropriate flags, checking SSH or HTTPS preferences along the way. +5. Review glab documentation for any additional steps or flags needed. + +Now, I'll proceed with these commands! +</thinking> + +**Tool Call: Run glab --version** +Status: Completed + +Terminal: +``` +glab 1.79.0 (19f58080) + + + + + +``` + + +**Tool Call: Run glab auth status** +Status: Completed + +Terminal: +``` +gitlab.com + ✓ Logged in to gitlab.com as vaultsovereign (/Users/sovereign/Library/Application Support/glab-cli/config.yml) + ✓ Git operations for gitlab.com configured to use https protocol. + ✓ API calls for gitlab.com are made over https protocol. + ✓ REST API Endpoint: https://gitlab.com/api/v4/ + ✓ GraphQL Endpoint: https://gitlab.com/api/graphql/ + ✓ Token found: ************************** + +``` + + +**Tool Call: Run git --version** +Status: Completed + +Terminal: +``` +git version 2.39.5 (Apple Git-154) + + + + + +``` + + +**Tool Call: Run git -C ops status** +Status: Failed + +Terminal: +``` +fatal: not a git repository (or any of the parent directories): .git + + + + + +``` + + +## Assistant + +<thinking> + + +**Moving forward with setup** + +I've installed and logged into glab as vaultsovereign, using HTTPS. Git is installed, but it seems ops isn't a git repo, which I expected. So my next step is to initialize git in ops. However, I also want to consider if there are any AGENTS instructions, though it appears unnecessary since there are no files. I'll mark step 1 as completed and move on to updating the plan for step 2. + +**Initializing git repository** + +I need to set up a git repository in the ops directory, and I'll run `git -C ops init -b main`. Oh, right, I need to check if the ops directory exists, but I remember it does. While doing this, I should ensure the `.gitignore` file includes `vault/tmp`. I’ll also look into whether we should remove `.gitkeep` and maybe create a `LICENSE` file, even though it's not requested. After initializing, I'll check the status and configure user settings, being careful about personal info. Finally, I’ll prepare to set the remote later on. + +**Creating git repository** + +I’m considering using glab to create a repository directly from the current directory and potentially push it. However, I'm not entirely sure if it initializes git as well. I think it’s safer to initialize the git repository myself. So, I’ll go ahead and run `git -C ops init -b main` to take charge of the setup. Ensuring I do it correctly feels important, especially when managing the repository!**Creating git repository** + +I’m considering using glab to create a repository directly from the current directory and potentially push it. However, I'm not entirely sure if it initializes git as well. I think it’s safer to initialize the git repository myself. So, I’ll go ahead and run `git -C ops init -b main` to take charge of the setup. Ensuring I do it correctly feels important, especially when managing the repository! +</thinking> + +**Tool Call: Run git -C ops init -b main** +Status: Completed + +Terminal: +``` +Initialized empty Git repository in /Users/sovereign/work-core/ops/.git/ + + + + + +``` + + +## Assistant + +<thinking> + + +**Committing files** + +I need to check the git status for the operations. This will help me understand which files are staged, unstaged, or anything that needs my attention before I commit. It's always good practice to look at the status first to avoid missing something important. Once I have that information, I can proceed confidently with committing the files. Let’s get started on this! +</thinking> + +**Tool Call: Run git -C ops status '--porcelain=v1'** +Status: Completed + +Terminal: +``` +?? .gitignore +?? 00-doctrine/ +?? 10-inventory/ +?? 20-identity/ +?? 30-infra/ +?? 40-apps/ +?? 50-runbooks/ +?? 60-backups/ +?? 70-audits/ +?? 80-automation/ +?? 90-scratch/ +?? README.md +?? vault/ + +``` + + +## Assistant + +<thinking> + + +**Checking git configuration** + +I need to add and commit changes first. But wait, I should check if the git configuration for user.name and user.email exists. I'll run commands like `git -C ops config user.name` and `git -C ops config user.email` to check locally. If they aren't set, I’ll fall back to the global configuration. Alright, time to run those commands and see what I find! +</thinking> + +**Tool Call: Run git -C ops config --get user.name** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +**Tool Call: Run git -C ops config --get user.email** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +**Tool Call: Run git config --global --get user.name** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +**Tool Call: Run git config --global --get user.email** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +**Tool Call: Run git -C ops config user.name vaultsovereign, Run git -C ops config user.email vaultsovereign@users.noreply.gitlab.com** +Status: Completed + +Terminal: +``` + + + + + +``` + + +**Tool Call: Run git -C ops add .** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +**Tool Call: Run git -C ops commit -m 'Initialize ops repository'** +Status: Completed + +Terminal: +``` +[main (root-commit) f375d21] Initialize ops repository + 51 files changed, 590 insertions(+) + create mode 100644 .gitignore + create mode 100644 00-doctrine/.gitkeep + create mode 100644 00-doctrine/README.md + create mode 100644 00-doctrine/backup-doctrine.md + create mode 100644 00-doctrine/identity-law.md + create mode 100644 00-doctrine/operator-charter.md + create mode 100644 00-doctrine/personal-operating-doctrine.md + create mode 100644 10-inventory/README.md + create mode 100644 10-inventory/hosts/.gitkeep + create mode 100644 10-inventory/networks/.gitkeep + create mode 100644 10-inventory/services/.gitkeep + create mode 100644 20-identity/README.md + create mode 100644 20-identity/keys/public/.gitkeep + create mode 100644 20-identity/keys/public/README.md + create mode 100644 20-identity/leases/.gitkeep + create mode 100644 20-identity/policies/.gitkeep + create mode 100644 20-identity/roles/.gitkeep + create mode 100644 20-identity/templates/.gitkeep + create mode 100644 30-infra/README.md + create mode 100644 30-infra/config/.gitkeep + create mode 100644 30-infra/k8s/.gitkeep + create mode 100644 30-infra/terraform/.gitkeep + create mode 100644 40-apps/.gitkeep + create mode 100644 40-apps/README.md + create mode 100644 50-runbooks/00-panic-day/.gitkeep + create mode 100644 50-runbooks/00-panic-day/README.md + create mode 100644 50-runbooks/10-provision/.gitkeep + create mode 100644 50-runbooks/10-provision/README.md + create mode 100644 50-runbooks/20-incident/.gitkeep + create mode 100644 50-runbooks/20-incident/README.md + create mode 100644 50-runbooks/90-decom/.gitkeep + create mode 100644 50-runbooks/90-decom/README.md + create mode 100644 50-runbooks/README.md + create mode 100644 50-runbooks/_template.md + create mode 100644 60-backups/README.md + create mode 100644 60-backups/manifests/.gitkeep + create mode 100644 60-backups/restore-tests/.gitkeep + create mode 100644 70-audits/README.md + create mode 100644 70-audits/checklists/.gitkeep + create mode 100644 70-audits/checklists/README.md + create mode 100644 70-audits/checklists/quarterly.md + create mode 100644 70-audits/checklists/weekly.md + create mode 100644 70-audits/reports/.gitkeep + create mode 100644 70-audits/reports/README.md + create mode 100644 80-automation/README.md + create mode 100644 80-automation/containers/.gitkeep + create mode 100644 80-automation/scripts/.gitkeep + create mode 100644 90-scratch/.gitignore + create mode 100644 90-scratch/README.md + create mode 100644 README.md + create mode 100644 vault/README.md + +``` + + +## Assistant + +<thinking> + + +**Creating a repository** + +I’m thinking about using the command to create a private repository for operations. The command looks like this: `glab repo create ops --private --source . --remote origin --push --defaultBranch main --description "Operational source of truth."` However, it’s wise to check the help documentation first to confirm that I’m using the correct flag names. So, I’ll run `glab repo create --help` to make sure everything is right. +</thinking> + +**Tool Call: Run glab repo create --help** +Status: Completed + +Terminal: +``` + + Creates the new project with your first configured host in your `glab` + configuration. The host defaults to `GitLab.com` if not set. To set a host, + provide either: + + - A `GITLAB_HOST` environment variable. + - A full URL for the project. + + + USAGE + + glab repo create [path] [--flags] + + EXAMPLES + + # Create a repository under your account using the current directory name. + $ glab repo create + + # Create a repository under a group using the current directory name. + $ glab repo create --group glab-cli + + # Create a repository with a specific name. + $ glab repo create my-project + + # Create a repository for a group. + $ glab repo create glab-cli/my-project + + # Create on a host other than gitlab.com. + $ GITLAB_HOST=example.com glab repo create + $ glab repo create <host>/path/to/repository + + FLAGS + + --defaultBranch Default branch of the project. Defaults to `master` if not provided. + -d --description Description of the new project. + -g --group Namespace or group for the new project. Defaults to the current user's namespace. + -h --help Show help for this command. + --internal Make project internal: visible to any authenticated user. Default. + -n --name Name of the new project. + -p --private Make project private: visible only to project members. + -P --public Make project public: visible without any authentication. + --readme Initialize project with `README.md`. + --remoteName Remote name for the Git repository you're in. Defaults to `origin` if not provided. (origin) + -s --skipGitInit Skip run 'git init'. + -t --tag The list of tags for the project. + + +``` + + +## Assistant + +<thinking> + + +**Creating a GitLab project** + +I’m thinking about how to create a project using the GitLab API. I’ll make an API call with the relevant parameters like name and visibility. We also might need the namespace id, but if we create it under a user, I just need the name. Including an option to initialize with a README is nice, but since I already have one locally, that’s fine too. I’ll check for existing repos to avoid clashes but I can go with the name "ops" for now. + +**Setting up the repository** + +It seems like the user wants to set up the "work-core" repository with an "ops" folder. Since they're logged in as vaultsovereign, I can create a remote and push to GitLab. I’ll proceed with creating the project as private. While branch protection is an option, the user didn't specifically ask for it, but it could still be helpful to protect the main branch and require merge requests. Since these configurations might complicate things, I'll keep it simple for now and focus on creating the project and pushing it. + +**Updating the project plan** + +I’m moving on to the next step, which is to create the GitLab project. I’ll make sure to update my plan to reflect this. It’s important to keep everything organized and clear as I go along. Getting this project set up correctly is essential! Alright, let’s ensure all the necessary details are included in the updated plan, so we can proceed smoothly to the next steps of this process.**Setting up the repository** + +It seems like the user wants to set up the "work-core" repository with an "ops" folder. Since they're logged in as vaultsovereign, I can create a remote and push to GitLab. I’ll proceed with creating the project as private. While branch protection is an option, the user didn't specifically ask for it, but it could still be helpful to protect the main branch and require merge requests. Since these configurations might complicate things, I'll keep it simple for now and focus on creating the project and pushing it.**Updating the project plan** + +I’m moving on to the next step, which is to create the GitLab project. I’ll make sure to update my plan to reflect this. It’s important to keep everything organized and clear as I go along. Getting this project set up correctly is essential! Alright, let’s ensure all the necessary details are included in the updated plan, so we can proceed smoothly to the next steps of this process. + +**Creating a GitLab project** + +I want to create a GitLab project. Option one is to use a command with specific parameters like `glab repo create ops --private --defaultBranch main --description ... --remoteName origin --name ops`. But since I'm already in the ops repository where there's a .git folder, I should specify the path argument carefully. + +I could run `glab repo create --name ops --private --defaultBranch main --description "Operational source of truth" --remoteName origin` from inside the ops directory. But I'm not sure if the command will ask for the path. Let's decide the best approach! + +**Using GitLab API for project creation** + +I'm considering running the command with `--private` and checking the output. If it prompts for input, we could face issues since interactive commands might hang waiting for a response. To avoid this, I should ensure the command runs non-interactively. + +I could use `glab api` to directly create the project with the necessary flags, which will definitely not prompt me. I need to look at the `glab api` command by running `glab api --help` to find the right API call. Let's dive in!**Using GitLab API for project creation** + +I'm considering running the command with `--private` and checking the output. If it prompts for input, we could face issues since interactive commands might hang waiting for a response. To avoid this, I should ensure the command runs non-interactively. + +I could use `glab api` to directly create the project with the necessary flags, which will definitely not prompt me. I need to look at the `glab api` command by running `glab api --help` to find the right API call. Let's dive in! +</thinking> + +**Tool Call: Run glab api --help** +Status: Completed + +Terminal: +``` + + Makes an authenticated HTTP request to the GitLab API, and prints the response. + The endpoint argument should either be a path of a GitLab API v4 endpoint, or + `graphql` to access the GitLab GraphQL API. + + - [GitLab REST API documentation](https://docs.gitlab.com/api/) + - [GitLab GraphQL documentation](https://docs.gitlab.com/api/graphql/) + + If the current directory is a Git directory, uses the GitLab authenticated host in the current + directory. Otherwise, `gitlab.com` will be used. + To override the GitLab hostname, use `--hostname`. + + These placeholder values, when used in the endpoint argument, are + replaced with values from the repository of the current directory: + + - `:branch` + - `:fullpath` + - `:group` + - `:id` + - `:namespace` + - `:repo` + - `:user` + - `:username` + + Methods: the default HTTP request method is `GET`, if no parameters are added, + and `POST` otherwise. Override the method with `--method`. + + Pass one or more `--raw-field` values in `key=value` format to add + JSON-encoded string parameters to the `POST` body. + + The `--field` flag behaves like `--raw-field` with magic type conversion based + on the format of the value: + + - Literal values `true`, `false`, `null`, and integer numbers are converted to + appropriate JSON types. + - Placeholder values `:namespace`, `:repo`, and `:branch` are populated with values + from the repository of the current directory. + - If the value starts with `@`, the rest of the value is interpreted as a + filename to read the value from. Pass `-` to read from standard input. + + For GraphQL requests, all fields other than `query` and `operationName` are + interpreted as GraphQL variables. + + Raw request body can be passed from the outside via a file specified by `--input`. + Pass `-` to read from standard input. In this mode, parameters specified with + `--field` flags are serialized into URL query parameters. + + In `--paginate` mode, all pages of results are requested sequentially until + no more pages of results remain. For GraphQL requests: + + - The original query must accept an `$endCursor: String` variable. + - The query must fetch the `pageInfo{ hasNextPage, endCursor }` set of fields from a collection. + + + USAGE + + glab api <endpoint> [--flags] + + EXAMPLES + + $ glab api projects/:fullpath/releases + $ glab api projects/gitlab-com%2Fwww-gitlab-com/issues + $ glab api issues --paginate + $ glab api graphql -f query="query { currentUser { username } }" + $ glab api graphql -f query=' + query { + project(fullPath: "gitlab-org/gitlab-docs") { + name + forksCount + statistics { + wikiSize + } + issuesEnabled + boards { + nodes { + id + name + } + } + } + } + ' + + $ glab api graphql --paginate -f query=' + query($endCursor: String) { + project(fullPath: "gitlab-org/graphql-sandbox") { + name + issues(first: 2, after: $endCursor) { + edges { + node { + title + } + } + pageInfo { + endCursor + hasNextPage + } + } + } + }' + + FLAGS + + -F --field Add a parameter of inferred type. Changes the default HTTP method to "POST". + -H --header Add an additional HTTP request header. + -h --help Show help for this command. + --hostname The GitLab hostname for the request. Defaults to 'gitlab.com', or the authenticated host in the current Git directory. + -i --include Include HTTP response headers in the output. + --input The file to use as the body for the HTTP request. + -X --method The HTTP method for the request. (GET) + --paginate Make additional HTTP requests to fetch all pages of results. + -f --raw-field Add a string parameter. + --silent Do not print the response body. + + +``` + + +## Assistant + +<thinking> + + +**Considering project creation steps** + +To prevent hanging, I can set `timeout_ms`, maybe around 5000, and test a command to create a project. If a prompt appears, it might not finish. I wonder if I can supply `--yes` through an environment variable. Another option could be to use `printf` piped to glab, but I’m unsure about prompts. Using the glab API to create the project noninteractively may be the simplest solution. I can then set the default branch as needed. + +**Choosing the API approach** + +I think the best way forward is using the API. First, I need to determine if the project already exists by calling `glab api projects/vaultsovereign%2Fops`. If it exists, I shouldn't create it but can set the remote and push. I'll try running the command with `--silent`. If the exit code is 1, it likely means the project isn’t found. I’ll also consider using `--include` to check the status while parsing the output appropriately. + +**Testing project existence** + +I wonder if the exit code will be 0 for success and 1 for a 404 error. To check, I’ll run a command like `glab api -i projects/vaultsovereign%2Fops --silent`, which would include headers. Alternatively, there's the option to use `glab repo view vaultsovereign/ops --json`, though that might cause an error. I think using `glab api` is a safer bet, so I’ll go ahead and run that while setting a timeout.**Choosing the API approach** + +I think the best way forward is using the API. First, I need to determine if the project already exists by calling `glab api projects/vaultsovereign%2Fops`. If it exists, I shouldn't create it but can set the remote and push. I'll try running the command with `--silent`. If the exit code is 1, it likely means the project isn’t found. I’ll also consider using `--include` to check the status while parsing the output appropriately.**Testing project existence** + +I wonder if the exit code will be 0 for success and 1 for a 404 error. To check, I’ll run a command like `glab api -i projects/vaultsovereign%2Fops --silent`, which would include headers. Alternatively, there's the option to use `glab repo view vaultsovereign/ops --json`, though that might cause an error. I think using `glab api` is a safer bet, so I’ll go ahead and run that while setting a timeout. +</thinking> + +**Tool Call: Run glab api -i 'projects/vaultsovereign%2Fops' --silent** +Status: Failed + +Terminal: +``` +HTTP/2.0 404 Not Found +Cache-Control: no-cache +Cf-Cache-Status: MISS +Cf-Ray: 9af75ef94c4417f5-DUB +Content-Length: 35 +Content-Security-Policy: default-src 'none' +Content-Type: application/json +Date: Wed, 17 Dec 2025 15:14:35 GMT +Gitlab-Lb: haproxy-main-60-lb-gprd +Gitlab-Sv: api-gke-us-east1-d +Nel: {"max_age": 0} +Ratelimit-Limit: 2000 +Ratelimit-Name: throttle_authenticated_api +Ratelimit-Observed: 1 +Ratelimit-Remaining: 1999 +Ratelimit-Reset: 1765984500 +Referrer-Policy: strict-origin-when-cross-origin +Server: cloudflare +Set-Cookie: __cf_bm=xXvFx_ZDxB7jXJFA7Q05NV1kUhfBe.xdH3uDVNJaUgM-1765984475-1.0.1.1-cv1nJNbijuLJhgqw_y64D0hNzuOoa6DBMgperglZUeFyMBU5O9BiWnasHIc5cYJGyJd6qXttpK30ctlTjygH.7WWNT3kFvBLi7NA3WipwUw; path=/; expires=Wed, 17-Dec-25 15:44:35 GMT; domain=.gitlab.com; HttpOnly; Secure; SameSite=None, _cfuvid=nwIh8dVeMZEC80tFY.o_kEaPA1QQS_PbrtepQdzNa60-1765984475289-0.0.1.1-604800000; path=/; domain=.gitlab.com; HttpOnly; Secure; SameSite=None +Strict-Transport-Security: max-age=31536000 +Vary: Origin, Accept-Encoding +X-Content-Type-Options: nosniff +X-Frame-Options: SAMEORIGIN +X-Gitlab-Meta: {"correlation_id":"9af75ef9621c17f5-ATL","version":"1"} +X-Request-Id: 9af75ef9621c17f5-ATL +X-Runtime: 0.058429 + +glab: 404 Project Not Found (HTTP 404) + +``` + + +**Tool Call: Run glab api projects -X POST -f name="ops" -f visibility="private" -f description="Operational source of truth"** +Status: Completed + +Terminal: +``` +{"id":77173462,"description":"Operational source of truth","name":"ops","name_with_namespace":"sovereign operator / ops","path":"ops","path_with_namespace":"vaultsovereign/ops","created_at":"2025-12-17T15:14:48.964Z","default_branch":"main","tag_list":[],"topics":[],"ssh_url_to_repo":"git@gitlab.com:vaultsovereign/ops.git","http_url_to_repo":"https://gitlab.com/vaultsovereign/ops.git","web_url":"https://gitlab.com/vaultsovereign/ops","readme_url":null,"forks_count":0,"avatar_url":null,"star_count":0,"last_activity_at":"2025-12-17T15:14:48.887Z","visibility":"private","namespace":{"id":121078566,"name":"sovereign operator","path":"vaultsovereign","kind":"user","full_path":"vaultsovereign","parent_id":null,"avatar_url":"https://secure.gravatar.com/avatar/1d8acaf320ce89eeaf7fef17ce32804c6c8293d1b21e6d1c7388f52c8f25c342?s=80\u0026d=identicon","web_url":"https://gitlab.com/vaultsovereign"},"container_registry_image_prefix":"registry.gitlab.com/vaultsovereign/ops","_links":{"self":"https://gitlab.com/api/v4/projects/77173462","issues":"https://gitlab.com/api/v4/projects/77173462/issues","merge_requests":"https://gitlab.com/api/v4/projects/77173462/merge_requests","repo_branches":"https://gitlab.com/api/v4/projects/77173462/repository/branches","labels":"https://gitlab.com/api/v4/projects/77173462/labels","events":"https://gitlab.com/api/v4/projects/77173462/events","members":"https://gitlab.com/api/v4/projects/77173462/members","cluster_agents":"https://gitlab.com/api/v4/projects/77173462/cluster_agents"},"marked_for_deletion_at":null,"marked_for_deletion_on":null,"packages_enabled":true,"empty_repo":true,"archived":false,"owner":{"id":32412904,"username":"vaultsovereign","public_email":null,"name":"sovereign operator","state":"active","locked":false,"avatar_url":"https://secure.gravatar.com/avatar/1d8acaf320ce89eeaf7fef17ce32804c6c8293d1b21e6d1c7388f52c8f25c342?s=80\u0026d=identicon","web_url":"https://gitlab.com/vaultsovereign"},"resolve_outdated_diff_discussions":false,"container_expiration_policy":{"cadence":"1d","enabled":false,"keep_n":10,"older_than":"90d","name_regex":".*","name_regex_keep":null,"next_run_at":"2025-12-18T15:14:48.980Z"},"repository_object_format":"sha1","issues_enabled":true,"merge_requests_enabled":true,"wiki_enabled":true,"jobs_enabled":true,"snippets_enabled":true,"container_registry_enabled":true,"service_desk_enabled":true,"service_desk_address":"contact-project+vaultsovereign-ops-77173462-issue-@incoming.gitlab.com","can_create_merge_request_in":true,"issues_access_level":"enabled","repository_access_level":"enabled","merge_requests_access_level":"enabled","forking_access_level":"enabled","wiki_access_level":"enabled","builds_access_level":"enabled","snippets_access_level":"enabled","pages_access_level":"private","analytics_access_level":"enabled","container_registry_access_level":"enabled","security_and_compliance_access_level":"private","releases_access_level":"enabled","environments_access_level":"enabled","feature_flags_access_level":"enabled","infrastructure_access_level":"enabled","monitor_access_level":"enabled","model_experiments_access_level":"enabled","model_registry_access_level":"enabled","package_registry_access_level":"enabled","emails_disabled":false,"emails_enabled":true,"show_diff_preview_in_email":true,"shared_runners_enabled":true,"lfs_enabled":true,"creator_id":32412904,"import_url":null,"import_type":null,"import_status":"none","import_error":null,"open_issues_count":0,"description_html":"\u003cp data-sourcepos=\"1:1-1:27\" dir=\"auto\"\u003eOperational source of truth\u003c/p\u003e","updated_at":"2025-12-17T15:14:48.964Z","ci_default_git_depth":20,"ci_delete_pipelines_in_seconds":null,"ci_forward_deployment_enabled":true,"ci_forward_deployment_rollback_allowed":true,"ci_job_token_scope_enabled":false,"ci_separated_caches":true,"ci_allow_fork_pipelines_to_run_in_parent_project":true,"ci_id_token_sub_claim_components":["project_path","ref_type","ref"],"build_git_strategy":"fetch","keep_latest_artifact":true,"restrict_user_defined_variables":true,"ci_pipeline_variables_minimum_override_role":"no_one_allowed","runner_token_expiration_interval":null,"group_runners_enabled":true,"resource_group_default_process_mode":"unordered","auto_cancel_pending_pipelines":"enabled","build_timeout":3600,"auto_devops_enabled":false,"auto_devops_deploy_strategy":"continuous","ci_push_repository_for_job_token_allowed":false,"runners_token":"GR1348941BPMxPCfHQF5brqxfz7qu","ci_config_path":"","public_jobs":true,"shared_with_groups":[],"only_allow_merge_if_pipeline_succeeds":false,"allow_merge_on_skipped_pipeline":null,"request_access_enabled":true,"only_allow_merge_if_all_discussions_are_resolved":false,"remove_source_branch_after_merge":true,"printing_merge_request_link_enabled":true,"merge_method":"merge","merge_request_title_regex":null,"merge_request_title_regex_description":null,"squash_option":"default_off","enforce_auth_checks_on_uploads":true,"suggestion_commit_message":null,"merge_commit_template":null,"squash_commit_template":null,"issue_branch_template":null,"warn_about_potentially_unwanted_characters":true,"autoclose_referenced_issues":true,"max_artifacts_size":null,"external_authorization_classification_label":"","requirements_enabled":false,"requirements_access_level":"enabled","security_and_compliance_enabled":true,"compliance_frameworks":[],"duo_remote_flows_enabled":true,"duo_foundational_flows_enabled":true,"web_based_commit_signing_enabled":false} +``` + + +## Assistant + +<thinking> + + +**Setting up the project** + +I've created a project and confirmed that the default branch is "main," which is good. It also shows that the repository is empty, so that's great! Next, I need to add the remote origin and push it. Since I'm using HTTPS for the Git protocol, I'll use the provided `http_url_to_repo` for the origin. My plan is to check if the remote exists with `git -C ops remote -v` before adding it. +</thinking> + +**Tool Call: Run git -C ops remote -v** +Status: Completed + +Terminal: +``` + + + + + +``` + + +## Assistant + +<thinking> + + +**Checking glab authentication commands** + +I’m exploring the commands available in glab, like `glab auth login` and `glab auth status`. The user mentioned they're logged in, so they likely already used `glab auth login` to set everything up. To make sure that pushing works without needing to interact, I think I should check if `glab auth setup-git` can be used to set up Git credentials. I might also want to run `glab auth --help` for more assistance. +</thinking> + +**Tool Call: Run glab auth --help** +Status: Completed + +Terminal: +``` + + Manage glab's authentication state. + + USAGE + + glab auth <command> [command] [--flags] + + COMMANDS + + configure-docker Register glab as a Docker credential helper + docker-helper A Docker credential helper for GitLab container registries + dpop-gen [--flags] Generates a DPoP (demonstrating-proof-of-possession) proof JWT. (EXPERIMENTAL) + login [--flags] Authenticate with a GitLab instance. + logout [--flags] Logout from a GitLab instance. + status [--flags] View authentication status. + + FLAGS + + -h --help Show help for this command. + + +``` + + +## Assistant + +<thinking> + + +**Checking GitLab Token Configuration** + +I'm trying to understand the setup for the GitLab token — specifically how `glab config` sets the `GITLAB_TOKEN` environment variable. It seems that a `git push` wouldn't automatically use this token without integrating a credential helper. I should look into whether `glab` sets `git config` for the credential helper as `!glab auth git-credential`. I need to check my global git config for the credential helper and run some specific commands to gather the information. Let's dive into that! +</thinking> + +**Tool Call: Run git config --global --get-regexp '^credential'"\\..*gitlab\\.com.*"** +Status: Completed + +Terminal: +``` + + + + + +``` + + +**Tool Call: Run git config --global --get credential.helper** +Status: Completed + +Terminal: +``` + + + + + +``` + + +**Tool Call: Run git config --show-origin --get-all credential.helper** +Status: Completed + +Terminal: +``` +file:/Library/Developer/CommandLineTools/usr/share/git-core/gitconfig osxkeychain + + + +``` + + +## Assistant + +<thinking> + + +**Considering GitHub credentials** + +I’m looking at how the system gitconfig uses osxkeychain, which is good because it lets Git use the Keychain credential helper when pushing to HTTPS remotes. I know that in non-interactive environments, prompts might cause issues. I could use the `GIT_ASKPASS` variable for tokens. The process with `glab` could generate a token, but embedding it in the URL is insecure. It might be better to rely on the keychain since glab already stores it. + +**Exploring GitHub token usage** + +I’m considering extracting a token and using the `git credential` command to approve credentials in the osxkeychain for gitlab.com. That seems secure, but I’m a bit hesitant to manipulate the user’s keychain. Since the user asked to "set it up," it might involve storing credentials. I could attempt a push first; if it fails due to authentication, I can configure the credential then. It’s important to see how the environment handles TTY, so I’ll give `git push` a try and see what happens. + +**Considering push methods with glab** + +I'm looking into how to push using the `glab` command, thinking about creating a remote or possibly using `glab repo set-url`, but that doesn't seem right. I could switch to using an SSH remote with SSH keys, but I’m unsure if those keys are available in this environment. Since glab seems set for HTTPS, I’ll try that for the push first. If authentication fails, I can configure git with the glab token by setting `credential.helper`, but I'm not entirely sure if that command exists. + +**Exploring glab authentication options** + +I'm considering how to set up glab for Git operations. It might be `glab auth login --git-protocol https` that configures it, but the auth status indicates it's already set to use the HTTPS protocol, suggesting that glab manages the configuration differently. I recall that `glab auth login` includes an option like `--token`, which might link to the credential helper. Checking the glab config file could clarify things, but I want to avoid printing any sensitive tokens. I think I can use `glab config list` or check the help with `glab config --help`.**Exploring GitHub token usage** + +I’m considering extracting a token and using the `git credential` command to approve credentials in the osxkeychain for gitlab.com. That seems secure, but I’m a bit hesitant to manipulate the user’s keychain. Since the user asked to "set it up," it might involve storing credentials. I could attempt a push first; if it fails due to authentication, I can configure the credential then. It’s important to see how the environment handles TTY, so I’ll give `git push` a try and see what happens.**Considering push methods with glab** + +I'm looking into how to push using the `glab` command, thinking about creating a remote or possibly using `glab repo set-url`, but that doesn't seem right. I could switch to using an SSH remote with SSH keys, but I’m unsure if those keys are available in this environment. Since glab seems set for HTTPS, I’ll try that for the push first. If authentication fails, I can configure git with the glab token by setting `credential.helper`, but I'm not entirely sure if that command exists.**Exploring glab authentication options** + +I'm considering how to set up glab for Git operations. It might be `glab auth login --git-protocol https` that configures it, but the auth status indicates it's already set to use the HTTPS protocol, suggesting that glab manages the configuration differently. I recall that `glab auth login` includes an option like `--token`, which might link to the credential helper. Checking the glab config file could clarify things, but I want to avoid printing any sensitive tokens. I think I can use `glab config list` or check the help with `glab config --help`. +</thinking> + +**Tool Call: Run glab config --help** +Status: Completed + +Terminal: +``` + + Manage key/value strings. + + Current respected settings: + + - browser: If unset, uses the default browser. Override with environment variable $BROWSER. + - check_update: If true, notifies of new versions of glab. Defaults to true. Override with environment variable + $GLAB_CHECK_UPDATE. + - display_hyperlinks: If true, and using a TTY, outputs hyperlinks for issues and merge request lists. Defaults to + false. + - editor: If unset, uses the default editor. Override with environment variable $EDITOR. + - glab_pager: Your desired pager command to use, such as 'less -R'. + - glamour_style: Your desired Markdown renderer style. Options are dark, light, notty. Custom styles are available + using [glamour](https://github.com/charmbracelet/glamour#styles). + - host: If unset, defaults to `https://gitlab.com`. + - token: Your GitLab access token. Defaults to environment variables. + - visual: Takes precedence over 'editor'. If unset, uses the default editor. Override with environment variable + $VISUAL. + + + USAGE + + glab config [command] [--flags] + + COMMANDS + + edit [--flags] Opens the glab configuration file. + get <key> [--flags] Prints the value of a given configuration key. + set <key> <value> [--flags] Updates configuration with the value of a given key. + + FLAGS + + -g --global Use global config file. + -h --help Show help for this command. + + +``` + + +**Tool Call: Run git -C ops remote add origin https://gitlab.com/vaultsovereign/ops.git** +Status: Completed + +Terminal: +``` + + + + + +``` + + +**Tool Call: Run git -C ops push -u origin main** +Status: Completed + +Terminal: +``` +To https://gitlab.com/vaultsovereign/ops.git + * [new branch] main -> main +branch 'main' set up to track 'origin/main'. + + +``` + + +## Assistant + +<thinking> + + +**Updating project status** + +I see that we've pushed and completed step 4, while step 5 is currently in progress. It’s time to update the plan after verifying everything. I’m planning to run some verify commands like checking the remote and branch status in the ops directory. I might also take a look at the repository details. Running `glab repo view vaultsovereign/ops` might provide the info I need without opening a browser. Let's proceed! +</thinking> + +**Tool Call: Run git -C ops remote -v** +Status: Completed + +Terminal: +``` +origin https://gitlab.com/vaultsovereign/ops.git (fetch) +origin https://gitlab.com/vaultsovereign/ops.git (push) + + + +``` + + +**Tool Call: Run git -C ops branch -vv** +Status: Completed + +Terminal: +``` +* main f375d21 [origin/main] Initialize ops repository + + + + +``` + + +**Tool Call: Run glab repo view vaultsovereign/ops** +Status: Completed + +Terminal: +``` +name: sovereign operator / ops +description: Operational source of truth +--- +# ops + +Single source of operational truth: everything here must be understandable, rebuildable, and revocable. + +## Non-negotiables + +- **Authority boundary:** critical actions originate from `op-core-vm`. +- **No plaintext secrets in Git:** `vault/` contains encrypted artifacts only; `20-identity/keys/public/` is public keys only. +- **Naming is reality:** everything is named `<role>-<scope>-<id>`. +- **Nuke test:** if losing it causes panic, reduce scope or remove it. + +## Read first + +- `00-doctrine/operator-charter.md` +- `00-doctrine/personal-operating-doctrine.md` + +## Layout + +- `00-doctrine/` – the law (stable, rarely changed) +- `10-inventory/` – what exists (hosts, networks, services) +- `20-identity/` – roles, policies, leases, public keys +- `30-infra/` – infrastructure as code + config management +- `40-apps/` – application-specific ops (deploy, config, lifecycle) +- `50-runbooks/` – executable procedures (panic day, provision, incident, decom) +- `60-backups/` – backup manifests + restore proofs +- `70-audits/` – checklists + reports +- `80-automation/` – scripts and containers that make the above repeatable +- `90-scratch/` – disposable working area (not source of truth) +- `vault/` – encrypted-only artifacts (safe to leak without keys) + +## What does not belong here + +Meaning-bearing material (books, media, personal archives) stays in cold storage, not `ops/`. + + + +``` + + +## Assistant + +<thinking> + + +**Summarizing GitLab setup** + +I need to respond to the user who asked about setting things up with GitLab. I've created the GitLab project, pushed the ops repository, and configured the remote. I'll summarize the project URL, where the repo is located (/Users/sovereign/work-core/ops), and make sure to include commands for next steps like cloning to other machines or adding a `.gitlab-ci.yml` file. Both HTTPS and SSH options will be mentioned, but the user didn't ask for that. I'll confirm we didn't commit any secrets either. + +**Asking about GitLab project hardening** + +I can check in with the user to see if they'd like to harden their GitLab project. This could include protecting the main branch, requiring merge requests, enabling signed commits, and adding CI guards for secrets. I'll see if the user has a group named "sovereign operator / ops". My final answer will be concise, with bullet points and monospace for file paths like `ops/README.md`. I'll provide the repo URL and local path while ensuring no tokens are revealed. + +**Clarifying next steps for GitLab project** + +I need to suggest some next steps for the user regarding their GitLab project. I can ask, “Want me to protect `main`, enable squash+MR-only, or add a CI secret scan guardrail?” While the user is interested in "setting it up" specifically with GitLab CLI, I can still mention we created the project using `glab api`. I should provide commands like `glab repo view vaultsovereign/ops` and include the repo link as `https://gitlab.com/vaultsovereign/ops`. I'll wrap commands and file paths in monospace for clarity.**Asking about GitLab project hardening** + +I can check in with the user to see if they'd like to harden their GitLab project. This could include protecting the main branch, requiring merge requests, enabling signed commits, and adding CI guards for secrets. I'll see if the user has a group named "sovereign operator / ops". My final answer will be concise, with bullet points and monospace for file paths like `ops/README.md`. I'll provide the repo URL and local path while ensuring no tokens are revealed.**Clarifying next steps for GitLab project** + +I need to suggest some next steps for the user regarding their GitLab project. I can ask, “Want me to protect `main`, enable squash+MR-only, or add a CI secret scan guardrail?” While the user is interested in "setting it up" specifically with GitLab CLI, I can still mention we created the project using `glab api`. I should provide commands like `glab repo view vaultsovereign/ops` and include the repo link as `https://gitlab.com/vaultsovereign/ops`. I'll wrap commands and file paths in monospace for clarity. +</thinking> + +- Initialized a Git repo in `ops/` and committed the scaffold (`main`) +- Created the private GitLab project `vaultsovereign/ops`: `https://gitlab.com/vaultsovereign/ops` +- Set `origin` to `https://gitlab.com/vaultsovereign/ops.git` and pushed `main` (tracking `origin/main`) + +Want me to harden the GitLab side next (protect `main`, MR-only, squash policy, basic CI guardrails)? + +## User + +You’re **fully stood up** now — clean and correct. + +## What’s done and verified + +* ✅ `ops/` is a real git repo (`main` initialized) +* ✅ Scaffold committed (`Initialize ops repository`) +* ✅ Private GitLab project created: `vaultsovereign/ops` +* ✅ Remote set + pushed: `origin/main` tracking is live +* ✅ Repo README already carries the **non-negotiables** (authority boundary, no plaintext secrets, naming law, nuke test) + +That’s the “source of truth” backbone in place. + +--- + +## What to do next (recommended hardening) + +This is where most ops repos quietly fail later. Two moves fix it: + +### 1) Protect `main` (no direct pushes) + +Make `main` protected so changes flow through review (even if “review” = you + future you). + +**Option A: GitLab UI (fastest)** +Project → **Settings → Repository → Protected branches** + +* Protect `main` +* Allow merge: Maintainers +* Allow push: No one (or Maintainers if you must) + +**Option B: `glab api` (scriptable)** + +```bash +# Protect main (GitLab API) +glab api projects/vaultsovereign%2Fops/protected_branches \ + -X POST \ + -f name=main \ + -F push_access_level=0 \ + -F merge_access_level=40 +``` + +(If it errors because it already exists, that’s fine — it means it’s already protected.) + +### 2) Add guardrails so secrets can’t slip in + +You already have the doctrine, now add the *tripwires*: + +#### A. Add a “never commit secrets” CI check + +Create `.gitlab-ci.yml` with a single job that scans for obvious secret patterns. Minimal example: + +```yaml +stages: [verify] + +verify:no_secrets: + stage: verify + image: alpine:latest + script: + - apk add --no-cache git grep + - | + ! git grep -nE "(BEGIN (RSA|OPENSSH|EC) PRIVATE KEY|aws_secret_access_key|xox[baprs]-|ghp_|glpat-|-----BEGIN PGP PRIVATE KEY BLOCK-----)" -- . \ + || (echo "Potential secret detected. Remove it or encrypt it into vault/." && exit 1) +``` + +This isn’t perfect, but it stops “oops” commits immediately. + +#### B. Enforce “vault/ is encrypted only” + +Add a **README + policy** (you already did), and optionally a repo check that fails if plaintext appears under `vault/` (same grep idea). + +--- + +## Repo hygiene that pays off later + +### Commit signing (recommended) + +Make commits from `op-core-vm` provable. + +* If you want the simplest path on macOS: **SSH signing** (no GPG pain). + +```bash +git config --global gpg.format ssh +git config --global user.signingkey ~/.ssh/id_op_core.pub +git config --global commit.gpgsign true +``` + +(If you don’t have that key yet, generate it and use the public part.) + +### Add CODEOWNERS (optional, but clean) + +Even solo, it forces “pause + review” by default later. + +`CODEOWNERS` + +```text +* @vaultsovereign +``` + +--- + +## Cold storage boundary reminder + +What you’re doing with Internxt is correct **only if**: + +* you upload **encrypted archives** +* you treat it as **vault, not sync** +* you never push plaintext identity material from the core + +--- + +## Choose the next forge + +Pick one and I’ll give you the exact commands/files: + +1. **Protect main + set MR-only flow** +2. **Add CI secret tripwire + vault plaintext guard** +3. **Enable commit signing (SSH) + document key lifecycle in `20-identity/`** +4. **Panic-day runbook + restore validation harness** + +Say the number. + +## Assistant + +<thinking> + + +**Deciding on the number choice** + +The user asked me to choose a number from options 1-4 and indicated they would provide exact commands/files afterward. I see I have flexibility to implement multiple tasks if needed. They want me to say the number, which suggests they need me to select the next step. Considering we can implement tasks in the repository, I'd choose 2 (CI secret tripwire + vault plaintext guard) since it's easy to implement quickly. Option 1 (protect main) is also viable through the glab API. + +**Choosing the number to pick** + +The user asked me to choose a single number. I think the best choice is to pick 2 first (the tripwire), as it provides immediate guardrails. Though I could suggest combining it with 1, I need to stick with their request for one option. So, I’ll go with 2. I could mention that it’s the CI secret tripwire + vault plaintext guard, but since they emphasized saying just the number, I'll keep it concise and respond with "2."**Choosing the number to pick** + +The user asked me to choose a single number. I think the best choice is to pick 2 first (the tripwire), as it provides immediate guardrails. Though I could suggest combining it with 1, I need to stick with their request for one option. So, I’ll go with 2. I could mention that it’s the CI secret tripwire + vault plaintext guard, but since they emphasized saying just the number, I'll keep it concise and respond with "2." +</thinking> + +2 + +## User (checkpoint) + +Good choice. This is the **right guardrail at the right time**. + +Below is a **minimal, effective CI tripwire** that does two things: + +1. **Blocks obvious secrets anywhere in the repo** +2. **Enforces that `vault/` never contains plaintext** + +No bloat. No false ceremony. + +--- + +## Step 2 — CI Secret Tripwire + Vault Guard + +### 1️⃣ Create `.gitlab-ci.yml` at repo root + +```yaml +stages: [verify] + +verify:no_secrets: + stage: verify + image: alpine:latest + script: + - apk add --no-cache git grep + + # Global secret scan (cheap but effective) + - | + ! git grep -nE \ + "(BEGIN (RSA|OPENSSH|EC) PRIVATE KEY|\ + -----BEGIN PGP PRIVATE KEY BLOCK-----|\ + aws_secret_access_key|\ + AKIA[0-9A-Z]{16}|\ + xox[baprs]-|\ + ghp_|\ + glpat-|\ + password\s*=\s*['\"]|\ + secret\s*=\s*['\"])" -- . \ + || (echo "❌ Potential secret detected. Remove it or encrypt it into vault/." && exit 1) + + # Vault plaintext guard + - | + ! find vault -type f \ + ! -name "*.age" \ + ! -name "*.sops.*" \ + ! -name "README.md" \ + ! -name ".gitkeep" \ + -print | grep . \ + || (echo "❌ Plaintext file detected in vault/. Encrypt before commit." && exit 1) +``` + +What this does: + +* ❌ Blocks private keys, tokens, obvious secrets +* ❌ Fails if **any plaintext file** sneaks into `vault/` +* ✅ Allows encrypted files only (`*.age`, `*.sops.*`) +* ✅ Allows documentation (`README.md`, `.gitkeep`) + +This is **cheap insurance** that stops 90% of mistakes. + +--- + +## 2️⃣ Commit it (signed, from the core) + +```bash +git -C ops add .gitlab-ci.yml +git -C ops commit -m "Add CI secret tripwire and vault plaintext guard" +git -C ops push +``` + +After push: + +* GitLab will run the pipeline automatically +* Any future mistake fails **before merge** + +--- + +## 3️⃣ Document the rule (small but important) + +Add this to `vault/README.md` (or confirm it’s already there): + +```md +## Vault Rule + +This directory contains **encrypted artifacts only**. + +Allowed: +- *.age +- *.sops.* +- README.md +- .gitkeep + +Plaintext secrets are forbidden. +If CI fails, encrypt the file before committing. +``` + +This keeps future-you honest. + +--- + +## Resulting State (this matters) + +You now have: + +* 🔒 **Mechanical enforcement** of your doctrine +* 🔥 A repo that is *safe to push to without fear* +* 🧠 Less cognitive load (the system yells before you screw up) + +This is how you keep ops calm. + +--- + +### Next logical upgrades (when you’re ready) + +* **Protect `main`** (MR-only, no direct pushes) +* **SSH commit signing** (cryptographic authorship) +* **Panic-day runbook** that proves rebuildability + +Say the next number when ready. + +## Assistant + +<thinking> + + +**Organizing GitLab structure** + +I need to make sure that the `.gitlab-ci.yml` file is in the root of the GitLab project. In my local workspace, the root is `/Users/sovereign/work-core`, but the GitLab repo root is the `ops/` folder, so I’ll create `ops/.gitlab-ci.yml`. I might also need to update `vault/README.md` to list allowed file extensions since we hadn't included those specifics before. Overall, I’ll make necessary adjustments and ensure the CI file is set up correctly. + +**Implementing patch and dependencies** + +I’m thinking about using `apply_patch`, but I need to ensure that secret detection patterns don't include characters that could break the YAML syntax. I’ll use the `|` for multi-line scripts and keep everything properly indented. To minimize dependencies, I can skip installing `grep` since busybox has it, but I’ll use `apk add --no-cache git` since `git grep` relies on Git. We need to check if `find` exists too; busybox's find should work just fine. Additionally, I need to refine our `find` command to avoid including unwanted files. + +**Evaluating `.gitignore` inclusion** + +In my repo, I see that `vault/tmp/.gitignore` exists, and the find command will detect it as a file type `f`. It doesn't match any excluded names, so it will be printed, causing the pipeline to fail! We created this `.gitignore` to ignore decrypted working materials, but it seems it violates the guard rule that allows only encrypted files plus the README. However, I think we might choose to allow `.gitignore` since it's not secret. Our guard should allow that file, as the intent is to prevent plaintext secrets from being committed. + +**Adjusting guard for `.gitignore`** + +I’m considering whether to modify the guard to exclude `vault/tmp/.gitignore` or to allow `.gitignore` files in general. If we allow them, there's a slight risk someone might commit a plaintext secret named `.gitignore`, although that's improbable. A better approach might be to allow specific file names like `README.md`, `.gitkeep`, and `.gitignore`. In GitLab CI, it’s important to focus on tracked files, so perhaps we could use `git ls-files vault` to verify that no plaintext secrets are present in tracked files. + +**Refining vault guard and `.gitignore` handling** + +I’m thinking it’s better to run the vault plaintext guard on tracked files only. I want to use `git ls-files vault | grep -vE '\.age$|\.sops\.'`, but I also need to allow `README.md`, `.gitignore`, and `.gitkeep`. We had removed `.gitkeep` for vault but still have `vault/tmp/.gitignore`. I remember our `ops/.gitignore` includes `vault/tmp/`, so normally, Git wouldn’t track anything inside that directory. It’s tricky because, despite being ignored, we’ve created `vault/tmp/.gitignore`. I need to review the commit history since it seems it wasn’t tracked, even though it exists in the working tree.**Implementing patch and dependencies** + +I’m thinking about using `apply_patch`, but I need to ensure that secret detection patterns don't include characters that could break the YAML syntax. I’ll use the `|` for multi-line scripts and keep everything properly indented. To minimize dependencies, I can skip installing `grep` since busybox has it, but I’ll use `apk add --no-cache git` since `git grep` relies on Git. We need to check if `find` exists too; busybox's find should work just fine. Additionally, I need to refine our `find` command to avoid including unwanted files.**Evaluating `.gitignore` inclusion** + +In my repo, I see that `vault/tmp/.gitignore` exists, and the find command will detect it as a file type `f`. It doesn't match any excluded names, so it will be printed, causing the pipeline to fail! We created this `.gitignore` to ignore decrypted working materials, but it seems it violates the guard rule that allows only encrypted files plus the README. However, I think we might choose to allow `.gitignore` since it's not secret. Our guard should allow that file, as the intent is to prevent plaintext secrets from being committed.**Adjusting guard for `.gitignore`** + +I’m considering whether to modify the guard to exclude `vault/tmp/.gitignore` or to allow `.gitignore` files in general. If we allow them, there's a slight risk someone might commit a plaintext secret named `.gitignore`, although that's improbable. A better approach might be to allow specific file names like `README.md`, `.gitkeep`, and `.gitignore`. In GitLab CI, it’s important to focus on tracked files, so perhaps we could use `git ls-files vault` to verify that no plaintext secrets are present in tracked files.**Refining vault guard and `.gitignore` handling** + +I’m thinking it’s better to run the vault plaintext guard on tracked files only. I want to use `git ls-files vault | grep -vE '\.age$|\.sops\.'`, but I also need to allow `README.md`, `.gitignore`, and `.gitkeep`. We had removed `.gitkeep` for vault but still have `vault/tmp/.gitignore`. I remember our `ops/.gitignore` includes `vault/tmp/`, so normally, Git wouldn’t track anything inside that directory. It’s tricky because, despite being ignored, we’ve created `vault/tmp/.gitignore`. I need to review the commit history since it seems it wasn’t tracked, even though it exists in the working tree. +</thinking> + +**Tool Call: Run git -C ops status '--porcelain=v1'** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +## Assistant + +<thinking> + + +**Analyzing git commit issues** + +I noticed the file exists now, and the status is clean, which suggests it was committed. If it was, the commit output should list it, but maybe it got truncated? However, it seemed complete, ending with creating mode 100644 for vault/README.md, while not including vault/tmp. It’s possible vault/tmp/.gitignore didn’t exist at commit but was created afterward. The patch created vault/tmp/.gitignore, but due to the ignore rule in ops/.gitignore, it wasn’t added. + +**Clarifying git ignore behavior** + +When I ran `git status`, it showed `?? vault/tmp/`, likely because that directory is ignored. After the commit, vault/tmp/.gitignore remained untracked due to this. My `find ops -type f` output includes it, but `git status` is clean since it’s ignored. We created a `.gitignore` there, but since the whole directory is ignored, it also remains untracked. This explains why `find` shows it; it exists but is not tracked by Git, meaning it won't be pushed to GitLab. + +**Adjusting git ignore settings** + +In the CI, the vault/tmp directory won't be present, which isn't ideal because I wanted to keep it in the repo. Since it's ignored, I can't commit it, leading to a mismatch. In GitLab, vault/tmp won't exist, but that's fine; I can create it locally as needed. To track the directory while ignoring its contents, I should remove `vault/tmp/` from ops/.gitignore and use vault/tmp/.gitignore to ignore everything inside instead. This way, the directory remains tracked while its contents are ignored. + +**Planning directory tracking in CI** + +I can create and track the vault/tmp/.gitignore file, but the CI guard needs to allow this path. I might decide not to track vault/tmp at all, though having a standard tmp directory with an ignore file could be useful. It makes sense to track vault/tmp/.gitignore and possibly a README. To do this, I should remove the ignore rule from the root .gitignore and put in the directory's .gitignore. This way, users who clone the repo will also get the vault/tmp directory. We may want to track the 90-scratch directory similarly since we already have its .gitignore and README tracked. Let's verify that ops/.gitignore still includes `vault/tmp/`—and yes, it does. Also, I should consider GitLab's secret scanning features by adding `.gitlab-ci.yml` in the ops root and updating vault/README.md with the allowed list. + +**Setting up CI for secret detection** + +We'll update vault/README.md to include allowed file extensions and mention CI rules. I plan to create a CI job with a verify stage where I will use the Alpine image, keeping installation minimal with just `git` and `grep`. The script will run `git grep` for secret patterns. I need to handle regex quoting in YAML carefully, especially since `\s` isn't recognized in basic `grep`. + +To simplify, I'll restructure the regex without new lines and ensure I check for errors properly. I'll define a clean regex string and verify the exit status of `git grep`. If matches are found, it'll indicate potential secrets; I need to ensure the script exits correctly and handles errors gracefully. + +**Refining secret detection patterns** + +I'll implement a robust approach for secret detection in our CI. First, I'm ensuring that any file within the vault directory adheres to allowed formats, while verifying that plaintext files aren’t present. I’m establishing specific regex patterns to identify various tokens and private key formats. These patterns will help reduce false positives, especially in documentation. + +I’ll consider minimizing the impact of potential mismatches caused by placeholder values while focusing on actual secret patterns. To tighten this, I can introduce restrictions based on file extensions and possibly include a whitelist for known examples, while remaining mindful of not overly complicating the rules. + +**Setting up vault guardrails** + +The user has provided guidelines, but I have the flexibility to refine them for accuracy and functionality without blocking the existing repository processes. I'm inspecting the repository to ensure our guardrails will not interfere with current documentation or files. + +I see I need to track `vault/tmp/`, so I'll tweak the `.gitignore` to accommodate that. The CI system will also check for plaintext files within the vault and ensure that we maintain a tidy workspace while allowing necessary files. I'll implement a guard for this, include a local pre-commit hook for added protection, and adjust the README accordingly.**Clarifying git ignore behavior** + +When I ran `git status`, it showed `?? vault/tmp/`, likely because that directory is ignored. After the commit, vault/tmp/.gitignore remained untracked due to this. My `find ops -type f` output includes it, but `git status` is clean since it’s ignored. We created a `.gitignore` there, but since the whole directory is ignored, it also remains untracked. This explains why `find` shows it; it exists but is not tracked by Git, meaning it won't be pushed to GitLab.**Adjusting git ignore settings** + +In the CI, the vault/tmp directory won't be present, which isn't ideal because I wanted to keep it in the repo. Since it's ignored, I can't commit it, leading to a mismatch. In GitLab, vault/tmp won't exist, but that's fine; I can create it locally as needed. To track the directory while ignoring its contents, I should remove `vault/tmp/` from ops/.gitignore and use vault/tmp/.gitignore to ignore everything inside instead. This way, the directory remains tracked while its contents are ignored.**Planning directory tracking in CI** + +I can create and track the vault/tmp/.gitignore file, but the CI guard needs to allow this path. I might decide not to track vault/tmp at all, though having a standard tmp directory with an ignore file could be useful. It makes sense to track vault/tmp/.gitignore and possibly a README. To do this, I should remove the ignore rule from the root .gitignore and put in the directory's .gitignore. This way, users who clone the repo will also get the vault/tmp directory. We may want to track the 90-scratch directory similarly since we already have its .gitignore and README tracked. Let's verify that ops/.gitignore still includes `vault/tmp/`—and yes, it does. Also, I should consider GitLab's secret scanning features by adding `.gitlab-ci.yml` in the ops root and updating vault/README.md with the allowed list.**Setting up CI for secret detection** + +We'll update vault/README.md to include allowed file extensions and mention CI rules. I plan to create a CI job with a verify stage where I will use the Alpine image, keeping installation minimal with just `git` and `grep`. The script will run `git grep` for secret patterns. I need to handle regex quoting in YAML carefully, especially since `\s` isn't recognized in basic `grep`. + +To simplify, I'll restructure the regex without new lines and ensure I check for errors properly. I'll define a clean regex string and verify the exit status of `git grep`. If matches are found, it'll indicate potential secrets; I need to ensure the script exits correctly and handles errors gracefully.**Refining secret detection patterns** + +I'll implement a robust approach for secret detection in our CI. First, I'm ensuring that any file within the vault directory adheres to allowed formats, while verifying that plaintext files aren’t present. I’m establishing specific regex patterns to identify various tokens and private key formats. These patterns will help reduce false positives, especially in documentation. + +I’ll consider minimizing the impact of potential mismatches caused by placeholder values while focusing on actual secret patterns. To tighten this, I can introduce restrictions based on file extensions and possibly include a whitelist for known examples, while remaining mindful of not overly complicating the rules.**Setting up vault guardrails** + +The user has provided guidelines, but I have the flexibility to refine them for accuracy and functionality without blocking the existing repository processes. I'm inspecting the repository to ensure our guardrails will not interfere with current documentation or files. + +I see I need to track `vault/tmp/`, so I'll tweak the `.gitignore` to accommodate that. The CI system will also check for plaintext files within the vault and ensure that we maintain a tidy workspace while allowing necessary files. I'll implement a guard for this, include a local pre-commit hook for added protection, and adjust the README accordingly. +</thinking> + +**Tool Call: Read README.md** +Status: Completed + +# Vault (encrypted-only) + +This directory contains **encrypted artifacts only**. + +Rules: + +- Never commit plaintext secrets (even temporarily). +- Never commit private keys. +- Treat `vault/` as safe-to-leak ciphertext: if leaking it would break you, it is mis-scoped. + +Decryption/working material belongs in `vault/tmp/` (gitignored) and should be wiped after use. + + + +## Assistant + +<thinking> + + +**Evaluating file patterns for scanning** + +I'm considering which file patterns to exclude. It seems reasonable to exclude `.md` files and `README.md` since they may contain example tokens. However, I wonder if we should allow prefixes like `glpat-` in documentation. There’s a risk of missing real secrets if we exclude `.md` entirely. I could also see the accidental inclusion of tokens in these documents. Maybe we should scan everything but refine the patterns to minimize false positives. + +**Refining scanning patterns** + +I'm thinking about requiring at least 20 characters after the `glpat-` prefix in documentation, which should help differentiate it from actual tokens. I’d apply a similar rule for `ghp_`, needing 36 characters. For private key headers, mentioning "BEGIN RSA PRIVATE KEY" could match, but perhaps that's less likely to appear in docs. I’m concerned about patterns like `password = "..."`, which we want to avoid, but we also want to capture common configurations in files like `*.tfvars` and `.env`, even though they might be ignored. + +**Proposing scanning patterns** + +I'm thinking about implementing patterns to check for several types of sensitive information, like PEM private keys, AWS Access Key IDs, and tokens from GitHub, GitLab, and Slack. It's also worth considering generic strings like "BEGIN PRIVATE KEY." I might check certain file extensions, such as `.pem`, `.key`, and `.p12`, but they’re often ignored in commits. We could use a blocklist to flag these file types in a way that minimizes false positives, keeping in mind that some certificates might still be legitimate to store. + +**Evaluating file storage and security** + +I'm considering that while sensitive files like `.pem` should generally be blocked to ensure security, there could be cases where encrypted versions are acceptable. Public certificates could be derived, and maybe we can allow `.crt` files. Instead of focusing just on file extensions, I might implement a scan for "PRIVATE KEY" and token patterns to keep things simple. + +Next, I’m thinking of vault plaintext guard policies, where files under the `vault/` directory should only include allowed names like `README.md`, `.gitkeep`, and possibly a temporary `.gitignore`. For encrypted files, I’ll allow extensions like `.age` and those marked with `.sops.`, particularly for `.yaml` files. + +**Implementing vault file checks** + +I'm thinking about setting up a regex for allowed files in the vault, which will include things like `README.md`, `.gitkeep`, and any files with `.age` or `.sops.` extensions. The plan is to check for any unauthorized files and print a warning if there are plaintext files detected. + +If the vault directory is empty, the check will still pass, but maybe we want to ensure that `README.md` is present. While this might seem strict, it could help maintain organization. I also want to ensure no unencrypted files like `secrets.yml` are in the root. After committing and pushing changes, I'll implement the CI pipeline with a potential update to the README. Let’s apply the patch!**Refining scanning patterns** + +I'm thinking about requiring at least 20 characters after the `glpat-` prefix in documentation, which should help differentiate it from actual tokens. I’d apply a similar rule for `ghp_`, needing 36 characters. For private key headers, mentioning "BEGIN RSA PRIVATE KEY" could match, but perhaps that's less likely to appear in docs. I’m concerned about patterns like `password = "..."`, which we want to avoid, but we also want to capture common configurations in files like `*.tfvars` and `.env`, even though they might be ignored.**Proposing scanning patterns** + +I'm thinking about implementing patterns to check for several types of sensitive information, like PEM private keys, AWS Access Key IDs, and tokens from GitHub, GitLab, and Slack. It's also worth considering generic strings like "BEGIN PRIVATE KEY." I might check certain file extensions, such as `.pem`, `.key`, and `.p12`, but they’re often ignored in commits. We could use a blocklist to flag these file types in a way that minimizes false positives, keeping in mind that some certificates might still be legitimate to store.**Evaluating file storage and security** + +I'm considering that while sensitive files like `.pem` should generally be blocked to ensure security, there could be cases where encrypted versions are acceptable. Public certificates could be derived, and maybe we can allow `.crt` files. Instead of focusing just on file extensions, I might implement a scan for "PRIVATE KEY" and token patterns to keep things simple. + +Next, I’m thinking of vault plaintext guard policies, where files under the `vault/` directory should only include allowed names like `README.md`, `.gitkeep`, and possibly a temporary `.gitignore`. For encrypted files, I’ll allow extensions like `.age` and those marked with `.sops.`, particularly for `.yaml` files.**Implementing vault file checks** + +I'm thinking about setting up a regex for allowed files in the vault, which will include things like `README.md`, `.gitkeep`, and any files with `.age` or `.sops.` extensions. The plan is to check for any unauthorized files and print a warning if there are plaintext files detected. + +If the vault directory is empty, the check will still pass, but maybe we want to ensure that `README.md` is present. While this might seem strict, it could help maintain organization. I also want to ensure no unencrypted files like `secrets.yml` are in the root. After committing and pushing changes, I'll implement the CI pipeline with a potential update to the README. Let’s apply the patch! +</thinking> + +**Tool Call: Edit /Users/sovereign/work-core/ops/.gitlab-ci.yml, /Users/sovereign/work-core/ops/vault/README.md** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/.gitlab-ci.yml +``` +stages: [verify] + +verify:no_secrets: + stage: verify + image: alpine:latest + script: + - apk add --no-cache git grep + + # Global secret scan (cheap but effective) + - | + set +e + secret_re='(BEGIN (RSA|OPENSSH|EC) PRIVATE KEY|-----BEGIN PGP PRIVATE KEY BLOCK-----|aws_secret_access_key|AKIA[0-9A-Z]{16}|xox[baprs]-[0-9A-Za-z-]{10,}|ghp_[A-Za-z0-9]{36}|glpat-[A-Za-z0-9_\\-]{20,})' + git grep -nE "$secret_re" -- . + status=$? + set -e + + if [ "$status" -eq 0 ]; then + echo "❌ Potential secret detected. Remove it or encrypt it into vault/." + exit 1 + elif [ "$status" -ne 1 ]; then + echo "❌ Secret scan failed (git grep exit $status)." + exit "$status" + fi + + # Vault plaintext guard (tracked files only) + - | + set -euo pipefail + + allowed_vault_re='(^vault/README\\.md$|^vault/\\.gitkeep$|^vault/tmp/\\.gitignore$|\\.age$|\\.sops\\.)' + bad_vault_files="$(git ls-files vault | grep -vE "$allowed_vault_re" || true)" + + if [ -n "$bad_vault_files" ]; then + echo "❌ Plaintext file detected in vault/. Encrypt before commit:" + echo "$bad_vault_files" + exit 1 + fi + + +``` + + +Diff: /Users/sovereign/work-core/ops/vault/README.md +``` +# Vault (encrypted-only) + +This directory contains **encrypted artifacts only**. + +Rules: + +- Never commit plaintext secrets (even temporarily). +- Never commit private keys. +- Treat `vault/` as safe-to-leak ciphertext: if leaking it would break you, it is mis-scoped. + +Decryption/working material belongs in `vault/tmp/` (gitignored) and should be wiped after use. + +## Allowed files + +The vault is for ciphertext, plus documentation. + +Allowed: + +- `*.age` +- `*.sops.*` +- `README.md` +- `.gitkeep` (if used) + +Anything else under `vault/` is treated as plaintext and is blocked by CI. + +``` + + +## Assistant + +<thinking> + + +**Examining regex behavior** + +I'm looking into how backslashes work in regex engines. A backslash usually escapes a hyphen, but with two backslashes, it might just treat the second as a literal backslash. Inside a character class, the backslash can escape certain characters, but I'm unsure about ERE. According to POSIX, backslashes have specific meanings, but they're somewhat undefined. It seems like two backslashes could produce a single one in regex, especially considering we’re in shell single quotes where there's no escape processing. + +**Clarifying regex interpretation** + +In the ERE, `\\-` could be seen as `\-`, since the regex parser interprets `\\` as an escape for `\`. However, I can simplify this. If I place the hyphen at the end of the character class, like this: `[A-Za-z0-9_-]{20,}`, it’s treated literally in most regex engines, including ERE. For matching a literal dot, I need to use `\.`. I originally had `\\.sops\\.` which isn't quite right; it should just be `\.sops\.`. + +**Assessing regex backslashes** + +I'm looking into how `\\.` is interpreted in regex. In ERE, a backslash escapes special characters, so `\\` becomes a literal backslash, and the dot remains unescaped, meaning it matches any character. That means `\\.sops\\.` could match a backslash followed by any character, which isn't what I want. Since YAML doesn't interpret backslash sequences in a block scalar, the script goes to the shell with its original formatting. Therefore, I need to adjust my regex to use single backslashes and update `.gitlab-ci.yml` accordingly. + +**Evaluating shell options** + +I'm realizing that we used `set -euo pipefail` in the second script, but since the image uses Busybox `sh`, I need to consider compatibility. Busybox ash, which is the default shell for GitLab CI in Alpine, doesn't support `pipefail`. Although I thought it might support that option since version 1.22, I’m not entirely sure. To play it safe, I'll avoid using `pipefail`, as it's unnecessary for our checks. Instead, I can just use `set -eu`, since we already handle errors with `|| true`. + +**Adjusting script settings** + +I’m noticing that using `set -euo pipefail` could lead to job failures if `pipefail` isn't recognized, so let's switch it to `set -eu`. In the first script, we applied `set +e` followed by `set -e`, which worked fine. We also installed the `grep` package, which should give us support for GNU grep and the `-E` option. However, since the job is using `git grep`, I need to ensure our patterns include braces since that's supported. Finally, we should adjust `.gitlab-ci.yml` accordingly for our patterns, like including `vault/README.md`.**Clarifying regex interpretation** + +In the ERE, `\\-` could be seen as `\-`, since the regex parser interprets `\\` as an escape for `\`. However, I can simplify this. If I place the hyphen at the end of the character class, like this: `[A-Za-z0-9_-]{20,}`, it’s treated literally in most regex engines, including ERE. For matching a literal dot, I need to use `\.`. I originally had `\\.sops\\.` which isn't quite right; it should just be `\.sops\.`.**Assessing regex backslashes** + +I'm looking into how `\\.` is interpreted in regex. In ERE, a backslash escapes special characters, so `\\` becomes a literal backslash, and the dot remains unescaped, meaning it matches any character. That means `\\.sops\\.` could match a backslash followed by any character, which isn't what I want. Since YAML doesn't interpret backslash sequences in a block scalar, the script goes to the shell with its original formatting. Therefore, I need to adjust my regex to use single backslashes and update `.gitlab-ci.yml` accordingly.**Evaluating shell options** + +I'm realizing that we used `set -euo pipefail` in the second script, but since the image uses Busybox `sh`, I need to consider compatibility. Busybox ash, which is the default shell for GitLab CI in Alpine, doesn't support `pipefail`. Although I thought it might support that option since version 1.22, I’m not entirely sure. To play it safe, I'll avoid using `pipefail`, as it's unnecessary for our checks. Instead, I can just use `set -eu`, since we already handle errors with `|| true`.**Adjusting script settings** + +I’m noticing that using `set -euo pipefail` could lead to job failures if `pipefail` isn't recognized, so let's switch it to `set -eu`. In the first script, we applied `set +e` followed by `set -e`, which worked fine. We also installed the `grep` package, which should give us support for GNU grep and the `-E` option. However, since the job is using `git grep`, I need to ensure our patterns include braces since that's supported. Finally, we should adjust `.gitlab-ci.yml` accordingly for our patterns, like including `vault/README.md`. +</thinking> + +**Tool Call: Edit /Users/sovereign/work-core/ops/.gitlab-ci.yml** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/.gitlab-ci.yml +``` +stages: [verify] + +verify:no_secrets: + stage: verify + image: alpine:latest + script: + - apk add --no-cache git grep + + # Global secret scan (cheap but effective) + - | + set +e + secret_re='(BEGIN (RSA|OPENSSH|EC) PRIVATE KEY|-----BEGIN PGP PRIVATE KEY BLOCK-----|aws_secret_access_key|AKIA[0-9A-Z]{16}|xox[baprs]-[0-9A-Za-z-]{10,}|ghp_[A-Za-z0-9]{36}|glpat-[A-Za-z0-9_-]{20,})' + git grep -nE "$secret_re" -- . + status=$? + set -e + + if [ "$status" -eq 0 ]; then + echo "❌ Potential secret detected. Remove it or encrypt it into vault/." + exit 1 + elif [ "$status" -ne 1 ]; then + echo "❌ Secret scan failed (git grep exit $status)." + exit "$status" + fi + + # Vault plaintext guard (tracked files only) + - | + set -eu + + allowed_vault_re='(^vault/README\.md$|^vault/\.gitkeep$|^vault/tmp/\.gitignore$|\.age$|\.sops\.)' + bad_vault_files="$(git ls-files vault | grep -vE "$allowed_vault_re" || true)" + + if [ -n "$bad_vault_files" ]; then + echo "❌ Plaintext file detected in vault/. Encrypt before commit:" + echo "$bad_vault_files" + exit 1 + fi + +``` + + +## Assistant + +<thinking> + + +**Reviewing .gitlab-ci.yml changes** + +I’ve updated `.gitlab-ci.yml` to use regex with the correct backslashes, which is great. However, I noticed that `allowed_vault_re` includes `^vault/tmp/\.gitignore$`. If we’re not tracking that file, it's fine, but if we decide to later, it would still be allowed. Now, I should commit these changes and push them. Let's also check the git status to confirm everything’s in order before proceeding. +</thinking> + +**Tool Call: Run git -C ops status '--porcelain=v1'** +Status: Completed + +Terminal: +``` + M vault/README.md +?? .gitlab-ci.yml + + + + +``` + + +## Assistant + +<thinking> + + +**Committing changes** + +I’m planning to add and commit with a message like "Add CI secret tripwire and vault guard." But I’m wondering if the user wanted a signed commit since they didn’t specifically request one. I guess since it’s not mentioned, I’ll just go ahead and commit without that extra step. I hope that’s okay! It’s always good to check in on these details to make sure I’m aligned with the user’s needs. +</thinking> + +**Tool Call: Run git -C ops add .gitlab-ci.yml vault/README.md** +Status: Completed + +Terminal: +``` + + + + + + +``` + + +**Tool Call: Run git -C ops commit -m 'Add CI secret tripwire and vault guard'** +Status: Completed + +Terminal: +``` +[main f3bef9d] Add CI secret tripwire and vault guard + 2 files changed, 48 insertions(+) + create mode 100644 .gitlab-ci.yml + + + +``` + + +**Tool Call: Run git -C ops push** +Status: Completed + +Terminal: +``` +To https://gitlab.com/vaultsovereign/ops.git + f375d21..f3bef9d main -> main + + + + +``` + + +## Assistant + +<thinking> + + +**Updating plan statuses** + +I'm thinking that we should update the plan statuses. We've completed all the steps for the repository setup, but we haven't created a new plan for the next task. Maybe we don’t really need it, since the user asked to implement two items. However, we might want to summarize our final response later. First, I'd like to verify if the pipeline has started and succeeded. I can check that using the `glab pipeline list` command. +</thinking> + +**Tool Call: Run glab pipeline --help** +Status: Completed + +Terminal: +``` + + Work with GitLab CI/CD pipelines and jobs. + + USAGE + + glab ci <command> [command] [--flags] + + COMMANDS + + artifact <refName> <jobName> [--flags] Download all artifacts from the last pipeline. + cancel <command> [command] Cancel a running pipeline or job. + config <command> [command] [--flags] Work with GitLab CI/CD configuration. + delete <id> [--flags] Delete CI/CD pipelines. + get [--flags] Get JSON of a running CI/CD pipeline on the current or other specified branch. + lint [--flags] Checks if your `.gitlab-ci.yml` file is valid. + list [--flags] Get the list of CI/CD pipelines. + retry <job-id> [--flags] Retry a CI/CD job. + run [--flags] Create or run a new CI/CD pipeline. + run-trig [--flags] Run a CI/CD pipeline trigger. + status [--flags] View a running CI/CD pipeline on current or other branch specified. + trace [<job-id>] [--flags] Trace a CI/CD job log in real time. + trigger <job-id> [--flags] Trigger a manual CI/CD job. + view [branch/tag] [--flags] View, run, trace, log, and cancel CI/CD job's current pipeline. + + FLAGS + + -h --help Show help for this command. + -R --repo Select another repository. Can use either `OWNER/REPO` or `GROUP/NAMESPACE/REPO` format. Also accepts full URL or Git URL. + + +``` + + +## Assistant + +<thinking> + + +**Running CI status check** + +I’m considering using `glab ci status`, and I might run `glab ci status -R vaultsovereign/ops` from that repo. It seems straightforward, so I’ll go ahead with `glab ci status`. I want to make sure I’m checking the right repo and getting the correct information. It’s important to be meticulous when running this command to avoid any confusion or mistakes! +</thinking> + +**Tool Call: Run glab ci status** +Status: Failed + +Terminal: +``` + +https://gitlab.com/vaultsovereign/ops/-/pipelines/2220346613 +SHA: f3bef9dfb164584bcd75292feba65cfe2fffce5c +Pipeline state: failed + + +``` + + +## Assistant + +<thinking> + + +**Inspecting pipeline failure** + +The pipeline failed, and I need to check the logs to figure out why. I’ll use commands like `glab ci view` or `glab ci trace` to get more details. I'll start by running `glab ci view`, which might show the jobs involved, and I can also check `glab ci view --help` to explore my options. It seems like using `glab ci view main --job` could provide me with the necessary insights. +</thinking> + +**Tool Call: Run glab ci view --help** +Status: Completed + +Terminal: +``` + + Supports viewing, running, tracing, and canceling jobs. + + Use arrow keys to navigate jobs and logs. + + - `Enter` to toggle through a job's logs / traces, or display a child pipeline. + Trigger jobs are marked with a `»`. + - `Esc` or `q` to close the logs or trace, or return to the parent pipeline. + - `Ctrl+R`, `Ctrl+P` to run, retry, or play a job. Use `Tab` or arrow keys to + navigate the modal, and `Enter` to confirm. + - `Ctrl+D` to cancel a job. If the selected job isn't running or pending, + quits the CI/CD view. + - `Ctrl+Q` to quit the CI/CD view. + - `Ctrl+Space` to suspend application and view the logs. Similar to `glab pipeline ci trace`. + - Supports `vi` style bindings and arrow keys for navigating jobs and logs. + + + USAGE + + glab ci view [branch/tag] [--flags] + + EXAMPLES + + # Uses current branch + $ glab pipeline ci view + + # Get latest pipeline on main branch + $ glab pipeline ci view main + + # Just like the second example + $ glab pipeline ci view -b main + + # Get latest pipeline on main branch of myusername/glab repo + $ glab pipeline ci view -b main -R myusername/glab + + FLAGS + + -b --branch Check pipeline status for a branch or tag. Defaults to the current branch. + -h --help Show help for this command. + -p --pipelineid Check pipeline status for a specific pipeline ID. + -R --repo Select another repository. Can use either `OWNER/REPO` or `GROUP/NAMESPACE/REPO` format. Also accepts full URL or Git URL. + -w --web Open pipeline in a browser. Uses default browser, or browser specified in BROWSER variable. + + +``` + + +## Assistant + +<thinking> + + +**Exploring CI options** + +I'm considering how to view the CI jobs using the `glab` tool. The interactive TUI might not be the best approach; instead, I could try using `glab ci trace <job-id>`. I’ll list the jobs in the pipeline, possibly using `glab ci get` or `glab ci list`. The `glab ci get` command can return JSON, or I could use `glab api` for job information. I'll attempt `glab ci get` to check the pipeline ID and job details. +</thinking> + +**Tool Call: Run glab ci get --branch main** +Status: Completed + +Terminal: +``` +# Pipeline: +id: 2220346613 +status: failed +source: push +ref: main +sha: f3bef9dfb164584bcd75292feba65cfe2fffce5c +tag: false +yaml Errors: +user: vaultsovereign +created: 2025-12-17 15:24:12.175 +0000 UTC +started: <nil> +updated: 2025-12-17 15:24:12.175 +0000 UTC + +# Jobs: + + +``` + + +## Assistant + +<thinking> + + +**Troubleshooting pipeline issues** + +I’m thinking it doesn’t list jobs possibly because the pipeline didn’t start due to a config error. It shows `started: <nil>`, which suggests the pipeline was created but not started and that the status is failed. I’m considering if there's an invalid YAML configuration, but glab shows no errors. It could be that the pipeline failed due to the misconfiguration of stages. I’ll need to double-check the usage of `stages: [verify]` too. + +**Investigating pipeline issues** + +I'm considering if the command `apk add --no-cache git grep` might be failing because the container can't pull the image due to a restricted network, but the network is enabled. Another thought is the use of `image: alpine:latest`, which GitLab's shared runners might not support. If the pipeline failed before starting, it could be due to the lack of available runners, but then it would show as 'pending.' I need to check the pipeline details using the GitLab API, so I’ll query it.**Investigating pipeline issues** + +I'm considering if the command `apk add --no-cache git grep` might be failing because the container can't pull the image due to a restricted network, but the network is enabled. Another thought is the use of `image: alpine:latest`, which GitLab's shared runners might not support. If the pipeline failed before starting, it could be due to the lack of available runners, but then it would show as 'pending.' I need to check the pipeline details using the GitLab API, so I’ll query it. +</thinking> + +**Tool Call: Run glab api projects/77173462/pipelines/2220346613** +Status: Completed + +Terminal: +``` +{"id":2220346613,"iid":1,"project_id":77173462,"sha":"f3bef9dfb164584bcd75292feba65cfe2fffce5c","ref":"main","status":"failed","source":"push","created_at":"2025-12-17T15:24:12.175Z","updated_at":"2025-12-17T15:24:12.175Z","web_url":"https://gitlab.com/vaultsovereign/ops/-/pipelines/2220346613","before_sha":"f375d21a9e391502ecb170583c4abc7fabbd7ddf","tag":false,"yaml_errors":null,"user":{"id":32412904,"username":"vaultsovereign","public_email":null,"name":"sovereign operator","state":"active","locked":false,"avatar_url":"https://secure.gravatar.com/avatar/1d8acaf320ce89eeaf7fef17ce32804c6c8293d1b21e6d1c7388f52c8f25c342?s=80\u0026d=identicon","web_url":"https://gitlab.com/vaultsovereign"},"started_at":null,"finished_at":"2025-12-17T15:24:12.174Z","committed_at":null,"duration":null,"queued_duration":null,"coverage":null,"detailed_status":{"icon":"status_failed","text":"Failed","label":"failed","group":"failed","tooltip":"failed","has_details":false,"details_path":"/vaultsovereign/ops/-/pipelines/2220346613","illustration":null,"favicon":"/assets/ci_favicons/favicon_status_failed-41304d7f7e3828808b0c26771f0309e55296819a9beea3ea9fbf6689d9857c12.png"},"archived":false,"name":null} +``` + + +## Assistant + +<thinking> + + +**Investigating CI runners in GitLab** + +I'm thinking the project might not have any CI runners enabled. Typically, shared runners are on by default for GitLab.com, but perhaps they're disabled for private projects or new free tier accounts? Historically, free tier included them with limits, but they might need to be activated. + +If they're disabled, the pipeline could get stuck in pending without any runners. I'm also considering whether there might be an issue with the job name or if "CI/CD" isn't configured properly. It’s worth checking the pipeline jobs endpoint to find out more. + +**Checking pipeline job issues** + +I'm considering that if there are no jobs, the pipeline creation might have failed due to the message "No stages/jobs for this pipeline." This often happens when the `.gitlab-ci.yml` file only has `stages: [verify]`, and a job is not included because it appears hidden due to a leading dot. + +But, I don't think that's the case here. I wonder if the trailing blank line in the `.gitlab-ci.yml` file could be causing issues. There's a lot to unravel here!**Checking pipeline job issues** + +I'm considering that if there are no jobs, the pipeline creation might have failed due to the message "No stages/jobs for this pipeline." This often happens when the `.gitlab-ci.yml` file only has `stages: [verify]`, and a job is not included because it appears hidden due to a leading dot. + +But, I don't think that's the case here. I wonder if the trailing blank line in the `.gitlab-ci.yml` file could be causing issues. There's a lot to unravel here! +</thinking> + +**Tool Call: Run glab api projects/77173462/pipelines/2220346613/jobs** +Status: Completed + +Terminal: +``` +[] + + + + + +``` + + +## Assistant + +<thinking> + + +**Analyzing GitLab CI Jobs** + +I’m looking into why no jobs are created in the pipeline. It seems like the issue might be with the `.gitlab-ci.yml` configuration, especially if `only/except` rules are excluding everything. I wonder if GitLab sees a job as hidden because of the colon in its name. However, I know that colons are allowed, so maybe the YAML parser is treating `verify:no_secrets:` as a key with nested mapping. This needs careful checking regarding YAML syntax and whether colons in keys require quoting. + +**Investigating YAML Key Parsing** + +I'm examining the situation with the colons in the key names. In `verify:no_secrets:`, the colon inside the key might lead to confusion, especially since it’s followed by another colon without a space. Is it possible that GitLab’s CI parser interprets this differently? For example, in `foo:bar: 1`, the first colon is just part of the key, allowing it as a plain scalar. However, I wonder if GitLab's parser treats colons in job names in a unique way. I'm still puzzled as to why no jobs are created. + +**Troubleshooting GitLab Pipeline Jobs** + +I’m troubleshooting the issue with the pipeline not showing any jobs. We added the `.gitlab-ci.yml` file in the ops/ directory, and I pushed it to GitLab, so it should be recognized. But it seems there's still a problem—maybe it has only hidden jobs or rules that exclude all jobs. It’s also possible that the CI configuration is empty because the file didn't exist at the time of the last commit. I need to ensure the pipeline config is set up correctly for it to create jobs. + +**Checking Stages and Job Configuration** + +I'm considering other possibilities for why there are no jobs in the pipeline. We defined `stages: [verify]`, but I'm not sure if it's being recognized correctly. Maybe there are exclusions like `only/except` or issues with how the job is defined—perhaps it needs specific tags? Since `stages: [verify]` should be fine, the next step is to lint the configuration using `glab ci lint`. That should help me identify if the YAML is valid and spot any errors or misconfigurations.**Investigating YAML Key Parsing** + +I'm examining the situation with the colons in the key names. In `verify:no_secrets:`, the colon inside the key might lead to confusion, especially since it’s followed by another colon without a space. Is it possible that GitLab’s CI parser interprets this differently? For example, in `foo:bar: 1`, the first colon is just part of the key, allowing it as a plain scalar. However, I wonder if GitLab's parser treats colons in job names in a unique way. I'm still puzzled as to why no jobs are created.**Troubleshooting GitLab Pipeline Jobs** + +I’m troubleshooting the issue with the pipeline not showing any jobs. We added the `.gitlab-ci.yml` file in the ops/ directory, and I pushed it to GitLab, so it should be recognized. But it seems there's still a problem—maybe it has only hidden jobs or rules that exclude all jobs. It’s also possible that the CI configuration is empty because the file didn't exist at the time of the last commit. I need to ensure the pipeline config is set up correctly for it to create jobs.**Checking Stages and Job Configuration** + +I'm considering other possibilities for why there are no jobs in the pipeline. We defined `stages: [verify]`, but I'm not sure if it's being recognized correctly. Maybe there are exclusions like `only/except` or issues with how the job is defined—perhaps it needs specific tags? Since `stages: [verify]` should be fine, the next step is to lint the configuration using `glab ci lint`. That should help me identify if the YAML is valid and spot any errors or misconfigurations. +</thinking> + +**Tool Call: Run glab ci lint** +Status: Completed + +Terminal: +``` +Validating... +✓ CI/CD YAML is valid! + + + + +``` + + +**Tool Call: Read .gitlab-ci.yml** +Status: Completed + +```yml +stages: [verify] + +verify:no_secrets: + stage: verify + image: alpine:latest + script: + - apk add --no-cache git grep + + # Global secret scan (cheap but effective) + - | + set +e + secret_re='(BEGIN (RSA|OPENSSH|EC) PRIVATE KEY|-----BEGIN PGP PRIVATE KEY BLOCK-----|aws_secret_access_key|AKIA[0-9A-Z]{16}|xox[baprs]-[0-9A-Za-z-]{10,}|ghp_[A-Za-z0-9]{36}|glpat-[A-Za-z0-9_-]{20,})' + git grep -nE "$secret_re" -- . + status=$? + set -e + + if [ "$status" -eq 0 ]; then + echo "❌ Potential secret detected. Remove it or encrypt it into vault/." + exit 1 + elif [ "$status" -ne 1 ]; then + echo "❌ Secret scan failed (git grep exit $status)." + exit "$status" + fi + + # Vault plaintext guard (tracked files only) + - | + set -eu + + allowed_vault_re='(^vault/README\.md$|^vault/\.gitkeep$|^vault/tmp/\.gitignore$|\.age$|\.sops\.)' + bad_vault_files="$(git ls-files vault | grep -vE "$allowed_vault_re" || true)" + + if [ -n "$bad_vault_files" ]; then + echo "❌ Plaintext file detected in vault/. Encrypt before commit:" + echo "$bad_vault_files" + exit 1 + fi +``` + + +## Assistant + +<thinking> + + +**Checking pipeline issues** + +I’m trying to understand why the pipeline has no jobs. It seems it’s marked as "failed" at creation time, which could be due to running out of CI minutes. If the minutes are exceeded, I think it wouldn’t run or create jobs. But I’m curious if the pipeline would still show a "failed" status without jobs. I want to check project features for shared runner minutes or if the pipeline is disabled. I'll look into the detailed status and failure reason through the GitLab API as well. + +**Investigating pipeline creation** + +I’m considering whether the pipeline was created but didn’t start because of the `workflow:rules` set to `when: never`. It could also be that the pipeline was initiated by a push to the default branch, but there's no `only` constraint. Maybe GitLab couldn’t create jobs because it couldn’t fetch the repository or there’s no runner for a new project. With `started_at` being null and `finished_at` set, it seems the pipeline failed instantly. That usually happens due to invalid YAML or configuration, but there are no YAML errors. So I wonder if it’s just a lack of jobs causing the failure? + +**Exploring pipeline errors** + +I came across a "Pipeline could not be created" error, but it seems like we have at least one job, so that raises questions. Without being able to open a browser, I think I can use the API to fetch details with `detailed_status.details_path`. The failure could be due to running out of CI/CD minutes, potentially marking it as failed with a `ci_quota_exceeded` error, leading to no jobs created. I’ll examine endpoints that could provide the failure reason or error details; I may need to check the pipeline creation event too. + +**Investigating API endpoints** + +I’m exploring whether the `GET /projects/:id/pipelines/:pipeline_id` endpoint includes `detailed_status` but not the `failure_reason`. There’s also a chance it might show `duration` and a status of `failed`. I think the `failure_reason` could be part of the pipeline object in GitLab 13, but I’m not certain. It’s definitely present in the job object. I wonder if any jobs were created but then deleted, as the `/jobs` endpoint returns an empty array. I should check the CI configuration for the pipeline using the `glab ci config` command.**Investigating pipeline creation** + +I’m considering whether the pipeline was created but didn’t start because of the `workflow:rules` set to `when: never`. It could also be that the pipeline was initiated by a push to the default branch, but there's no `only` constraint. Maybe GitLab couldn’t create jobs because it couldn’t fetch the repository or there’s no runner for a new project. With `started_at` being null and `finished_at` set, it seems the pipeline failed instantly. That usually happens due to invalid YAML or configuration, but there are no YAML errors. So I wonder if it’s just a lack of jobs causing the failure?**Exploring pipeline errors** + +I came across a "Pipeline could not be created" error, but it seems like we have at least one job, so that raises questions. Without being able to open a browser, I think I can use the API to fetch details with `detailed_status.details_path`. The failure could be due to running out of CI/CD minutes, potentially marking it as failed with a `ci_quota_exceeded` error, leading to no jobs created. I’ll examine endpoints that could provide the failure reason or error details; I may need to check the pipeline creation event too.**Investigating API endpoints** + +I’m exploring whether the `GET /projects/:id/pipelines/:pipeline_id` endpoint includes `detailed_status` but not the `failure_reason`. There’s also a chance it might show `duration` and a status of `failed`. I think the `failure_reason` could be part of the pipeline object in GitLab 13, but I’m not certain. It’s definitely present in the job object. I wonder if any jobs were created but then deleted, as the `/jobs` endpoint returns an empty array. I should check the CI configuration for the pipeline using the `glab ci config` command. +</thinking> + +**Tool Call: Run glab ci config --help** +Status: Completed + +Terminal: +``` + + Work with GitLab CI/CD configuration. + + USAGE + + glab ci config <command> [command] [--flags] + + COMMANDS + + compile View the fully expanded CI/CD configuration. + + FLAGS + + -h --help Show help for this command. + -R --repo Select another repository. Can use either `OWNER/REPO` or `GROUP/NAMESPACE/REPO` format. Also accepts full URL or Git URL. + + +``` + + +## Assistant + +<thinking> + + +I'm considering using `glab ci config compile` to check the expanded configuration for the commit. This could help confirm whether the job is actually present. It seems like a logical step, so I think I’ll go ahead and run that compile command. This should provide me with the needed details to move forward. Let's see what it reveals! +</thinking> + +**Tool Call: Run glab ci config compile --branch main** +Status: Failed + +Terminal: +``` + + ERROR + + Unknown flag: --branch. + + Try --help for usage. + + +``` + + +**Tool Call: Run glab ci config compile --help** +Status: Completed + +Terminal: +``` + + View the fully expanded CI/CD configuration. + + USAGE + + glab ci config compile [--flags] + + EXAMPLES + + # Uses .gitlab-ci.yml in the current directory + $ glab ci config compile + $ glab ci config compile .gitlab-ci.yml + $ glab ci config compile path/to/.gitlab-ci.yml + + FLAGS + + -h --help Show help for this command. + + +``` + + +## Assistant + +<thinking> + + +**Checking GitLab CI/CD minutes** + +I’m looking into the "CI/CD minutes quota" for GitLab.com. If that quota is set to 0, it's likely that pipelines could be blocked, and I might see a "quota_exceeded" error right away, preventing job creation. It’s important to check the namespace CI minutes usage. So, I want to verify with the GitLab API, specifically looking at the `GET /namespace/:id/ci_minutes` or potentially the `GET /projects/:id` endpoint to find the minutes limit and usage. Let's go ahead and query that. +</thinking> + +**Tool Call: Run glab api projects/77173462, Run jq '{shared_runners_enabled, shared_runners_minutes_limit, shared_runners_minutes_used, build_timeout, visibility}'** +Status: Completed + +Terminal: +``` +{ + "shared_runners_enabled": true, + "shared_runners_minutes_limit": null, + "shared_runners_minutes_used": null, + "build_timeout": 3600, + "visibility": "private" +} + +``` + + +## Assistant + +<thinking> + + +**Troubleshooting pipeline jobs** + +I'm noticing that the pipeline jobs list is empty, and I think it could be due to a couple of reasons. One possibility is that my `.gitlab-ci.yml` only defines `rules:exists` or `rules:if` that evaluate to false. But we have job keys. I’m also wondering if maybe GitLab is interpreting `verify:no_secrets` as a hidden anchor or extend. Yet, I know using colons in job names is common, like `test:unit`. So, why is there no jobs? I should check if there's an unnoticed error in `.gitlab-ci.yml` that's causing issues during evaluation. + +**Investigating pipeline issues** + +I’m thinking that maybe the pipeline was created but then failed because the configuration only included a job in the verify stage, which I thought existed. Perhaps the pipeline is empty due to no diffs since it was created for a commit. I also considered that the first pipeline in a new project might need shared runners enabled, but those are already enabled. It's possible the `.gitlab-ci.yml` file is in a subdirectory since we created it in `ops/`, and that’s where the repo root is during the push. + +**Checking for pipeline errors** + +I’m wondering if the pipeline failed due to an internal error, which might just be temporary. I’ll check the pipeline status, but since the `finished_at` timestamp matches the creation time, it looks like the pipeline has ended. I need to investigate the failure reason, possibly using GraphQL. I can query the pipeline by its ID while including errors. I’ll run a GraphQL query to see if the `failureReason` is available. Let’s go ahead and attempt that!**Investigating pipeline issues** + +I’m thinking that maybe the pipeline was created but then failed because the configuration only included a job in the verify stage, which I thought existed. Perhaps the pipeline is empty due to no diffs since it was created for a commit. I also considered that the first pipeline in a new project might need shared runners enabled, but those are already enabled. It's possible the `.gitlab-ci.yml` file is in a subdirectory since we created it in `ops/`, and that’s where the repo root is during the push.**Checking for pipeline errors** + +I’m wondering if the pipeline failed due to an internal error, which might just be temporary. I’ll check the pipeline status, but since the `finished_at` timestamp matches the creation time, it looks like the pipeline has ended. I need to investigate the failure reason, possibly using GraphQL. I can query the pipeline by its ID while including errors. I’ll run a GraphQL query to see if the `failureReason` is available. Let’s go ahead and attempt that! +</thinking> + +**Tool Call: Run glab api graphql -f query='query { project(fullPath: "vaultsovereign/ops") { pipelines(first: 5) { nodes { iid status failureReason detailedStatus { label detailsPath } } } } }'** +Status: Completed + +Terminal: +``` +{"data":{"project":{"pipelines":{"nodes":[{"iid":"1","status":"FAILED","failureReason":"The pipeline failed due to the user not being verified.","detailedStatus":{"label":"failed","detailsPath":"/vaultsovereign/ops/-/pipelines/2220346613"}}]}}}} + + + +``` + + +## Assistant + +<thinking> + + +**Identifying pipeline issues** + +I realize the pipeline failed because the user isn’t verified. GitLab requires account verification—either email or phone—to use pipelines and shared runners. So, nothing's wrong with our CI configuration, just that the user needs to verify their account. I think we should inform them to complete that step. Although they asked for setup help, we can always rerun the pipeline later. Testing `glab ci lint` is a good idea. The pipeline will work post-verification! + +**Considering verification solutions** + +I'm thinking about how to handle pipeline failures when a user isn't verified. One way could be using local hooks or a self-hosted runner. But honestly, the simplest solution would be for the user to verify their GitLab account or set up a runner. The user selected number two and wants guardrails. Even if GitLab CI isn't functional because of this verification issue, we could suggest a pre-commit hook or a local scanning script as a fallback. Options are there, but getting the user to verify is key! + +**Addressing pipeline verification issues** + +The pipeline isn't running because the user hasn't verified their account, which may be a global issue for using pipelines. However, maybe a project can still execute pipelines with a verified user. So, I should instruct the user to verify their email in the GitLab UI. We could also consider setting CI/CD variables or using GitLab's secret detection template, but that requires Ultimate features. Ultimately, our job is done: the configuration is in place, but I wonder if adding a local tripwire could be beneficial for immediate feedback, even though the user didn't ask. + +**Organizing guardrails and CI setup** + +The user previously asked for guardrails to prevent secrets from slipping through, especially since the CI isn't currently running. So, I can provide a `pre-commit` example and maybe a `make verify` command, but they specifically requested CI, which we've implemented. They may need to verify their account for the pipeline to work, which is an external constraint. I could check if GitLab permits pipelines for unverified users, but it’s likely not an option. Lastly, I’ll confirm that `.gitlab-ci.yml` is at the root of the repository and consider any necessary updates to our plan. + +**Adjusting CI for secret detection** + +We didn’t originally plan for this subtask, but I can respond accordingly. The user wants to set things up with GitLab CI, and I need to highlight that the pipeline is failing due to account verification. I’ll suggest they verify their account, provide commands like `glab ci run`, and inform them about file changes like `ops/.gitlab-ci.yml` and `ops/vault/README.md`. + +For detecting secrets, we should adjust the CI job to avoid leaking sensitive data in logs. Instead of printing matched lines, I’ll suggest using `git grep -lE` for file names without content. This ensures safety while still providing useful diagnostics. I'll make the necessary updates to the configuration to protect information effectively.**Considering verification solutions** + +I'm thinking about how to handle pipeline failures when a user isn't verified. One way could be using local hooks or a self-hosted runner. But honestly, the simplest solution would be for the user to verify their GitLab account or set up a runner. The user selected number two and wants guardrails. Even if GitLab CI isn't functional because of this verification issue, we could suggest a pre-commit hook or a local scanning script as a fallback. Options are there, but getting the user to verify is key!**Addressing pipeline verification issues** + +The pipeline isn't running because the user hasn't verified their account, which may be a global issue for using pipelines. However, maybe a project can still execute pipelines with a verified user. So, I should instruct the user to verify their email in the GitLab UI. We could also consider setting CI/CD variables or using GitLab's secret detection template, but that requires Ultimate features. Ultimately, our job is done: the configuration is in place, but I wonder if adding a local tripwire could be beneficial for immediate feedback, even though the user didn't ask.**Organizing guardrails and CI setup** + +The user previously asked for guardrails to prevent secrets from slipping through, especially since the CI isn't currently running. So, I can provide a `pre-commit` example and maybe a `make verify` command, but they specifically requested CI, which we've implemented. They may need to verify their account for the pipeline to work, which is an external constraint. I could check if GitLab permits pipelines for unverified users, but it’s likely not an option. Lastly, I’ll confirm that `.gitlab-ci.yml` is at the root of the repository and consider any necessary updates to our plan.**Adjusting CI for secret detection** + +We didn’t originally plan for this subtask, but I can respond accordingly. The user wants to set things up with GitLab CI, and I need to highlight that the pipeline is failing due to account verification. I’ll suggest they verify their account, provide commands like `glab ci run`, and inform them about file changes like `ops/.gitlab-ci.yml` and `ops/vault/README.md`. + +For detecting secrets, we should adjust the CI job to avoid leaking sensitive data in logs. Instead of printing matched lines, I’ll suggest using `git grep -lE` for file names without content. This ensures safety while still providing useful diagnostics. I'll make the necessary updates to the configuration to protect information effectively. +</thinking> + +**Tool Call: Edit /Users/sovereign/work-core/ops/.gitlab-ci.yml** +Status: Completed + +Diff: /Users/sovereign/work-core/ops/.gitlab-ci.yml +``` +stages: [verify] + +verify:no_secrets: + stage: verify + image: alpine:latest + script: + - apk add --no-cache git grep + + # Global secret scan (cheap but effective) + - | + set +e + secret_re='(BEGIN (RSA|OPENSSH|EC) PRIVATE KEY|-----BEGIN PGP PRIVATE KEY BLOCK-----|aws_secret_access_key|AKIA[0-9A-Z]{16}|xox[baprs]-[0-9A-Za-z-]{10,}|ghp_[A-Za-z0-9]{36}|glpat-[A-Za-z0-9_-]{20,})' + matches="$(git grep -lE "$secret_re" -- .)" + status=$? + set -e + + if [ "$status" -eq 0 ]; then + echo "❌ Potential secret detected in:" + echo "$matches" + echo + echo "Remove it or encrypt it into vault/." + exit 1 + elif [ "$status" -ne 1 ]; then + echo "❌ Secret scan failed (git grep exit $status)." + exit "$status" + fi + + # Vault plaintext guard (tracked files only) + - | + set -eu + + allowed_vault_re='(^vault/README\.md$|^vault/\.gitkeep$|^vault/tmp/\.gitignore$|\.age$|\.sops\.)' + bad_vault_files="$(git ls-files vault | grep -vE "$allowed_vault_re" || true)" + + if [ -n "$bad_vault_files" ]; then + echo "❌ Plaintext file detected in vault/. Encrypt before commit:" + echo "$bad_vault_files" + exit 1 + fi + +``` + + +## Assistant + +<thinking> + + +**Committing changes** + +I'm noticing that the git status shows I have a modified file. This means I need to commit the changes before pushing them again. I should double-check that everything looks good and then proceed with the commit. Once I'm confident, I can push the updates to the repository. I want to make sure I’ve captured everything needed in this commit, so I'll take a moment to consider any final edits. +</thinking> + +**Tool Call: Run git -C ops status '--porcelain=v1'** +Status: Completed + +Terminal: +``` + M .gitlab-ci.yml + + + + + +``` + + +**Tool Call: Run git -C ops add .gitlab-ci.yml, Run git -C ops commit -m 'Harden CI scan output'** +Status: Completed + +Terminal: +``` +[main 58c7ffa] Harden CI scan output + 1 file changed, 5 insertions(+), 2 deletions(-) + + + +``` + + +**Tool Call: Run git -C ops push** +Status: Completed + +Terminal: +``` +To https://gitlab.com/vaultsovereign/ops.git + f3bef9d..58c7ffa main -> main + + + + +``` + + +## Assistant + +<thinking> + + +**Checking pipeline status** + +The pipeline will likely fail again because the user isn't verified. That's alright! I should mention that the CI configuration is set up, but GitLab requires email verification to run pipelines. Verifying the email will help, or they could use their own runner. It might be useful to check the pipeline status to confirm the failure reason, but it's not strictly necessary. A quick command run like `glab ci status` can give me the current pipeline state. +</thinking> + +**Tool Call: Run glab ci status** +Status: Failed + +Terminal: +``` + +https://gitlab.com/vaultsovereign/ops/-/pipelines/2220354107 +SHA: 58c7ffaad03c04a51920fa7cc22c982d3e2a11c3 +Pipeline state: failed + + +``` + + +**Tool Call: Run glab api graphql -f query='query { project(fullPath: "vaultsovereign/ops") { pipelines(first: 1) { nodes { iid status failureReason webPath } } } }'** +Status: Failed + +Terminal: +``` +{"errors":[{"message":"Field 'webPath' doesn't exist on type 'Pipeline'","locations":[{"line":1,"column":106}],"path":["query","project","pipelines","nodes","webPath"],"extensions":{"code":"undefinedField","typeName":"Pipeline","fieldName":"webPath"}}]}glab: Field 'webPath' doesn't exist on type 'Pipeline' + +``` + + +**Tool Call: Run glab api graphql -f query='query { project(fullPath: "vaultsovereign/ops") { pipelines(first: 1) { nodes { iid status failureReason } } } }'** +Status: Completed + +Terminal: +``` +{"data":{"project":{"pipelines":{"nodes":[{"iid":"2","status":"FAILED","failureReason":"The pipeline failed due to the user not being verified."}]}}}} + + + +``` + + +## Assistant + +<thinking> + + +**Providing verification steps** + +Alright, I want to provide clear steps for verifying the GitLab account since the pipeline is failing. I’ll mention both verifying the email in the account settings and possibly the phone. Typically, verifying the email is done during signup, so it’s likely that’s what’s needed here. Once they verify, the user can rerun the pipeline by pushing a commit or using the `glab ci run --branch main` command. I'll check the help section on `glab ci run` just to make sure. +</thinking> + +**Tool Call: Run glab ci run --help** +Status: Completed + +Terminal: +``` + + The `--branch` option is available for all pipeline types. + + The options for variables are incompatible with merge request pipelines. + If used with merge request pipelines, the command fails with a message like `ERROR: if any flags in the group [output + output-format] are set none of the others can be` + + Specify one or more pipeline inputs using the `-i` or `--input` flag for each + input. Each input flag uses the format `key:value`. + + The values are typed and will default to `string` unless a type is explicitly + specified. To specify a type, use the `type(value)` syntax. For example, + `key:string(value)` will pass the string `value` as the input. + + Valid types are: + + - `string`: A string value. This is the default type. For example, `key:string(value)`. + - `int`: An integer value. For example, `key:int(42)`. + - `float`: A floating-point value. For example, `key:float(3.14)`. + - `bool`: A boolean value. For example, `key:bool(true)`. + - `array`: An array of strings. For example, `key:array(foo,bar)`. + + An array of strings can be specified with a trailing comma. For example, + `key:array(foo,bar,)` will pass the array `[foo, bar]`. `array()` specifies an + empty array. To pass an array with the empty string, use `array(,)`. + + Value arguments containing parentheses should be escaped from the shell with + quotes. For example, `--input key:array(foo,bar)` should be written as + `--input 'key:array(foo,bar)'`. + + + USAGE + + glab ci run [--flags] + + EXAMPLES + + $ glab ci run + $ glab ci run --variables \"key1:value,with,comma\" + $ glab ci run -b main + $ glab ci run --web + $ glab ci run --mr + + # Specify CI variables + $ glab ci run -b main --variables-env key1:val1 + $ glab ci run -b main --variables-env key1:val1,key2:val2 + $ glab ci run -b main --variables-env key1:val1 --variables-env key2:val2 + $ glab ci run -b main --variables-file MYKEY:file1 --variables KEY2:some_value + + # Specify CI inputs + $ glab ci run -b main --input key1:val1 --input key2:val2 + $ glab ci run -b main --input "replicas:int(3)" --input "debug:bool(false)" --input "regions:array(us-east,eu-w… + + // For an example of 'glab ci run -f' with a variables file, see + // [Run a CI/CD pipeline with variables from a file](https://docs.gitlab.com/editor_extensions/gitlab_cli/#run-… + // in the GitLab documentation. + + FLAGS + + -b --branch Create pipeline on branch/ref <string>. + -h --help Show help for this command. + -i --input Pass inputs to pipeline in format '<key>:<value>'. Cannot be used for merge request pipelines. See documentation for examples. + --mr Run merge request pipeline instead of branch pipeline. + -R --repo Select another repository. Can use either `OWNER/REPO` or `GROUP/NAMESPACE/REPO` format. Also accepts full URL or Git URL. + --variables Pass variables to pipeline in format <key>:<value>. Cannot be used for MR pipelines. + --variables-env Pass variables to pipeline in format <key>:<value>. Cannot be used for MR pipelines. + --variables-file Pass file contents as a file variable to pipeline in format <key>:<filename>. Cannot be used for MR pipelines. + -f --variables-from Json file with variables for pipeline execution. Expects array of hashes, each with at least 'key' and 'value'. Cannot be used for MR pipelines. + -w --web Open pipeline in a browser. Uses default browser, or browser specified in BROWSER environment variable. + + +``` + + +## Assistant + +<thinking> + + +**Verifying CI configuration** + +I need to ensure that we can rerun with `glab ci run -b main`. It's also important to check that the `.gitlab-ci.yml` file is correct and functional. We added `apk add --no-cache git grep`, but maybe `grep` isn't necessary. Although I could verify that this command works, testing it locally in Docker isn't essential right now. Since the user instructed us to "Choose the next forge," and they selected 2, I can confirm that implementation is complete! + +**Summarizing final message** + +The final message should highlight that I've added `ops/.gitlab-ci.yml` with the `verify:no_secrets` job to scan for tokens/keys and ensure only encrypted files in `vault/` are tracked. I've also updated `ops/vault/README.md` to document allowed files and CI mentions, and pushed commits to GitLab with proper commit messages. Pipelines currently fail because the account isn't verified. To rerun, users should verify their email, then click retry or run `glab ci run -b main`. The scan only prints file paths, and patterns can be extended in `.gitlab-ci.yml`. + +**Clarifying GitLab email verification** + +I need to be cautious when instructing users to "verify your GitLab account email." The issue stems from the requirement for the `vaultsovereign` account to be verified before the GitLab pipelines can run. I’ll mention that users should go to user settings -> emails to complete this process. Regarding untracked files, it appears the working directory root is empty aside from the ops directory. Now, I’ll create a final answer with a concise bullet list, referencing file paths and line numbers as needed. + +**Reviewing file referencing rules** + +I see that I need to remember to include the relevant start line when referencing files in my response. The guidelines mention using inline code for clickable file paths, and each reference should include a standalone path like `ops/.gitlab-ci.yml:1`. That's clear. I’ll make sure to provide the relevant start lines. It might be helpful to include the command `glab ci run -b main` as well. Plus, since the user mentioned "Logged in as vaultsovereign" and requested setup, I'll ensure our changes are pushed correctly. + +**Final message summary** + +The final message should include points like this: + +- We've added `ops/.gitlab-ci.yml:1` to create a verifying job that checks for likely secrets, ensuring that only encrypted files under `vault/` are tracked (`*.age`/`*.sops.*`) alongside documentation. +- I've updated `ops/vault/README.md:1` to document the allowed file types and CI enforcement. +- Currently, GitLab pipelines fail due to the "user not verified" message for `vaultsovereign`. Users need to verify their account email and then rerun the pipeline using either `glab ci run -b main` or the retry option in the UI.**Summarizing final message** + +The final message should highlight that I've added `ops/.gitlab-ci.yml` with the `verify:no_secrets` job to scan for tokens/keys and ensure only encrypted files in `vault/` are tracked. I've also updated `ops/vault/README.md` to document allowed files and CI mentions, and pushed commits to GitLab with proper commit messages. Pipelines currently fail because the account isn't verified. To rerun, users should verify their email, then click retry or run `glab ci run -b main`. The scan only prints file paths, and patterns can be extended in `.gitlab-ci.yml`.**Clarifying GitLab email verification** + +I need to be cautious when instructing users to "verify your GitLab account email." The issue stems from the requirement for the `vaultsovereign` account to be verified before the GitLab pipelines can run. I’ll mention that users should go to user settings -> emails to complete this process. Regarding untracked files, it appears the working directory root is empty aside from the ops directory. Now, I’ll create a final answer with a concise bullet list, referencing file paths and line numbers as needed.**Reviewing file referencing rules** + +I see that I need to remember to include the relevant start line when referencing files in my response. The guidelines mention using inline code for clickable file paths, and each reference should include a standalone path like `ops/.gitlab-ci.yml:1`. That's clear. I’ll make sure to provide the relevant start lines. It might be helpful to include the command `glab ci run -b main` as well. Plus, since the user mentioned "Logged in as vaultsovereign" and requested setup, I'll ensure our changes are pushed correctly.**Final message summary** + +The final message should include points like this: + +- We've added `ops/.gitlab-ci.yml:1` to create a verifying job that checks for likely secrets, ensuring that only encrypted files under `vault/` are tracked (`*.age`/`*.sops.*`) alongside documentation. +- I've updated `ops/vault/README.md:1` to document the allowed file types and CI enforcement. +- Currently, GitLab pipelines fail due to the "user not verified" message for `vaultsovereign`. Users need to verify their account email and then rerun the pipeline using either `glab ci run -b main` or the retry option in the UI. +</thinking> + +- Added CI tripwire at `ops/.gitlab-ci.yml:1`: + - Fails on likely secrets (private key blocks + common token formats) without printing secret contents (only file paths). + - Enforces `vault/` is encrypted-only for tracked files (`*.age`, `*.sops.*`, docs). +- Documented allowed vault contents in `ops/vault/README.md:1`. + +Note: GitLab CI is currently failing because the `vaultsovereign` account isn’t verified on GitLab.com; once verified, rerun with `glab ci run -b main` (or retry the pipeline in the UI). + diff --git a/.codex/output-guardrails.md b/.codex/output-guardrails.md new file mode 100644 index 0000000..9db085d --- /dev/null +++ b/.codex/output-guardrails.md @@ -0,0 +1,25 @@ +# Output Guardrails (for Codex / agents) + +These rules prevent config-dumps and keep agent output fast, deterministic, and reviewable. + +## Operating rule + +- Plan (max 3 bullets) → Execute 1 step → Report (max 5 lines). +- Never paste full configs by default. Prefer diffs + file paths. +- Only export full config when explicitly requested (`full=true`) and still cap output. +- Ask for confirmation only for destructive actions. + +## Preferred report format + +- Goal: +- Observed: +- Change: +- Command(s): +- Result: +- Next: + +## Tooling rule + +- Prefer `cf_snapshot` + `cf_config_diff` + `cf_export_config(full=false)` over any “dump” tool. +- If output would exceed limits, write artifacts to disk and return the path. + diff --git a/.env.example b/.env.example index e211616..f6e00ec 100644 --- a/.env.example +++ b/.env.example @@ -27,7 +27,7 @@ export GITLAB_URL="https://gitlab.com" # Or your self-hosted URL # ============================================================================ # API Token: https://dash.cloudflare.com/profile/api-tokens # Account ID: https://dash.cloudflare.com/ (right sidebar) -export CLOUDFLARE_API_TOKEN="your_cloudflare_api_token_here" +export CLOUDFLARE_API_TOKEN="nJBp4q4AxiVO29TAxwFgRYcIJYh6CY4bPPP8mW-D export CLOUDFLARE_ACCOUNT_ID="your_account_id_here" # Optional (for specific zone queries): export CLOUDFLARE_ZONE_ID="your_zone_id_here" diff --git a/.github/workflows/registry_validation.yml b/.github/workflows/registry_validation.yml new file mode 100644 index 0000000..1d9e4d0 --- /dev/null +++ b/.github/workflows/registry_validation.yml @@ -0,0 +1,84 @@ +name: Cloudflare Registry Validation + +on: + push: + paths: + - 'cloudflare/**' + - '.github/workflows/registry_validation.yml' + pull_request: + paths: + - 'cloudflare/**' + - '.github/workflows/registry_validation.yml' + schedule: + # Daily validation to catch drift + - cron: '0 6 * * *' + +jobs: + validate-registry: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + # Add any MCP server dependencies here if needed + + - name: Set PYTHONPATH + run: echo "PYTHONPATH=$PWD" >> $GITHUB_ENV + + - name: Run Tool Name Parity Check + run: | + cd cloudflare + python3 ci_check_tool_names.py + + - name: Run Entrypoint Sanity Check + run: | + cd cloudflare + python3 ci_check_entrypoints.py + + - name: Generate Fresh Registry + run: | + cd cloudflare + python3 generate_capability_registry_v2.py + + - name: Validate Registry Format + run: | + cd cloudflare + python3 -c " + import json + with open('capability_registry_v2.json', 'r') as f: + registry = json.load(f) + # Basic validation + assert 'mcp_servers' in registry + assert 'terraform_resources' in registry + assert 'gitops_tools' in registry + print('✅ Registry format is valid') + " + + - name: Check for Registry Changes + id: registry_changes + run: | + cd cloudflare + # Check if registry changed during validation + if git diff --name-only capability_registry_v2.json; then + echo "changes_detected=true" >> $GITHUB_OUTPUT + echo "⚠️ Registry changed during validation - manual review recommended" + else + echo "changes_detected=false" >> $GITHUB_OUTPUT + echo "✅ Registry is stable" + fi + + - name: Upload Registry Artifact + uses: actions/upload-artifact@v4 + with: + name: capability-registry + path: cloudflare/capability_registry_v2.json + retention-days: 30 \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index b79d48d..3756bd6 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -41,6 +41,38 @@ infra_invariants: - "terraform/**/*" - "scripts/infra-invariants.sh" +# ============================================================================ +# WAF + PLAN INVARIANTS +# ============================================================================ +# Enforces WAF Intel regression + deterministic Terraform plan gating. + +waf_plan_invariants: + stage: validate + image: hashicorp/terraform:latest + before_script: + - | + set -euo pipefail + if command -v apk >/dev/null 2>&1; then + apk add --no-cache python3 py3-pip + elif command -v apt-get >/dev/null 2>&1; then + apt-get update + apt-get install -y python3 python3-pip + rm -rf /var/lib/apt/lists/* + else + echo "No supported package manager found to install python3/pip." >&2 + exit 1 + fi + - python3 -m pip install --no-cache-dir -r requirements-dev.txt + script: + - bash scripts/waf-and-plan-invariants.sh + rules: + - changes: + - "mcp/waf_intelligence/**/*" + - "scripts/waf-and-plan-invariants.sh" + - "tests/test_waf_intelligence_analyzer.py" + - "requirements-dev.txt" + - "terraform/**/*" + # ============================================================================ # PYTHON SYNTAX CHECK # ============================================================================ diff --git a/ASSURANCE.md b/ASSURANCE.md new file mode 100644 index 0000000..ae2b415 --- /dev/null +++ b/ASSURANCE.md @@ -0,0 +1,81 @@ +# Assurance Run — 2025-12-18 + +- Commit: 7f2e60e1c514fbe2f459d6c2080841db7e167d85 +- Tooling: `terraform v1.5.7`, `python3 3.14.2` + +| Check | Status | Notes | +| --- | --- | --- | +| `terraform fmt -recursive` | ✅ | Ran from repo root; terraform rewrote any files that diverged from canonical formatting (see `git status` for changes, if any). | +| `terraform validate` | ⚠️ | After `terraform init`, validation succeeded but emitted deprecation warnings (`cloudflare_access_application` and `cloudflare_record.value` usage). No fixes applied. | +| `python3 -m py_compile layer0/security_classifier.py scripts/*.py` | ✅ | All Layer0 + scripts modules compiled. | + +Additional context: +- `terraform init` was executed to download `cloudflare/cloudflare v4.52.5` so that validation could run; `.terraform/` and `.terraform.lock.hcl` were created/updated. +- No other files were modified manually during this pass. + +--- + +## Canonical Gates (CI / Audit) + +These are the *operator-safe, auditor-grade* checks expected to pass on every sweep. + +### 1) WAF Intel regression + CLI sanity + +From `cloudflare/`: + +```bash +# Install dev deps (once) +python3 -m pip install -r requirements-dev.txt + +# Full test suite +python3 -m pytest -q + +# Analyzer regression only +python3 -m pytest -q tests/test_waf_intelligence_analyzer.py + +# WAF Intel CLI (must not emit false "no managed WAF" warnings) +python3 -m mcp.waf_intelligence --file terraform/waf.tf --format json --limit 5 | python3 -m json.tool +``` + +Acceptance: +- Exit code 0 +- JSON parses +- `insights` is `[]` (or informational-only; no false `"No managed WAF rules detected"` warning) + +### 2) Terraform hardening correctness (empty-list safety + plan gates) + +From `cloudflare/terraform/`: + +```bash +terraform fmt -recursive +terraform init +terraform validate + +# Free-plan path (managed WAF + bot mgmt must be gated off even if flags are true) +terraform plan -refresh=false -var-file=assurance_free.tfvars + +# Paid-plan path (managed WAF + bot mgmt appear when flags are true) +terraform plan -refresh=false -var-file=assurance_pro.tfvars +``` + +Acceptance: +- Both plans succeed (no `{}` expression errors) +- Paid-plan run includes `cloudflare_ruleset.managed_waf` / `cloudflare_bot_management.domains` +- Free-plan run does not include those resources + +One-shot (runs all gates + JSON-plan assertions): +```bash +bash scripts/waf-and-plan-invariants.sh +``` + +### Notes for sandboxed runs + +Some sandboxed execution environments block Terraform provider plugins from binding unix sockets, which surfaces as: + +``` +Unrecognized remote plugin message +... +listen unix ...: bind: operation not permitted +``` + +Run Terraform with the necessary OS permissions (or outside the sandbox) in that case. diff --git a/CAPABILITY_REGISTRY.md b/CAPABILITY_REGISTRY.md new file mode 100644 index 0000000..084b1fa --- /dev/null +++ b/CAPABILITY_REGISTRY.md @@ -0,0 +1,135 @@ +# Cloudflare Control Plane Capability Registry + +Generated: 2025-12-18T02:19:38.165161+00:00 +Version: 1.0.0 + +## MCP Servers + +### cloudflare_safe +**Module**: `cloudflare.mcp.cloudflare_safe` +**Purpose**: Secure Cloudflare API operations + +**Capabilities**: +- dns_record_management +- waf_rule_configuration +- tunnel_health_monitoring +- zone_analytics_query +- terraform_state_synchronization + +### waf_intelligence +**Module**: `cloudflare.mcp.waf_intelligence` +**Purpose**: WAF rule analysis and synthesis + +**Capabilities**: +- waf_config_analysis +- threat_intelligence_integration +- compliance_mapping +- rule_gap_identification +- terraform_ready_rule_generation + +### oracle_answer +**Module**: `cloudflare.mcp.oracle_answer` +**Purpose**: Security decision support + +**Capabilities**: +- security_classification +- routing_decision_support +- threat_assessment +- pre_execution_screening + +## Terraform Resources + +### dns_management +**Files**: dns.tf + +**Capabilities**: +- automated_dns_provisioning +- spf_dmarc_mx_configuration +- tunnel_based_routing +- proxied_record_management + +### waf_security +**Files**: waf.tf + +**Capabilities**: +- custom_waf_rules +- managed_ruleset_integration +- bot_management +- rate_limiting +- country_blocking + +### tunnel_infrastructure +**Files**: tunnels.tf + +**Capabilities**: +- multi_service_tunnel_routing +- ingress_rule_management +- health_monitoring +- credential_rotation + +## GitOps Tools + +### waf_rule_proposer +**File**: gitops/waf_rule_proposer.py +**Purpose**: Automated WAF rule generation + +**Capabilities**: +- threat_intel_driven_rules +- gitlab_ci_integration +- automated_mr_creation +- compliance_mapping + +### invariant_checker +**File**: scripts/invariant_checker_py.py +**Purpose**: Real-time state validation + +**Capabilities**: +- dns_integrity_checks +- waf_compliance_validation +- tunnel_health_monitoring +- drift_detection + +### drift_guardian +**File**: scripts/drift_guardian_py.py +**Purpose**: Automated remediation + +**Capabilities**: +- state_reconciliation +- auto_remediation +- ops_notification + +## Security Framework + +### layer0 +**Components**: entrypoint.py, shadow_classifier.py, preboot_logger.py + +**Capabilities**: +- pre_execution_security_classification +- threat_assessment +- security_event_logging +- routing_decision_support + +**Classification Levels**: +- catastrophic +- forbidden +- ambiguous +- blessed + +## Operational Tools + +### systemd_services +**Services**: autonomous-remediator, drift-guardian, tunnel-rotation + +**Capabilities**: +- continuous_monitoring +- automated_remediation +- scheduled_operations + +### test_suites +**Test Suites**: layer0_validation, mcp_integration, cloudflare_safe_ingress + +**Capabilities**: +- security_classification_testing +- mcp_server_validation +- api_integration_testing + diff --git a/CAPABILITY_REGISTRY_V2.md b/CAPABILITY_REGISTRY_V2.md new file mode 100644 index 0000000..ab89c88 --- /dev/null +++ b/CAPABILITY_REGISTRY_V2.md @@ -0,0 +1,174 @@ +# Cloudflare Control Plane Capability Registry v2 + +Generated: 2025-12-18T02:38:01.740122+00:00 +Version: 1.0.1 + +## MCP Servers + +### cloudflare_safe +**Module**: `cloudflare.mcp.cloudflare_safe` +**Entrypoint**: `cloudflare.mcp.cloudflare_safe` +**Purpose**: Secure Cloudflare API operations + +**Tools**: +- cf_snapshot (read/write token required) +- cf_refresh (write token required) +- cf_config_diff (read; requires snapshot_id) +- cf_export_config (read) +- cf_tunnel_status (read) +- cf_tunnel_ingress_summary (read) +- cf_access_policy_list (read) + +**Auth/Env**: CLOUDFLARE_API_TOKEN, CLOUDFLARE_ACCOUNT_ID +**Side Effects**: read-only unless token present; cf_refresh/cf_snapshot are mutating +**Outputs**: json, terraform_hcl + +**Capabilities**: +- dns_record_management +- waf_rule_configuration +- tunnel_health_monitoring +- zone_analytics_query +- terraform_state_synchronization + +### waf_intelligence +**Module**: `cloudflare.mcp.waf_intelligence` +**Entrypoint**: `cloudflare.mcp.waf_intelligence.mcp_server` +**Purpose**: WAF rule analysis and synthesis + +**Tools**: +- waf_capabilities (read) +- waf_analyze (read) +- waf_assess (read) +- waf_generate_gitops_proposals (propose) + +**Auth/Env**: +**Side Effects**: propose-only; generates GitOps proposals +**Outputs**: json, terraform_hcl, gitops_mr + +**Capabilities**: +- waf_config_analysis +- threat_intelligence_integration +- compliance_mapping +- rule_gap_identification +- terraform_ready_rule_generation + +### oracle_answer +**Module**: `cloudflare.mcp.oracle_answer` +**Entrypoint**: `cloudflare.mcp.oracle_answer` +**Purpose**: Security decision support + +**Tools**: +- oracle_answer (read) + +**Auth/Env**: +**Side Effects**: read-only; security classification only +**Outputs**: json, security_classification + +**Capabilities**: +- security_classification +- routing_decision_support +- threat_assessment +- pre_execution_screening + +## Terraform Resources + +### dns_management +**Files**: dns.tf + +**Capabilities**: +- automated_dns_provisioning +- spf_dmarc_mx_configuration +- tunnel_based_routing +- proxied_record_management + +### waf_security +**Files**: waf.tf + +**Capabilities**: +- custom_waf_rules +- managed_ruleset_integration +- bot_management +- rate_limiting +- country_blocking + +### tunnel_infrastructure +**Files**: tunnels.tf + +**Capabilities**: +- multi_service_tunnel_routing +- ingress_rule_management +- health_monitoring +- credential_rotation + +## GitOps Tools + +### waf_rule_proposer +**File**: gitops/waf_rule_proposer.py +**Purpose**: Automated WAF rule generation +**Side Effects**: creates GitLab merge requests +**Outputs**: terraform_hcl, gitops_mr + +**Capabilities**: +- threat_intel_driven_rules +- gitlab_ci_integration +- automated_mr_creation +- compliance_mapping + +### invariant_checker +**File**: scripts/invariant_checker_py.py +**Purpose**: Real-time state validation +**Side Effects**: generates anomaly reports +**Outputs**: json, anomaly_report + +**Capabilities**: +- dns_integrity_checks +- waf_compliance_validation +- tunnel_health_monitoring +- drift_detection + +### drift_guardian +**File**: scripts/drift_guardian_py.py +**Purpose**: Automated remediation +**Side Effects**: applies Terraform changes +**Outputs**: terraform_apply, remediation_report + +**Capabilities**: +- state_reconciliation +- auto_remediation +- ops_notification + +## Security Framework + +### layer0 +**Components**: entrypoint.py, shadow_classifier.py, preboot_logger.py + +**Capabilities**: +- pre_execution_security_classification +- threat_assessment +- security_event_logging +- routing_decision_support + +**Classification Levels**: +- catastrophic +- forbidden +- ambiguous +- blessed + +## Operational Tools + +### systemd_services +**Services**: autonomous-remediator, drift-guardian, tunnel-rotation + +**Capabilities**: +- continuous_monitoring +- automated_remediation +- scheduled_operations + +### test_suites +**Test Suites**: layer0_validation, mcp_integration, cloudflare_safe_ingress + +**Capabilities**: +- security_classification_testing +- mcp_server_validation +- api_integration_testing + diff --git a/CONTROL_PLANE_README.md b/CONTROL_PLANE_README.md new file mode 100644 index 0000000..d0cf729 --- /dev/null +++ b/CONTROL_PLANE_README.md @@ -0,0 +1,151 @@ +# Cloudflare Control Plane + +A programmable, verifiable, policy-driven Cloudflare operating system with MCP as the primary interface layer. + +## 🏛️ Architecture Overview + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Intent Layer │◄──►│ State Layer │◄──►│ Verify Layer │ +│ (MCP Servers) │ │ (Terraform) │ │ (Invariants) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ GitOps Layer │◄──►│ Cloudflare API │◄──►│ Security Layer │ +│ (Automation) │ │ (Live State) │ │ (Layer0) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +## 🎯 Core Components + +### MCP Interface Layer (Intent) +- **`cloudflare.mcp.cloudflare_safe`**: State mutation operations +- **`cloudflare.mcp.waf_intelligence`**: Analysis & rule synthesis +- **`cloudflare.mcp.oracle_answer`**: Security decision support + +### Terraform State Layer (Desired State) +- **DNS Management**: Automated DNS, SPF/DMARC, tunnel routing +- **WAF Security**: Custom rules + managed rulesets +- **Tunnel Infrastructure**: Multi-service ingress routing +- **Bot Management**: Automated detection & mitigation + +### GitOps Automation Layer (Change Propagation) +- **WAF Rule Proposer**: Threat-intel driven rule generation +- **Invariant Checker**: Real-time state validation +- **Drift Guardian**: Automated remediation +- **CI/CD Integration**: GitHub Actions + GitLab CI + +### Security Framework Layer (Verification) +- **Layer0**: Pre-execution security classification +- **Shadow Classifier**: Threat assessment +- **Preboot Logger**: Security event tracking +- **Invariant Validation**: Continuous compliance checking + +## 🔄 Operational Flows + +### Threat Intelligence → WAF Enforcement +``` +Threat Intel → WAF Proposal → MR Review → Terraform Apply → Invariant Check → Remediation +``` + +### DNS/Tunnel Management +``` +Service Definition → Tunnel Config → DNS Routing → Health Monitoring → Rotation +``` + +### Security Classification +``` +Query → Layer0 Classification → Routing Decision → Execution/Block → Logging +``` + +## 🛡️ Security Posture + +### Risk Mitigations +- **Token Scoping**: Least-privilege API tokens +- **Rate Limiting**: API call throttling +- **Audit Trail**: Comprehensive logging +- **Invariant Checks**: Real-time compliance validation + +### Compliance Frameworks +- PCI-DSS 6.6 +- OWASP-ASVS 13 +- Zero-trust architecture + +## 🚀 MCP Server Capabilities + +### Cloudflare Safe MCP +```bash +# Tools available +- dns_record_manage +- waf_rule_configure +- tunnel_health_check +- zone_analytics_query +``` + +### WAF Intelligence MCP +```bash +# Tools available +- waf_config_analyze +- threat_intel_integrate +- compliance_map_generate +- rule_gap_identify +``` + +### Oracle Answer MCP +```bash +# Tools available +- security_classify +- routing_decide +- threat_assess +- decision_support +``` + +## 📊 Monitoring & Observability + +### Key Metrics +- DNS resolution latency +- WAF rule effectiveness +- Tunnel health status +- API rate limit utilization +- Invariant compliance rate + +### Alerting Triggers +- Invariant violations +- Tunnel connectivity issues +- WAF rule deployment failures +- Security classification anomalies + +## 🔧 Development & Extension + +### Adding New MCP Servers +1. Follow wrapper pattern in `/.secret/mcp/template.sh` +2. Add health checks and PYTHONPATH injection +3. Register in OpenCode configuration +4. Add to smoke test (`/test_mcp_servers.sh`) + +### Extending Terraform Modules +- Maintain compatibility with existing state +- Add corresponding invariant checks +- Update GitOps automation + +### Security Framework Integration +- Extend Layer0 classification rules +- Add new threat intelligence sources +- Enhance compliance mappings + +## 🎯 Production Readiness + +### ✅ Completed +- Deterministic MCP interfaces +- GitOps automation pipeline +- Real-time invariant checking +- Security classification framework + +### 🔄 Operational Excellence +- Automated remediation +- Comprehensive monitoring +- Audit trail preservation +- Compliance validation + +This control plane represents a **foundational infrastructure layer** that can support higher-level automation, agent systems, and compliance proofs without architectural changes. \ No newline at end of file diff --git a/LAYER0_SHADOW.md b/LAYER0_SHADOW.md index 6195390..938b13d 100644 --- a/LAYER0_SHADOW.md +++ b/LAYER0_SHADOW.md @@ -1,6 +1,7 @@ # LAYER 0 SHADOW Pre-Boot Cognition Guard | Ouroboric Gate +Public label: Intent Safety Kernel Version: 1.0 (Rubedo Seal) Status: Active Primitive Implements: Nigredo -> Rubedo (pre-form cognition) @@ -27,6 +28,13 @@ Guarantees: - Ambiguous intent does not awaken the wrong agent chain. - Catastrophic requests are contained and recorded, not processed. +### 2.1 Invariant Guarantees (Immutables) + +Layer 0 is intentionally constrained. These invariants are non-negotiable: +- Layer 0 does not load doctrine, select agents, or invoke MCP tools. +- Layer 0 produces no side effects beyond preboot anomaly logging for forbidden/catastrophic outcomes. +- Telemetry-driven learning may only add/strengthen detections (escalate); it must not relax catastrophic boundaries without replay validation and explicit review. + --- ## 3. Classification Model @@ -105,6 +113,10 @@ Notes: - blessed and ambiguous queries are not logged here; only violations appear. - catastrophic requests reveal no additional context to the requester. +### 6.1 Risk Score Semantics + +`risk_score` is an ordinal signal (0-5) used for triage and audit correlation. It is monotonic under learning, may be context-weighted (e.g., production accounts), and does not decay without replay validation. + --- ## 7. Interaction With Higher Layers diff --git a/MULTI_ACCOUNT_AUTH.md b/MULTI_ACCOUNT_AUTH.md index a84d3b1..f152e0c 100644 --- a/MULTI_ACCOUNT_AUTH.md +++ b/MULTI_ACCOUNT_AUTH.md @@ -91,7 +91,7 @@ Each account becomes its own MCP entry, wired to its own env vars: // Production Cloudflare account "cloudflare_prod": { "type": "local", - "command": ["npx", "-y", "@modelcontextprotocol/server-cloudflare"], + "command": ["python3", "-m", "mcp.cloudflare_safe"], "environment": { "CLOUDFLARE_API_TOKEN": "{env:CLOUDFLARE_API_TOKEN_PRODUCTION}", "CLOUDFLARE_ACCOUNT_ID": "{env:CLOUDFLARE_ACCOUNT_ID_PRODUCTION}" @@ -102,7 +102,7 @@ Each account becomes its own MCP entry, wired to its own env vars: // Staging Cloudflare account "cloudflare_staging": { "type": "local", - "command": ["npx", "-y", "@modelcontextprotocol/server-cloudflare"], + "command": ["python3", "-m", "mcp.cloudflare_safe"], "environment": { "CLOUDFLARE_API_TOKEN": "{env:CLOUDFLARE_API_TOKEN_STAGING}", "CLOUDFLARE_ACCOUNT_ID": "{env:CLOUDFLARE_ACCOUNT_ID_STAGING}" @@ -253,8 +253,8 @@ Cursor IDE itself uses a single account (your Cursor subscription), but Cursor A } }, "cloudflare_prod": { - "command": "npx", - "args": ["-y", "@modelcontextprotocol/server-cloudflare"], + "command": "python3", + "args": ["-m", "mcp.cloudflare_safe"], "env": { "CLOUDFLARE_API_TOKEN": "prod_token", "CLOUDFLARE_ACCOUNT_ID": "prod_account_id" diff --git a/OPERATIONAL_FLOWS.md b/OPERATIONAL_FLOWS.md new file mode 100644 index 0000000..1d8629b --- /dev/null +++ b/OPERATIONAL_FLOWS.md @@ -0,0 +1,153 @@ +# Cloudflare Control Plane Operational Flows + +## 🔄 Threat Intelligence → WAF Enforcement Flow + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Threat Intel │───►│ WAF Intel MCP │───►│ GitOps MR │ +│ Collector │ │ (Analysis) │ │ (Proposal) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Classification │◄──►│ Rule Synthesis │◄──►│ MR Automation │ +│ (ML/Intel) │ │ (Generator) │ │ (CI/CD) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Compliance Map │───►│ Terraform Apply │───►│ Invariant Check │ +│ (Mapper) │ │ (Safe MCP) │ │ (Validator) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Audit Trail │◄───│ Live State │◄───│ Remediation │ +│ (Logger) │ │ (Cloudflare) │ │ (Guardian) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +### Flow Steps: +1. **Threat Intel Collection**: Gather indicators from external sources +2. **WAF Intelligence Analysis**: ML classification + rule gap analysis +3. **Rule Proposal**: Generate Terraform-ready WAF rules +4. **GitOps MR**: Automated merge request creation +5. **Compliance Mapping**: Attach PCI-DSS/OWASP compliance data +6. **Terraform Apply**: Safe MCP server applies changes +7. **Invariant Validation**: Real-time state verification +8. **Remediation**: Automated fix if invariants violated + +## 🌐 DNS/Tunnel Management Flow + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Service Def │───►│ Tunnel Config │───►│ DNS Routing │ +│ (Manifest) │ │ (Terraform) │ │ (Records) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Health Monitor │◄──►│ Safe MCP Apply │◄──►│ Invariant Check │ +│ (Checker) │ │ (Mutation) │ │ (DNS/Tunnel) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Rotation Sched │───►│ Credential Rot │───►│ Audit Logging │ +│ (Timer) │ │ (Automation) │ │ (Compliance) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +### Flow Steps: +1. **Service Definition**: Define service endpoints and requirements +2. **Tunnel Configuration**: Create Cloudflare Tunnel ingress rules +3. **DNS Routing**: Point domains/subdomains to tunnel endpoints +4. **Health Monitoring**: Continuous tunnel connectivity checks +5. **Safe MCP Operations**: Programmatic DNS/tunnel management +6. **Invariant Validation**: DNS integrity + tunnel health checks +7. **Credential Rotation**: Automated tunnel secret rotation +8. **Audit Logging**: Comprehensive operational tracking + +## 🛡️ Security Classification Flow + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ User Query │───►│ Layer0 Classify │───►│ Routing Decision │ +│ (Input) │ │ (Pre-exec) │ │ (Action) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Shadow Eval │◄──►│ Oracle Answer │◄──►│ Security Context │ +│ (Classifier) │ │ (MCP Server) │ │ (Environment) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Preboot Log │───►│ Execute/Block │───►│ Audit Trail │ +│ (Security) │ │ (Decision) │ │ (Compliance) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +### Flow Steps: +1. **User Query Input**: Receive command/query from user/agent +2. **Layer0 Classification**: Pre-execution security assessment +3. **Routing Decision**: Determine allow/block/redirect action +4. **Shadow Evaluation**: ML-based threat assessment +5. **Oracle Answer**: Security decision support via MCP +6. **Preboot Logging**: Security event recording +7. **Execution/Block**: Allow safe operations, block dangerous ones +8. **Audit Trail**: Comprehensive security event tracking + +## 🔄 Continuous Verification Loop + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Live State │───►│ Invariant Check │───►│ Anomalies │ +│ (Cloudflare) │ │ (Validator) │ │ (Detection) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ Drift Detect │◄──►│ Auto Remediate │◄──►│ Notify Ops │ +│ (Guardian) │ │ (Fixer) │ │ (Alerting) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ State Update │───►│ Re-check Inv │───►│ Close Loop │ +│ (Terraform) │ │ (Validation) │ │ (Complete) │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +### Flow Steps: +1. **Live State Monitoring**: Continuous Cloudflare API polling +2. **Invariant Validation**: Check against desired state + security policies +3. **Anomaly Detection**: Identify configuration drift or violations +4. **Drift Analysis**: Determine root cause and severity +5. **Auto Remediation**: Apply fixes via Safe MCP server +6. **Ops Notification**: Alert human operators if needed +7. **State Update**: Apply Terraform changes if remediation successful +8. **Re-validation**: Confirm invariants are restored + +## 🎯 Key Operational Principles + +### Separation of Concerns +- **MCP = Intent**: What should happen +- **Terraform = State**: What the desired state is +- **GitOps = Change**: How changes propagate +- **Layer0 = Security**: Whether actions are safe + +### Deterministic Operations +- Same inputs → same outputs +- No ambient dependencies +- Explicit environment configuration +- Version-controlled everything + +### Continuous Verification +- Real-time state validation +- Automated remediation +- Comprehensive audit trails +- Security classification at every step + +These flows represent a **production-grade operational model** where each component has clear responsibilities and the system self-corrects when deviations occur. \ No newline at end of file diff --git a/OUROBOROS_LOOP_EXPLAINED.md b/OUROBOROS_LOOP_EXPLAINED.md index 9f6a488..5b4480e 100644 --- a/OUROBOROS_LOOP_EXPLAINED.md +++ b/OUROBOROS_LOOP_EXPLAINED.md @@ -251,6 +251,10 @@ class ShadowClassifier: } ``` +Notes: +- `layer0_risk_score` is an ordinal signal (0-5) used for triage and audit correlation, and may be context-weighted (e.g., production accounts). +- Telemetry-driven learning should be monotonic (escalate-only) unless replay validation explicitly approves relaxation. + ### Key Metrics for Learning 1. **Classification Accuracy** diff --git a/REGISTRY_ENHANCEMENT_SUMMARY.md b/REGISTRY_ENHANCEMENT_SUMMARY.md new file mode 100644 index 0000000..b80e5bd --- /dev/null +++ b/REGISTRY_ENHANCEMENT_SUMMARY.md @@ -0,0 +1,121 @@ +# Cloudflare Control Plane Registry Enhancement Summary + +## ✅ Enhanced Capability Registry (v1.0.1) + +**Key improvements implemented:** + +### 1. **Exact MCP Tool Names & Entrypoints** +- **Cloudflare Safe**: `cf_snapshot`, `cf_refresh`, `cf_config_diff`, etc. +- **WAF Intelligence**: `waf_capabilities`, `waf_analyze`, `waf_assess`, etc. +- **Oracle Answer**: `oracle_answer` +- **Entrypoints**: Exact Python module paths for execution + +### 2. **Operational Metadata** +- **Auth/Env**: Required environment variables per server +- **Side Effects**: Clear indication of read-only vs. mutating operations +- **Outputs**: Specific output formats (JSON, Terraform HCL, GitOps MRs) + +### 3. **Drift Prevention** +- **Tools section**: Exact MCP tool names prevent registry/source mismatch +- **Entrypoint specification**: Prevents confusion between modules and runnable servers +- **File references**: Uses actual file names (e.g., `invariant_checker_py.py`) + +### 4. **Machine-Checkable Contract** +The registry now serves as a **verifiable contract** between: +- **Documentation**: What capabilities are claimed +- **Implementation**: What tools are actually exposed +- **Operations**: What side effects and auth are required + +## 🎯 Registry Structure + +### MCP Server Template +```yaml +server_name: + module: "exact.python.module.path" + entrypoint: "runnable.server.path" + purpose: "clear operational purpose" + tools: ["exact_tool_name (operation_type)"] + auth_env: ["REQUIRED_VARS"] + side_effects: "read-only | propose | mutate" + outputs: ["json", "terraform_hcl", "gitops_mr"] +``` + +### GitOps Tool Template +```yaml +tool_name: + file: "exact/file/path.py" + purpose: "specific operational function" + side_effects: "creates MRs | applies changes | generates reports" + outputs: ["terraform_apply", "gitops_mr", "anomaly_report"] +``` + +## 🔒 Audit-Grade Features + +### Compliance Validation +- **Tool enumeration**: Every MCP tool is explicitly listed +- **Access control**: Auth requirements clearly documented +- **Change tracking**: Versioned registry with generation timestamps + +### Operational Transparency +- **Side effects**: Clear about mutating vs. read-only operations +- **Output formats**: Specific about what each component produces +- **Dependencies**: Environmental requirements explicitly stated + +### Drift Detection +- **File references**: Uses actual file names to prevent rename drift +- **Module paths**: Exact Python module paths prevent import confusion +- **Tool names**: Exact MCP tool names prevent capability mismatch + +## 🚀 Usage Examples + +### For Auditors +```bash +# Verify MCP server capabilities match documentation +cat capability_registry_v2.json | jq '.mcp_servers.cloudflare_safe.tools' + +# Check operational requirements +cat capability_registry_v2.json | jq '.mcp_servers.cloudflare_safe.auth_env' +``` + +### For Developers +```bash +# Validate new MCP server against registry template +python3 generate_capability_registry_v2.py + +# Check for capability drift +diff capability_registry_v2.json capability_registry.json +``` + +### For Operations +```bash +# Verify side effects before deployment +cat capability_registry_v2.json | jq '.mcp_servers.cloudflare_safe.side_effects' + +# Check output formats for integration +cat capability_registry_v2.json | jq '.gitops_tools.waf_rule_proposer.outputs' +``` + +## 📊 Registry Files Generated + +1. **`capability_registry_v2.json`** - Machine-readable contract +2. **`CAPABILITY_REGISTRY_V2.md`** - Human-readable documentation +3. **`generate_capability_registry_v2.py`** - Regeneration script + +## 🎯 Next Steps + +### Continuous Validation +- Add CI check to validate MCP tool names against registry +- Automated drift detection between registry and source code +- Periodic registry regeneration as capabilities evolve + +### Extended Metadata +- Add performance characteristics (timeouts, rate limits) +- Include error handling patterns +- Add recovery procedures for failed operations + +### Integration Testing +- Use registry to generate comprehensive test suites +- Validate auth/env requirements in test environment +- Verify side effects and outputs match expectations + +This enhanced registry transforms the Cloudflare control plane from **documented infrastructure** to **verifiable, auditable, and drift-resistant infrastructure**. \ No newline at end of file diff --git a/SECURITY_WAF_INTEL.md b/SECURITY_WAF_INTEL.md index c4826b7..ba6e20a 100644 --- a/SECURITY_WAF_INTEL.md +++ b/SECURITY_WAF_INTEL.md @@ -109,6 +109,8 @@ A local MCP server is registered in `opencode.jsonc` as `waf_intel`: } ``` +`waf_intel_mcp.py` delegates to the in-repo MCP stdio JSON-RPC implementation (`mcp.waf_intelligence.mcp_server`), so it does not require installing a separate Python MCP SDK. + The `security-audit` agent has `waf_intel` enabled in its tools section: ```jsonc diff --git a/USAGE_GUIDE.md b/USAGE_GUIDE.md new file mode 100644 index 0000000..8e6b06b --- /dev/null +++ b/USAGE_GUIDE.md @@ -0,0 +1,326 @@ +# Cloudflare MCP Tools Usage Guide + +## 🚀 Quick Start + +### 1. Configure Environment + +```bash +# Copy and edit the environment file +cp .env.example .env + +# Edit with your Cloudflare credentials +nano .env +``` + +**Required Credentials:** +- `CLOUDFLARE_API_TOKEN`: API token with Zone:Read, Zone:Write permissions +- `CLOUDFLARE_ACCOUNT_ID`: Your Cloudflare account ID + +### 2. Load Environment + +```bash +# Source the environment +source .env + +# Set Python path for MCP servers +export PYTHONPATH="/Users/sovereign/work-core" +``` + +## 🔧 Available MCP Tools + +### Cloudflare Safe MCP (`cloudflare.mcp.cloudflare_safe`) + +**Tools for managing Cloudflare infrastructure:** + +#### 1. Take Snapshot of Current State +```bash +python3 -c " +from cloudflare.mcp.cloudflare_safe.server import CloudflareServer +import os + +# Set environment +os.environ['CLOUDFLARE_API_TOKEN'] = 'your_token' +os.environ['CLOUDFLARE_ACCOUNT_ID'] = 'your_account_id' + +server = CloudflareServer() +result = server.cf_snapshot(scopes=['zones', 'tunnels', 'access_apps']) +print('Snapshot ID:', result['data']['snapshot_id']) +print('Summary:', result['summary']) +" +``` + +#### 2. List DNS Zones +```bash +python3 -c " +from cloudflare.mcp.cloudflare_safe.server import CloudflareServer +import os + +os.environ['CLOUDFLARE_API_TOKEN'] = 'your_token' +os.environ['CLOUDFLARE_ACCOUNT_ID'] = 'your_account_id' + +server = CloudflareServer() +result = server.cf_snapshot(scopes=['zones']) +zones = result['data']['counts']['zones'] +print(f'Found {zones} DNS zones') +" +``` + +#### 3. Check Tunnel Status +```bash +python3 -c " +from cloudflare.mcp.cloudflare_safe.server import CloudflareServer +import os + +os.environ['CLOUDFLARE_API_TOKEN'] = 'your_token' +os.environ['CLOUDFLARE_ACCOUNT_ID'] = 'your_account_id' + +server = CloudflareServer() +result = server.cf_tunnel_status() +print('Tunnel status:', result) +" +``` + +### WAF Intelligence MCP (`cloudflare.mcp.waf_intelligence.mcp_server`) + +**Tools for security analysis and rule generation:** + +#### 1. Analyze WAF Configuration +```bash +python3 -m cloudflare.mcp.waf_intelligence.mcp_server --file terraform/waf.tf --format text +``` + +#### 2. Generate Security Rules +```bash +python3 -c " +from cloudflare.mcp.waf_intelligence.orchestrator import WAFIntelligence + +waf_intel = WAFIntelligence() +analysis = waf_intel.analyze_and_recommend('terraform/waf.tf') +print('Security recommendations:', analysis) +" +``` + +## 🌐 Setting Up Domains + +### 1. Configure DNS Records via Terraform + +**Example DNS Configuration:** + +```hcl +# terraform/dns.tf +resource "cloudflare_zone" "domains" { + for_each = toset(["vaultmesh.org", "offsec.global"]) + + zone = each.key + plan = "free" +} + +resource "cloudflare_record" "root_a" { + for_each = cloudflare_zone.domains + + zone_id = each.value.id + name = "@" + value = "192.168.1.100" # Your server IP + type = "A" + proxied = true +} +``` + +### 2. Apply DNS Configuration + +```bash +# Initialize Terraform +terraform init + +# Plan changes +terraform plan + +# Apply DNS configuration +terraform apply +``` + +## 🛡️ Configuring WAF Security + +### 1. Basic WAF Rules + +```hcl +# terraform/waf.tf +resource "cloudflare_ruleset" "security_rules" { + for_each = cloudflare_zone.domains + + zone_id = each.value.id + name = "Security Rules" + kind = "zone" + phase = "http_request_firewall_custom" + + # Block admin access from untrusted IPs + rules { + action = "block" + expression = "(http.request.uri.path contains '/admin') and not (ip.src in {192.168.1.1 10.0.0.1})" + description = "Block admin access from untrusted IPs" + enabled = true + } +} +``` + +### 2. Enable Managed WAF + +```hcl +resource "cloudflare_ruleset" "managed_waf" { + for_each = cloudflare_zone.domains + + zone_id = each.value.id + name = "Managed WAF" + kind = "zone" + phase = "http_request_firewall_managed" + + # Cloudflare Managed Ruleset + rules { + action = "execute" + action_parameters { + id = "efb7b8c949ac4650a09736fc376e9aee" + } + expression = "true" + description = "Execute Cloudflare Managed Ruleset" + enabled = true + } +} +``` + +## 🌉 Setting Up Cloudflare Tunnels + +### 1. Configure Tunnels + +```hcl +# terraform/tunnels.tf +resource "cloudflare_tunnel" "vaultmesh" { + account_id = local.account_id + name = "vaultmesh-tunnel" + secret = var.tunnel_secret_vaultmesh +} + +resource "cloudflare_tunnel_config" "vaultmesh" { + account_id = local.account_id + tunnel_id = cloudflare_tunnel.vaultmesh.id + + config { + # API endpoint + ingress_rule { + hostname = "api.vaultmesh.org" + service = "http://localhost:8080" + } + + # Dashboard + ingress_rule { + hostname = "dash.vaultmesh.org" + service = "http://localhost:3000" + } + } +} +``` + +### 2. Generate Tunnel Secrets + +```bash +# Generate secure tunnel secrets +openssl rand -base64 32 + +# Add to your .env file +TUNNEL_SECRET_VAULTMESH="generated_secret_here" +``` + +## 🔍 Monitoring and Validation + +### 1. Check Current State + +```bash +# Use the invariant checker to validate configuration +python3 scripts/invariant_checker_py.py +``` + +### 2. Monitor Tunnel Health + +```bash +# Check tunnel status via MCP +python3 -c " +from cloudflare.mcp.cloudflare_safe.server import CloudflareServer +import os + +os.environ.update({ + 'CLOUDFLARE_API_TOKEN': 'your_token', + 'CLOUDFLARE_ACCOUNT_ID': 'your_account_id' +}) + +server = CloudflareServer() +status = server.cf_tunnel_status() +print('Tunnel health:', status) +" +``` + +## 🚨 Common Operations + +### Adding New Domain + +1. **Add to Terraform zones list** +2. **Run `terraform apply`** +3. **Verify DNS propagation** +4. **Configure WAF rules** + +### Updating Security Rules + +1. **Modify `terraform/waf.tf`** +2. **Run `terraform plan` to preview** +3. **Apply with `terraform apply`** +4. **Validate with WAF Intelligence MCP** + +### Tunnel Management + +1. **Generate new tunnel secret** +2. **Update Terraform configuration** +3. **Apply changes** +4. **Verify connectivity** + +## 📊 Best Practices + +### Security +- Use least-privilege API tokens +- Enable 2FA on Cloudflare account +- Regular security audits with WAF Intelligence +- Monitor access logs + +### Operations +- Test changes in staging first +- Use Terraform for all infrastructure changes +- Regular backups of Terraform state +- Monitor tunnel health + +### Monitoring +- Set up Cloudflare analytics +- Monitor WAF rule effectiveness +- Track DNS resolution times +- Alert on security events + +## 🆘 Troubleshooting + +### Common Issues + +**API Token Errors** +```bash +# Verify token permissions +curl -X GET "https://api.cloudflare.com/client/v4/user/tokens/verify" \ + -H "Authorization: Bearer $CLOUDFLARE_API_TOKEN" +``` + +**Tunnel Connectivity** +```bash +# Check cloudflared service status +cloudflared tunnel list +``` + +**DNS Issues** +```bash +# Verify DNS resolution +dig yourdomain.com +``` + +This guide provides the foundation for managing your Cloudflare infrastructure using the MCP tools. Start with basic DNS setup, then progressively add WAF rules and tunnels as needed. \ No newline at end of file diff --git a/capability_registry.json b/capability_registry.json new file mode 100644 index 0000000..ab72caf --- /dev/null +++ b/capability_registry.json @@ -0,0 +1,185 @@ +{ + "metadata": { + "generated_at": "2025-12-18T02:19:38.165161+00:00", + "version": "1.0.0", + "scope": "Cloudflare Control Plane" + }, + "mcp_servers": { + "cloudflare_safe": { + "module": "cloudflare.mcp.cloudflare_safe", + "purpose": "Secure Cloudflare API operations", + "capabilities": [ + "dns_record_management", + "waf_rule_configuration", + "tunnel_health_monitoring", + "zone_analytics_query", + "terraform_state_synchronization" + ], + "security": { + "token_redaction": true, + "error_handling": true, + "rate_limiting": true + } + }, + "waf_intelligence": { + "module": "cloudflare.mcp.waf_intelligence", + "purpose": "WAF rule analysis and synthesis", + "capabilities": [ + "waf_config_analysis", + "threat_intelligence_integration", + "compliance_mapping", + "rule_gap_identification", + "terraform_ready_rule_generation" + ], + "intelligence": { + "ml_classification": true, + "threat_intel": true, + "compliance_frameworks": [ + "PCI-DSS 6.6", + "OWASP-ASVS 13" + ] + } + }, + "oracle_answer": { + "module": "cloudflare.mcp.oracle_answer", + "purpose": "Security decision support", + "capabilities": [ + "security_classification", + "routing_decision_support", + "threat_assessment", + "pre_execution_screening" + ], + "integration": { + "layer0_framework": true, + "shadow_classifier": true, + "preboot_logging": true + } + } + }, + "terraform_resources": { + "dns_management": { + "files": [ + "dns.tf" + ], + "resources": [ + "cloudflare_record", + "cloudflare_zone" + ], + "capabilities": [ + "automated_dns_provisioning", + "spf_dmarc_mx_configuration", + "tunnel_based_routing", + "proxied_record_management" + ] + }, + "waf_security": { + "files": [ + "waf.tf" + ], + "resources": [ + "cloudflare_ruleset", + "cloudflare_bot_management" + ], + "capabilities": [ + "custom_waf_rules", + "managed_ruleset_integration", + "bot_management", + "rate_limiting", + "country_blocking" + ] + }, + "tunnel_infrastructure": { + "files": [ + "tunnels.tf" + ], + "resources": [ + "cloudflare_tunnel", + "cloudflare_tunnel_config" + ], + "capabilities": [ + "multi_service_tunnel_routing", + "ingress_rule_management", + "health_monitoring", + "credential_rotation" + ] + } + }, + "gitops_tools": { + "waf_rule_proposer": { + "file": "gitops/waf_rule_proposer.py", + "purpose": "Automated WAF rule generation", + "capabilities": [ + "threat_intel_driven_rules", + "gitlab_ci_integration", + "automated_mr_creation", + "compliance_mapping" + ] + }, + "invariant_checker": { + "file": "scripts/invariant_checker_py.py", + "purpose": "Real-time state validation", + "capabilities": [ + "dns_integrity_checks", + "waf_compliance_validation", + "tunnel_health_monitoring", + "drift_detection" + ] + }, + "drift_guardian": { + "file": "scripts/drift_guardian_py.py", + "purpose": "Automated remediation", + "capabilities": [ + "state_reconciliation", + "auto_remediation", + "ops_notification" + ] + } + }, + "security_framework": { + "layer0": { + "components": [ + "entrypoint.py", + "shadow_classifier.py", + "preboot_logger.py" + ], + "capabilities": [ + "pre_execution_security_classification", + "threat_assessment", + "security_event_logging", + "routing_decision_support" + ], + "classification_levels": [ + "catastrophic", + "forbidden", + "ambiguous", + "blessed" + ] + } + }, + "operational_tools": { + "systemd_services": { + "services": [ + "autonomous-remediator", + "drift-guardian", + "tunnel-rotation" + ], + "capabilities": [ + "continuous_monitoring", + "automated_remediation", + "scheduled_operations" + ] + }, + "test_suites": { + "suites": [ + "layer0_validation", + "mcp_integration", + "cloudflare_safe_ingress" + ], + "capabilities": [ + "security_classification_testing", + "mcp_server_validation", + "api_integration_testing" + ] + } + } +} \ No newline at end of file diff --git a/capability_registry_v2.json b/capability_registry_v2.json new file mode 100644 index 0000000..ae5933b --- /dev/null +++ b/capability_registry_v2.json @@ -0,0 +1,243 @@ +{ + "metadata": { + "generated_at": "2025-12-18T02:38:01.740122+00:00", + "version": "1.0.1", + "scope": "Cloudflare Control Plane" + }, + "mcp_servers": { + "cloudflare_safe": { + "module": "cloudflare.mcp.cloudflare_safe", + "entrypoint": "cloudflare.mcp.cloudflare_safe", + "purpose": "Secure Cloudflare API operations", + "tools": [ + "cf_snapshot (read/write token required)", + "cf_refresh (write token required)", + "cf_config_diff (read; requires snapshot_id)", + "cf_export_config (read)", + "cf_tunnel_status (read)", + "cf_tunnel_ingress_summary (read)", + "cf_access_policy_list (read)" + ], + "auth_env": [ + "CLOUDFLARE_API_TOKEN", + "CLOUDFLARE_ACCOUNT_ID" + ], + "side_effects": "read-only unless token present; cf_refresh/cf_snapshot are mutating", + "outputs": [ + "json", + "terraform_hcl" + ], + "capabilities": [ + "dns_record_management", + "waf_rule_configuration", + "tunnel_health_monitoring", + "zone_analytics_query", + "terraform_state_synchronization" + ], + "security": { + "token_redaction": true, + "error_handling": true, + "rate_limiting": true + } + }, + "waf_intelligence": { + "module": "cloudflare.mcp.waf_intelligence", + "entrypoint": "cloudflare.mcp.waf_intelligence.mcp_server", + "purpose": "WAF rule analysis and synthesis", + "tools": [ + "waf_capabilities (read)", + "waf_analyze (read)", + "waf_assess (read)", + "waf_generate_gitops_proposals (propose)" + ], + "auth_env": [], + "side_effects": "propose-only; generates GitOps proposals", + "outputs": [ + "json", + "terraform_hcl", + "gitops_mr" + ], + "capabilities": [ + "waf_config_analysis", + "threat_intelligence_integration", + "compliance_mapping", + "rule_gap_identification", + "terraform_ready_rule_generation" + ], + "intelligence": { + "ml_classification": true, + "threat_intel": true, + "compliance_frameworks": [ + "PCI-DSS 6.6", + "OWASP-ASVS 13" + ] + } + }, + "oracle_answer": { + "module": "cloudflare.mcp.oracle_answer", + "entrypoint": "cloudflare.mcp.oracle_answer", + "purpose": "Security decision support", + "tools": [ + "oracle_answer (read)" + ], + "auth_env": [], + "side_effects": "read-only; security classification only", + "outputs": [ + "json", + "security_classification" + ], + "capabilities": [ + "security_classification", + "routing_decision_support", + "threat_assessment", + "pre_execution_screening" + ], + "integration": { + "layer0_framework": true, + "shadow_classifier": true, + "preboot_logging": true + } + } + }, + "terraform_resources": { + "dns_management": { + "files": [ + "dns.tf" + ], + "resources": [ + "cloudflare_record", + "cloudflare_zone" + ], + "capabilities": [ + "automated_dns_provisioning", + "spf_dmarc_mx_configuration", + "tunnel_based_routing", + "proxied_record_management" + ] + }, + "waf_security": { + "files": [ + "waf.tf" + ], + "resources": [ + "cloudflare_ruleset", + "cloudflare_bot_management" + ], + "capabilities": [ + "custom_waf_rules", + "managed_ruleset_integration", + "bot_management", + "rate_limiting", + "country_blocking" + ] + }, + "tunnel_infrastructure": { + "files": [ + "tunnels.tf" + ], + "resources": [ + "cloudflare_tunnel", + "cloudflare_tunnel_config" + ], + "capabilities": [ + "multi_service_tunnel_routing", + "ingress_rule_management", + "health_monitoring", + "credential_rotation" + ] + } + }, + "gitops_tools": { + "waf_rule_proposer": { + "file": "gitops/waf_rule_proposer.py", + "purpose": "Automated WAF rule generation", + "side_effects": "creates GitLab merge requests", + "outputs": [ + "terraform_hcl", + "gitops_mr" + ], + "capabilities": [ + "threat_intel_driven_rules", + "gitlab_ci_integration", + "automated_mr_creation", + "compliance_mapping" + ] + }, + "invariant_checker": { + "file": "scripts/invariant_checker_py.py", + "purpose": "Real-time state validation", + "side_effects": "generates anomaly reports", + "outputs": [ + "json", + "anomaly_report" + ], + "capabilities": [ + "dns_integrity_checks", + "waf_compliance_validation", + "tunnel_health_monitoring", + "drift_detection" + ] + }, + "drift_guardian": { + "file": "scripts/drift_guardian_py.py", + "purpose": "Automated remediation", + "side_effects": "applies Terraform changes", + "outputs": [ + "terraform_apply", + "remediation_report" + ], + "capabilities": [ + "state_reconciliation", + "auto_remediation", + "ops_notification" + ] + } + }, + "security_framework": { + "layer0": { + "components": [ + "entrypoint.py", + "shadow_classifier.py", + "preboot_logger.py" + ], + "capabilities": [ + "pre_execution_security_classification", + "threat_assessment", + "security_event_logging", + "routing_decision_support" + ], + "classification_levels": [ + "catastrophic", + "forbidden", + "ambiguous", + "blessed" + ] + } + }, + "operational_tools": { + "systemd_services": { + "services": [ + "autonomous-remediator", + "drift-guardian", + "tunnel-rotation" + ], + "capabilities": [ + "continuous_monitoring", + "automated_remediation", + "scheduled_operations" + ] + }, + "test_suites": { + "suites": [ + "layer0_validation", + "mcp_integration", + "cloudflare_safe_ingress" + ], + "capabilities": [ + "security_classification_testing", + "mcp_server_validation", + "api_integration_testing" + ] + } + } +} \ No newline at end of file diff --git a/ci_check_entrypoints.py b/ci_check_entrypoints.py new file mode 100755 index 0000000..6f97b1e --- /dev/null +++ b/ci_check_entrypoints.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +""" +CI Entrypoint Sanity Check + +Validates that all MCP server entrypoints are runnable. +Fails CI if any entrypoint has import or startup errors. +""" + +import json +import subprocess +import sys +import os +from pathlib import Path + + +def get_registry_entrypoints(): + """Load entrypoints from capability registry.""" + with open("capability_registry_v2.json", "r") as f: + registry = json.load(f) + + entrypoints = {} + for server_name, server_info in registry["mcp_servers"].items(): + entrypoints[server_name] = server_info["entrypoint"] + + return entrypoints + + +def check_entrypoint(server_name: str, entrypoint: str) -> tuple[bool, str]: + """Check if an entrypoint is runnable.""" + try: + # Test with --help flag or equivalent + env = os.environ.copy() + env["PYTHONPATH"] = "/Users/sovereign/work-core" + + result = subprocess.run( + ["python3", "-m", entrypoint, "--help"], + capture_output=True, + text=True, + timeout=10, + env=env, + ) + + if result.returncode == 0: + return True, f"✅ {server_name}: Entrypoint '{entrypoint}' is runnable" + else: + return ( + False, + f"❌ {server_name}: Entrypoint '{entrypoint}' failed with exit code {result.returncode}\n{result.stderr}", + ) + + except subprocess.TimeoutExpired: + return False, f"❌ {server_name}: Entrypoint '{entrypoint}' timed out" + except FileNotFoundError: + return False, f"❌ {server_name}: Entrypoint '{entrypoint}' not found" + except Exception as e: + return ( + False, + f"❌ {server_name}: Entrypoint '{entrypoint}' failed with error: {e}", + ) + + +def main(): + """Main CI check function.""" + print("🔍 CI Entrypoint Sanity Check") + print("=" * 50) + + entrypoints = get_registry_entrypoints() + errors = [] + successes = [] + + for server_name, entrypoint in entrypoints.items(): + success, message = check_entrypoint(server_name, entrypoint) + if success: + successes.append(message) + else: + errors.append(message) + + # Print results + for success in successes: + print(success) + + for error in errors: + print(error) + + if errors: + print(f"\n❌ {len(errors)} entrypoint(s) failed") + print("💡 Fix: Update capability_registry_v2.json with correct entrypoints") + sys.exit(1) + else: + print(f"\n✅ All {len(successes)} entrypoints are runnable") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/ci_check_tool_names.py b/ci_check_tool_names.py new file mode 100755 index 0000000..6123697 --- /dev/null +++ b/ci_check_tool_names.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python3 +""" +CI Tool Name Parity Check + +Validates that MCP server tool names match the capability registry. +Fails CI if: +- A tool exists but isn't registered +- A registered tool no longer exists +""" + +import json +import subprocess +import sys +import os +import re +from pathlib import Path + + +def get_registry_tools(): + """Load tool names from capability registry.""" + with open("capability_registry_v2.json", "r") as f: + registry = json.load(f) + + registry_tools = {} + for server_name, server_info in registry["mcp_servers"].items(): + # Extract base tool names (remove operation type) + tools = [] + for tool_desc in server_info["tools"]: + tool_name = tool_desc.split(" (")[0] # Remove "(operation_type)" + tools.append(tool_name) + registry_tools[server_name] = set(tools) + + return registry_tools + + +def get_actual_tools(): + """Get actual tool names from MCP servers by examining source code.""" + actual_tools = {} + + # Extract tools from Cloudflare Safe server source + try: + with open("mcp/cloudflare_safe/server.py", "r") as f: + content = f.read() + + # Look for tool function definitions + import re + + tool_pattern = r"def (cf_\w+)\(" + tools_found = set(re.findall(tool_pattern, content)) + + # Filter out internal functions + valid_tools = {tool for tool in tools_found if not tool.startswith("_")} + actual_tools["cloudflare_safe"] = valid_tools + except Exception as e: + print(f"⚠️ Could not extract tools from cloudflare_safe: {e}") + + # Extract tools from WAF Intelligence server source + try: + with open("mcp/waf_intelligence/mcp_server.py", "r") as f: + content = f.read() + + tool_pattern = r"def (waf_\w+)\(" + tools_found = set(re.findall(tool_pattern, content)) + + valid_tools = {tool for tool in tools_found if not tool.startswith("_")} + actual_tools["waf_intelligence"] = valid_tools + except Exception as e: + print(f"⚠️ Could not extract tools from waf_intelligence: {e}") + + # Extract tools from Oracle Answer server source + try: + with open("mcp/oracle_answer/server.py", "r") as f: + content = f.read() + + tool_pattern = r"def (\w+)\(" + tools_found = set(re.findall(tool_pattern, content)) + + # Look for oracle_answer specifically + oracle_tools = {tool for tool in tools_found if "oracle" in tool.lower()} + actual_tools["oracle_answer"] = oracle_tools + except Exception as e: + print(f"⚠️ Could not extract tools from oracle_answer: {e}") + + return actual_tools + + +def check_tool_parity(): + """Compare registry tools with actual tools.""" + registry_tools = get_registry_tools() + actual_tools = get_actual_tools() + + errors = [] + + for server_name in set(registry_tools.keys()) | set(actual_tools.keys()): + reg_tools = registry_tools.get(server_name, set()) + act_tools = actual_tools.get(server_name, set()) + + # Check for tools in registry but not in actual + missing_in_actual = reg_tools - act_tools + if missing_in_actual: + errors.append( + f"❌ {server_name}: Tools registered but not found: {missing_in_actual}" + ) + + # Check for tools in actual but not in registry + missing_in_registry = act_tools - reg_tools + if missing_in_registry: + errors.append( + f"❌ {server_name}: Tools found but not registered: {missing_in_registry}" + ) + + # Report parity + if not missing_in_actual and not missing_in_registry: + print(f"✅ {server_name}: Tool parity verified ({len(reg_tools)} tools)") + + return errors + + +def main(): + """Main CI check function.""" + print("🔍 CI Tool Name Parity Check") + print("=" * 50) + + errors = check_tool_parity() + + if errors: + print("\n❌ Registry drift detected:") + for error in errors: + print(error) + print( + "\n💡 Fix: Update capability_registry_v2.json to match actual MCP server tools" + ) + sys.exit(1) + else: + print("\n✅ All MCP server tools match capability registry") + sys.exit(0) + + +if __name__ == "__main__": + main() diff --git a/generate_capability_registry.py b/generate_capability_registry.py new file mode 100644 index 0000000..f6ff42d --- /dev/null +++ b/generate_capability_registry.py @@ -0,0 +1,350 @@ +#!/usr/bin/env python3 +""" +Cloudflare Control Plane Capability Registry Generator + +Generates a machine-readable registry of all MCP server capabilities, +Terraform resources, and operational tools for auditability and documentation. +""" + +import json +from pathlib import Path +from datetime import datetime, timezone +from typing import Dict, List, Any + +# Registry structure +CAPABILITY_REGISTRY = { + "metadata": { + "generated_at": datetime.now(timezone.utc).isoformat(), + "version": "1.0.0", + "scope": "Cloudflare Control Plane", + }, + "mcp_servers": {}, + "terraform_resources": {}, + "gitops_tools": {}, + "security_framework": {}, + "operational_tools": {}, +} + +# MCP Server capabilities (from analysis) +MCP_CAPABILITIES = { + "cloudflare_safe": { + "module": "cloudflare.mcp.cloudflare_safe", + "entrypoint": "cloudflare.mcp.cloudflare_safe", + "purpose": "Secure Cloudflare API operations", + "tools": [ + "cf_snapshot (read/write token required)", + "cf_refresh (write token required)", + "cf_config_diff (read; requires snapshot_id)", + "cf_export_config (read)", + "cf_tunnel_status (read)", + "cf_tunnel_ingress_summary (read)", + "cf_access_policy_list (read)" + ], + "auth_env": ["CLOUDFLARE_API_TOKEN", "CLOUDFLARE_ACCOUNT_ID"], + "side_effects": "read-only unless token present; cf_refresh/cf_snapshot are mutating", + "outputs": ["json", "terraform_hcl"], + "capabilities": [ + "dns_record_management", + "waf_rule_configuration", + "tunnel_health_monitoring", + "zone_analytics_query", + "terraform_state_synchronization" + ], + "security": { + "token_redaction": True, + "error_handling": True, + "rate_limiting": True + } + }, + "waf_intelligence": { + "module": "cloudflare.mcp.waf_intelligence", + "entrypoint": "cloudflare.mcp.waf_intelligence.mcp_server", + "purpose": "WAF rule analysis and synthesis", + "tools": [ + "waf_capabilities (read)", + "waf_analyze (read)", + "waf_assess (read)", + "waf_generate_gitops_proposals (propose)" + ], + "auth_env": [], + "side_effects": "propose-only; generates GitOps proposals", + "outputs": ["json", "terraform_hcl", "gitops_mr"], + "capabilities": [ + "waf_config_analysis", + "threat_intelligence_integration", + "compliance_mapping", + "rule_gap_identification", + "terraform_ready_rule_generation" + ], + "intelligence": { + "ml_classification": True, + "threat_intel": True, + "compliance_frameworks": ["PCI-DSS 6.6", "OWASP-ASVS 13"] + } + }, + "oracle_answer": { + "module": "cloudflare.mcp.oracle_answer", + "entrypoint": "cloudflare.mcp.oracle_answer", + "purpose": "Security decision support", + "tools": ["oracle_answer (read)"], + "auth_env": [], + "side_effects": "read-only; security classification only", + "outputs": ["json", "security_classification"], + "capabilities": [ + "security_classification", + "routing_decision_support", + "threat_assessment", + "pre_execution_screening" + ], + "integration": { + "layer0_framework": True, + "shadow_classifier": True, + "preboot_logging": True + } + } +} + }, + }, + "waf_intelligence": { + "module": "cloudflare.mcp.waf_intelligence", + "purpose": "WAF rule analysis and synthesis", + "capabilities": [ + "waf_config_analysis", + "threat_intelligence_integration", + "compliance_mapping", + "rule_gap_identification", + "terraform_ready_rule_generation", + ], + "intelligence": { + "ml_classification": True, + "threat_intel": True, + "compliance_frameworks": ["PCI-DSS 6.6", "OWASP-ASVS 13"], + }, + }, + "oracle_answer": { + "module": "cloudflare.mcp.oracle_answer", + "purpose": "Security decision support", + "capabilities": [ + "security_classification", + "routing_decision_support", + "threat_assessment", + "pre_execution_screening", + ], + "integration": { + "layer0_framework": True, + "shadow_classifier": True, + "preboot_logging": True, + }, + }, +} + +# Terraform resources (from analysis) +TERRAFORM_RESOURCES = { + "dns_management": { + "files": ["dns.tf"], + "resources": ["cloudflare_record", "cloudflare_zone"], + "capabilities": [ + "automated_dns_provisioning", + "spf_dmarc_mx_configuration", + "tunnel_based_routing", + "proxied_record_management", + ], + }, + "waf_security": { + "files": ["waf.tf"], + "resources": ["cloudflare_ruleset", "cloudflare_bot_management"], + "capabilities": [ + "custom_waf_rules", + "managed_ruleset_integration", + "bot_management", + "rate_limiting", + "country_blocking", + ], + }, + "tunnel_infrastructure": { + "files": ["tunnels.tf"], + "resources": ["cloudflare_tunnel", "cloudflare_tunnel_config"], + "capabilities": [ + "multi_service_tunnel_routing", + "ingress_rule_management", + "health_monitoring", + "credential_rotation", + ], + }, +} + +# GitOps tools +GITOPS_TOOLS = { + "waf_rule_proposer": { + "file": "gitops/waf_rule_proposer.py", + "purpose": "Automated WAF rule generation", + "capabilities": [ + "threat_intel_driven_rules", + "gitlab_ci_integration", + "automated_mr_creation", + "compliance_mapping", + ], + }, + "invariant_checker": { + "file": "scripts/invariant_checker_py.py", + "purpose": "Real-time state validation", + "capabilities": [ + "dns_integrity_checks", + "waf_compliance_validation", + "tunnel_health_monitoring", + "drift_detection", + ], + }, + "drift_guardian": { + "file": "scripts/drift_guardian_py.py", + "purpose": "Automated remediation", + "capabilities": [ + "state_reconciliation", + "auto_remediation", + "ops_notification", + ], + }, +} + +# Security framework +SECURITY_FRAMEWORK = { + "layer0": { + "components": ["entrypoint.py", "shadow_classifier.py", "preboot_logger.py"], + "capabilities": [ + "pre_execution_security_classification", + "threat_assessment", + "security_event_logging", + "routing_decision_support", + ], + "classification_levels": ["catastrophic", "forbidden", "ambiguous", "blessed"], + } +} + +# Operational tools +OPERATIONAL_TOOLS = { + "systemd_services": { + "services": ["autonomous-remediator", "drift-guardian", "tunnel-rotation"], + "capabilities": [ + "continuous_monitoring", + "automated_remediation", + "scheduled_operations", + ], + }, + "test_suites": { + "suites": ["layer0_validation", "mcp_integration", "cloudflare_safe_ingress"], + "capabilities": [ + "security_classification_testing", + "mcp_server_validation", + "api_integration_testing", + ], + }, +} + + +def generate_registry(): + """Generate the complete capability registry.""" + + CAPABILITY_REGISTRY["mcp_servers"] = MCP_CAPABILITIES + CAPABILITY_REGISTRY["terraform_resources"] = TERRAFORM_RESOURCES + CAPABILITY_REGISTRY["gitops_tools"] = GITOPS_TOOLS + CAPABILITY_REGISTRY["security_framework"] = SECURITY_FRAMEWORK + CAPABILITY_REGISTRY["operational_tools"] = OPERATIONAL_TOOLS + + return CAPABILITY_REGISTRY + + +def save_registry_formats(): + """Save registry in multiple formats for different use cases.""" + + registry = generate_registry() + + # JSON format (machine-readable) + with open("capability_registry.json", "w") as f: + json.dump(registry, f, indent=2) + + # Markdown format (documentation) + markdown_content = generate_markdown_doc(registry) + with open("CAPABILITY_REGISTRY.md", "w") as f: + f.write(markdown_content) + + print("✅ Capability registry generated:") + print(" - capability_registry.json (machine-readable)") + print(" - CAPABILITY_REGISTRY.md (documentation)") + + +def generate_markdown_doc(registry: Dict[str, Any]) -> str: + """Generate Markdown documentation from registry.""" + + md = f"""# Cloudflare Control Plane Capability Registry + +Generated: {registry["metadata"]["generated_at"]} +Version: {registry["metadata"]["version"]} + +## MCP Servers + +""" + + for server_name, server_info in registry["mcp_servers"].items(): + md += f"### {server_name}\n" + md += f"**Module**: `{server_info['module']}` \n" + md += f"**Purpose**: {server_info['purpose']} \n\n" + + md += "**Capabilities**:\n" + for cap in server_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "## Terraform Resources\n\n" + for resource_name, resource_info in registry["terraform_resources"].items(): + md += f"### {resource_name}\n" + md += f"**Files**: {', '.join(resource_info['files'])} \n\n" + + md += "**Capabilities**:\n" + for cap in resource_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "## GitOps Tools\n\n" + for tool_name, tool_info in registry["gitops_tools"].items(): + md += f"### {tool_name}\n" + md += f"**File**: {tool_info['file']} \n" + md += f"**Purpose**: {tool_info['purpose']} \n\n" + + md += "**Capabilities**:\n" + for cap in tool_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "## Security Framework\n\n" + for framework_name, framework_info in registry["security_framework"].items(): + md += f"### {framework_name}\n" + md += f"**Components**: {', '.join(framework_info['components'])} \n\n" + + md += "**Capabilities**:\n" + for cap in framework_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "**Classification Levels**:\n" + for level in framework_info["classification_levels"]: + md += f"- {level}\n" + md += "\n" + + md += "## Operational Tools\n\n" + for tool_category, tool_info in registry["operational_tools"].items(): + md += f"### {tool_category}\n" + if "services" in tool_info: + md += f"**Services**: {', '.join(tool_info['services'])} \n\n" + elif "suites" in tool_info: + md += f"**Test Suites**: {', '.join(tool_info['suites'])} \n\n" + + md += "**Capabilities**:\n" + for cap in tool_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + return md + + +if __name__ == "__main__": + save_registry_formats() diff --git a/generate_capability_registry_v2.py b/generate_capability_registry_v2.py new file mode 100644 index 0000000..c834ced --- /dev/null +++ b/generate_capability_registry_v2.py @@ -0,0 +1,332 @@ +#!/usr/bin/env python3 +""" +Cloudflare Control Plane Capability Registry Generator v2 + +Enhanced with exact MCP tool names, entrypoints, and operational details +for audit-grade documentation and drift prevention. +""" + +import json +from pathlib import Path +from datetime import datetime, timezone + +# Registry structure +CAPABILITY_REGISTRY = { + "metadata": { + "generated_at": datetime.now(timezone.utc).isoformat(), + "version": "1.0.1", + "scope": "Cloudflare Control Plane", + }, + "mcp_servers": {}, + "terraform_resources": {}, + "gitops_tools": {}, + "security_framework": {}, + "operational_tools": {}, +} + +# MCP Server capabilities with exact tool names +MCP_CAPABILITIES = { + "cloudflare_safe": { + "module": "cloudflare.mcp.cloudflare_safe", + "entrypoint": "cloudflare.mcp.cloudflare_safe", + "purpose": "Secure Cloudflare API operations", + "tools": [ + "cf_snapshot (read/write token required)", + "cf_refresh (write token required)", + "cf_config_diff (read; requires snapshot_id)", + "cf_export_config (read)", + "cf_tunnel_status (read)", + "cf_tunnel_ingress_summary (read)", + "cf_access_policy_list (read)", + ], + "auth_env": ["CLOUDFLARE_API_TOKEN", "CLOUDFLARE_ACCOUNT_ID"], + "side_effects": "read-only unless token present; cf_refresh/cf_snapshot are mutating", + "outputs": ["json", "terraform_hcl"], + "capabilities": [ + "dns_record_management", + "waf_rule_configuration", + "tunnel_health_monitoring", + "zone_analytics_query", + "terraform_state_synchronization", + ], + "security": { + "token_redaction": True, + "error_handling": True, + "rate_limiting": True, + }, + }, + "waf_intelligence": { + "module": "cloudflare.mcp.waf_intelligence", + "entrypoint": "cloudflare.mcp.waf_intelligence.mcp_server", + "purpose": "WAF rule analysis and synthesis", + "tools": [ + "waf_capabilities (read)", + "waf_analyze (read)", + "waf_assess (read)", + "waf_generate_gitops_proposals (propose)", + ], + "auth_env": [], + "side_effects": "propose-only; generates GitOps proposals", + "outputs": ["json", "terraform_hcl", "gitops_mr"], + "capabilities": [ + "waf_config_analysis", + "threat_intelligence_integration", + "compliance_mapping", + "rule_gap_identification", + "terraform_ready_rule_generation", + ], + "intelligence": { + "ml_classification": True, + "threat_intel": True, + "compliance_frameworks": ["PCI-DSS 6.6", "OWASP-ASVS 13"], + }, + }, + "oracle_answer": { + "module": "cloudflare.mcp.oracle_answer", + "entrypoint": "cloudflare.mcp.oracle_answer", + "purpose": "Security decision support", + "tools": ["oracle_answer (read)"], + "auth_env": [], + "side_effects": "read-only; security classification only", + "outputs": ["json", "security_classification"], + "capabilities": [ + "security_classification", + "routing_decision_support", + "threat_assessment", + "pre_execution_screening", + ], + "integration": { + "layer0_framework": True, + "shadow_classifier": True, + "preboot_logging": True, + }, + }, +} + +# Terraform resources (from analysis) +TERRAFORM_RESOURCES = { + "dns_management": { + "files": ["dns.tf"], + "resources": ["cloudflare_record", "cloudflare_zone"], + "capabilities": [ + "automated_dns_provisioning", + "spf_dmarc_mx_configuration", + "tunnel_based_routing", + "proxied_record_management", + ], + }, + "waf_security": { + "files": ["waf.tf"], + "resources": ["cloudflare_ruleset", "cloudflare_bot_management"], + "capabilities": [ + "custom_waf_rules", + "managed_ruleset_integration", + "bot_management", + "rate_limiting", + "country_blocking", + ], + }, + "tunnel_infrastructure": { + "files": ["tunnels.tf"], + "resources": ["cloudflare_tunnel", "cloudflare_tunnel_config"], + "capabilities": [ + "multi_service_tunnel_routing", + "ingress_rule_management", + "health_monitoring", + "credential_rotation", + ], + }, +} + +# GitOps tools with operational details +GITOPS_TOOLS = { + "waf_rule_proposer": { + "file": "gitops/waf_rule_proposer.py", + "purpose": "Automated WAF rule generation", + "side_effects": "creates GitLab merge requests", + "outputs": ["terraform_hcl", "gitops_mr"], + "capabilities": [ + "threat_intel_driven_rules", + "gitlab_ci_integration", + "automated_mr_creation", + "compliance_mapping", + ], + }, + "invariant_checker": { + "file": "scripts/invariant_checker_py.py", + "purpose": "Real-time state validation", + "side_effects": "generates anomaly reports", + "outputs": ["json", "anomaly_report"], + "capabilities": [ + "dns_integrity_checks", + "waf_compliance_validation", + "tunnel_health_monitoring", + "drift_detection", + ], + }, + "drift_guardian": { + "file": "scripts/drift_guardian_py.py", + "purpose": "Automated remediation", + "side_effects": "applies Terraform changes", + "outputs": ["terraform_apply", "remediation_report"], + "capabilities": [ + "state_reconciliation", + "auto_remediation", + "ops_notification", + ], + }, +} + +# Security framework +SECURITY_FRAMEWORK = { + "layer0": { + "components": ["entrypoint.py", "shadow_classifier.py", "preboot_logger.py"], + "capabilities": [ + "pre_execution_security_classification", + "threat_assessment", + "security_event_logging", + "routing_decision_support", + ], + "classification_levels": ["catastrophic", "forbidden", "ambiguous", "blessed"], + } +} + +# Operational tools +OPERATIONAL_TOOLS = { + "systemd_services": { + "services": ["autonomous-remediator", "drift-guardian", "tunnel-rotation"], + "capabilities": [ + "continuous_monitoring", + "automated_remediation", + "scheduled_operations", + ], + }, + "test_suites": { + "suites": ["layer0_validation", "mcp_integration", "cloudflare_safe_ingress"], + "capabilities": [ + "security_classification_testing", + "mcp_server_validation", + "api_integration_testing", + ], + }, +} + + +def generate_registry(): + """Generate the complete capability registry.""" + + CAPABILITY_REGISTRY["mcp_servers"] = MCP_CAPABILITIES + CAPABILITY_REGISTRY["terraform_resources"] = TERRAFORM_RESOURCES + CAPABILITY_REGISTRY["gitops_tools"] = GITOPS_TOOLS + CAPABILITY_REGISTRY["security_framework"] = SECURITY_FRAMEWORK + CAPABILITY_REGISTRY["operational_tools"] = OPERATIONAL_TOOLS + + return CAPABILITY_REGISTRY + + +def save_registry_formats(): + """Save registry in multiple formats for different use cases.""" + + registry = generate_registry() + + # JSON format (machine-readable) + with open("capability_registry_v2.json", "w") as f: + json.dump(registry, f, indent=2) + + # Markdown format (documentation) + markdown_content = generate_markdown_doc(registry) + with open("CAPABILITY_REGISTRY_V2.md", "w") as f: + f.write(markdown_content) + + print("✅ Enhanced capability registry generated:") + print(" - capability_registry_v2.json (machine-readable)") + print(" - CAPABILITY_REGISTRY_V2.md (documentation)") + + +def generate_markdown_doc(registry: dict) -> str: + """Generate Markdown documentation from registry.""" + + md = f"""# Cloudflare Control Plane Capability Registry v2 + +Generated: {registry["metadata"]["generated_at"]} +Version: {registry["metadata"]["version"]} + +## MCP Servers + +""" + + for server_name, server_info in registry["mcp_servers"].items(): + md += f"### {server_name}\n" + md += f"**Module**: `{server_info['module']}` \n" + md += f"**Entrypoint**: `{server_info['entrypoint']}` \n" + md += f"**Purpose**: {server_info['purpose']} \n\n" + + md += "**Tools**:\n" + for tool in server_info["tools"]: + md += f"- {tool}\n" + + md += f"\n**Auth/Env**: {', '.join(server_info['auth_env'])}\n" + md += f"**Side Effects**: {server_info['side_effects']}\n" + md += f"**Outputs**: {', '.join(server_info['outputs'])}\n\n" + + md += "**Capabilities**:\n" + for cap in server_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "## Terraform Resources\n\n" + for resource_name, resource_info in registry["terraform_resources"].items(): + md += f"### {resource_name}\n" + md += f"**Files**: {', '.join(resource_info['files'])} \n\n" + + md += "**Capabilities**:\n" + for cap in resource_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "## GitOps Tools\n\n" + for tool_name, tool_info in registry["gitops_tools"].items(): + md += f"### {tool_name}\n" + md += f"**File**: {tool_info['file']} \n" + md += f"**Purpose**: {tool_info['purpose']} \n" + md += f"**Side Effects**: {tool_info['side_effects']} \n" + md += f"**Outputs**: {', '.join(tool_info['outputs'])} \n\n" + + md += "**Capabilities**:\n" + for cap in tool_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "## Security Framework\n\n" + for framework_name, framework_info in registry["security_framework"].items(): + md += f"### {framework_name}\n" + md += f"**Components**: {', '.join(framework_info['components'])} \n\n" + + md += "**Capabilities**:\n" + for cap in framework_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + md += "**Classification Levels**:\n" + for level in framework_info["classification_levels"]: + md += f"- {level}\n" + md += "\n" + + md += "## Operational Tools\n\n" + for tool_category, tool_info in registry["operational_tools"].items(): + md += f"### {tool_category}\n" + if "services" in tool_info: + md += f"**Services**: {', '.join(tool_info['services'])} \n\n" + elif "suites" in tool_info: + md += f"**Test Suites**: {', '.join(tool_info['suites'])} \n\n" + + md += "**Capabilities**:\n" + for cap in tool_info["capabilities"]: + md += f"- {cap}\n" + md += "\n" + + return md + + +if __name__ == "__main__": + save_registry_formats() diff --git a/layer0/learn.py b/layer0/learn.py new file mode 100644 index 0000000..2600618 --- /dev/null +++ b/layer0/learn.py @@ -0,0 +1,392 @@ +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import sqlite3 +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Iterable + +from .pattern_store import ( + normalize_query_for_matching, + pattern_dict, + write_pattern_snapshot, +) + +THIS_FILE = Path(__file__).resolve() +LAYER0_DIR = THIS_FILE.parent +REPO_ROOT = LAYER0_DIR.parent.parent + + +def _utc_now_iso_z() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + + +def _default_db_path() -> Path: + for key in ("LEDGER_DB_PATH", "VAULTMESH_LEDGER_DB"): + v = (os.environ.get(key) or "").strip() + if v: + return Path(v).expanduser().resolve() + return (REPO_ROOT / ".state" / "ledger.sqlite").resolve() + + +def _default_candidate_path() -> Path: + return (REPO_ROOT / ".state" / "layer0_patterns_candidate.json").resolve() + + +def _read_jsonl(paths: Iterable[Path]) -> list[dict[str, Any]]: + events: list[dict[str, Any]] = [] + for path in paths: + if not path.exists(): + continue + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except Exception: + continue + if isinstance(obj, dict): + events.append(obj) + return events + + +def _telemetry_actor(event: dict[str, Any]) -> str | None: + v = event.get("actor") or event.get("user") or event.get("account") + if isinstance(v, str) and v.strip(): + return v.strip() + meta = event.get("metadata") + if isinstance(meta, dict): + v2 = meta.get("actor") or meta.get("account") + if isinstance(v2, str) and v2.strip(): + return v2.strip() + return None + + +def _telemetry_trace_id(event: dict[str, Any]) -> str | None: + for k in ("trace_id", "layer0_trace_id", "trace", "id"): + v = event.get(k) + if isinstance(v, str) and v.strip(): + return v.strip() + return None + + +def _telemetry_ts(event: dict[str, Any]) -> str | None: + for k in ("timestamp", "ts", "time"): + v = event.get(k) + if isinstance(v, str) and v.strip(): + return v.strip() + return None + + +def _telemetry_query(event: dict[str, Any]) -> str: + v = event.get("query") or event.get("prompt") or event.get("input") + if isinstance(v, str): + return v + meta = event.get("metadata") + if isinstance(meta, dict) and isinstance(meta.get("query"), str): + return str(meta.get("query")) + return "" + + +def _outcome(event: dict[str, Any]) -> str | None: + v = event.get("outcome") or event.get("result") or event.get("status") + if isinstance(v, str) and v.strip(): + return v.strip() + return None + + +def _layer0_classification(event: dict[str, Any]) -> str | None: + v = event.get("layer0_classification") or event.get("classification") + if isinstance(v, str) and v.strip(): + return v.strip() + return None + + +def _infer_target_from_event( + event: dict[str, Any], *, include_relaxations: bool +) -> tuple[str, str] | None: + """ + Returns (mode, classification) or None. + + mode: + - "escalate": adds/strengthens detection immediately + - "relax": can reduce severity only after replay + explicit approval + """ + outcome = (_outcome(event) or "").lower() + l0 = (_layer0_classification(event) or "").lower() + + # Ground-truth blocked downstream: L0 should tighten. + if outcome in { + "blocked_by_guardrails", + "blocked_by_policy", + "blocked", + "denied", + } and l0 in {"blessed", "ambiguous"}: + return ("escalate", "forbidden") + + if ( + outcome in {"fail_closed", "catastrophic", "blocked_catastrophic"} + and l0 != "catastrophic" + ): + return ("escalate", "catastrophic") + + # Preboot logs (already blocked) can still be used to learn more specific signatures. + if not outcome and l0 in {"forbidden", "catastrophic"}: + return ("escalate", l0) + + # False positives: relax only after replay + approval. + if include_relaxations and outcome in {"success", "ok"} and l0 in {"forbidden"}: + return ("relax", "blessed") + + return None + + +def _default_risk_score(classification: str) -> int: + if classification == "catastrophic": + return 5 + if classification == "forbidden": + return 3 + if classification == "ambiguous": + return 1 + return 0 + + +@dataclass +class _Bucket: + traces: set[str] + actors: set[str] + last_seen: str | None + + +def _ensure_ledger_schema(conn: sqlite3.Connection) -> None: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS migrations ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + applied_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + """ + ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS proof_artifacts ( + id TEXT PRIMARY KEY, + ts TEXT NOT NULL DEFAULT (datetime('now')), + kind TEXT NOT NULL, + path TEXT, + sha256_hex TEXT, + blake3_hex TEXT, + size_bytes INTEGER, + meta_json TEXT, + trace_id TEXT + ); + """ + ) + + +def _log_artifact( + *, + kind: str, + path: Path | None, + meta: dict[str, Any], + trace_id: str | None, + db_path: Path, +) -> str: + try: + from ledger.db import log_proof_artifact # type: ignore + + return log_proof_artifact( + kind=kind, + path=path, + meta=meta, + trace_id=trace_id, + db_path=db_path, + ) + except Exception: + pass + + artifact_id = str(uuid.uuid4()) + rel_path: str | None = None + sha256_hex: str | None = None + size_bytes: int | None = None + + if path is not None: + try: + rel_path = str(path.resolve().relative_to(REPO_ROOT)) + except Exception: + rel_path = str(path) + if path.exists() and path.is_file(): + data = path.read_bytes() + sha256_hex = hashlib.sha256(data).hexdigest() + size_bytes = len(data) + + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(db_path), isolation_level=None) + try: + _ensure_ledger_schema(conn) + conn.execute( + """ + INSERT INTO proof_artifacts ( + id, ts, kind, path, sha256_hex, blake3_hex, size_bytes, meta_json, trace_id + ) + VALUES (?, ?, ?, ?, ?, NULL, ?, ?, ?); + """, + ( + artifact_id, + _utc_now_iso_z(), + kind, + rel_path, + sha256_hex, + size_bytes, + json.dumps(meta, ensure_ascii=False, sort_keys=True), + trace_id, + ), + ) + finally: + conn.close() + + return artifact_id + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Layer0: build candidate patterns from telemetry." + ) + parser.add_argument( + "--telemetry-jsonl", + action="append", + default=[], + help="Path to telemetry JSONL (repeatable). Defaults include anomalies/preboot_shield.jsonl if present.", + ) + parser.add_argument("--min-support", type=int, default=3) + parser.add_argument("--min-actors", type=int, default=2) + parser.add_argument("--max-tokens", type=int, default=8) + parser.add_argument( + "--include-relaxations", + action="store_true", + help="Generate relaxation candidates (still requires replay + explicit promotion).", + ) + parser.add_argument("--out", type=str, default=str(_default_candidate_path())) + parser.add_argument("--db", type=str, default=None) + args = parser.parse_args(argv) + + paths: list[Path] = [] + for p in args.telemetry_jsonl: + if p: + paths.append(Path(p).expanduser()) + + default_preboot = REPO_ROOT / "anomalies" / "preboot_shield.jsonl" + if default_preboot.exists() and default_preboot not in paths: + paths.append(default_preboot) + + events = _read_jsonl(paths) + + buckets: dict[tuple[str, str, tuple[str, ...]], _Bucket] = {} + for ev in events: + inferred = _infer_target_from_event( + ev, include_relaxations=bool(args.include_relaxations) + ) + if not inferred: + continue + mode, target = inferred + + norm = normalize_query_for_matching(_telemetry_query(ev)) + tokens = norm.split() + if len(tokens) < 2: + continue + if args.max_tokens and len(tokens) > args.max_tokens: + tokens = tokens[: int(args.max_tokens)] + + key = (mode, target, tuple(tokens)) + b = buckets.get(key) + if b is None: + b = _Bucket(traces=set(), actors=set(), last_seen=None) + buckets[key] = b + + trace = _telemetry_trace_id(ev) + if trace: + b.traces.add(trace) + actor = _telemetry_actor(ev) + if actor: + b.actors.add(actor) + ts = _telemetry_ts(ev) + if ts and (b.last_seen is None or ts > b.last_seen): + b.last_seen = ts + + patterns: list[dict[str, Any]] = [] + for (mode, target, tokens), bucket in buckets.items(): + support = len(bucket.traces) if bucket.traces else 0 + actors = len(bucket.actors) + if support < int(args.min_support): + continue + if actors and actors < int(args.min_actors): + continue + + patterns.append( + pattern_dict( + tokens_all=tokens, + classification=target, + reason="telemetry_learned", + risk_score=_default_risk_score(target), + flags=["telemetry_learned"], + min_support=support, + last_seen=bucket.last_seen, + source={"support_traces": support, "support_actors": actors}, + mode=mode, + pattern_id=str(uuid.uuid4()), + ) + ) + + # Deterministic ordering: most severe, then most specific/support. + severity_rank = { + "blessed": 0, + "ambiguous": 1, + "forbidden": 2, + "catastrophic": 3, + } + patterns.sort( + key=lambda p: ( + severity_rank.get(p["classification"], 0), + int(p.get("specificity_score") or 0), + int(p.get("min_support") or 0), + str(p.get("last_seen") or ""), + ), + reverse=True, + ) + + out_path = Path(args.out).expanduser().resolve() + write_pattern_snapshot(out_path, patterns) + + db_path = Path(args.db).expanduser().resolve() if args.db else _default_db_path() + artifact_id = _log_artifact( + kind="shadow_pattern_candidate", + path=out_path, + meta={ + "patterns": len(patterns), + "min_support": int(args.min_support), + "min_actors": int(args.min_actors), + "inputs": [str(p) for p in paths], + }, + trace_id=None, + db_path=db_path, + ) + + print(f"Wrote {len(patterns)} candidate patterns to {out_path}") + print(f"Logged artifact {artifact_id} to {db_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/layer0/pattern_store.py b/layer0/pattern_store.py new file mode 100644 index 0000000..cc1946b --- /dev/null +++ b/layer0/pattern_store.py @@ -0,0 +1,331 @@ +from __future__ import annotations + +import json +import os +import re +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Iterable, Sequence + +THIS_FILE = Path(__file__).resolve() +LAYER0_DIR = THIS_FILE.parent +REPO_ROOT = LAYER0_DIR.parent.parent + + +_RE_URL = re.compile(r"\bhttps?://\S+\b", re.IGNORECASE) +_RE_EMAIL = re.compile(r"\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b", re.IGNORECASE) +_RE_IPV4 = re.compile(r"\b(?:\d{1,3}\.){3}\d{1,3}\b") +_RE_IPV6 = re.compile(r"\b(?:[0-9a-f]{0,4}:){2,}[0-9a-f]{0,4}\b", re.IGNORECASE) +_RE_UUID = re.compile( + r"\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b", + re.IGNORECASE, +) +_RE_HEX_LONG = re.compile(r"\b[0-9a-f]{32,}\b", re.IGNORECASE) +_RE_BASE64ISH = re.compile(r"\b[A-Za-z0-9+/]{28,}={0,2}\b") +_RE_PATHISH = re.compile(r"(?:(?:\.\.?/)|/)[A-Za-z0-9._~/-]{2,}") +_RE_NUMBER = re.compile(r"\b\d+\b") +_RE_TOKEN = re.compile(r"[a-z][a-z_-]{1,31}", re.IGNORECASE) + + +SAFE_VOCAB = { + # Governance / safety verbs + "disable", + "override", + "bypass", + "skip", + "ignore", + "evade", + "break", + "force", + "apply", + "deploy", + "destroy", + "delete", + "drop", + "remove", + "exfiltrate", + # Critical nouns / domains + "guardrails", + "permissions", + "governance", + "git", + "gitops", + "dashboard", + "manual", + "prod", + "production", + "staging", + "terraform", + "waf", + "dns", + "tunnel", + "access", + "token", + "secret", + "key", + "credential", + "admin", + "root", + # Phrases often seen in L0 rules (tokenized) + "self", + "modifying", + "directly", +} + + +def _utc_now_iso_z() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + + +def normalize_query_for_matching(query: str) -> str: + """ + Produce a low-leakage normalized string suitable for storing and matching. + + Invariants: + - Never stores raw URLs, IPs, emails, long hex strings, base64ish blobs, UUIDs, or paths. + - Numbers are stripped to <NUM>. + - Only safe vocabulary tokens are preserved; other words are dropped. + """ + q = (query or "").lower().strip() + if not q: + return "" + + # Keep placeholders lowercase to make matching stable across sources. + q = _RE_URL.sub("<url>", q) + q = _RE_EMAIL.sub("<email>", q) + q = _RE_IPV4.sub("<ip>", q) + q = _RE_IPV6.sub("<ip>", q) + q = _RE_UUID.sub("<uuid>", q) + q = _RE_PATHISH.sub("<path>", q) + q = _RE_HEX_LONG.sub("<hex>", q) + q = _RE_BASE64ISH.sub("<b64>", q) + q = _RE_NUMBER.sub("<num>", q) + + # Tokenize; keep placeholders and a tight safe vocabulary. + tokens: list[str] = [] + for raw in re.split(r"[^a-z0-9_<>\-_/]+", q): + t = raw.strip() + if not t: + continue + if t.startswith("<") and t.endswith(">"): + tokens.append(t) + continue + if _RE_TOKEN.fullmatch(t) and t in SAFE_VOCAB: + tokens.append(t) + + # De-dupe while preserving order. + seen: set[str] = set() + out: list[str] = [] + for t in tokens: + if t in seen: + continue + seen.add(t) + out.append(t) + return " ".join(out) + + +def normalized_tokens(query: str) -> list[str]: + s = normalize_query_for_matching(query) + return s.split() if s else [] + + +@dataclass(frozen=True) +class LearnedPattern: + pattern_id: str + tokens_all: tuple[str, ...] + classification: str + reason: str | None + risk_score: int + flags: tuple[str, ...] + specificity_score: int + min_support: int + last_seen: str | None + source: dict[str, Any] | None + mode: str # "escalate" | "relax" + + def matches(self, normalized_query: str) -> bool: + if not normalized_query: + return False + hay = set(normalized_query.split()) + return all(t in hay for t in self.tokens_all) + + +def _default_active_path() -> Path: + configured = os.environ.get("LAYER0_ACTIVE_PATTERNS_PATH") + if configured: + return Path(configured).expanduser().resolve() + return (REPO_ROOT / ".state" / "layer0_patterns_active.json").resolve() + + +class PatternStore: + """ + Read-only active pattern snapshot. + + This is intentionally immutable during request handling; mutations happen in + offline jobs (learn/replay) that write a new snapshot and log an artifact. + """ + + def __init__(self, active_path: Path | None = None): + self._active_path = active_path or _default_active_path() + self._active: list[LearnedPattern] = [] + self._loaded = False + + @property + def active_path(self) -> Path: + return self._active_path + + def load(self) -> None: + if self._loaded: + return + self._loaded = True + self._active = self._load_patterns_file(self._active_path) + + def patterns(self) -> list[LearnedPattern]: + self.load() + return list(self._active) + + def match_ordered(self, normalized_query: str) -> list[LearnedPattern]: + self.load() + matched = [p for p in self._active if p.matches(normalized_query)] + severity_rank = { + "blessed": 0, + "ambiguous": 1, + "forbidden": 2, + "catastrophic": 3, + } + matched.sort( + key=lambda p: ( + severity_rank.get(p.classification, 0), + p.specificity_score, + p.min_support, + p.last_seen or "", + ), + reverse=True, + ) + return matched + + @staticmethod + def _load_patterns_file(path: Path) -> list[LearnedPattern]: + if not path.exists(): + return [] + data = json.loads(path.read_text(encoding="utf-8")) + items = data.get("patterns") if isinstance(data, dict) else data + if not isinstance(items, list): + return [] + + patterns: list[LearnedPattern] = [] + for item in items: + if not isinstance(item, dict): + continue + tokens = item.get("tokens_all") or item.get("tokens") or [] + if not isinstance(tokens, list) or not tokens: + continue + tokens_norm = tuple( + t.lower() if isinstance(t, str) else "" + for t in tokens + if isinstance(t, str) + and t + and (t.startswith("<") or t.lower() in SAFE_VOCAB) + ) + if not tokens_norm: + continue + + classification = item.get("classification") + if classification not in { + "blessed", + "ambiguous", + "forbidden", + "catastrophic", + }: + continue + + flags = item.get("flags") or [] + if not isinstance(flags, list): + flags = [] + + mode = item.get("mode") or "escalate" + if mode not in {"escalate", "relax"}: + mode = "escalate" + + min_support = int(item.get("min_support") or item.get("support") or 0) + specificity = int(item.get("specificity_score") or len(tokens_norm)) + risk_score = int(item.get("risk_score") or 0) + + patterns.append( + LearnedPattern( + pattern_id=str(item.get("pattern_id") or item.get("id") or ""), + tokens_all=tokens_norm, + classification=classification, + reason=item.get("reason"), + risk_score=risk_score, + flags=tuple(str(f) for f in flags if isinstance(f, str)), + specificity_score=specificity, + min_support=min_support, + last_seen=item.get("last_seen"), + source=item.get("source") + if isinstance(item.get("source"), dict) + else None, + mode=mode, + ) + ) + + severity_rank = { + "blessed": 0, + "ambiguous": 1, + "forbidden": 2, + "catastrophic": 3, + } + patterns.sort( + key=lambda p: ( + severity_rank.get(p.classification, 0), + p.specificity_score, + p.min_support, + p.last_seen or "", + ), + reverse=True, + ) + return patterns + + +def pattern_dict( + *, + tokens_all: Sequence[str], + classification: str, + reason: str | None, + risk_score: int, + flags: Sequence[str], + min_support: int, + last_seen: str | None = None, + source: dict[str, Any] | None = None, + mode: str = "escalate", + pattern_id: str | None = None, +) -> dict[str, Any]: + tokens = [t for t in tokens_all if isinstance(t, str) and t] + return { + "pattern_id": pattern_id or "", + "tokens_all": tokens, + "classification": classification, + "reason": reason, + "risk_score": int(risk_score), + "flags": list(flags), + "specificity_score": int(len(tokens)), + "min_support": int(min_support), + "last_seen": last_seen or _utc_now_iso_z(), + "source": source or {}, + "mode": mode, + } + + +def write_pattern_snapshot(path: Path, patterns: Iterable[dict[str, Any]]) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + payload = {"generated_at": _utc_now_iso_z(), "patterns": list(patterns)} + path.write_text( + json.dumps(payload, ensure_ascii=False, sort_keys=True, indent=2) + "\n", + encoding="utf-8", + ) diff --git a/layer0/preboot_logger.py b/layer0/preboot_logger.py index d1d6990..801e4f4 100644 --- a/layer0/preboot_logger.py +++ b/layer0/preboot_logger.py @@ -1,22 +1,134 @@ import datetime +import hashlib import json import os +import re +import sqlite3 from typing import Optional -from .shadow_classifier import ShadowEvalResult, Classification +from .pattern_store import normalize_query_for_matching +from .shadow_classifier import Classification, ShadowEvalResult class PrebootLogger: LOG_PATH = "anomalies/preboot_shield.jsonl" + @staticmethod + def _ledger_db_path() -> str | None: + return os.getenv("VAULTMESH_LEDGER_DB") or os.getenv("LEDGER_DB_PATH") + + @staticmethod + def _normalize_for_shadow_receipt(query: str) -> str: + """ + Poison-resistant normalizer for ShadowReceipt emission. + + Goals: + - Normalize casing/whitespace + - Replace common secret/identifier carriers with placeholders + - Keep output stable and compact + """ + s = (query or "").lower().strip() + s = re.sub(r"\s+", " ", s) + s = re.sub(r"\bhttps?://\S+\b", "<URL>", s) + s = re.sub(r"\b\d{1,3}(?:\.\d{1,3}){3}\b", "<IP>", s) + s = re.sub( + r"\b[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}\b", + "<HEX>", + s, + flags=re.IGNORECASE, + ) + s = re.sub(r"(?:(?:\.\.?/)|/|~\/)[A-Za-z0-9._~/-]{2,}", "<PATH>", s) + s = re.sub(r"\b[0-9a-f]{16,}\b", "<HEX>", s, flags=re.IGNORECASE) + s = re.sub(r"\b\d+\b", "<N>", s) + return s.strip() + + @staticmethod + def _sha256_hex(text: str) -> str: + return hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest() + + @staticmethod + def _try_emit_shadow_receipt( + *, + query: str, + classification: str, + reason: str | None, + flags: list[str], + trace_id: str | None, + ) -> None: + """ + Best-effort ShadowReceipt emission into the local-first SQLite ledger. + + Hard constraints: + - No dependency on vaultmesh-orgine-mobile code + - Fail silently on any error (Layer 0 must never crash) + """ + db_path = PrebootLogger._ledger_db_path() + if not db_path: + return + + try: + norm = PrebootLogger._normalize_for_shadow_receipt(query) + cf_hash = PrebootLogger._sha256_hex(norm) + + placeholders: list[str] = [] + for p in ("<URL>", "<IP>", "<PATH>", "<HEX>", "<N>"): + if p in norm: + placeholders.append(p) + + meta = { + "ts_utc": datetime.datetime.now(datetime.timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z"), + "classification": classification, + "reason": reason, + "flags": (flags or [])[:64], + "normalized_query_features": { + "placeholders": placeholders, + "length": len(norm), + }, + } + + conn = sqlite3.connect(db_path, timeout=0.25) + try: + conn.execute("PRAGMA foreign_keys=ON;") + conn.execute( + """ + INSERT INTO shadow_receipts ( + id, horizon_id, counterfactual_hash, entropy_delta, + reason_unrealized, observer_signature, trace_id, meta_json + ) + VALUES (?, ?, ?, NULL, ?, NULL, ?, ?); + """, + ( + PrebootLogger._sha256_hex( + meta["ts_utc"] + "|" + (trace_id or "") + "|" + cf_hash + ), + "layer0_block", + cf_hash, + "layer0_block", + trace_id, + json.dumps(meta, separators=(",", ":"), ensure_ascii=False), + ), + ) + conn.commit() + finally: + conn.close() + except Exception: + return + @staticmethod def log(event: ShadowEvalResult, query: str, reason_override: Optional[str] = None): - if event.classification not in (Classification.CATASTROPHIC, Classification.FORBIDDEN): + if event.classification not in ( + Classification.CATASTROPHIC, + Classification.FORBIDDEN, + ): return # Only violations get logged record = { "timestamp": datetime.datetime.utcnow().isoformat() + "Z", - "query": query, + # Store a normalized, low-leakage representation (never raw strings). + "query": normalize_query_for_matching(query), "classification": event.classification.value, "reason": reason_override or event.reason, "trace_id": event.trace_id, @@ -31,3 +143,11 @@ class PrebootLogger: with open(PrebootLogger.LOG_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record) + "\n") + + PrebootLogger._try_emit_shadow_receipt( + query=query, + classification=event.classification.value, + reason=reason_override or event.reason, + flags=event.flags, + trace_id=event.trace_id, + ) diff --git a/layer0/replay.py b/layer0/replay.py new file mode 100644 index 0000000..6f71d6c --- /dev/null +++ b/layer0/replay.py @@ -0,0 +1,443 @@ +from __future__ import annotations + +import argparse +import hashlib +import json +import os +import sqlite3 +import uuid +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Iterable + +from .pattern_store import PatternStore, write_pattern_snapshot +from .shadow_classifier import Classification, ShadowClassifier + +THIS_FILE = Path(__file__).resolve() +LAYER0_DIR = THIS_FILE.parent +REPO_ROOT = LAYER0_DIR.parent.parent + + +def _utc_now_iso_z() -> str: + return ( + datetime.now(timezone.utc) + .replace(microsecond=0) + .isoformat() + .replace("+00:00", "Z") + ) + + +def _default_db_path() -> Path: + for key in ("LEDGER_DB_PATH", "VAULTMESH_LEDGER_DB"): + v = (os.environ.get(key) or "").strip() + if v: + return Path(v).expanduser().resolve() + return (REPO_ROOT / ".state" / "ledger.sqlite").resolve() + + +def _read_jsonl(paths: Iterable[Path], *, limit: int | None) -> list[dict[str, Any]]: + rows: list[dict[str, Any]] = [] + for path in paths: + if not path.exists(): + continue + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.strip() + if not line: + continue + try: + obj = json.loads(line) + except Exception: + continue + if isinstance(obj, dict): + rows.append(obj) + + if limit is not None and limit > 0 and len(rows) > limit: + return rows[-limit:] + return rows + + +def _telemetry_query(event: dict[str, Any]) -> str: + v = event.get("query") or event.get("prompt") or event.get("input") + return v if isinstance(v, str) else "" + + +def _outcome(event: dict[str, Any]) -> str | None: + v = event.get("outcome") or event.get("result") or event.get("status") + if isinstance(v, str) and v.strip(): + return v.strip() + return None + + +def _ground_truth(event: dict[str, Any]) -> Classification | None: + outcome = (_outcome(event) or "").lower() + if outcome in {"success", "ok"}: + return Classification.BLESSED + if outcome in {"blocked_by_guardrails", "blocked_by_policy", "blocked", "denied"}: + return Classification.FORBIDDEN + if outcome in {"fail_closed", "catastrophic", "blocked_catastrophic"}: + return Classification.CATASTROPHIC + return None + + +def _ensure_ledger_schema(conn: sqlite3.Connection) -> None: + conn.execute( + """ + CREATE TABLE IF NOT EXISTS migrations ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL UNIQUE, + applied_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + """ + ) + conn.execute( + """ + CREATE TABLE IF NOT EXISTS proof_artifacts ( + id TEXT PRIMARY KEY, + ts TEXT NOT NULL DEFAULT (datetime('now')), + kind TEXT NOT NULL, + path TEXT, + sha256_hex TEXT, + blake3_hex TEXT, + size_bytes INTEGER, + meta_json TEXT, + trace_id TEXT + ); + """ + ) + + +def _log_artifact( + *, + kind: str, + path: Path | None, + meta: dict[str, Any], + trace_id: str | None, + db_path: Path, +) -> str: + try: + from ledger.db import log_proof_artifact # type: ignore + + return log_proof_artifact( + kind=kind, + path=path, + meta=meta, + trace_id=trace_id, + db_path=db_path, + ) + except Exception: + pass + + artifact_id = str(uuid.uuid4()) + rel_path: str | None = None + sha256_hex: str | None = None + size_bytes: int | None = None + + if path is not None: + try: + rel_path = str(path.resolve().relative_to(REPO_ROOT)) + except Exception: + rel_path = str(path) + if path.exists() and path.is_file(): + data = path.read_bytes() + sha256_hex = hashlib.sha256(data).hexdigest() + size_bytes = len(data) + + db_path.parent.mkdir(parents=True, exist_ok=True) + conn = sqlite3.connect(str(db_path), isolation_level=None) + try: + _ensure_ledger_schema(conn) + conn.execute( + """ + INSERT INTO proof_artifacts ( + id, ts, kind, path, sha256_hex, blake3_hex, size_bytes, meta_json, trace_id + ) + VALUES (?, ?, ?, ?, ?, NULL, ?, ?, ?); + """, + ( + artifact_id, + _utc_now_iso_z(), + kind, + rel_path, + sha256_hex, + size_bytes, + json.dumps(meta, ensure_ascii=False, sort_keys=True), + trace_id, + ), + ) + finally: + conn.close() + + return artifact_id + + +def _load_patterns_file(path: Path) -> list[dict[str, Any]]: + if not path.exists(): + return [] + data = json.loads(path.read_text(encoding="utf-8")) + items = data.get("patterns") if isinstance(data, dict) else data + return items if isinstance(items, list) else [] + + +def _merge_patterns( + active: list[dict[str, Any]], extra: list[dict[str, Any]] +) -> list[dict[str, Any]]: + """ + Candidate patterns win on identical (mode, tokens_all, classification). + """ + + def key(p: dict[str, Any]) -> tuple[str, tuple[str, ...], str]: + mode = str(p.get("mode") or "escalate") + cls = str(p.get("classification") or "") + tokens = p.get("tokens_all") or p.get("tokens") or [] + if not isinstance(tokens, list): + tokens = [] + return (mode, tuple(str(t).lower() for t in tokens), cls) + + merged: dict[tuple[str, tuple[str, ...], str], dict[str, Any]] = {} + for p in active: + if isinstance(p, dict): + merged[key(p)] = p + for p in extra: + if isinstance(p, dict): + merged[key(p)] = p + return list(merged.values()) + + +@dataclass +class ReplayMetrics: + total: int + baseline_false_pos: int + baseline_false_neg: int + candidate_false_pos: int + candidate_false_neg: int + catastrophic_boundary_unchanged: bool + + +def _is_false_positive(pred: Classification, truth: Classification) -> bool: + return truth == Classification.BLESSED and pred in { + Classification.FORBIDDEN, + Classification.CATASTROPHIC, + } + + +def _is_false_negative(pred: Classification, truth: Classification) -> bool: + return truth in { + Classification.FORBIDDEN, + Classification.CATASTROPHIC, + } and pred in { + Classification.BLESSED, + Classification.AMBIGUOUS, + } + + +def _compute_metrics( + events: list[dict[str, Any]], + baseline: ShadowClassifier, + candidate: ShadowClassifier, +) -> ReplayMetrics: + total = 0 + b_fp = b_fn = 0 + c_fp = c_fn = 0 + catastrophic_ok = True + + for ev in events: + truth = _ground_truth(ev) + if truth is None: + continue + q = _telemetry_query(ev) + total += 1 + + b = baseline.classify(q).classification + c = candidate.classify(q).classification + + if _is_false_positive(b, truth): + b_fp += 1 + if _is_false_negative(b, truth): + b_fn += 1 + if _is_false_positive(c, truth): + c_fp += 1 + if _is_false_negative(c, truth): + c_fn += 1 + + if b == Classification.CATASTROPHIC and c != Classification.CATASTROPHIC: + catastrophic_ok = False + + return ReplayMetrics( + total=total, + baseline_false_pos=b_fp, + baseline_false_neg=b_fn, + candidate_false_pos=c_fp, + candidate_false_neg=c_fn, + catastrophic_boundary_unchanged=catastrophic_ok, + ) + + +def main(argv: list[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Layer0: replay candidate patterns against recent telemetry." + ) + parser.add_argument( + "--candidate", + required=True, + help="Candidate snapshot JSON (from layer0.learn).", + ) + parser.add_argument( + "--telemetry-jsonl", + action="append", + default=[], + help="Path to telemetry JSONL (repeatable). Must include outcome=success|blocked_by_guardrails|... for scoring.", + ) + parser.add_argument("--limit", type=int, default=2000) + parser.add_argument( + "--active", + type=str, + default=None, + help="Active patterns snapshot (defaults to .state).", + ) + parser.add_argument("--db", type=str, default=None) + parser.add_argument("--report-out", type=str, default=None) + parser.add_argument( + "--promote", + action="store_true", + help="If replay passes, write active snapshot update.", + ) + parser.add_argument( + "--allow-relaxations", + action="store_true", + help="Allow promotion of relaxation-mode patterns (requires replay pass).", + ) + parser.add_argument("--max-fp-increase", type=int, default=0) + args = parser.parse_args(argv) + + telemetry_paths = [Path(p).expanduser() for p in args.telemetry_jsonl if p] + if not telemetry_paths: + default_preboot = REPO_ROOT / "anomalies" / "preboot_shield.jsonl" + if default_preboot.exists(): + telemetry_paths = [default_preboot] + + events = _read_jsonl(telemetry_paths, limit=int(args.limit)) + + active_path = ( + Path(args.active).expanduser().resolve() + if args.active + else PatternStore().active_path + ) + active_patterns = _load_patterns_file(active_path) + candidate_path = Path(args.candidate).expanduser().resolve() + candidate_patterns_all = _load_patterns_file(candidate_path) + candidate_patterns = [ + p + for p in candidate_patterns_all + if isinstance(p, dict) + and (args.allow_relaxations or str(p.get("mode") or "escalate") != "relax") + ] + + baseline_classifier = ShadowClassifier( + pattern_store=PatternStore(active_path=active_path) + ) + + merged = _merge_patterns(active_patterns, candidate_patterns) + merged_path = ( + REPO_ROOT / ".state" / "layer0_patterns_merged_replay.json" + ).resolve() + write_pattern_snapshot(merged_path, merged) + candidate_classifier = ShadowClassifier( + pattern_store=PatternStore(active_path=merged_path) + ) + + metrics = _compute_metrics(events, baseline_classifier, candidate_classifier) + + passes = ( + metrics.catastrophic_boundary_unchanged + and metrics.candidate_false_pos + <= metrics.baseline_false_pos + int(args.max_fp_increase) + and metrics.candidate_false_neg <= metrics.baseline_false_neg + ) + + report = { + "generated_at": _utc_now_iso_z(), + "telemetry_inputs": [str(p) for p in telemetry_paths], + "candidate_snapshot": str(candidate_path), + "active_snapshot": str(active_path), + "merged_snapshot": str(merged_path), + "allow_relaxations": bool(args.allow_relaxations), + "max_fp_increase": int(args.max_fp_increase), + "metrics": { + "total_scored": metrics.total, + "baseline_false_positives": metrics.baseline_false_pos, + "baseline_false_negatives": metrics.baseline_false_neg, + "candidate_false_positives": metrics.candidate_false_pos, + "candidate_false_negatives": metrics.candidate_false_neg, + "catastrophic_boundary_unchanged": metrics.catastrophic_boundary_unchanged, + }, + "passes": passes, + "promotion": { + "requested": bool(args.promote), + "performed": False, + "active_written_to": str(active_path), + "patterns_added": len(candidate_patterns), + }, + } + + report_out = ( + Path(args.report_out).expanduser().resolve() + if args.report_out + else (REPO_ROOT / ".state" / "layer0_shadow_replay_report.json").resolve() + ) + report_out.parent.mkdir(parents=True, exist_ok=True) + report_out.write_text( + json.dumps(report, ensure_ascii=False, sort_keys=True, indent=2) + "\n", + encoding="utf-8", + ) + + db_path = Path(args.db).expanduser().resolve() if args.db else _default_db_path() + report_artifact_id = _log_artifact( + kind="shadow_replay_report", + path=report_out, + meta={ + "passes": passes, + "total_scored": metrics.total, + "baseline_fp": metrics.baseline_false_pos, + "baseline_fn": metrics.baseline_false_neg, + "candidate_fp": metrics.candidate_false_pos, + "candidate_fn": metrics.candidate_false_neg, + }, + trace_id=None, + db_path=db_path, + ) + + if args.promote and passes: + # Promotion = merged active snapshot (existing + candidates), written atomically. + tmp_path = active_path.with_suffix(active_path.suffix + ".tmp") + write_pattern_snapshot(tmp_path, merged) + tmp_path.replace(active_path) + promo_artifact_id = _log_artifact( + kind="shadow_pattern_promotion", + path=active_path, + meta={ + "added": len(candidate_patterns), + "source_candidate": str(candidate_path), + "merged_snapshot": str(merged_path), + }, + trace_id=None, + db_path=db_path, + ) + report["promotion"]["performed"] = True + report["promotion"]["artifact_id"] = promo_artifact_id + report_out.write_text( + json.dumps(report, ensure_ascii=False, sort_keys=True, indent=2) + "\n", + encoding="utf-8", + ) + + print(f"Replay report: {report_out} (passes={passes})") + print(f"Logged artifact {report_artifact_id} to {db_path}") + if args.promote: + print( + f"Promotion {'performed' if (args.promote and passes) else 'skipped'}; active={active_path}" + ) + return 0 if passes else 2 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/layer0/security_classifier.py b/layer0/security_classifier.py new file mode 100644 index 0000000..b9f7e4e --- /dev/null +++ b/layer0/security_classifier.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python3 +""" +Enhanced Security Classification Framework for Layer0 +Provides advanced classification capabilities for Cloudflare infrastructure operations +""" + +from enum import Enum +from typing import Dict, List, Optional, Any +from dataclasses import dataclass +import re + + +class SecurityLevel(str, Enum): + """Security classification levels""" + + LOW_RISK = "low_risk" + MEDIUM_RISK = "medium_risk" + HIGH_RISK = "high_risk" + CRITICAL_RISK = "critical_risk" + + +class OperationType(str, Enum): + """Types of infrastructure operations""" + + READ_ONLY = "read_only" + CONFIGURATION_CHANGE = "configuration_change" + INFRASTRUCTURE_MODIFICATION = "infrastructure_modification" + SECURITY_MODIFICATION = "security_modification" + ACCESS_CONTROL_CHANGE = "access_control_change" + + +class ResourceType(str, Enum): + """Types of Cloudflare resources""" + + DNS_RECORD = "dns_record" + WAF_RULE = "waf_rule" + ACCESS_RULE = "access_rule" + TUNNEL = "tunnel" + ZONE_SETTINGS = "zone_settings" + ACCOUNT_SETTINGS = "account_settings" + + +@dataclass +class SecurityClassification: + """Result of security classification""" + + level: SecurityLevel + operation_type: OperationType + resource_type: ResourceType + confidence: float # 0.0 to 1.0 + flags: List[str] + rationale: str + requires_approval: bool + approval_threshold: Optional[str] = None + + +class SecurityClassifier: + """ + Advanced security classifier for Cloudflare infrastructure operations + Provides multi-dimensional risk assessment and classification + """ + + def __init__(self): + # Pattern definitions for different risk levels + self.critical_patterns = [ + r"delete.*all", + r"destroy.*infrastructure", + r"disable.*waf", + r"remove.*firewall", + r"bypass.*security", + r"expose.*credentials", + r"terraform.*destroy", + r"drop.*database", + ] + + self.high_risk_patterns = [ + r"modify.*dns", + r"change.*tunnel", + r"update.*waf", + r"create.*rule", + r"modify.*access", + r"terraform.*apply", + ] + + self.medium_risk_patterns = [ + r"create.*record", + r"update.*settings", + r"configure.*zone", + r"modify.*page", + r"change.*cache", + ] + + self.low_risk_patterns = [ + r"list.*records", + r"get.*status", + r"show.*config", + r"read.*logs", + r"monitor.*health", + ] + + # Operation type patterns + self.operation_patterns = { + OperationType.READ_ONLY: [ + r"list", + r"get", + r"show", + r"read", + r"monitor", + r"status", + ], + OperationType.CONFIGURATION_CHANGE: [ + r"configure", + r"update.*settings", + r"change.*config", + ], + OperationType.INFRASTRUCTURE_MODIFICATION: [ + r"create", + r"modify", + r"update", + r"delete", + r"destroy", + ], + OperationType.SECURITY_MODIFICATION: [ + r"waf", + r"firewall", + r"security", + r"block", + r"allow", + ], + OperationType.ACCESS_CONTROL_CHANGE: [ + r"access", + r"permission", + r"role", + r"policy", + ], + } + + # Resource type patterns + self.resource_patterns = { + ResourceType.DNS_RECORD: [r"dns", r"record", r"domain", r"zone"], + ResourceType.WAF_RULE: [r"waf", r"firewall", r"rule", r"security"], + ResourceType.ACCESS_RULE: [r"access", r"policy", r"permission"], + ResourceType.TUNNEL: [r"tunnel", r"connector", r"proxy"], + ResourceType.ZONE_SETTINGS: [r"zone.*settings", r"domain.*config"], + ResourceType.ACCOUNT_SETTINGS: [r"account.*settings", r"billing"], + } + + def classify_operation( + self, operation_description: str, context: Optional[Dict[str, Any]] = None + ) -> SecurityClassification: + """ + Classify an infrastructure operation based on description and context + """ + description_lower = operation_description.lower() + + # Determine security level + security_level = self._determine_security_level(description_lower) + + # Determine operation type + operation_type = self._determine_operation_type(description_lower) + + # Determine resource type + resource_type = self._determine_resource_type(description_lower) + + # Calculate confidence + confidence = self._calculate_confidence(description_lower, security_level) + + # Generate flags + flags = self._generate_flags(description_lower, security_level, context) + + # Generate rationale + rationale = self._generate_rationale( + security_level, operation_type, resource_type + ) + + # Determine if approval is required + requires_approval = security_level in [ + SecurityLevel.HIGH_RISK, + SecurityLevel.CRITICAL_RISK, + ] + approval_threshold = self._determine_approval_threshold(security_level) + + return SecurityClassification( + level=security_level, + operation_type=operation_type, + resource_type=resource_type, + confidence=confidence, + flags=flags, + rationale=rationale, + requires_approval=requires_approval, + approval_threshold=approval_threshold, + ) + + def _determine_security_level(self, description: str) -> SecurityLevel: + """Determine the security risk level""" + for pattern in self.critical_patterns: + if re.search(pattern, description): + return SecurityLevel.CRITICAL_RISK + + for pattern in self.high_risk_patterns: + if re.search(pattern, description): + return SecurityLevel.HIGH_RISK + + for pattern in self.medium_risk_patterns: + if re.search(pattern, description): + return SecurityLevel.MEDIUM_RISK + + for pattern in self.low_risk_patterns: + if re.search(pattern, description): + return SecurityLevel.LOW_RISK + + # Default to medium risk for unknown operations + return SecurityLevel.MEDIUM_RISK + + def _determine_operation_type(self, description: str) -> OperationType: + """Determine the type of operation""" + for op_type, patterns in self.operation_patterns.items(): + for pattern in patterns: + if re.search(pattern, description): + return op_type + + # Default to infrastructure modification for safety + return OperationType.INFRASTRUCTURE_MODIFICATION + + def _determine_resource_type(self, description: str) -> ResourceType: + """Determine the type of resource being operated on""" + for resource_type, patterns in self.resource_patterns.items(): + for pattern in patterns: + if re.search(pattern, description): + return resource_type + + # Default to DNS records (most common) + return ResourceType.DNS_RECORD + + def _calculate_confidence( + self, description: str, security_level: SecurityLevel + ) -> float: + """Calculate confidence score for classification""" + base_confidence = 0.7 + + # Increase confidence for longer, more specific descriptions + word_count = len(description.split()) + if word_count > 10: + base_confidence += 0.2 + elif word_count > 5: + base_confidence += 0.1 + + # Adjust based on security level + if security_level == SecurityLevel.CRITICAL_RISK: + base_confidence += 0.1 # Critical patterns are usually clear + + return min(1.0, base_confidence) + + def _generate_flags( + self, + description: str, + security_level: SecurityLevel, + context: Optional[Dict[str, Any]], + ) -> List[str]: + """Generate security flags for the operation""" + flags = [] + + # Basic flags based on security level + if security_level == SecurityLevel.CRITICAL_RISK: + flags.extend( + ["critical_risk", "requires_emergency_approval", "multi_factor_auth"] + ) + elif security_level == SecurityLevel.HIGH_RISK: + flags.extend(["high_risk", "requires_senior_approval", "audit_trail"]) + elif security_level == SecurityLevel.MEDIUM_RISK: + flags.extend(["medium_risk", "requires_standard_approval"]) + else: + flags.extend(["low_risk", "auto_approved"]) + + # Context-based flags + if context: + environment = context.get("environment", "") + if environment.lower() in ["prod", "production"]: + flags.append("production_environment") + + user_role = context.get("user_role", "") + if user_role.lower() in ["admin", "root"]: + flags.append("privileged_user") + + # Pattern-based flags + if re.search(r"delete|destroy|remove", description): + flags.append("destructive_operation") + + if re.search(r"waf|firewall|security", description): + flags.append("security_related") + + if re.search(r"dns|domain|zone", description): + flags.append("dns_related") + + return flags + + def _generate_rationale( + self, + security_level: SecurityLevel, + operation_type: OperationType, + resource_type: ResourceType, + ) -> str: + """Generate rationale for the classification""" + rationales = { + SecurityLevel.CRITICAL_RISK: "Critical risk operation involving infrastructure destruction or security bypass", + SecurityLevel.HIGH_RISK: "High risk operation modifying core infrastructure or security settings", + SecurityLevel.MEDIUM_RISK: "Medium risk operation involving configuration changes", + SecurityLevel.LOW_RISK: "Low risk read-only operation", + } + + base_rationale = rationales.get( + security_level, "Standard infrastructure operation" + ) + + # Add operation-specific details + if operation_type == OperationType.INFRASTRUCTURE_MODIFICATION: + base_rationale += " with infrastructure modification capabilities" + elif operation_type == OperationType.SECURITY_MODIFICATION: + base_rationale += " affecting security controls" + + # Add resource-specific details + if resource_type == ResourceType.DNS_RECORD: + base_rationale += " on DNS infrastructure" + elif resource_type == ResourceType.WAF_RULE: + base_rationale += " on WAF security rules" + + return base_rationale + + def _determine_approval_threshold( + self, security_level: SecurityLevel + ) -> Optional[str]: + """Determine the approval threshold required""" + thresholds = { + SecurityLevel.CRITICAL_RISK: "Emergency Change Advisory Board (ECAB)", + SecurityLevel.HIGH_RISK: "Senior Infrastructure Engineer", + SecurityLevel.MEDIUM_RISK: "Team Lead", + SecurityLevel.LOW_RISK: None, + } + return thresholds.get(security_level) + + +# Example usage and testing +def main(): + """Example usage of the security classifier""" + classifier = SecurityClassifier() + + # Test cases + test_cases = [ + "Delete all DNS records for domain example.com", + "Update WAF rule to allow traffic from China", + "Create new DNS record for subdomain", + "List all current tunnels and their status", + "Modify zone settings to enable development mode", + "Destroy all terraform infrastructure", + ] + + print("🔐 Security Classification Framework Test") + print("=" * 60) + + for test_case in test_cases: + classification = classifier.classify_operation(test_case) + + print(f"\nOperation: {test_case}") + print(f"Security Level: {classification.level.value}") + print(f"Operation Type: {classification.operation_type.value}") + print(f"Resource Type: {classification.resource_type.value}") + print(f"Confidence: {classification.confidence:.2f}") + print(f"Requires Approval: {classification.requires_approval}") + if classification.approval_threshold: + print(f"Approval Threshold: {classification.approval_threshold}") + print(f"Flags: {', '.join(classification.flags)}") + print(f"Rationale: {classification.rationale}") + + +if __name__ == "__main__": + main() diff --git a/layer0/shadow_classifier.py b/layer0/shadow_classifier.py index 718077b..5e20964 100644 --- a/layer0/shadow_classifier.py +++ b/layer0/shadow_classifier.py @@ -1,6 +1,8 @@ -from enum import Enum -from typing import Optional, List import uuid +from enum import Enum +from typing import Any, List, Mapping, Optional + +from .pattern_store import PatternStore, normalize_query_for_matching class Classification(str, Enum): @@ -39,55 +41,136 @@ class ShadowClassifier: Minimal doctrinal classifier for Layer 0 (Shadow Eval). """ - def classify(self, query: str) -> ShadowEvalResult: + def __init__(self, pattern_store: PatternStore | None = None): + self._patterns = pattern_store or PatternStore() + + def classify( + self, query: str, *, context: Mapping[str, Any] | None = None + ) -> ShadowEvalResult: """Return a doctrinal classification for the incoming query.""" - q = query.lower().strip() + q = (query or "").lower().strip() + q_norm = normalize_query_for_matching(query or "") - # 1. Catastrophic (fail closed) - if any(x in q for x in [ - "disable guardrails", - "override agent permissions", - "bypass governance", - "self-modifying", - ]): - return ShadowEvalResult( - classification=Classification.CATASTROPHIC, - reason="catastrophic_indicator", - risk_score=5, - flags=["permission_override", "guardrail_disable"], + # 0. Catastrophic boundary (fail closed): never relaxed at runtime. + if any( + x in q + for x in [ + "disable guardrails", + "override agent permissions", + "bypass governance", + "self-modifying", + ] + ): + return self._apply_context( + ShadowEvalResult( + classification=Classification.CATASTROPHIC, + reason="catastrophic_indicator", + risk_score=5, + flags=["permission_override", "guardrail_disable"], + ), + context, ) - # 2. Forbidden (governance violation) - if any(x in q for x in [ - "skip git", - "apply directly", - "dashboard", - "manual change", - ]): - return ShadowEvalResult( - classification=Classification.FORBIDDEN, - reason="governance_violation", - risk_score=3, - flags=["gitops_bypass"], + # 1. Learned patterns (highest specificity/support first) + learned = self._patterns.match_ordered(q_norm) + if learned: + p = learned[0] + return self._apply_context( + ShadowEvalResult( + classification=Classification(p.classification), + reason=p.reason or "telemetry_learned", + risk_score=int(p.risk_score), + flags=list(p.flags) + ["telemetry_learned"], + ), + context, ) - # 3. Ambiguous (needs clarification) - if any(x in q for x in [ - "fix it", - "change this", - "update stuff", - ]) or len(q.split()) <= 2: - return ShadowEvalResult( - classification=Classification.AMBIGUOUS, - reason="insufficient_context", - risk_score=1, - flags=["needs_clarification"], + # 2. Static patterns + + # 2a. Forbidden (governance violation) + if any( + x in q + for x in [ + "skip git", + "apply directly", + "dashboard", + "manual change", + ] + ): + return self._apply_context( + ShadowEvalResult( + classification=Classification.FORBIDDEN, + reason="governance_violation", + risk_score=3, + flags=["gitops_bypass"], + ), + context, + ) + + # 2b. Ambiguous (needs clarification) + if ( + any( + x in q + for x in [ + "fix it", + "change this", + "update stuff", + ] + ) + or len(q.split()) <= 2 + ): + return self._apply_context( + ShadowEvalResult( + classification=Classification.AMBIGUOUS, + reason="insufficient_context", + risk_score=1, + flags=["needs_clarification"], + ), + context, ) # 4. Blessed (valid + lawful) - return ShadowEvalResult( - classification=Classification.BLESSED, - reason=None, - risk_score=0, + return self._apply_context( + ShadowEvalResult( + classification=Classification.BLESSED, + reason=None, + risk_score=0, + ), + context, ) + + @staticmethod + def _apply_context( + result: ShadowEvalResult, context: Mapping[str, Any] | None + ) -> ShadowEvalResult: + if not context: + return result + + env = str(context.get("environment") or "").lower() + realm = str(context.get("realm") or "").lower() + capability = str(context.get("capability") or "").lower() + role = str(context.get("actor_role") or context.get("role") or "").lower() + + mult = 1.0 + if env in {"prod", "production"}: + mult *= 2.0 + elif env in {"staging", "stage"}: + mult *= 1.5 + elif env in {"dev", "development", "test"}: + mult *= 1.0 + + if capability in {"destroy", "delete", "write"}: + mult *= 1.5 + elif capability in {"read"}: + mult *= 1.0 + + if role in {"admin", "root"}: + mult *= 1.2 + + if realm in {"terraform", "gitops", "cloudflare"}: + mult *= 1.1 + + weighted = int(round(result.risk_score * mult)) + result.risk_score = max(0, min(5, weighted)) + return result diff --git a/mcp/__init__.py b/mcp/__init__.py index f3ed363..154c572 100644 --- a/mcp/__init__.py +++ b/mcp/__init__.py @@ -3,4 +3,6 @@ MCP tools for the CLOUDFLARE workspace. Currently: - oracle_answer: compliance / security oracle +- cloudflare_safe: summary-first Cloudflare state + tunnel helpers +- akash_docs: Akash docs fetch/search + SDL template helper """ diff --git a/mcp/akash_docs/__init__.py b/mcp/akash_docs/__init__.py new file mode 100644 index 0000000..6044c1c --- /dev/null +++ b/mcp/akash_docs/__init__.py @@ -0,0 +1,10 @@ +""" +Akash docs + deployment helpers exposed as an MCP server. + +Tools: +- akash_docs_list_routes: discover common docs routes from akash.network +- akash_docs_fetch: fetch a docs page (prefers GitHub markdown, falls back to site HTML) +- akash_docs_search: keyword search across discovered routes (cached) +- akash_sdl_snippet: generate a minimal Akash SDL template +""" + diff --git a/mcp/akash_docs/__main__.py b/mcp/akash_docs/__main__.py new file mode 100644 index 0000000..cd0fe5d --- /dev/null +++ b/mcp/akash_docs/__main__.py @@ -0,0 +1,7 @@ +from __future__ import annotations + +from .server import main + +if __name__ == "__main__": + main() + diff --git a/mcp/akash_docs/server.py b/mcp/akash_docs/server.py new file mode 100644 index 0000000..4e1af3e --- /dev/null +++ b/mcp/akash_docs/server.py @@ -0,0 +1,861 @@ +from __future__ import annotations + +import hashlib +import json +import os +import re +import sys +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + + +AKASH_SITE_BASE = "https://akash.network" +AKASH_DOCS_BASE = f"{AKASH_SITE_BASE}/docs" + +AKASH_DOCS_GITHUB_OWNER = "akash-network" +AKASH_DOCS_GITHUB_REPO = "website-revamp" +AKASH_DOCS_GITHUB_REF_DEFAULT = "main" +AKASH_DOCS_GITHUB_DOCS_ROOT = "src/content/Docs" + +MAX_BYTES_DEFAULT = 32_000 + + +def _repo_root() -> Path: + # server.py -> akash_docs -> mcp -> cloudflare -> <repo root> + return Path(__file__).resolve().parents[3] + + +def _utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _max_bytes() -> int: + raw = (os.getenv("VM_MCP_MAX_BYTES") or "").strip() + if not raw: + return MAX_BYTES_DEFAULT + try: + return max(4_096, int(raw)) + except ValueError: + return MAX_BYTES_DEFAULT + + +def _sha256_hex(text: str) -> str: + return hashlib.sha256(text.encode("utf-8")).hexdigest() + + +def _http_get(url: str, *, timeout: int = 30) -> str: + req = urllib.request.Request( + url=url, + headers={ + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "User-Agent": "work-core-akamcp/0.1 (+https://akash.network)", + }, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + return resp.read().decode("utf-8", "replace") + + +def _normalize_route(route_or_url: str) -> Tuple[str, str]: + """ + Returns (route, canonical_url). + + route: "getting-started/what-is-akash" (no leading/trailing slashes) + canonical_url: https://akash.network/docs/<route> + """ + raw = (route_or_url or "").strip() + if not raw: + return "", AKASH_DOCS_BASE + "/" + + if raw.startswith("http://") or raw.startswith("https://"): + parsed = urllib.parse.urlparse(raw) + path = parsed.path or "" + # Normalize to docs route if possible. + if path in ("/docs", "/docs/"): + return "", AKASH_DOCS_BASE + "/" + if path.startswith("/docs/"): + route = path[len("/docs/") :].strip("/") + return route, f"{AKASH_DOCS_BASE}/{route}" + return path.strip("/"), raw + + # Accept "/docs/..." or "docs/..." + route = raw.lstrip("/") + if route in ("docs", "docs/"): + return "", AKASH_DOCS_BASE + "/" + if route.startswith("docs/"): + route = route[len("docs/") :] + route = route.strip("/") + return route, f"{AKASH_DOCS_BASE}/{route}" if route else AKASH_DOCS_BASE + "/" + + +def _strip_frontmatter(markdown: str) -> str: + # Remove leading YAML frontmatter: ---\n...\n---\n + if not markdown.startswith("---"): + return markdown + m = re.match(r"^---\s*\n.*?\n---\s*\n", markdown, flags=re.S) + if not m: + return markdown + return markdown[m.end() :] + + +def _github_candidates(route: str) -> List[str]: + base = f"{AKASH_DOCS_GITHUB_DOCS_ROOT}/{route}".rstrip("/") + candidates = [ + f"{base}/index.md", + f"{base}/index.mdx", + f"{base}.md", + f"{base}.mdx", + ] + # Handle root docs landing if route is empty. + if not route: + candidates = [ + f"{AKASH_DOCS_GITHUB_DOCS_ROOT}/index.md", + f"{AKASH_DOCS_GITHUB_DOCS_ROOT}/index.mdx", + ] + return candidates + + +def _fetch_markdown_from_github(route: str, *, ref: str) -> Tuple[str, str, str]: + """ + Returns (markdown, raw_url, repo_path) or raises urllib.error.HTTPError. + """ + last_err: Optional[urllib.error.HTTPError] = None + for repo_path in _github_candidates(route): + raw_url = ( + f"https://raw.githubusercontent.com/{AKASH_DOCS_GITHUB_OWNER}/" + f"{AKASH_DOCS_GITHUB_REPO}/{ref}/{repo_path}" + ) + try: + return _http_get(raw_url), raw_url, repo_path + except urllib.error.HTTPError as e: + if e.code == 404: + last_err = e + continue + raise + if last_err: + raise last_err + raise urllib.error.HTTPError( + url="", + code=404, + msg="Not Found", + hdrs=None, + fp=None, + ) + + +def _extract_article_html(page_html: str) -> str: + m = re.search(r"<article\b[^>]*>(.*?)</article>", page_html, flags=re.S | re.I) + if m: + return m.group(1) + m = re.search(r"<main\b[^>]*>(.*?)</main>", page_html, flags=re.S | re.I) + if m: + return m.group(1) + return page_html + + +def _html_to_text(article_html: str) -> str: + # Drop scripts/styles + cleaned = re.sub( + r"<(script|style)\b[^>]*>.*?</\1>", "", article_html, flags=re.S | re.I + ) + + # Preserve code blocks a bit better (Astro uses <div class="ec-line"> for each line) + def _pre_repl(match: re.Match[str]) -> str: + pre = match.group(0) + pre = re.sub(r"</div>\s*", "\n", pre, flags=re.I) + pre = re.sub(r"<div\b[^>]*>", "", pre, flags=re.I) + pre = re.sub(r"<br\s*/?>", "\n", pre, flags=re.I) + pre = re.sub(r"<[^>]+>", "", pre) + return "\n```\n" + _html_unescape(pre).strip() + "\n```\n" + + cleaned = re.sub(r"<pre\b[^>]*>.*?</pre>", _pre_repl, cleaned, flags=re.S | re.I) + + # Newlines for common block tags + cleaned = re.sub( + r"</(p|h1|h2|h3|h4|h5|h6|li|blockquote)>", "\n", cleaned, flags=re.I + ) + cleaned = re.sub(r"<br\s*/?>", "\n", cleaned, flags=re.I) + cleaned = re.sub(r"<hr\b[^>]*>", "\n---\n", cleaned, flags=re.I) + + # Strip remaining tags + cleaned = re.sub(r"<[^>]+>", "", cleaned) + + text = _html_unescape(cleaned) + lines = [ln.rstrip() for ln in text.splitlines()] + # Collapse excessive blank lines + out: List[str] = [] + blank = False + for ln in lines: + if ln.strip() == "": + if blank: + continue + blank = True + out.append("") + continue + blank = False + out.append(ln.strip()) + return "\n".join(out).strip() + + +def _html_unescape(text: str) -> str: + # Avoid importing html module repeatedly; do it lazily. + import html as _html # local import to keep global import list small + + return _html.unescape(text) + + +def _discover_routes_from_docs_index() -> List[str]: + html = _http_get(AKASH_DOCS_BASE + "/") + hrefs = set(re.findall(r'href=\"(/docs/[^\"#?]+)\"', html)) + routes: List[str] = [] + for href in sorted(hrefs): + route, _url = _normalize_route(href) + if route: + routes.append(route) + return routes + + +@dataclass(frozen=True) +class CachedDoc: + cache_key: str + fetched_at: str + source: str + route: str + url: str + ref: str + content_path: str + + +class DocStore: + def __init__(self, root_dir: Path) -> None: + self.root_dir = root_dir + self.pages_dir = root_dir / "pages" + self.index_path = root_dir / "index.json" + self.pages_dir.mkdir(parents=True, exist_ok=True) + self._index: Dict[str, Dict[str, Any]] = {} + if self.index_path.exists(): + try: + self._index = json.loads(self.index_path.read_text(encoding="utf-8")) + except Exception: + self._index = {} + + def _write_index(self) -> None: + tmp = self.index_path.with_suffix(".tmp") + tmp.write_text( + json.dumps(self._index, ensure_ascii=False, indent=2) + "\n", + encoding="utf-8", + ) + tmp.replace(self.index_path) + + def get(self, cache_key: str) -> Optional[CachedDoc]: + raw = self._index.get(cache_key) + if not raw: + return None + path = Path(raw.get("content_path") or "") + if not path.exists(): + return None + return CachedDoc( + cache_key=cache_key, + fetched_at=str(raw.get("fetched_at") or ""), + source=str(raw.get("source") or ""), + route=str(raw.get("route") or ""), + url=str(raw.get("url") or ""), + ref=str(raw.get("ref") or ""), + content_path=str(path), + ) + + def save( + self, + *, + cache_key: str, + source: str, + route: str, + url: str, + ref: str, + content: str, + ) -> CachedDoc: + content_hash = _sha256_hex(f"{source}:{ref}:{url}")[:20] + path = self.pages_dir / f"{content_hash}.txt" + path.write_text(content, encoding="utf-8") + entry = { + "fetched_at": _utc_now_iso(), + "source": source, + "route": route, + "url": url, + "ref": ref, + "content_path": str(path), + } + self._index[cache_key] = entry + self._write_index() + return self.get(cache_key) or CachedDoc( + cache_key=cache_key, + fetched_at=entry["fetched_at"], + source=source, + route=route, + url=url, + ref=ref, + content_path=str(path), + ) + + +def _default_state_dir() -> Path: + return _repo_root() / "archive_runtime" / "akash_docs_mcp" + + +def _truncate_to_max_bytes(text: str, *, max_bytes: int) -> Tuple[str, bool]: + blob = text.encode("utf-8") + if len(blob) <= max_bytes: + return text, False + # Reserve a bit for the truncation notice + reserve = min(512, max_bytes // 10) + head = blob[: max(0, max_bytes - reserve)].decode("utf-8", "replace") + head = head.rstrip() + "\n\n[TRUNCATED: response exceeded VM_MCP_MAX_BYTES]\n" + return head, True + + +def _mcp_text_result(text: str, *, is_error: bool = False) -> Dict[str, Any]: + text, _truncated = _truncate_to_max_bytes(text, max_bytes=_max_bytes()) + result: Dict[str, Any] = {"content": [{"type": "text", "text": text}]} + if is_error: + result["isError"] = True + return result + + +class AkashDocsTools: + def __init__(self) -> None: + state_dir = Path(os.getenv("VM_AKASH_DOCS_MCP_STATE_DIR") or _default_state_dir()) + self.store = DocStore(state_dir) + + def akash_docs_list_routes(self) -> Dict[str, Any]: + routes = _discover_routes_from_docs_index() + return { + "ok": True, + "summary": f"Discovered {len(routes)} docs route(s) from {AKASH_DOCS_BASE}/.", + "data": {"routes": routes}, + "next_steps": ["akash_docs_fetch(route_or_url=...)"], + } + + def akash_docs_fetch( + self, + *, + route_or_url: str, + source: str = "auto", + ref: str = AKASH_DOCS_GITHUB_REF_DEFAULT, + max_chars: int = 12_000, + refresh: bool = False, + strip_frontmatter: bool = True, + ) -> Dict[str, Any]: + route, canonical_url = _normalize_route(route_or_url) + source_norm = (source or "auto").strip().lower() + if source_norm not in ("auto", "github", "site"): + raise ValueError("source must be one of: auto, github, site") + + max_chars_int = max(0, int(max_chars)) + # Avoid flooding clients; open content_path for full content. + max_chars_int = min(max_chars_int, max(2_000, _max_bytes() - 8_000)) + + cache_key = f"{source_norm}:{ref}:{route or canonical_url}" + cached = self.store.get(cache_key) + if cached and not refresh: + content = Path(cached.content_path).read_text(encoding="utf-8") + if strip_frontmatter and cached.source == "github": + content = _strip_frontmatter(content) + truncated = len(content) > max_chars_int + return { + "ok": True, + "summary": "Returned cached docs content.", + "data": { + "source": cached.source, + "route": cached.route, + "url": cached.url, + "ref": cached.ref, + "cached": True, + "fetched_at": cached.fetched_at, + "content": content[:max_chars_int], + "truncated": truncated, + "content_path": cached.content_path, + }, + "next_steps": ["Set refresh=true to refetch."], + } + + attempted: List[Dict[str, Any]] = [] + + def _try_github() -> Optional[Tuple[str, str, str]]: + try: + md, raw_url, repo_path = _fetch_markdown_from_github(route, ref=ref) + return md, raw_url, repo_path + except urllib.error.HTTPError as e: + attempted.append({"source": "github", "status": getattr(e, "code", None), "detail": str(e)}) + return None + + def _try_site() -> Optional[Tuple[str, str]]: + try: + html = _http_get(canonical_url) + article = _extract_article_html(html) + text = _html_to_text(article) + return text, canonical_url + except urllib.error.HTTPError as e: + attempted.append({"source": "site", "status": getattr(e, "code", None), "detail": str(e)}) + return None + + content: str + final_source: str + final_url: str + extra: Dict[str, Any] = {} + + if source_norm in ("auto", "github"): + gh = _try_github() + if gh: + content, final_url, repo_path = gh + final_source = "github" + extra["repo_path"] = repo_path + elif source_norm == "github": + raise ValueError("GitHub fetch failed; try source='site' or verify the route/ref.") + else: + site = _try_site() + if not site: + raise ValueError(f"Fetch failed for route_or_url={route_or_url!r}. Attempts: {attempted}") + content, final_url = site + final_source = "site" + else: + site = _try_site() + if not site: + raise ValueError(f"Site fetch failed for route_or_url={route_or_url!r}. Attempts: {attempted}") + content, final_url = site + final_source = "site" + + cached_doc = self.store.save( + cache_key=cache_key, + source=final_source, + route=route, + url=final_url, + ref=ref, + content=content, + ) + + content_view = content + if strip_frontmatter and final_source == "github": + content_view = _strip_frontmatter(content_view) + truncated = len(content_view) > max_chars_int + content_out = content_view[:max_chars_int] + return { + "ok": True, + "summary": f"Fetched docs via {final_source}.", + "data": { + "source": final_source, + "route": route, + "url": final_url, + "ref": ref, + "cached": False, + "fetched_at": cached_doc.fetched_at, + "content": content_out, + "truncated": truncated, + "content_path": cached_doc.content_path, + "attempts": attempted, + **extra, + }, + "next_steps": [ + "akash_docs_search(query=..., refresh=false)", + ], + } + + def akash_docs_search( + self, + *, + query: str, + limit: int = 10, + refresh: bool = False, + ref: str = AKASH_DOCS_GITHUB_REF_DEFAULT, + ) -> Dict[str, Any]: + q = (query or "").strip() + if not q: + raise ValueError("query is required") + limit = max(1, min(50, int(limit))) + + routes = _discover_routes_from_docs_index() + hits: List[Dict[str, Any]] = [] + + for route in routes: + doc = self.akash_docs_fetch( + route_or_url=route, + source="github", + ref=ref, + max_chars=0, # search reads full content from content_path + refresh=refresh, + strip_frontmatter=True, + ) + data = doc.get("data") or {} + content_path = data.get("content_path") + if not content_path: + continue + try: + content = Path(str(content_path)).read_text(encoding="utf-8") + content = _strip_frontmatter(content) + except Exception: + continue + idx = content.lower().find(q.lower()) + if idx == -1: + continue + start = max(0, idx - 80) + end = min(len(content), idx + 160) + snippet = content[start:end].replace("\n", " ").strip() + hits.append( + { + "route": route, + "url": data.get("url"), + "source": data.get("source"), + "snippet": snippet, + } + ) + if len(hits) >= limit: + break + + return { + "ok": True, + "summary": f"Found {len(hits)} hit(s) across {len(routes)} route(s).", + "data": {"query": q, "hits": hits, "routes_searched": len(routes)}, + "next_steps": ["akash_docs_fetch(route_or_url=hits[0].route)"], + } + + def akash_sdl_snippet( + self, + *, + service_name: str, + container_image: str, + port: int, + cpu_units: float = 0.5, + memory_size: str = "512Mi", + storage_size: str = "512Mi", + denom: str = "uakt", + price_amount: int = 100, + ) -> Dict[str, Any]: + svc = (service_name or "").strip() + img = (container_image or "").strip() + if not svc: + raise ValueError("service_name is required") + if not img: + raise ValueError("container_image is required") + port_int = int(port) + if port_int <= 0 or port_int > 65535: + raise ValueError("port must be 1..65535") + + sdl = f"""version: \"2.0\" + +services: + {svc}: + image: {img} + expose: + - port: {port_int} + to: + - global: true + +profiles: + compute: + {svc}: + resources: + cpu: + units: {cpu_units} + memory: + size: {memory_size} + storage: + size: {storage_size} + placement: + akash: + pricing: + {svc}: + denom: {denom} + amount: {int(price_amount)} + +deployment: + {svc}: + akash: + profile: {svc} + count: 1 +""" + return { + "ok": True, + "summary": "Generated an Akash SDL template.", + "data": { + "service_name": svc, + "container_image": img, + "port": port_int, + "sdl": sdl, + }, + "next_steps": [ + "Save as deploy.yaml and deploy via Akash Console or akash CLI.", + ], + } + + +TOOLS: List[Dict[str, Any]] = [ + { + "name": "akash_docs_list_routes", + "description": "Discover common Akash docs routes by scraping https://akash.network/docs/ (SSR HTML).", + "inputSchema": {"type": "object", "properties": {}}, + }, + { + "name": "akash_docs_fetch", + "description": "Fetch an Akash docs page (prefers GitHub markdown in akash-network/website-revamp; falls back to site HTML).", + "inputSchema": { + "type": "object", + "properties": { + "route_or_url": {"type": "string"}, + "source": { + "type": "string", + "description": "auto|github|site", + "default": "auto", + }, + "ref": {"type": "string", "default": AKASH_DOCS_GITHUB_REF_DEFAULT}, + "max_chars": {"type": "integer", "default": 12000}, + "refresh": {"type": "boolean", "default": False}, + "strip_frontmatter": {"type": "boolean", "default": True}, + }, + "required": ["route_or_url"], + }, + }, + { + "name": "akash_docs_search", + "description": "Keyword search across routes discovered from /docs (fetches + caches GitHub markdown).", + "inputSchema": { + "type": "object", + "properties": { + "query": {"type": "string"}, + "limit": {"type": "integer", "default": 10}, + "refresh": {"type": "boolean", "default": False}, + "ref": {"type": "string", "default": AKASH_DOCS_GITHUB_REF_DEFAULT}, + }, + "required": ["query"], + }, + }, + { + "name": "akash_sdl_snippet", + "description": "Generate a minimal Akash SDL manifest for a single service exposing one port.", + "inputSchema": { + "type": "object", + "properties": { + "service_name": {"type": "string"}, + "container_image": {"type": "string"}, + "port": {"type": "integer"}, + "cpu_units": {"type": "number", "default": 0.5}, + "memory_size": {"type": "string", "default": "512Mi"}, + "storage_size": {"type": "string", "default": "512Mi"}, + "denom": {"type": "string", "default": "uakt"}, + "price_amount": {"type": "integer", "default": 100}, + }, + "required": ["service_name", "container_image", "port"], + }, + }, +] + + +class StdioJsonRpc: + def __init__(self) -> None: + self._in = sys.stdin.buffer + self._out = sys.stdout.buffer + self._mode: str | None = None # "headers" | "line" + + def read_message(self) -> Optional[Dict[str, Any]]: + while True: + if self._mode == "line": + line = self._in.readline() + if not line: + return None + raw = line.decode("utf-8", "replace").strip() + if not raw: + continue + try: + msg = json.loads(raw) + except Exception: + continue + if isinstance(msg, dict): + return msg + continue + + first = self._in.readline() + if not first: + return None + + if first in (b"\r\n", b"\n"): + continue + + # Auto-detect newline-delimited JSON framing. + if self._mode is None and first.lstrip().startswith(b"{"): + try: + msg = json.loads(first.decode("utf-8", "replace")) + except Exception: + msg = None + if isinstance(msg, dict): + self._mode = "line" + return msg + + headers: Dict[str, str] = {} + try: + text = first.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + while True: + line = self._in.readline() + if not line: + return None + if line in (b"\r\n", b"\n"): + break + try: + text = line.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + if "content-length" not in headers: + return None + try: + length = int(headers["content-length"]) + except ValueError: + return None + body = self._in.read(length) + if not body: + return None + self._mode = "headers" + msg = json.loads(body.decode("utf-8", "replace")) + if isinstance(msg, dict): + return msg + return None + + def write_message(self, message: Dict[str, Any]) -> None: + if self._mode == "line": + payload = json.dumps( + message, ensure_ascii=False, separators=(",", ":"), default=str + ).encode("utf-8") + self._out.write(payload + b"\n") + self._out.flush() + return + + body = json.dumps(message, ensure_ascii=False, separators=(",", ":")).encode( + "utf-8" + ) + header = f"Content-Length: {len(body)}\r\n\r\n".encode("utf-8") + self._out.write(header) + self._out.write(body) + self._out.flush() + + +def main() -> None: + tools = AkashDocsTools() + rpc = StdioJsonRpc() + + handlers: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = { + "akash_docs_list_routes": lambda a: tools.akash_docs_list_routes(), + "akash_docs_fetch": lambda a: tools.akash_docs_fetch(**a), + "akash_docs_search": lambda a: tools.akash_docs_search(**a), + "akash_sdl_snippet": lambda a: tools.akash_sdl_snippet(**a), + } + + while True: + msg = rpc.read_message() + if msg is None: + return + + method = msg.get("method") + msg_id = msg.get("id") + params = msg.get("params") or {} + + try: + if method == "initialize": + result = { + "protocolVersion": "2024-11-05", + "serverInfo": {"name": "akash_docs", "version": "0.1.0"}, + "capabilities": {"tools": {}}, + } + rpc.write_message({"jsonrpc": "2.0", "id": msg_id, "result": result}) + continue + + if method == "tools/list": + rpc.write_message( + {"jsonrpc": "2.0", "id": msg_id, "result": {"tools": TOOLS}} + ) + continue + + if method == "tools/call": + tool_name = str(params.get("name") or "") + args = params.get("arguments") or {} + if tool_name not in handlers: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + f"Unknown tool: {tool_name}\nKnown tools: {', '.join(sorted(handlers.keys()))}", + is_error=True, + ), + } + ) + continue + + try: + payload = handlers[tool_name](args) + # Split payload: meta JSON + optional raw content. + # If payload["data"]["content"] exists, emit it as a second text block for readability. + data = payload.get("data") if isinstance(payload, dict) else None + content_text = None + if isinstance(data, dict) and isinstance(data.get("content"), str): + content_text = data["content"] + data = dict(data) + data.pop("content", None) + payload = dict(payload) + payload["data"] = data + + blocks = [json.dumps(payload, ensure_ascii=False, indent=2)] + if content_text: + blocks.append(content_text) + result: Dict[str, Any] = { + "content": [{"type": "text", "text": b} for b in blocks] + } + rpc.write_message({"jsonrpc": "2.0", "id": msg_id, "result": result}) + except Exception as e: # noqa: BLE001 + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + f"Error: {e}", + is_error=True, + ), + } + ) + continue + + # Ignore notifications. + if msg_id is None: + continue + + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + f"Unsupported method: {method}", + is_error=True, + ), + } + ) + except Exception as e: # noqa: BLE001 + # Last-resort: avoid crashing the server. + if msg_id is not None: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result(f"fatal error: {e}", is_error=True), + } + ) diff --git a/mcp/cloudflare_safe/__init__.py b/mcp/cloudflare_safe/__init__.py new file mode 100644 index 0000000..b3f679e --- /dev/null +++ b/mcp/cloudflare_safe/__init__.py @@ -0,0 +1,11 @@ +""" +cloudflare_safe MCP server. + +Summary-first Cloudflare tooling with hard output caps and default redaction. +""" + +from __future__ import annotations + +__all__ = ["__version__"] + +__version__ = "0.1.0" diff --git a/mcp/cloudflare_safe/__main__.py b/mcp/cloudflare_safe/__main__.py new file mode 100644 index 0000000..d998836 --- /dev/null +++ b/mcp/cloudflare_safe/__main__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .server import main + +if __name__ == "__main__": + main() diff --git a/mcp/cloudflare_safe/cloudflare_api.py b/mcp/cloudflare_safe/cloudflare_api.py new file mode 100644 index 0000000..c6182ad --- /dev/null +++ b/mcp/cloudflare_safe/cloudflare_api.py @@ -0,0 +1,496 @@ +from __future__ import annotations + +import hashlib +import json +import os +import urllib.error +import urllib.parse +import urllib.request +from dataclasses import dataclass +from datetime import datetime, timezone +from pathlib import Path +from typing import ( + Any, + Dict, + Iterable, + List, + Mapping, + MutableMapping, + Optional, + Sequence, + Tuple, +) + +CF_API_BASE = "https://api.cloudflare.com/client/v4" + + +def utc_now_iso() -> str: + return datetime.now(timezone.utc).isoformat() + + +def stable_hash(data: Any) -> str: + blob = json.dumps( + data, sort_keys=True, separators=(",", ":"), ensure_ascii=False + ).encode("utf-8") + return hashlib.sha256(blob).hexdigest() + + +class CloudflareError(RuntimeError): + pass + + +@dataclass(frozen=True) +class CloudflareContext: + api_token: str + account_id: str + + @staticmethod + def from_env() -> "CloudflareContext": + api_token = ( + os.getenv("CLOUDFLARE_API_TOKEN") + or os.getenv("CF_API_TOKEN") + or os.getenv("CLOUDFLARE_TOKEN") + or "" + ).strip() + account_id = ( + os.getenv("CLOUDFLARE_ACCOUNT_ID") or os.getenv("CF_ACCOUNT_ID") or "" + ).strip() + + if not api_token: + raise CloudflareError( + "Missing Cloudflare API token. Set CLOUDFLARE_API_TOKEN (or CF_API_TOKEN)." + ) + if not account_id: + raise CloudflareError( + "Missing Cloudflare account id. Set CLOUDFLARE_ACCOUNT_ID (or CF_ACCOUNT_ID)." + ) + return CloudflareContext(api_token=api_token, account_id=account_id) + + +class CloudflareClient: + def __init__(self, *, api_token: str) -> None: + self.api_token = api_token + + def _request( + self, + method: str, + path: str, + *, + params: Optional[Mapping[str, str]] = None, + ) -> Dict[str, Any]: + url = f"{CF_API_BASE}{path}" + if params: + url = f"{url}?{urllib.parse.urlencode(params)}" + + req = urllib.request.Request( + url=url, + method=method, + headers={ + "Authorization": f"Bearer {self.api_token}", + "Accept": "application/json", + "Content-Type": "application/json", + }, + ) + + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read() + except urllib.error.HTTPError as e: + raw = e.read() if hasattr(e, "read") else b"" + detail = raw.decode("utf-8", "replace") + raise CloudflareError( + f"Cloudflare API HTTP {e.code} for {path}: {detail}" + ) from e + except urllib.error.URLError as e: + raise CloudflareError( + f"Cloudflare API request failed for {path}: {e}" + ) from e + + try: + data = json.loads(raw.decode("utf-8", "replace")) + except json.JSONDecodeError: + raise CloudflareError( + f"Cloudflare API returned non-JSON for {path}: {raw[:200]!r}" + ) + + if not data.get("success", True): + raise CloudflareError( + f"Cloudflare API error for {path}: {data.get('errors')}" + ) + + return data + + def paginate( + self, + path: str, + *, + params: Optional[Mapping[str, str]] = None, + per_page: int = 100, + max_pages: int = 5, + ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]: + """ + Fetch a paginated Cloudflare endpoint. + + Returns (results, result_info). + """ + results: List[Dict[str, Any]] = [] + page = 1 + last_info: Dict[str, Any] = {} + + while True: + merged_params: Dict[str, str] = { + "page": str(page), + "per_page": str(per_page), + } + if params: + merged_params.update({k: str(v) for k, v in params.items()}) + + data = self._request("GET", path, params=merged_params) + batch = data.get("result") or [] + if not isinstance(batch, list): + batch = [batch] + results.extend(batch) + last_info = data.get("result_info") or {} + + total_pages = int(last_info.get("total_pages") or 1) + if page >= total_pages or page >= max_pages: + break + page += 1 + + return results, last_info + + def list_zones(self) -> List[Dict[str, Any]]: + zones, _info = self.paginate("/zones", max_pages=2) + return zones + + def list_dns_records_summary( + self, zone_id: str, *, max_pages: int = 1 + ) -> Tuple[List[Dict[str, Any]], Dict[str, Any]]: + return self.paginate(f"/zones/{zone_id}/dns_records", max_pages=max_pages) + + def list_tunnels(self, account_id: str) -> List[Dict[str, Any]]: + tunnels, _info = self.paginate( + f"/accounts/{account_id}/cfd_tunnel", max_pages=2 + ) + return tunnels + + def list_tunnel_connections( + self, account_id: str, tunnel_id: str + ) -> List[Dict[str, Any]]: + data = self._request( + "GET", f"/accounts/{account_id}/cfd_tunnel/{tunnel_id}/connections" + ) + result = data.get("result") or [] + return result if isinstance(result, list) else [result] + + def list_access_apps(self, account_id: str) -> List[Dict[str, Any]]: + apps, _info = self.paginate(f"/accounts/{account_id}/access/apps", max_pages=3) + return apps + + def list_access_policies( + self, account_id: str, app_id: str + ) -> List[Dict[str, Any]]: + policies, _info = self.paginate( + f"/accounts/{account_id}/access/apps/{app_id}/policies", + max_pages=3, + ) + return policies + + +@dataclass(frozen=True) +class SnapshotMeta: + snapshot_id: str + created_at: str + scopes: List[str] + snapshot_path: str + + +class SnapshotStore: + def __init__(self, root_dir: Path) -> None: + self.root_dir = root_dir + self.snapshots_dir = root_dir / "snapshots" + self.diffs_dir = root_dir / "diffs" + self.snapshots_dir.mkdir(parents=True, exist_ok=True) + self.diffs_dir.mkdir(parents=True, exist_ok=True) + self._index: Dict[str, SnapshotMeta] = {} + + def get(self, snapshot_id: str) -> SnapshotMeta: + if snapshot_id not in self._index: + raise CloudflareError(f"Unknown snapshot_id: {snapshot_id}") + return self._index[snapshot_id] + + def load_snapshot(self, snapshot_id: str) -> Dict[str, Any]: + meta = self.get(snapshot_id) + return json.loads(Path(meta.snapshot_path).read_text(encoding="utf-8")) + + def create_snapshot( + self, + *, + client: CloudflareClient, + ctx: CloudflareContext, + scopes: Sequence[str], + zone_id: Optional[str] = None, + zone_name: Optional[str] = None, + dns_max_pages: int = 1, + ) -> Tuple[SnapshotMeta, Dict[str, Any]]: + scopes_norm = sorted(set(scopes)) + created_at = utc_now_iso() + + zones = client.list_zones() + zones_min = [ + { + "id": z.get("id"), + "name": z.get("name"), + "status": z.get("status"), + "paused": z.get("paused"), + } + for z in zones + ] + + selected_zone_id = zone_id + if not selected_zone_id and zone_name: + for z in zones_min: + if z.get("name") == zone_name: + selected_zone_id = str(z.get("id")) + break + + snapshot: Dict[str, Any] = { + "meta": { + "snapshot_id": "", + "created_at": created_at, + "account_id": ctx.account_id, + "scopes": scopes_norm, + }, + "zones": zones_min, + } + + if "tunnels" in scopes_norm: + tunnels = client.list_tunnels(ctx.account_id) + tunnels_min: List[Dict[str, Any]] = [] + for t in tunnels: + tid = t.get("id") + name = t.get("name") + status = t.get("status") + connector_count: Optional[int] = None + last_seen: Optional[str] = None + if tid and status != "deleted": + conns = client.list_tunnel_connections(ctx.account_id, str(tid)) + connector_count = len(conns) + # Pick the most recent 'opened_at' if present. + opened = [c.get("opened_at") for c in conns if isinstance(c, dict)] + opened = [o for o in opened if isinstance(o, str)] + last_seen = max(opened) if opened else None + + tunnels_min.append( + { + "id": tid, + "name": name, + "status": status, + "created_at": t.get("created_at"), + "deleted_at": t.get("deleted_at"), + "connector_count": connector_count, + "last_seen": last_seen, + } + ) + snapshot["tunnels"] = tunnels_min + + if "access_apps" in scopes_norm: + apps = client.list_access_apps(ctx.account_id) + apps_min = [ + { + "id": a.get("id"), + "name": a.get("name"), + "domain": a.get("domain"), + "type": a.get("type"), + "created_at": a.get("created_at"), + "updated_at": a.get("updated_at"), + } + for a in apps + ] + snapshot["access_apps"] = apps_min + + if "dns" in scopes_norm: + if selected_zone_id: + records, info = client.list_dns_records_summary( + selected_zone_id, max_pages=dns_max_pages + ) + records_min = [ + { + "id": r.get("id"), + "type": r.get("type"), + "name": r.get("name"), + "content": r.get("content"), + "proxied": r.get("proxied"), + "ttl": r.get("ttl"), + } + for r in records + ] + snapshot["dns"] = { + "zone_id": selected_zone_id, + "zone_name": zone_name, + "result_info": info, + "records_sample": records_min, + } + else: + snapshot["dns"] = { + "note": "dns scope requested but no zone_id/zone_name provided; only zones list included", + } + + snapshot_id = f"cf_{created_at.replace(':', '').replace('-', '').replace('.', '')}_{stable_hash(snapshot)[:10]}" + snapshot["meta"]["snapshot_id"] = snapshot_id + + path = self.snapshots_dir / f"{snapshot_id}.json" + path.write_text( + json.dumps(snapshot, indent=2, ensure_ascii=False), encoding="utf-8" + ) + + meta = SnapshotMeta( + snapshot_id=snapshot_id, + created_at=created_at, + scopes=scopes_norm, + snapshot_path=str(path), + ) + self._index[snapshot_id] = meta + return meta, snapshot + + def diff( + self, + *, + from_snapshot_id: str, + to_snapshot_id: str, + scopes: Optional[Sequence[str]] = None, + ) -> Dict[str, Any]: + before = self.load_snapshot(from_snapshot_id) + after = self.load_snapshot(to_snapshot_id) + + scopes_before = set(before.get("meta", {}).get("scopes") or []) + scopes_after = set(after.get("meta", {}).get("scopes") or []) + scopes_all = sorted(scopes_before | scopes_after) + scopes_use = sorted(set(scopes or scopes_all)) + + def index_by_id( + items: Iterable[Mapping[str, Any]], + ) -> Dict[str, Dict[str, Any]]: + out: Dict[str, Dict[str, Any]] = {} + for it in items: + _id = it.get("id") + if _id is None: + continue + out[str(_id)] = dict(it) + return out + + diff_out: Dict[str, Any] = { + "from": from_snapshot_id, + "to": to_snapshot_id, + "scopes": scopes_use, + "changes": {}, + } + + for scope in scopes_use: + if scope not in {"tunnels", "access_apps", "zones"}: + continue + b_items = before.get(scope) or [] + a_items = after.get(scope) or [] + if not isinstance(b_items, list) or not isinstance(a_items, list): + continue + b_map = index_by_id(b_items) + a_map = index_by_id(a_items) + added = [a_map[k] for k in sorted(set(a_map) - set(b_map))] + removed = [b_map[k] for k in sorted(set(b_map) - set(a_map))] + + changed: List[Dict[str, Any]] = [] + for k in sorted(set(a_map) & set(b_map)): + if stable_hash(a_map[k]) != stable_hash(b_map[k]): + changed.append({"id": k, "before": b_map[k], "after": a_map[k]}) + + diff_out["changes"][scope] = { + "added": [{"id": x.get("id"), "name": x.get("name")} for x in added], + "removed": [ + {"id": x.get("id"), "name": x.get("name")} for x in removed + ], + "changed": [ + {"id": x.get("id"), "name": x.get("after", {}).get("name")} + for x in changed + ], + "counts": { + "added": len(added), + "removed": len(removed), + "changed": len(changed), + }, + } + + diff_path = self.diffs_dir / f"{from_snapshot_id}_to_{to_snapshot_id}.json" + diff_path.write_text( + json.dumps(diff_out, indent=2, ensure_ascii=False), + encoding="utf-8", + ) + diff_out["diff_path"] = str(diff_path) + return diff_out + + +def parse_cloudflared_config_ingress(config_text: str) -> List[Dict[str, str]]: + """ + Best-effort parser for cloudflared YAML config ingress rules. + + We intentionally avoid a YAML dependency; this extracts common patterns: + - hostname: example.com + service: http://127.0.0.1:8080 + """ + rules: List[Dict[str, str]] = [] + lines = config_text.splitlines() + i = 0 + while i < len(lines): + line = lines[i] + stripped = line.lstrip() + if not stripped.startswith("-"): + i += 1 + continue + after_dash = stripped[1:].lstrip() + if not after_dash.startswith("hostname:"): + i += 1 + continue + + hostname = after_dash[len("hostname:") :].strip().strip('"').strip("'") + base_indent = len(line) - len(line.lstrip()) + + service = "" + j = i + 1 + while j < len(lines): + next_line = lines[j] + if next_line.strip() == "": + j += 1 + continue + + next_indent = len(next_line) - len(next_line.lstrip()) + if next_indent <= base_indent: + break + + next_stripped = next_line.lstrip() + if next_stripped.startswith("service:"): + service = next_stripped[len("service:") :].strip().strip('"').strip("'") + break + j += 1 + + rules.append({"hostname": hostname, "service": service}) + i = j + return rules + + +def ingress_summary_from_file( + *, + config_path: str, + max_rules: int = 50, +) -> Dict[str, Any]: + path = Path(config_path) + if not path.exists(): + raise CloudflareError(f"cloudflared config not found: {config_path}") + text = path.read_text(encoding="utf-8", errors="replace") + rules = parse_cloudflared_config_ingress(text) + hostnames = sorted({r["hostname"] for r in rules if r.get("hostname")}) + return { + "config_path": config_path, + "ingress_rule_count": len(rules), + "hostnames": hostnames[:max_rules], + "rules_sample": rules[:max_rules], + "truncated": len(rules) > max_rules, + } diff --git a/mcp/cloudflare_safe/server.py b/mcp/cloudflare_safe/server.py new file mode 100644 index 0000000..5a23f1d --- /dev/null +++ b/mcp/cloudflare_safe/server.py @@ -0,0 +1,725 @@ +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple + +from .cloudflare_api import ( + CloudflareClient, + CloudflareContext, + CloudflareError, + SnapshotStore, + ingress_summary_from_file, +) + +MAX_BYTES_DEFAULT = 32_000 + + +def _repo_root() -> Path: + # server.py -> cloudflare_safe -> mcp -> <repo root> + return Path(__file__).resolve().parents[3] + + +def _max_bytes() -> int: + raw = (os.getenv("VM_MCP_MAX_BYTES") or "").strip() + if not raw: + return MAX_BYTES_DEFAULT + try: + return max(4_096, int(raw)) + except ValueError: + return MAX_BYTES_DEFAULT + + +def _redact(obj: Any) -> Any: + sensitive_keys = ("token", "secret", "password", "private", "key", "certificate") + + if isinstance(obj, dict): + out: Dict[str, Any] = {} + for k, v in obj.items(): + if any(s in str(k).lower() for s in sensitive_keys): + out[k] = "<REDACTED>" + else: + out[k] = _redact(v) + return out + if isinstance(obj, list): + return [_redact(v) for v in obj] + if isinstance(obj, str): + if obj.startswith("ghp_") or obj.startswith("github_pat_"): + return "<REDACTED>" + return obj + return obj + + +def _safe_json(payload: Dict[str, Any]) -> str: + payload = _redact(payload) + raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":")) + if len(raw.encode("utf-8")) <= _max_bytes(): + return json.dumps(payload, ensure_ascii=False, indent=2) + + # Truncate: keep only summary + next_steps. + truncated = { + "ok": payload.get("ok", True), + "truncated": True, + "summary": payload.get("summary", "Response exceeded max size; truncated."), + "next_steps": payload.get( + "next_steps", + [ + "request a narrower scope (e.g., scopes=['tunnels'])", + "request an export path instead of inline content", + ], + ), + } + return json.dumps(truncated, ensure_ascii=False, indent=2) + + +def _mcp_text_result( + payload: Dict[str, Any], *, is_error: bool = False +) -> Dict[str, Any]: + result: Dict[str, Any] = { + "content": [{"type": "text", "text": _safe_json(payload)}] + } + if is_error: + result["isError"] = True + return result + + +def _default_state_dir() -> Path: + return _repo_root() / "archive_runtime" / "cloudflare_mcp" + + +class CloudflareSafeTools: + def __init__(self) -> None: + self.store = SnapshotStore( + Path(os.getenv("VM_CF_MCP_STATE_DIR") or _default_state_dir()) + ) + + def cf_snapshot( + self, + *, + scopes: Optional[Sequence[str]] = None, + zone_id: Optional[str] = None, + zone_name: Optional[str] = None, + dns_max_pages: int = 1, + ) -> Dict[str, Any]: + scopes_use = list(scopes or ["tunnels", "access_apps"]) + ctx = CloudflareContext.from_env() + client = CloudflareClient(api_token=ctx.api_token) + meta, snapshot = self.store.create_snapshot( + client=client, + ctx=ctx, + scopes=scopes_use, + zone_id=zone_id, + zone_name=zone_name, + dns_max_pages=dns_max_pages, + ) + + summary = ( + f"Snapshot {meta.snapshot_id} captured " + f"(scopes={','.join(meta.scopes)}) and written to {meta.snapshot_path}." + ) + return { + "ok": True, + "summary": summary, + "data": { + "snapshot_id": meta.snapshot_id, + "created_at": meta.created_at, + "scopes": meta.scopes, + "snapshot_path": meta.snapshot_path, + "counts": { + "zones": len(snapshot.get("zones") or []), + "tunnels": len(snapshot.get("tunnels") or []), + "access_apps": len(snapshot.get("access_apps") or []), + }, + }, + "truncated": False, + "next_steps": [ + "cf_config_diff(from_snapshot_id=..., to_snapshot_id=...)", + "cf_export_config(full=false, snapshot_id=...)", + ], + } + + def cf_refresh( + self, + *, + snapshot_id: str, + scopes: Optional[Sequence[str]] = None, + dns_max_pages: int = 1, + ) -> Dict[str, Any]: + before_meta = self.store.get(snapshot_id) + before = self.store.load_snapshot(snapshot_id) + scopes_use = list(scopes or (before.get("meta", {}).get("scopes") or [])) + + ctx = CloudflareContext.from_env() + client = CloudflareClient(api_token=ctx.api_token) + + meta, _snapshot = self.store.create_snapshot( + client=client, + ctx=ctx, + scopes=scopes_use, + zone_id=(before.get("dns") or {}).get("zone_id"), + zone_name=(before.get("dns") or {}).get("zone_name"), + dns_max_pages=dns_max_pages, + ) + + return { + "ok": True, + "summary": f"Refreshed {before_meta.snapshot_id} -> {meta.snapshot_id} (scopes={','.join(meta.scopes)}).", + "data": { + "from_snapshot_id": before_meta.snapshot_id, + "to_snapshot_id": meta.snapshot_id, + "snapshot_path": meta.snapshot_path, + }, + "truncated": False, + "next_steps": [ + "cf_config_diff(from_snapshot_id=..., to_snapshot_id=...)", + ], + } + + def cf_config_diff( + self, + *, + from_snapshot_id: str, + to_snapshot_id: str, + scopes: Optional[Sequence[str]] = None, + ) -> Dict[str, Any]: + diff = self.store.diff( + from_snapshot_id=from_snapshot_id, + to_snapshot_id=to_snapshot_id, + scopes=scopes, + ) + + # Keep the response small; point to diff_path for full detail. + changes = diff.get("changes") or {} + counts = { + scope: (changes.get(scope) or {}).get("counts") + for scope in sorted(changes.keys()) + } + return { + "ok": True, + "summary": f"Diff computed and written to {diff.get('diff_path')}.", + "data": { + "from_snapshot_id": from_snapshot_id, + "to_snapshot_id": to_snapshot_id, + "scopes": diff.get("scopes"), + "counts": counts, + "diff_path": diff.get("diff_path"), + }, + "truncated": False, + "next_steps": [ + "Use filesystem MCP to open diff_path for full details", + "Run cf_export_config(full=false, snapshot_id=...) for a safe export path", + ], + } + + def cf_export_config( + self, + *, + snapshot_id: Optional[str] = None, + full: bool = False, + scopes: Optional[Sequence[str]] = None, + ) -> Dict[str, Any]: + if snapshot_id is None: + snap = self.cf_snapshot(scopes=scopes) + snapshot_id = str((snap.get("data") or {}).get("snapshot_id")) + + meta = self.store.get(snapshot_id) + if not full: + return { + "ok": True, + "summary": "Export is summary-first; full config requires full=true.", + "data": { + "snapshot_id": meta.snapshot_id, + "snapshot_path": meta.snapshot_path, + }, + "truncated": False, + "next_steps": [ + "Use filesystem MCP to open snapshot_path", + "If you truly need inline data, call cf_export_config(full=true, snapshot_id=...)", + ], + } + + snapshot = self.store.load_snapshot(snapshot_id) + return { + "ok": True, + "summary": "Full snapshot export (redacted + size-capped). Prefer snapshot_path for large data.", + "data": snapshot, + "truncated": False, + "next_steps": [ + f"Snapshot file: {meta.snapshot_path}", + ], + } + + def cf_tunnel_status( + self, + *, + snapshot_id: Optional[str] = None, + tunnel_name: Optional[str] = None, + tunnel_id: Optional[str] = None, + ) -> Dict[str, Any]: + if snapshot_id: + snap = self.store.load_snapshot(snapshot_id) + tunnels = snap.get("tunnels") or [] + else: + snap = self.cf_snapshot(scopes=["tunnels"]) + sid = str((snap.get("data") or {}).get("snapshot_id")) + tunnels = self.store.load_snapshot(sid).get("tunnels") or [] + + def matches(t: Dict[str, Any]) -> bool: + if tunnel_id and str(t.get("id")) != str(tunnel_id): + return False + if tunnel_name and str(t.get("name")) != str(tunnel_name): + return False + return True + + filtered = [t for t in tunnels if isinstance(t, dict) and matches(t)] + if not filtered and (tunnel_id or tunnel_name): + return { + "ok": False, + "summary": "Tunnel not found in snapshot.", + "data": {"tunnel_id": tunnel_id, "tunnel_name": tunnel_name}, + "truncated": False, + "next_steps": ["Call cf_snapshot(scopes=['tunnels']) and retry."], + } + + connectors = [t.get("connector_count") for t in filtered if isinstance(t, dict)] + connectors = [c for c in connectors if isinstance(c, int)] + return { + "ok": True, + "summary": f"Returned {len(filtered)} tunnel(s).", + "data": { + "tunnels": [ + { + "id": t.get("id"), + "name": t.get("name"), + "status": t.get("status"), + "connector_count": t.get("connector_count"), + "last_seen": t.get("last_seen"), + } + for t in filtered + ], + "connectors_total": sum(connectors) if connectors else 0, + }, + "truncated": False, + "next_steps": [ + "For local ingress hostnames, use cf_tunnel_ingress_summary(config_path='/etc/cloudflared/config.yml')", + ], + } + + def cf_tunnel_ingress_summary( + self, + *, + config_path: str = "/etc/cloudflared/config.yml", + full: bool = False, + max_rules: int = 50, + ) -> Dict[str, Any]: + summary = ingress_summary_from_file( + config_path=config_path, max_rules=max_rules + ) + if not full: + return { + "ok": True, + "summary": f"Parsed ingress hostnames from {config_path}.", + "data": { + "config_path": summary["config_path"], + "ingress_rule_count": summary["ingress_rule_count"], + "hostnames": summary["hostnames"], + "truncated": summary["truncated"], + }, + "truncated": False, + "next_steps": [ + "Call cf_tunnel_ingress_summary(full=true, ...) to include service mappings (still capped).", + ], + } + return { + "ok": True, + "summary": f"Ingress summary (full=true) for {config_path}.", + "data": summary, + "truncated": False, + "next_steps": [], + } + + def cf_access_policy_list( + self, + *, + app_id: Optional[str] = None, + ) -> Dict[str, Any]: + ctx = CloudflareContext.from_env() + client = CloudflareClient(api_token=ctx.api_token) + + if not app_id: + apps = client.list_access_apps(ctx.account_id) + apps_min = [ + { + "id": a.get("id"), + "name": a.get("name"), + "domain": a.get("domain"), + "type": a.get("type"), + } + for a in apps + ] + return { + "ok": True, + "summary": f"Returned {len(apps_min)} Access app(s). Provide app_id to list policies.", + "data": {"apps": apps_min}, + "truncated": False, + "next_steps": [ + "Call cf_access_policy_list(app_id=...)", + ], + } + + policies = client.list_access_policies(ctx.account_id, app_id) + policies_min = [ + { + "id": p.get("id"), + "name": p.get("name"), + "decision": p.get("decision"), + "precedence": p.get("precedence"), + } + for p in policies + ] + return { + "ok": True, + "summary": f"Returned {len(policies_min)} policy/policies for app_id={app_id}.", + "data": {"app_id": app_id, "policies": policies_min}, + "truncated": False, + "next_steps": [], + } + + +TOOLS: List[Dict[str, Any]] = [ + { + "name": "cf_snapshot", + "description": "Create a summary-first Cloudflare state snapshot (writes JSON to disk; returns snapshot_id + paths).", + "inputSchema": { + "type": "object", + "properties": { + "scopes": { + "type": "array", + "items": {"type": "string"}, + "description": "Scopes to fetch (default: ['tunnels','access_apps']). Supported: zones,tunnels,access_apps,dns", + }, + "zone_id": {"type": "string"}, + "zone_name": {"type": "string"}, + "dns_max_pages": {"type": "integer", "default": 1}, + }, + }, + }, + { + "name": "cf_refresh", + "description": "Refresh a prior snapshot (creates a new snapshot_id).", + "inputSchema": { + "type": "object", + "properties": { + "snapshot_id": {"type": "string"}, + "scopes": {"type": "array", "items": {"type": "string"}}, + "dns_max_pages": {"type": "integer", "default": 1}, + }, + "required": ["snapshot_id"], + }, + }, + { + "name": "cf_config_diff", + "description": "Diff two snapshots (summary counts inline; full diff written to disk).", + "inputSchema": { + "type": "object", + "properties": { + "from_snapshot_id": {"type": "string"}, + "to_snapshot_id": {"type": "string"}, + "scopes": {"type": "array", "items": {"type": "string"}}, + }, + "required": ["from_snapshot_id", "to_snapshot_id"], + }, + }, + { + "name": "cf_export_config", + "description": "Export snapshot config. Defaults to summary-only; full=true returns redacted + size-capped data.", + "inputSchema": { + "type": "object", + "properties": { + "snapshot_id": {"type": "string"}, + "full": {"type": "boolean", "default": False}, + "scopes": {"type": "array", "items": {"type": "string"}}, + }, + }, + }, + { + "name": "cf_tunnel_status", + "description": "Return tunnel status summary (connector count, last seen).", + "inputSchema": { + "type": "object", + "properties": { + "snapshot_id": {"type": "string"}, + "tunnel_name": {"type": "string"}, + "tunnel_id": {"type": "string"}, + }, + }, + }, + { + "name": "cf_tunnel_ingress_summary", + "description": "Parse cloudflared ingress hostnames from a local config file (never dumps full YAML unless full=true, still capped).", + "inputSchema": { + "type": "object", + "properties": { + "config_path": { + "type": "string", + "default": "/etc/cloudflared/config.yml", + }, + "full": {"type": "boolean", "default": False}, + "max_rules": {"type": "integer", "default": 50}, + }, + }, + }, + { + "name": "cf_access_policy_list", + "description": "List Access apps, or policies for a specific app_id (summary-only).", + "inputSchema": { + "type": "object", + "properties": { + "app_id": {"type": "string"}, + }, + }, + }, +] + + +class StdioJsonRpc: + def __init__(self) -> None: + self._in = sys.stdin.buffer + self._out = sys.stdout.buffer + self._mode: str | None = None # "headers" | "line" + + def read_message(self) -> Optional[Dict[str, Any]]: + while True: + if self._mode == "line": + line = self._in.readline() + if not line: + return None + raw = line.decode("utf-8", "replace").strip() + if not raw: + continue + try: + msg = json.loads(raw) + except Exception: + continue + if isinstance(msg, dict): + return msg + continue + + first = self._in.readline() + if not first: + return None + + if first in (b"\r\n", b"\n"): + continue + + # Auto-detect newline-delimited JSON framing. + if self._mode is None and first.lstrip().startswith(b"{"): + try: + msg = json.loads(first.decode("utf-8", "replace")) + except Exception: + msg = None + if isinstance(msg, dict): + self._mode = "line" + return msg + + headers: Dict[str, str] = {} + try: + text = first.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + while True: + line = self._in.readline() + if not line: + return None + if line in (b"\r\n", b"\n"): + break + try: + text = line.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + if "content-length" not in headers: + return None + try: + length = int(headers["content-length"]) + except ValueError: + return None + body = self._in.read(length) + if not body: + return None + self._mode = "headers" + msg = json.loads(body.decode("utf-8", "replace")) + if isinstance(msg, dict): + return msg + return None + + def write_message(self, message: Dict[str, Any]) -> None: + if self._mode == "line": + payload = json.dumps( + message, ensure_ascii=False, separators=(",", ":"), default=str + ).encode("utf-8") + self._out.write(payload + b"\n") + self._out.flush() + return + + body = json.dumps(message, ensure_ascii=False, separators=(",", ":")).encode( + "utf-8" + ) + header = f"Content-Length: {len(body)}\r\n\r\n".encode("utf-8") + self._out.write(header) + self._out.write(body) + self._out.flush() + + +def main() -> None: + tools = CloudflareSafeTools() + rpc = StdioJsonRpc() + + handlers: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = { + "cf_snapshot": lambda a: tools.cf_snapshot(**a), + "cf_refresh": lambda a: tools.cf_refresh(**a), + "cf_config_diff": lambda a: tools.cf_config_diff(**a), + "cf_export_config": lambda a: tools.cf_export_config(**a), + "cf_tunnel_status": lambda a: tools.cf_tunnel_status(**a), + "cf_tunnel_ingress_summary": lambda a: tools.cf_tunnel_ingress_summary(**a), + "cf_access_policy_list": lambda a: tools.cf_access_policy_list(**a), + } + + while True: + msg = rpc.read_message() + if msg is None: + return + + method = msg.get("method") + msg_id = msg.get("id") + params = msg.get("params") or {} + + try: + if method == "initialize": + result = { + "protocolVersion": "2024-11-05", + "serverInfo": {"name": "cloudflare_safe", "version": "0.1.0"}, + "capabilities": {"tools": {}}, + } + rpc.write_message({"jsonrpc": "2.0", "id": msg_id, "result": result}) + continue + + if method == "tools/list": + rpc.write_message( + {"jsonrpc": "2.0", "id": msg_id, "result": {"tools": TOOLS}} + ) + continue + + if method == "tools/call": + tool_name = str(params.get("name") or "") + args = params.get("arguments") or {} + if tool_name not in handlers: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": f"Unknown tool: {tool_name}", + "data": {"known_tools": sorted(handlers.keys())}, + "truncated": False, + "next_steps": ["Call tools/list"], + }, + is_error=True, + ), + } + ) + continue + + try: + payload = handlers[tool_name](args) + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result(payload), + } + ) + except CloudflareError as e: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": str(e), + "truncated": False, + "next_steps": [ + "Verify CLOUDFLARE_API_TOKEN and CLOUDFLARE_ACCOUNT_ID are set", + "Retry with a narrower scope", + ], + }, + is_error=True, + ), + } + ) + except Exception as e: # noqa: BLE001 + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": f"Unhandled error: {e}", + "truncated": False, + "next_steps": ["Retry with a narrower scope"], + }, + is_error=True, + ), + } + ) + continue + + # Ignore notifications. + if msg_id is None: + continue + + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": f"Unsupported method: {method}", + "truncated": False, + }, + is_error=True, + ), + } + ) + except Exception as e: # noqa: BLE001 + # Last-resort: avoid crashing the server. + if msg_id is not None: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": f"fatal error: {e}", + "truncated": False, + }, + ), + } + ) diff --git a/mcp/oracle_answer/__main__.py b/mcp/oracle_answer/__main__.py new file mode 100644 index 0000000..d998836 --- /dev/null +++ b/mcp/oracle_answer/__main__.py @@ -0,0 +1,6 @@ +from __future__ import annotations + +from .server import main + +if __name__ == "__main__": + main() diff --git a/mcp/oracle_answer/server.py b/mcp/oracle_answer/server.py new file mode 100644 index 0000000..5a3aff6 --- /dev/null +++ b/mcp/oracle_answer/server.py @@ -0,0 +1,386 @@ +from __future__ import annotations + +import asyncio +import json +import os +import sys +from typing import Any, Callable, Dict, List, Optional + +from layer0 import layer0_entry +from layer0.shadow_classifier import ShadowEvalResult + +from .tool import OracleAnswerTool + +MAX_BYTES_DEFAULT = 32_000 + + +def _max_bytes() -> int: + raw = (os.getenv("VM_MCP_MAX_BYTES") or "").strip() + if not raw: + return MAX_BYTES_DEFAULT + try: + return max(4_096, int(raw)) + except ValueError: + return MAX_BYTES_DEFAULT + + +def _redact(obj: Any) -> Any: + sensitive_keys = ("token", "secret", "password", "private", "key", "certificate") + + if isinstance(obj, dict): + out: Dict[str, Any] = {} + for k, v in obj.items(): + if any(s in str(k).lower() for s in sensitive_keys): + out[k] = "<REDACTED>" + else: + out[k] = _redact(v) + return out + if isinstance(obj, list): + return [_redact(v) for v in obj] + if isinstance(obj, str): + if obj.startswith("ghp_") or obj.startswith("github_pat_"): + return "<REDACTED>" + return obj + return obj + + +def _safe_json(payload: Dict[str, Any]) -> str: + payload = _redact(payload) + raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":"), default=str) + if len(raw.encode("utf-8")) <= _max_bytes(): + return json.dumps(payload, ensure_ascii=False, indent=2, default=str) + + truncated = { + "ok": payload.get("ok", True), + "truncated": True, + "summary": payload.get("summary", "Response exceeded max size; truncated."), + "next_steps": payload.get( + "next_steps", + ["request narrower outputs (e.g., fewer frameworks or shorter question)"], + ), + } + return json.dumps(truncated, ensure_ascii=False, indent=2, default=str) + + +def _mcp_text_result( + payload: Dict[str, Any], *, is_error: bool = False +) -> Dict[str, Any]: + result: Dict[str, Any] = { + "content": [{"type": "text", "text": _safe_json(payload)}] + } + if is_error: + result["isError"] = True + return result + + +TOOLS: List[Dict[str, Any]] = [ + { + "name": "oracle_answer", + "description": "Answer a compliance/security question (optionally via NVIDIA LLM) and map to frameworks.", + "inputSchema": { + "type": "object", + "properties": { + "question": { + "type": "string", + "description": "The question to answer.", + }, + "frameworks": { + "type": "array", + "items": {"type": "string"}, + "description": "Frameworks to reference (e.g., ['NIST-CSF','ISO-27001','GDPR']).", + }, + "mode": { + "type": "string", + "enum": ["strict", "advisory"], + "default": "strict", + "description": "strict=conservative, advisory=exploratory.", + }, + "local_only": { + "type": "boolean", + "description": "If true, skip NVIDIA API calls (uses local-only mode). Defaults to true when NVIDIA_API_KEY is missing.", + }, + }, + "required": ["question"], + }, + } +] + + +class OracleAnswerTools: + async def oracle_answer( + self, + *, + question: str, + frameworks: Optional[List[str]] = None, + mode: str = "strict", + local_only: Optional[bool] = None, + ) -> Dict[str, Any]: + routing_action, shadow = layer0_entry(question) + if routing_action != "HANDOFF_TO_LAYER1": + return _layer0_payload(routing_action, shadow) + + local_only_use = ( + bool(local_only) + if local_only is not None + else not bool((os.getenv("NVIDIA_API_KEY") or "").strip()) + ) + + try: + tool = OracleAnswerTool( + default_frameworks=frameworks, + use_local_only=local_only_use, + ) + except Exception as e: # noqa: BLE001 + return { + "ok": False, + "summary": str(e), + "data": { + "local_only": local_only_use, + "has_nvidia_api_key": bool( + (os.getenv("NVIDIA_API_KEY") or "").strip() + ), + }, + "truncated": False, + "next_steps": [ + "Set NVIDIA_API_KEY to enable live answers", + "Or call oracle_answer(local_only=true, ...)", + ], + } + + resp = await tool.answer(question=question, frameworks=frameworks, mode=mode) + return { + "ok": True, + "summary": "Oracle answer generated.", + "data": { + "question": question, + "mode": mode, + "frameworks": frameworks or tool.default_frameworks, + "local_only": local_only_use, + "model": resp.model, + "answer": resp.answer, + "framework_hits": resp.framework_hits, + "reasoning": resp.reasoning, + }, + "truncated": False, + "next_steps": [ + "If the answer is incomplete, add more specifics to the question or include more frameworks.", + ], + } + + +class StdioJsonRpc: + def __init__(self) -> None: + self._in = sys.stdin.buffer + self._out = sys.stdout.buffer + self._mode: str | None = None # "headers" | "line" + + def read_message(self) -> Optional[Dict[str, Any]]: + while True: + if self._mode == "line": + line = self._in.readline() + if not line: + return None + raw = line.decode("utf-8", "replace").strip() + if not raw: + continue + try: + msg = json.loads(raw) + except Exception: + continue + if isinstance(msg, dict): + return msg + continue + + first = self._in.readline() + if not first: + return None + + if first in (b"\r\n", b"\n"): + continue + + # Auto-detect newline-delimited JSON framing. + if self._mode is None and first.lstrip().startswith(b"{"): + try: + msg = json.loads(first.decode("utf-8", "replace")) + except Exception: + msg = None + if isinstance(msg, dict): + self._mode = "line" + return msg + + headers: Dict[str, str] = {} + try: + text = first.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + while True: + line = self._in.readline() + if not line: + return None + if line in (b"\r\n", b"\n"): + break + try: + text = line.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + if "content-length" not in headers: + return None + try: + length = int(headers["content-length"]) + except ValueError: + return None + body = self._in.read(length) + if not body: + return None + self._mode = "headers" + msg = json.loads(body.decode("utf-8", "replace")) + if isinstance(msg, dict): + return msg + return None + + def write_message(self, message: Dict[str, Any]) -> None: + if self._mode == "line": + payload = json.dumps( + message, ensure_ascii=False, separators=(",", ":"), default=str + ).encode("utf-8") + self._out.write(payload + b"\n") + self._out.flush() + return + + body = json.dumps( + message, ensure_ascii=False, separators=(",", ":"), default=str + ).encode("utf-8") + header = f"Content-Length: {len(body)}\r\n\r\n".encode("utf-8") + self._out.write(header) + self._out.write(body) + self._out.flush() + + +def main() -> None: + tools = OracleAnswerTools() + rpc = StdioJsonRpc() + + handlers: Dict[str, Callable[[Dict[str, Any]], Any]] = { + "oracle_answer": lambda a: tools.oracle_answer(**a), + } + + while True: + msg = rpc.read_message() + if msg is None: + return + + method = msg.get("method") + msg_id = msg.get("id") + params = msg.get("params") or {} + + try: + if method == "initialize": + result = { + "protocolVersion": "2024-11-05", + "serverInfo": {"name": "oracle_answer", "version": "0.1.0"}, + "capabilities": {"tools": {}}, + } + rpc.write_message({"jsonrpc": "2.0", "id": msg_id, "result": result}) + continue + + if method == "tools/list": + rpc.write_message( + {"jsonrpc": "2.0", "id": msg_id, "result": {"tools": TOOLS}} + ) + continue + + if method == "tools/call": + tool_name = str(params.get("name") or "") + args = params.get("arguments") or {} + handler = handlers.get(tool_name) + if not handler: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": f"Unknown tool: {tool_name}", + "data": {"known_tools": sorted(handlers.keys())}, + "truncated": False, + "next_steps": ["Call tools/list"], + }, + is_error=True, + ), + } + ) + continue + + payload = asyncio.run(handler(args)) # type: ignore[arg-type] + is_error = ( + not bool(payload.get("ok", True)) + if isinstance(payload, dict) + else False + ) + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result(payload, is_error=is_error), + } + ) + continue + + # Ignore notifications. + if msg_id is None: + continue + + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + {"ok": False, "summary": f"Unsupported method: {method}"}, + is_error=True, + ), + } + ) + except Exception as e: # noqa: BLE001 + if msg_id is not None: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + {"ok": False, "summary": f"fatal error: {e}"}, + is_error=True, + ), + } + ) + + +def _layer0_payload(routing_action: str, shadow: ShadowEvalResult) -> Dict[str, Any]: + if routing_action == "FAIL_CLOSED": + return {"ok": False, "summary": "Layer 0: cannot comply with this request."} + if routing_action == "HANDOFF_TO_GUARDRAILS": + reason = shadow.reason or "governance_violation" + return { + "ok": False, + "summary": f"Layer 0: governance violation detected ({reason}).", + } + if routing_action == "PROMPT_FOR_CLARIFICATION": + return { + "ok": False, + "summary": "Layer 0: request is ambiguous. Please clarify and retry.", + } + return {"ok": False, "summary": "Layer 0: unrecognized routing action; refusing."} + + +if __name__ == "__main__": + main() diff --git a/mcp/oracle_answer/tool.py b/mcp/oracle_answer/tool.py index ffdb930..bfc4423 100644 --- a/mcp/oracle_answer/tool.py +++ b/mcp/oracle_answer/tool.py @@ -9,7 +9,11 @@ Separate from CLI/API wrapper for clean testability. from __future__ import annotations +import asyncio +import json import os +import urllib.error +import urllib.request from dataclasses import dataclass from typing import Any, Dict, List, Optional @@ -92,12 +96,10 @@ class OracleAnswerTool: if self.use_local_only: return "Local-only mode: skipping NVIDIA API call" - if not httpx: - raise ImportError("httpx not installed. Install with: pip install httpx") - headers = { "Authorization": f"Bearer {self.api_key}", "Accept": "application/json", + "Content-Type": "application/json", } payload = { @@ -108,18 +110,45 @@ class OracleAnswerTool: "max_tokens": 1024, } - try: - async with httpx.AsyncClient() as client: - response = await client.post( - f"{self.NVIDIA_API_BASE}/chat/completions", - json=payload, - headers=headers, - timeout=30.0, - ) - response.raise_for_status() - data = response.json() + # Prefer httpx when available; otherwise fall back to stdlib urllib to avoid extra deps. + if httpx: + try: + async with httpx.AsyncClient() as client: + response = await client.post( + f"{self.NVIDIA_API_BASE}/chat/completions", + json=payload, + headers=headers, + timeout=30.0, + ) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] + except Exception as e: # noqa: BLE001 + return f"(API Error: {str(e)}) Falling back to local analysis..." + + def _urllib_post() -> str: + req = urllib.request.Request( + url=f"{self.NVIDIA_API_BASE}/chat/completions", + method="POST", + headers=headers, + data=json.dumps(payload, ensure_ascii=False).encode("utf-8"), + ) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read().decode("utf-8", "replace") + data = json.loads(raw) return data["choices"][0]["message"]["content"] - except Exception as e: + except urllib.error.HTTPError as e: + detail = "" + try: + detail = e.read().decode("utf-8", "replace") + except Exception: + detail = str(e) + raise RuntimeError(f"HTTP {e.code}: {detail}") from e + + try: + return await asyncio.to_thread(_urllib_post) + except Exception as e: # noqa: BLE001 return f"(API Error: {str(e)}) Falling back to local analysis..." async def answer( diff --git a/mcp/waf_intelligence/__init__.py b/mcp/waf_intelligence/__init__.py index 9ae990c..c9d5649 100644 --- a/mcp/waf_intelligence/__init__.py +++ b/mcp/waf_intelligence/__init__.py @@ -10,22 +10,24 @@ This module provides tools to: Export primary classes and functions: """ -from mcp.waf_intelligence.analyzer import ( - WAFRuleAnalyzer, - RuleViolation, +__version__ = "0.3.0" + +from .analyzer import ( AnalysisResult, + RuleViolation, + WAFRuleAnalyzer, ) -from mcp.waf_intelligence.generator import ( - WAFRuleGenerator, - GeneratedRule, -) -from mcp.waf_intelligence.compliance import ( +from .compliance import ( ComplianceMapper, FrameworkMapping, ) -from mcp.waf_intelligence.orchestrator import ( - WAFIntelligence, +from .generator import ( + GeneratedRule, + WAFRuleGenerator, +) +from .orchestrator import ( WAFInsight, + WAFIntelligence, ) __all__ = [ diff --git a/mcp/waf_intelligence/__main__.py b/mcp/waf_intelligence/__main__.py index 4dd4e06..9b07a23 100644 --- a/mcp/waf_intelligence/__main__.py +++ b/mcp/waf_intelligence/__main__.py @@ -10,6 +10,7 @@ from typing import Any, Dict, List from layer0 import layer0_entry from layer0.shadow_classifier import ShadowEvalResult +from . import __version__ as WAF_INTEL_VERSION from .orchestrator import WAFInsight, WAFIntelligence @@ -56,11 +57,18 @@ def run_cli(argv: List[str] | None = None) -> int: action="store_true", help="Exit with non-zero code if any error-severity violations are found.", ) + parser.add_argument( + "--version", + action="version", + version=f"%(prog)s {WAF_INTEL_VERSION}", + ) args = parser.parse_args(argv) # Layer 0: pre-boot Shadow Eval gate. - routing_action, shadow = layer0_entry(f"waf_intel_cli file={args.file} limit={args.limit}") + routing_action, shadow = layer0_entry( + f"waf_intel_cli file={args.file} limit={args.limit}" + ) if routing_action != "HANDOFF_TO_LAYER1": _render_layer0_block(routing_action, shadow) return 1 @@ -90,7 +98,9 @@ def run_cli(argv: List[str] | None = None) -> int: print(f"\nWAF Intelligence Report for: {path}\n{'-' * 72}") if not insights: - print("No high-severity, high-confidence issues detected based on current heuristics.") + print( + "No high-severity, high-confidence issues detected based on current heuristics." + ) return 0 for idx, insight in enumerate(insights, start=1): @@ -119,7 +129,9 @@ def run_cli(argv: List[str] | None = None) -> int: if insight.mappings: print("\nCompliance Mapping:") for mapping in insight.mappings: - print(f" - {mapping.framework} {mapping.control_id}: {mapping.description}") + print( + f" - {mapping.framework} {mapping.control_id}: {mapping.description}" + ) print() diff --git a/mcp/waf_intelligence/analyzer.py b/mcp/waf_intelligence/analyzer.py index 4f7de91..2291c22 100644 --- a/mcp/waf_intelligence/analyzer.py +++ b/mcp/waf_intelligence/analyzer.py @@ -1,9 +1,16 @@ from __future__ import annotations +import re from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, List, Optional +MANAGED_WAF_RULESET_IDS = ( + # Cloudflare managed WAF ruleset IDs (last updated 2025-12-18). + "efb7b8c949ac4650a09736fc376e9aee", # Cloudflare Managed Ruleset + "4814384a9e5d4991b9815dcfc25d2f1f", # OWASP Core Ruleset +) + @dataclass class RuleViolation: @@ -57,6 +64,20 @@ class WAFRuleAnalyzer: Analyze Cloudflare WAF rules from Terraform with a quality-first posture. """ + def _has_managed_waf_rules(self, text: str) -> bool: + text_lower = text.lower() + + if "managed_rules" in text_lower: + return True + + if re.search(r'phase\s*=\s*"http_request_firewall_managed"', text_lower): + return True + + if "cf.waf" in text_lower: + return True + + return any(ruleset_id in text_lower for ruleset_id in MANAGED_WAF_RULESET_IDS) + def analyze_file( self, path: str | Path, @@ -70,7 +91,7 @@ class WAFRuleAnalyzer: violations: List[RuleViolation] = [] # Example heuristic: no managed rules present - if "managed_rules" not in text: + if not self._has_managed_waf_rules(text): violations.append( RuleViolation( rule_id=None, @@ -102,7 +123,7 @@ class WAFRuleAnalyzer: violations=violations, metadata={ "file_size": path.stat().st_size, - "heuristics_version": "0.2.0", + "heuristics_version": "0.3.0", }, ) @@ -125,7 +146,7 @@ class WAFRuleAnalyzer: tmp_path = Path(source_name) violations: List[RuleViolation] = [] - if "managed_rules" not in text: + if not self._has_managed_waf_rules(text): violations.append( RuleViolation( rule_id=None, @@ -141,7 +162,7 @@ class WAFRuleAnalyzer: result = AnalysisResult( source=str(tmp_path), violations=violations, - metadata={"heuristics_version": "0.2.0"}, + metadata={"heuristics_version": "0.3.0"}, ) result.violations = result.top_violations( @@ -161,27 +182,37 @@ class WAFRuleAnalyzer: ) -> AnalysisResult: """ Enhanced analysis using threat intelligence data. - + Args: path: WAF config file path threat_indicators: List of ThreatIndicator objects from threat_intel module min_severity: Minimum severity to include min_confidence: Minimum confidence threshold - + Returns: AnalysisResult with violations informed by threat intel """ # Start with base analysis - base_result = self.analyze_file(path, min_severity=min_severity, min_confidence=min_confidence) - + base_result = self.analyze_file( + path, min_severity=min_severity, min_confidence=min_confidence + ) + path = Path(path) text = path.read_text(encoding="utf-8") text_lower = text.lower() - + # Check if threat indicators are addressed by existing rules - critical_ips = [i for i in threat_indicators if i.indicator_type == "ip" and i.severity in ("critical", "high")] - critical_patterns = [i for i in threat_indicators if i.indicator_type == "pattern" and i.severity in ("critical", "high")] - + critical_ips = [ + i + for i in threat_indicators + if i.indicator_type == "ip" and i.severity in ("critical", "high") + ] + critical_patterns = [ + i + for i in threat_indicators + if i.indicator_type == "pattern" and i.severity in ("critical", "high") + ] + # Check for IP blocking coverage if critical_ips: ip_block_present = "ip.src" in text_lower or "cf.client.ip" in text_lower @@ -197,14 +228,14 @@ class WAFRuleAnalyzer: hint=f"Add IP blocking rules for identified threat actors. Sample IPs: {', '.join(i.value for i in critical_ips[:3])}", ) ) - + # Check for pattern-based attack coverage attack_types_seen = set() for ind in critical_patterns: for tag in ind.tags: if tag in ("sqli", "xss", "rce", "path_traversal"): attack_types_seen.add(tag) - + # Check managed ruleset coverage for attack_type in attack_types_seen: if attack_type not in text_lower and f'"{attack_type}"' not in text_lower: @@ -219,13 +250,12 @@ class WAFRuleAnalyzer: hint=f"Enable Cloudflare managed rules for {attack_type.upper()} protection.", ) ) - + # Update metadata with threat intel stats base_result.metadata["threat_intel"] = { "critical_ips": len(critical_ips), "critical_patterns": len(critical_patterns), "attack_types_seen": list(attack_types_seen), } - - return base_result + return base_result diff --git a/mcp/waf_intelligence/mcp_server.py b/mcp/waf_intelligence/mcp_server.py new file mode 100644 index 0000000..813f41e --- /dev/null +++ b/mcp/waf_intelligence/mcp_server.py @@ -0,0 +1,632 @@ +from __future__ import annotations + +import glob +import json +import os +import sys +from dataclasses import asdict +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional + +from cloudflare.layer0 import layer0_entry +from cloudflare.layer0.shadow_classifier import ShadowEvalResult + +from .orchestrator import ThreatAssessment, WAFInsight, WAFIntelligence + +MAX_BYTES_DEFAULT = 32_000 + + +def _cloudflare_root() -> Path: + # mcp_server.py -> waf_intelligence -> mcp -> cloudflare + return Path(__file__).resolve().parents[2] + + +def _max_bytes() -> int: + raw = (os.getenv("VM_MCP_MAX_BYTES") or "").strip() + if not raw: + return MAX_BYTES_DEFAULT + try: + return max(4_096, int(raw)) + except ValueError: + return MAX_BYTES_DEFAULT + + +def _redact(obj: Any) -> Any: + sensitive_keys = ("token", "secret", "password", "private", "key", "certificate") + + if isinstance(obj, dict): + out: Dict[str, Any] = {} + for k, v in obj.items(): + if any(s in str(k).lower() for s in sensitive_keys): + out[k] = "<REDACTED>" + else: + out[k] = _redact(v) + return out + if isinstance(obj, list): + return [_redact(v) for v in obj] + if isinstance(obj, str): + if obj.startswith("ghp_") or obj.startswith("github_pat_"): + return "<REDACTED>" + return obj + return obj + + +def _safe_json(payload: Dict[str, Any]) -> str: + payload = _redact(payload) + raw = json.dumps(payload, ensure_ascii=False, separators=(",", ":"), default=str) + if len(raw.encode("utf-8")) <= _max_bytes(): + return json.dumps(payload, ensure_ascii=False, indent=2, default=str) + + truncated = { + "ok": payload.get("ok", True), + "truncated": True, + "summary": payload.get("summary", "Response exceeded max size; truncated."), + "next_steps": payload.get( + "next_steps", + [ + "request fewer files/insights (limit=...)", + "use higher min_severity to reduce output", + ], + ), + } + return json.dumps(truncated, ensure_ascii=False, indent=2, default=str) + + +def _mcp_text_result( + payload: Dict[str, Any], *, is_error: bool = False +) -> Dict[str, Any]: + result: Dict[str, Any] = { + "content": [{"type": "text", "text": _safe_json(payload)}] + } + if is_error: + result["isError"] = True + return result + + +def _insight_to_dict(insight: WAFInsight) -> Dict[str, Any]: + return asdict(insight) + + +def _assessment_to_dict(assessment: ThreatAssessment) -> Dict[str, Any]: + violations = [] + if assessment.analysis_result and getattr( + assessment.analysis_result, "violations", None + ): + violations = list(assessment.analysis_result.violations) + + severity_counts = {"error": 0, "warning": 0, "info": 0} + for v in violations: + sev = getattr(v, "severity", "info") + if sev in severity_counts: + severity_counts[sev] += 1 + + return { + "risk_score": assessment.risk_score, + "risk_level": assessment.risk_level, + "classification_summary": assessment.classification_summary, + "recommended_actions": assessment.recommended_actions, + "analysis": { + "has_config_analysis": assessment.analysis_result is not None, + "violations_total": len(violations), + "violations_by_severity": severity_counts, + }, + "has_threat_intel": assessment.threat_report is not None, + "generated_at": str(assessment.generated_at), + } + + +TOOLS: List[Dict[str, Any]] = [ + { + "name": "waf_capabilities", + "description": "List available WAF Intelligence capabilities.", + "inputSchema": {"type": "object", "properties": {}}, + }, + { + "name": "analyze_waf", + "description": "Analyze Terraform WAF file(s) and return curated insights (legacy alias for waf_analyze).", + "inputSchema": { + "type": "object", + "properties": { + "file": { + "type": "string", + "description": "Single file path to analyze.", + }, + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "List of file paths or glob patterns to analyze.", + }, + "limit": { + "type": "integer", + "default": 3, + "description": "Max insights per file.", + }, + "severity_threshold": { + "type": "string", + "enum": ["info", "warning", "error"], + "default": "warning", + "description": "Minimum severity to include (alias for min_severity).", + }, + }, + }, + }, + { + "name": "waf_analyze", + "description": "Analyze Terraform WAF file(s) and return curated insights (requires file or files).", + "inputSchema": { + "type": "object", + "properties": { + "file": { + "type": "string", + "description": "Single file path to analyze.", + }, + "files": { + "type": "array", + "items": {"type": "string"}, + "description": "List of file paths or glob patterns to analyze.", + }, + "limit": { + "type": "integer", + "default": 3, + "description": "Max insights per file.", + }, + "min_severity": { + "type": "string", + "enum": ["info", "warning", "error"], + "default": "warning", + "description": "Minimum severity to include.", + }, + }, + }, + }, + { + "name": "waf_assess", + "description": "Run a broader assessment (optionally includes threat intel collection).", + "inputSchema": { + "type": "object", + "properties": { + "waf_config_path": { + "type": "string", + "description": "Path to Terraform WAF config (default: terraform/waf.tf).", + }, + "include_threat_intel": { + "type": "boolean", + "default": False, + "description": "If true, attempt to collect threat intel (may require network and credentials).", + }, + }, + }, + }, + { + "name": "waf_generate_gitops_proposals", + "description": "Generate GitOps-ready rule proposals (best-effort; requires threat intel to produce output).", + "inputSchema": { + "type": "object", + "properties": { + "waf_config_path": { + "type": "string", + "description": "Path to Terraform WAF config (default: terraform/waf.tf).", + }, + "include_threat_intel": { + "type": "boolean", + "default": True, + "description": "Attempt to collect threat intel before proposing rules.", + }, + "max_proposals": { + "type": "integer", + "default": 5, + "description": "Maximum proposals to generate.", + }, + }, + }, + }, +] + + +class WafIntelligenceTools: + def __init__(self) -> None: + self.workspace_root = _cloudflare_root() + self.repo_root = self.workspace_root.parent + self.waf = WAFIntelligence(workspace_path=str(self.workspace_root)) + + def _resolve_path(self, raw: str) -> Path: + path = Path(raw) + if path.is_absolute(): + return path + + candidates = [ + Path.cwd() / path, + self.workspace_root / path, + self.repo_root / path, + ] + for candidate in candidates: + if candidate.exists(): + return candidate + return self.workspace_root / path + + def waf_capabilities(self) -> Dict[str, Any]: + return { + "ok": True, + "summary": "WAF Intelligence capabilities.", + "data": {"capabilities": self.waf.capabilities}, + "truncated": False, + "next_steps": [ + "Call waf_analyze(file=..., limit=...) to analyze config.", + "Call waf_assess(include_threat_intel=true) for a broader assessment.", + ], + } + + def waf_analyze( + self, + *, + file: Optional[str] = None, + files: Optional[List[str]] = None, + limit: int = 3, + min_severity: str = "warning", + ) -> Dict[str, Any]: + paths: List[str] = [] + if files: + for pattern in files: + paths.extend(glob.glob(pattern)) + if file: + paths.append(file) + + seen = set() + unique_paths: List[str] = [] + for p in paths: + if p not in seen: + seen.add(p) + unique_paths.append(p) + + if not unique_paths: + return { + "ok": False, + "summary": "Provide 'file' or 'files' to analyze.", + "truncated": False, + "next_steps": ["Call waf_analyze(file='terraform/waf.tf')"], + } + + results: List[Dict[str, Any]] = [] + for p in unique_paths: + path = self._resolve_path(p) + if not path.exists(): + results.append( + { + "file": str(path), + "ok": False, + "summary": "File not found.", + } + ) + continue + + insights = self.waf.analyze_and_recommend( + str(path), + limit=limit, + min_severity=min_severity, + ) + results.append( + { + "file": str(path), + "ok": True, + "insights": [_insight_to_dict(i) for i in insights], + } + ) + + ok = all(r.get("ok") for r in results) + return { + "ok": ok, + "summary": f"Analyzed {len(results)} file(s).", + "data": {"results": results}, + "truncated": False, + "next_steps": [ + "Raise/lower min_severity or limit to tune output size.", + ], + } + + def waf_assess( + self, + *, + waf_config_path: Optional[str] = None, + include_threat_intel: bool = False, + ) -> Dict[str, Any]: + waf_config_path_resolved = ( + str(self._resolve_path(waf_config_path)) if waf_config_path else None + ) + assessment = self.waf.full_assessment( + waf_config_path=waf_config_path_resolved, + include_threat_intel=include_threat_intel, + ) + return { + "ok": True, + "summary": "WAF assessment complete.", + "data": _assessment_to_dict(assessment), + "truncated": False, + "next_steps": [ + "Call waf_generate_gitops_proposals(...) to draft Terraform rule proposals (best-effort).", + ], + } + + def waf_generate_gitops_proposals( + self, + *, + waf_config_path: Optional[str] = None, + include_threat_intel: bool = True, + max_proposals: int = 5, + ) -> Dict[str, Any]: + waf_config_path_resolved = ( + str(self._resolve_path(waf_config_path)) if waf_config_path else None + ) + assessment = self.waf.full_assessment( + waf_config_path=waf_config_path_resolved, + include_threat_intel=include_threat_intel, + ) + proposals = self.waf.generate_gitops_proposals( + threat_report=assessment.threat_report, + max_proposals=max_proposals, + ) + return { + "ok": True, + "summary": f"Generated {len(proposals)} proposal(s).", + "data": { + "assessment": _assessment_to_dict(assessment), + "proposals": proposals, + }, + "truncated": False, + "next_steps": [ + "If proposals are empty, enable threat intel and ensure required credentials/log sources exist.", + ], + } + + +class StdioJsonRpc: + def __init__(self) -> None: + self._in = sys.stdin.buffer + self._out = sys.stdout.buffer + self._mode: str | None = None # "headers" | "line" + + def read_message(self) -> Optional[Dict[str, Any]]: + while True: + if self._mode == "line": + line = self._in.readline() + if not line: + return None + raw = line.decode("utf-8", "replace").strip() + if not raw: + continue + try: + msg = json.loads(raw) + except Exception: + continue + if isinstance(msg, dict): + return msg + continue + + first = self._in.readline() + if not first: + return None + + if first in (b"\r\n", b"\n"): + continue + + # Auto-detect newline-delimited JSON framing. + if self._mode is None and first.lstrip().startswith(b"{"): + try: + msg = json.loads(first.decode("utf-8", "replace")) + except Exception: + msg = None + if isinstance(msg, dict): + self._mode = "line" + return msg + + headers: Dict[str, str] = {} + try: + text = first.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + while True: + line = self._in.readline() + if not line: + return None + if line in (b"\r\n", b"\n"): + break + try: + text = line.decode("utf-8", "replace").strip() + except Exception: + continue + if ":" not in text: + continue + k, v = text.split(":", 1) + headers[k.lower().strip()] = v.strip() + + if "content-length" not in headers: + return None + try: + length = int(headers["content-length"]) + except ValueError: + return None + body = self._in.read(length) + if not body: + return None + self._mode = "headers" + msg = json.loads(body.decode("utf-8", "replace")) + if isinstance(msg, dict): + return msg + return None + + def write_message(self, message: Dict[str, Any]) -> None: + if self._mode == "line": + payload = json.dumps( + message, ensure_ascii=False, separators=(",", ":"), default=str + ).encode("utf-8") + self._out.write(payload + b"\n") + self._out.flush() + return + + body = json.dumps( + message, ensure_ascii=False, separators=(",", ":"), default=str + ).encode("utf-8") + header = f"Content-Length: {len(body)}\r\n\r\n".encode("utf-8") + self._out.write(header) + self._out.write(body) + self._out.flush() + + +def main() -> None: + tools = WafIntelligenceTools() + rpc = StdioJsonRpc() + + handlers: Dict[str, Callable[[Dict[str, Any]], Dict[str, Any]]] = { + "waf_capabilities": lambda a: tools.waf_capabilities(), + "analyze_waf": lambda a: tools.waf_analyze( + file=a.get("file"), + files=a.get("files"), + limit=int(a.get("limit", 3)), + min_severity=str(a.get("severity_threshold", "warning")), + ), + "waf_analyze": lambda a: tools.waf_analyze(**a), + "waf_assess": lambda a: tools.waf_assess(**a), + "waf_generate_gitops_proposals": lambda a: tools.waf_generate_gitops_proposals( + **a + ), + } + + while True: + msg = rpc.read_message() + if msg is None: + return + + method = msg.get("method") + msg_id = msg.get("id") + params = msg.get("params") or {} + + try: + if method == "initialize": + result = { + "protocolVersion": "2024-11-05", + "serverInfo": {"name": "waf_intelligence", "version": "0.1.0"}, + "capabilities": {"tools": {}}, + } + rpc.write_message({"jsonrpc": "2.0", "id": msg_id, "result": result}) + continue + + if method == "tools/list": + rpc.write_message( + {"jsonrpc": "2.0", "id": msg_id, "result": {"tools": TOOLS}} + ) + continue + + if method == "tools/call": + tool_name = str(params.get("name") or "") + args = params.get("arguments") or {} + + routing_action, shadow = layer0_entry( + _shadow_query_repr(tool_name, args) + ) + if routing_action != "HANDOFF_TO_LAYER1": + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + _layer0_payload(routing_action, shadow), is_error=True + ), + } + ) + continue + + handler = handlers.get(tool_name) + if not handler: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + { + "ok": False, + "summary": f"Unknown tool: {tool_name}", + "data": {"known_tools": sorted(handlers.keys())}, + "truncated": False, + "next_steps": ["Call tools/list"], + }, + is_error=True, + ), + } + ) + continue + + payload = handler(args) + is_error = ( + not bool(payload.get("ok", True)) + if isinstance(payload, dict) + else False + ) + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result(payload, is_error=is_error), + } + ) + continue + + # Ignore notifications. + if msg_id is None: + continue + + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + {"ok": False, "summary": f"Unsupported method: {method}"}, + is_error=True, + ), + } + ) + except Exception as e: # noqa: BLE001 + if msg_id is not None: + rpc.write_message( + { + "jsonrpc": "2.0", + "id": msg_id, + "result": _mcp_text_result( + {"ok": False, "summary": f"fatal error: {e}"}, + is_error=True, + ), + } + ) + + +def _shadow_query_repr(tool_name: str, tool_args: Dict[str, Any]) -> str: + if tool_name == "waf_capabilities": + return "List WAF Intelligence capabilities." + try: + return f"{tool_name}: {json.dumps(tool_args, sort_keys=True, default=str)}" + except Exception: + return f"{tool_name}: {str(tool_args)}" + + +def _layer0_payload(routing_action: str, shadow: ShadowEvalResult) -> Dict[str, Any]: + if routing_action == "FAIL_CLOSED": + return {"ok": False, "summary": "Layer 0: cannot comply with this request."} + if routing_action == "HANDOFF_TO_GUARDRAILS": + reason = shadow.reason or "governance_violation" + return { + "ok": False, + "summary": f"Layer 0: governance violation detected ({reason}).", + } + if routing_action == "PROMPT_FOR_CLARIFICATION": + return { + "ok": False, + "summary": "Layer 0: request is ambiguous. Please clarify and retry.", + } + return {"ok": False, "summary": "Layer 0: unrecognized routing action; refusing."} + + +if __name__ == "__main__": + main() diff --git a/mcp/waf_intelligence/orchestrator.py b/mcp/waf_intelligence/orchestrator.py index cac7e28..9bb210f 100644 --- a/mcp/waf_intelligence/orchestrator.py +++ b/mcp/waf_intelligence/orchestrator.py @@ -6,27 +6,26 @@ from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional -from mcp.waf_intelligence.analyzer import AnalysisResult, RuleViolation, WAFRuleAnalyzer -from mcp.waf_intelligence.compliance import ComplianceMapper, FrameworkMapping -from mcp.waf_intelligence.generator import GeneratedRule, WAFRuleGenerator +from .analyzer import AnalysisResult, RuleViolation, WAFRuleAnalyzer +from .compliance import ComplianceMapper, FrameworkMapping +from .generator import GeneratedRule, WAFRuleGenerator # Optional advanced modules (Phase 7) try: - from mcp.waf_intelligence.threat_intel import ( + from .threat_intel import ( ThreatIntelCollector, ThreatIntelReport, ThreatIndicator, ) + _HAS_THREAT_INTEL = True except ImportError: _HAS_THREAT_INTEL = False ThreatIntelCollector = None try: - from mcp.waf_intelligence.classifier import ( - ThreatClassifier, - ClassificationResult, - ) + from .classifier import ThreatClassifier + _HAS_CLASSIFIER = True except ImportError: _HAS_CLASSIFIER = False @@ -45,14 +44,14 @@ class WAFInsight: @dataclass class ThreatAssessment: """Phase 7: Comprehensive threat assessment result.""" - + analysis_result: Optional[AnalysisResult] = None threat_report: Optional[Any] = None # ThreatIntelReport when available classification_summary: Dict[str, int] = field(default_factory=dict) risk_score: float = 0.0 recommended_actions: List[str] = field(default_factory=list) generated_at: datetime = field(default_factory=datetime.utcnow) - + @property def risk_level(self) -> str: if self.risk_score >= 0.8: @@ -81,22 +80,22 @@ class WAFIntelligence: enable_ml_classifier: bool = True, ) -> None: self.workspace = Path(workspace_path) if workspace_path else Path.cwd() - + # Core components self.analyzer = WAFRuleAnalyzer() self.generator = WAFRuleGenerator() self.mapper = ComplianceMapper() - + # Phase 7 components (optional) self.threat_intel: Optional[Any] = None self.classifier: Optional[Any] = None - + if enable_threat_intel and _HAS_THREAT_INTEL: try: self.threat_intel = ThreatIntelCollector() except Exception: pass - + if enable_ml_classifier and _HAS_CLASSIFIER: try: self.classifier = ThreatClassifier() @@ -149,24 +148,24 @@ class WAFIntelligence: ) -> Optional[Any]: """ Collect threat intelligence from logs and external feeds. - + Args: log_paths: Paths to Cloudflare log files max_indicators: Maximum indicators to collect - + Returns: ThreatIntelReport or None if unavailable """ if not self.threat_intel: return None - + # Default log paths if log_paths is None: log_paths = [ str(self.workspace / "logs"), "/var/log/cloudflare", ] - + return self.threat_intel.collect( log_paths=log_paths, max_indicators=max_indicators, @@ -175,16 +174,16 @@ class WAFIntelligence: def classify_threat(self, payload: str) -> Optional[Any]: """ Classify a payload using ML classifier. - + Args: payload: Request payload to classify - + Returns: ClassificationResult or None """ if not self.classifier: return None - + return self.classifier.classify(payload) def full_assessment( @@ -195,51 +194,52 @@ class WAFIntelligence: ) -> ThreatAssessment: """ Phase 7: Perform comprehensive threat assessment. - + Combines: - WAF configuration analysis - Threat intelligence collection - ML classification summary - Risk scoring - + Args: waf_config_path: Path to WAF Terraform file log_paths: Paths to log files include_threat_intel: Whether to collect threat intel - + Returns: ThreatAssessment with full analysis results """ assessment = ThreatAssessment() risk_factors: List[float] = [] recommendations: List[str] = [] - + # 1. Analyze WAF configuration if waf_config_path is None: waf_config_path = str(self.workspace / "terraform" / "waf.tf") - + if Path(waf_config_path).exists(): assessment.analysis_result = self.analyzer.analyze_file( waf_config_path, min_severity="info", ) - + # Calculate risk from violations severity_weights = {"error": 0.8, "warning": 0.5, "info": 0.2} for violation in assessment.analysis_result.violations: weight = severity_weights.get(violation.severity, 0.3) risk_factors.append(weight) - + # Generate recommendations critical_count = sum( - 1 for v in assessment.analysis_result.violations + 1 + for v in assessment.analysis_result.violations if v.severity == "error" ) if critical_count > 0: recommendations.append( f"🔴 Fix {critical_count} critical WAF configuration issues" ) - + # 2. Collect threat intelligence if include_threat_intel and self.threat_intel: try: @@ -247,52 +247,55 @@ class WAFIntelligence: log_paths=log_paths, max_indicators=50, ) - + if assessment.threat_report: indicators = assessment.threat_report.indicators - + # Count by severity severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0} for ind in indicators: sev = getattr(ind, "severity", "low") severity_counts[sev] = severity_counts.get(sev, 0) + 1 - + # Add to classification summary - assessment.classification_summary["threat_indicators"] = len(indicators) + assessment.classification_summary["threat_indicators"] = len( + indicators + ) assessment.classification_summary.update(severity_counts) - + # Calculate threat intel risk if indicators: critical_ratio = severity_counts["critical"] / len(indicators) high_ratio = severity_counts["high"] / len(indicators) risk_factors.append(critical_ratio * 0.9 + high_ratio * 0.7) - + if severity_counts["critical"] > 0: recommendations.append( f"🚨 Block {severity_counts['critical']} critical threat IPs immediately" ) except Exception: pass - + # 3. ML classification summary (from any collected data) if self.classifier and assessment.threat_report: try: attack_types = {"sqli": 0, "xss": 0, "rce": 0, "clean": 0, "unknown": 0} - + indicators = assessment.threat_report.indicators pattern_indicators = [ - i for i in indicators + i + for i in indicators if getattr(i, "indicator_type", "") == "pattern" ] - + for ind in pattern_indicators[:20]: # Sample first 20 result = self.classifier.classify(ind.value) if result: label = result.label attack_types[label] = attack_types.get(label, 0) + 1 - + assessment.classification_summary["ml_classifications"] = attack_types - + # Add ML risk factor dangerous = attack_types.get("sqli", 0) + attack_types.get("rce", 0) if dangerous > 5: @@ -302,15 +305,17 @@ class WAFIntelligence: ) except Exception: pass - + # 4. Calculate final risk score if risk_factors: - assessment.risk_score = min(1.0, sum(risk_factors) / max(len(risk_factors), 1)) + assessment.risk_score = min( + 1.0, sum(risk_factors) / max(len(risk_factors), 1) + ) else: assessment.risk_score = 0.3 # Baseline risk - + assessment.recommended_actions = recommendations - + return assessment def generate_gitops_proposals( @@ -320,42 +325,44 @@ class WAFIntelligence: ) -> List[Dict[str, Any]]: """ Generate GitOps-ready rule proposals. - + Args: threat_report: ThreatIntelReport to use max_proposals: Maximum proposals to generate - + Returns: List of proposal dicts ready for MR creation """ proposals: List[Dict[str, Any]] = [] - + if not threat_report: return proposals - + try: # Import proposer dynamically from gitops.waf_rule_proposer import WAFRuleProposer - + proposer = WAFRuleProposer(workspace_path=str(self.workspace)) batch = proposer.generate_proposals( threat_report=threat_report, max_proposals=max_proposals, ) - + for proposal in batch.proposals: - proposals.append({ - "name": proposal.rule_name, - "type": proposal.rule_type, - "severity": proposal.severity, - "confidence": proposal.confidence, - "terraform": proposal.terraform_code, - "justification": proposal.justification, - "auto_deploy": proposal.auto_deploy_eligible, - }) + proposals.append( + { + "name": proposal.rule_name, + "type": proposal.rule_type, + "severity": proposal.severity, + "confidence": proposal.confidence, + "terraform": proposal.terraform_code, + "justification": proposal.justification, + "auto_deploy": proposal.auto_deploy_eligible, + } + ) except ImportError: pass - + return proposals @property diff --git a/mcp/waf_intelligence/server.py b/mcp/waf_intelligence/server.py old mode 100755 new mode 100644 index 9edbba8..b1ce232 --- a/mcp/waf_intelligence/server.py +++ b/mcp/waf_intelligence/server.py @@ -1,326 +1,14 @@ #!/usr/bin/env python3 -""" -WAF Intelligence MCP Server for VS Code Copilot. +from __future__ import annotations -This implements the Model Context Protocol (MCP) stdio interface -so VS Code can communicate with your WAF Intelligence system. +""" +Deprecated entrypoint kept for older editor configs. + +Use `python3 -m mcp.waf_intelligence.mcp_server` (or `waf_intel_mcp.py`) instead. """ -import json -import sys -from typing import Any - -# Add parent to path for imports -sys.path.insert(0, '/Users/sovereign/Desktop/CLOUDFLARE') - -from mcp.waf_intelligence.orchestrator import WAFIntelligence -from mcp.waf_intelligence.analyzer import WAFRuleAnalyzer -from layer0 import layer0_entry -from layer0.shadow_classifier import ShadowEvalResult - - -class WAFIntelligenceMCPServer: - """MCP Server wrapper for WAF Intelligence.""" - - def __init__(self): - self.waf = WAFIntelligence() - self.analyzer = WAFRuleAnalyzer() - - def get_capabilities(self) -> dict: - """Return server capabilities.""" - return { - "tools": [ - { - "name": "waf_analyze", - "description": "Analyze WAF logs and detect attack patterns", - "inputSchema": { - "type": "object", - "properties": { - "log_file": { - "type": "string", - "description": "Path to WAF log file (optional)" - }, - "zone_id": { - "type": "string", - "description": "Cloudflare zone ID (optional)" - } - } - } - }, - { - "name": "waf_assess", - "description": "Run full security assessment with threat intel and ML classification", - "inputSchema": { - "type": "object", - "properties": { - "zone_id": { - "type": "string", - "description": "Cloudflare zone ID" - } - }, - "required": ["zone_id"] - } - }, - { - "name": "waf_generate_rules", - "description": "Generate Terraform WAF rules from threat intelligence", - "inputSchema": { - "type": "object", - "properties": { - "zone_id": { - "type": "string", - "description": "Cloudflare zone ID" - }, - "min_confidence": { - "type": "number", - "description": "Minimum confidence threshold (0-1)", - "default": 0.7 - } - }, - "required": ["zone_id"] - } - }, - { - "name": "waf_capabilities", - "description": "List available WAF Intelligence capabilities", - "inputSchema": { - "type": "object", - "properties": {} - } - } - ] - } - - def handle_tool_call(self, name: str, arguments: dict) -> dict: - """Handle a tool invocation.""" - try: - if name == "waf_capabilities": - return { - "content": [ - { - "type": "text", - "text": json.dumps({ - "capabilities": self.waf.capabilities, - "status": "operational" - }, indent=2) - } - ] - } - - elif name == "waf_analyze": - log_file = arguments.get("log_file") - zone_id = arguments.get("zone_id") - - if log_file: - result = self.analyzer.analyze_log_file(log_file) - else: - result = { - "message": "No log file provided. Use zone_id for live analysis.", - "capabilities": self.waf.capabilities - } - - return { - "content": [ - {"type": "text", "text": json.dumps(result, indent=2, default=str)} - ] - } - - elif name == "waf_assess": - zone_id = arguments.get("zone_id") - # full_assessment uses workspace paths, not zone_id - assessment = self.waf.full_assessment( - include_threat_intel=True - ) - # Build result from ThreatAssessment dataclass - result = { - "zone_id": zone_id, - "risk_score": assessment.risk_score, - "risk_level": assessment.risk_level, - "classification_summary": assessment.classification_summary, - "recommended_actions": assessment.recommended_actions[:10], # Top 10 - "has_analysis": assessment.analysis_result is not None, - "has_threat_intel": assessment.threat_report is not None, - "generated_at": str(assessment.generated_at) - } - - return { - "content": [ - {"type": "text", "text": json.dumps(result, indent=2, default=str)} - ] - } - - elif name == "waf_generate_rules": - zone_id = arguments.get("zone_id") - min_confidence = arguments.get("min_confidence", 0.7) - - # Generate proposals (doesn't use zone_id directly) - proposals = self.waf.generate_gitops_proposals( - max_proposals=5 - ) - - result = { - "zone_id": zone_id, - "min_confidence": min_confidence, - "proposals_count": len(proposals), - "proposals": proposals - } - - return { - "content": [ - {"type": "text", "text": json.dumps(result, indent=2, default=str) if proposals else "No rules generated (no threat data available)"} - ] - } - - else: - return { - "content": [ - {"type": "text", "text": f"Unknown tool: {name}"} - ], - "isError": True - } - - except Exception as e: - return { - "content": [ - {"type": "text", "text": f"Error: {str(e)}"} - ], - "isError": True - } - - def run(self): - """Run the MCP server (stdio mode).""" - # Send server info - server_info = { - "jsonrpc": "2.0", - "method": "initialized", - "params": { - "serverInfo": { - "name": "waf-intelligence", - "version": "1.0.0" - }, - "capabilities": self.get_capabilities() - } - } - - # Main loop - read JSON-RPC messages from stdin - for line in sys.stdin: - try: - message = json.loads(line.strip()) - - if message.get("method") == "initialize": - response = { - "jsonrpc": "2.0", - "id": message.get("id"), - "result": { - "protocolVersion": "2024-11-05", - "serverInfo": { - "name": "waf-intelligence", - "version": "1.0.0" - }, - "capabilities": { - "tools": {} - } - } - } - print(json.dumps(response), flush=True) - - elif message.get("method") == "tools/list": - response = { - "jsonrpc": "2.0", - "id": message.get("id"), - "result": self.get_capabilities() - } - print(json.dumps(response), flush=True) - - elif message.get("method") == "tools/call": - params = message.get("params", {}) - tool_name = params.get("name") - tool_args = params.get("arguments", {}) - - # Layer 0: pre-boot Shadow Eval gate before handling tool calls. - routing_action, shadow = layer0_entry(_shadow_query_repr(tool_name, tool_args)) - if routing_action != "HANDOFF_TO_LAYER1": - response = _layer0_mcp_response(routing_action, shadow, message.get("id")) - print(json.dumps(response), flush=True) - continue - - result = self.handle_tool_call(tool_name, tool_args) - - response = { - "jsonrpc": "2.0", - "id": message.get("id"), - "result": result - } - print(json.dumps(response), flush=True) - - elif message.get("method") == "notifications/initialized": - # Client acknowledged initialization - pass - - else: - # Unknown method - response = { - "jsonrpc": "2.0", - "id": message.get("id"), - "error": { - "code": -32601, - "message": f"Method not found: {message.get('method')}" - } - } - print(json.dumps(response), flush=True) - - except json.JSONDecodeError: - continue - except Exception as e: - error_response = { - "jsonrpc": "2.0", - "id": None, - "error": { - "code": -32603, - "message": str(e) - } - } - print(json.dumps(error_response), flush=True) - +from .mcp_server import main if __name__ == "__main__": - server = WAFIntelligenceMCPServer() - server.run() + main() - -def _shadow_query_repr(tool_name: str, tool_args: dict) -> str: - """Build a textual representation of the tool call for Layer 0 classification.""" - try: - return f"{tool_name}: {json.dumps(tool_args, sort_keys=True)}" - except TypeError: - return f"{tool_name}: {str(tool_args)}" - - -def _layer0_mcp_response(routing_action: str, shadow: ShadowEvalResult, msg_id: Any) -> dict: - """ - Map Layer 0 outcomes to MCP responses. - Catastrophic/forbidden/ambiguous short-circuit with minimal disclosure. - """ - base = {"jsonrpc": "2.0", "id": msg_id} - - if routing_action == "FAIL_CLOSED": - base["error"] = {"code": -32000, "message": "Layer 0: cannot comply with this request."} - return base - - if routing_action == "HANDOFF_TO_GUARDRAILS": - reason = shadow.reason or "governance_violation" - base["error"] = { - "code": -32001, - "message": f"Layer 0: governance violation detected ({reason}).", - } - return base - - if routing_action == "PROMPT_FOR_CLARIFICATION": - base["error"] = { - "code": -32002, - "message": "Layer 0: request is ambiguous. Please clarify and retry.", - } - return base - - base["error"] = {"code": -32099, "message": "Layer 0: unrecognized routing action; refusing."} - return base diff --git a/opencode.jsonc b/opencode.jsonc index faac715..03ae9ff 100644 --- a/opencode.jsonc +++ b/opencode.jsonc @@ -2,92 +2,92 @@ "$schema": "https://opencode.ai/config.json", "mcp": { // Popular open-source MCP servers - + // File system operations "filesystem": { "type": "local", - "command": ["npx", "-y", "@modelcontextprotocol/server-filesystem"], + "command": ["npx", "-y", "@modelcontextprotocol/server-filesystem", "."], "environment": { - "HOME": "{env:HOME}" + "HOME": "{env:HOME}", }, - "enabled": true + "enabled": true, }, - + // Git operations "git": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-git"], - "enabled": true + "enabled": false, }, - + // GitHub integration "github": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-github"], "environment": { - "GITHUB_PERSONAL_ACCESS_TOKEN": "{env:GITHUB_TOKEN}" + "GITHUB_PERSONAL_ACCESS_TOKEN": "{env:GITHUB_TOKEN}", }, - "enabled": true + "enabled": true, }, - + // Postgres database "postgres": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-postgres"], "environment": { - "DATABASE_URL": "{env:DATABASE_URL}" + "DATABASE_URL": "{env:DATABASE_URL}", }, - "enabled": false + "enabled": false, }, - + // SQLite database "sqlite": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-sqlite"], - "enabled": false + "enabled": false, }, - + // Docker integration "docker": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-docker"], - "enabled": false + "enabled": false, }, - + // Web scraping "web-scraper": { "type": "local", "command": ["npx", "-y", "web-scraper-mcp"], - "enabled": false + "enabled": false, }, - + // Google Maps integration "googlemaps": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-google-maps"], "environment": { - "GOOGLE_MAPS_API_KEY": "{env:GOOGLE_MAPS_API_KEY}" + "GOOGLE_MAPS_API_KEY": "{env:GOOGLE_MAPS_API_KEY}", }, - "enabled": false + "enabled": false, }, - + // Slack integration "slack": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-slack"], "environment": { - "SLACK_BOT_TOKEN": "{env:SLACK_BOT_TOKEN}" + "SLACK_BOT_TOKEN": "{env:SLACK_BOT_TOKEN}", }, - "enabled": false + "enabled": false, }, - + // Memory/knowledge base "memory": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-memory"], - "enabled": false + "enabled": false, }, - + // AWS integration "aws": { "type": "local", @@ -95,73 +95,80 @@ "environment": { "AWS_ACCESS_KEY_ID": "{env:AWS_ACCESS_KEY_ID}", "AWS_SECRET_ACCESS_KEY": "{env:AWS_SECRET_ACCESS_KEY}", - "AWS_REGION": "{env:AWS_REGION}" + "AWS_REGION": "{env:AWS_REGION}", }, - "enabled": false + "enabled": false, }, - + // Linear integration "linear": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-linear"], "environment": { - "LINEAR_API_KEY": "{env:LINEAR_API_KEY}" + "LINEAR_API_KEY": "{env:LINEAR_API_KEY}", }, - "enabled": false + "enabled": false, }, - + // Knowledge search via Context7 "context7": { "type": "remote", "url": "https://mcp.context7.com/mcp", "headers": { - "CONTEXT7_API_KEY": "{env:CONTEXT7_API_KEY}" + "CONTEXT7_API_KEY": "{env:CONTEXT7_API_KEY}", }, - "enabled": false + "enabled": false, }, - + // GitHub code search via Grep "gh_grep": { "type": "remote", "url": "https://mcp.grep.app", - "enabled": true + "enabled": true, }, // WAF intelligence orchestrator "waf_intel": { "type": "local", - "command": ["python3", "waf_intel_mcp.py"], + "command": ["/bin/bash", "/Users/sovereign/work-core/.secret/mcp/waf_intelligence.sh"], "enabled": true, - "timeout": 300000 + "timeout": 300000, }, - + // GitLab integration "gitlab": { "type": "local", - "command": ["npx", "-y", "@modelcontextprotocol/server-gitlab"], - "environment": { - "GITLAB_TOKEN": "{env:GITLAB_TOKEN}", - "GITLAB_URL": "{env:GITLAB_URL:https://gitlab.com}" - }, - "enabled": false + "command": ["/opt/homebrew/bin/python3", "-u", "/Users/sovereign/work-core/.secret/gitlab_mcp_opencode_proxy.py"], + "enabled": true, }, - + // Cloudflare API integration "cloudflare": { "type": "local", - "command": ["npx", "-y", "@modelcontextprotocol/server-cloudflare"], + "command": ["/bin/bash", "/Users/sovereign/work-core/.secret/mcp_cloudflare_safe.sh"], "environment": { "CLOUDFLARE_API_TOKEN": "{env:CLOUDFLARE_API_TOKEN}", - "CLOUDFLARE_ACCOUNT_ID": "{env:CLOUDFLARE_ACCOUNT_ID}" + "CLOUDFLARE_ACCOUNT_ID": "{env:CLOUDFLARE_ACCOUNT_ID}", }, - "enabled": false + "enabled": true, }, - + + // Akash docs + SDL helpers (read-only; no wallet/key handling) + "akash_docs": { + "type": "local", + "command": ["python3", "-m", "cloudflare.mcp.akash_docs"], + "environment": { + "PYTHONPATH": "/Users/sovereign/work-core" + }, + "enabled": false, + "timeout": 300000, + }, + // Test server (remove in production) "test_everything": { "type": "local", "command": ["npx", "-y", "@modelcontextprotocol/server-everything"], - "enabled": false - } - } + "enabled": false, + }, + }, } diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..cb2b4f9 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1 @@ +pytest>=8.0.0,<9 diff --git a/scripts/deploy_infrastructure.sh b/scripts/deploy_infrastructure.sh new file mode 100644 index 0000000..bd14079 --- /dev/null +++ b/scripts/deploy_infrastructure.sh @@ -0,0 +1,308 @@ +#!/bin/bash + +# Cloudflare Infrastructure Deployment Automation +# Automated Terraform deployment with safety checks and rollback capabilities + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Configuration +TERRAFORM_DIR="terraform" +BACKUP_DIR="terraform_backups" +STATE_FILE="terraform.tfstate" +PLAN_FILE="deployment_plan.tfplan" +LOG_FILE="deployment_$(date +%Y%m%d_%H%M%S).log" + +# Function to log messages +log() { + echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" | tee -a "$LOG_FILE" +} + +# Function to log success +success() { + echo -e "${GREEN}✅ $1${NC}" | tee -a "$LOG_FILE" +} + +# Function to log warning +warning() { + echo -e "${YELLOW}⚠️ $1${NC}" | tee -a "$LOG_FILE" +} + +# Function to log error +error() { + echo -e "${RED}❌ $1${NC}" | tee -a "$LOG_FILE" + exit 1 +} + +# Function to check prerequisites +check_prerequisites() { + log "Checking prerequisites..." + + # Check if .env file exists + if [[ ! -f "../.env" ]]; then + error "Missing .env file. Run setup_credentials.sh first." + fi + + # Source environment variables + source "../.env" + + # Check required variables + if [[ -z "$CLOUDFLARE_API_TOKEN" ]]; then + error "CLOUDFLARE_API_TOKEN not set in .env" + fi + + if [[ -z "$CLOUDFLARE_ACCOUNT_ID" ]]; then + error "CLOUDFLARE_ACCOUNT_ID not set in .env" + fi + + # Check Terraform installation + if ! command -v terraform &> /dev/null; then + error "Terraform not found. Please install Terraform first." + fi + + # Check Terraform version + TF_VERSION=$(terraform version | head -n1 | awk '{print $2}' | sed 's/v//') + log "Terraform version: $TF_VERSION" + + success "Prerequisites check passed" +} + +# Function to backup current state +backup_state() { + log "Creating backup of current state..." + + # Create backup directory + mkdir -p "$BACKUP_DIR" + + # Backup state file if it exists + if [[ -f "$STATE_FILE" ]]; then + BACKUP_NAME="${BACKUP_DIR}/state_backup_$(date +%Y%m%d_%H%M%S).tfstate" + cp "$STATE_FILE" "$BACKUP_NAME" + success "State backed up to: $BACKUP_NAME" + else + warning "No existing state file found" + fi + + # Backup terraform.tfvars + if [[ -f "terraform.tfvars" ]]; then + cp "terraform.tfvars" "${BACKUP_DIR}/terraform.tfvars.backup" + fi +} + +# Function to prepare terraform.tfvars +prepare_config() { + log "Preparing Terraform configuration..." + + # Update terraform.tfvars with actual credentials + cat > terraform.tfvars << EOF +cloudflare_api_token = "$CLOUDFLARE_API_TOKEN" +cloudflare_account_id = "$CLOUDFLARE_ACCOUNT_ID" +cloudflare_account_name = "" # Use account_id from .env +EOF + + # Add optional Zone ID if set + if [[ -n "$CLOUDFLARE_ZONE_ID" ]]; then + echo "cloudflare_zone_id = \"$CLOUDFLARE_ZONE_ID\"" >> terraform.tfvars + fi + + success "Configuration prepared" +} + +# Function to initialize Terraform +init_terraform() { + log "Initializing Terraform..." + + if terraform init -upgrade; then + success "Terraform initialized successfully" + else + error "Terraform initialization failed" + fi +} + +# Function to validate Terraform configuration +validate_config() { + log "Validating Terraform configuration..." + + if terraform validate; then + success "Configuration validation passed" + else + error "Configuration validation failed" + fi +} + +# Function to create deployment plan +create_plan() { + log "Creating deployment plan..." + + if terraform plan -out="$PLAN_FILE" -detailed-exitcode; then + case $? in + 0) + success "No changes needed" + return 0 + ;; + 2) + success "Plan created successfully" + return 2 + ;; + *) + error "Plan creation failed" + ;; + esac + else + error "Plan creation failed" + fi +} + +# Function to show plan summary +show_plan_summary() { + log "Plan Summary:" + terraform show -json "$PLAN_FILE" | jq -r ' + .resource_changes[] | + select(.change.actions != ["no-op"]) | + "\(.change.actions | join(",")) \(.type).\(.name)" + ' | sort | tee -a "$LOG_FILE" +} + +# Function to confirm deployment +confirm_deployment() { + echo + echo "==================================================" + echo "🚀 DEPLOYMENT CONFIRMATION" + echo "==================================================" + echo + echo "The following changes will be applied:" + show_plan_summary + echo + echo "Log file: $LOG_FILE" + echo "Backup directory: $BACKUP_DIR" + echo + read -p "Do you want to proceed with deployment? (y/n): " -n 1 -r + echo + + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + log "Deployment cancelled by user" + exit 0 + fi +} + +# Function to apply deployment +apply_deployment() { + log "Applying deployment..." + + if terraform apply "$PLAN_FILE"; then + success "Deployment applied successfully" + else + error "Deployment failed" + fi +} + +# Function to verify deployment +verify_deployment() { + log "Verifying deployment..." + + # Check if resources were created successfully + OUTPUTS=$(terraform output -json) + + if [[ -n "$OUTPUTS" ]]; then + success "Deployment verification passed" + echo "Outputs:" + terraform output + else + warning "No outputs generated - manual verification required" + fi +} + +# Function to cleanup temporary files +cleanup() { + log "Cleaning up temporary files..." + + if [[ -f "$PLAN_FILE" ]]; then + rm "$PLAN_FILE" + success "Plan file removed" + fi +} + +# Function to show deployment summary +deployment_summary() { + echo + echo "==================================================" + echo "🎉 DEPLOYMENT SUMMARY" + echo "==================================================" + echo + echo "✅ Infrastructure deployed successfully" + echo "📋 Log file: $LOG_FILE" + echo "💾 Backups: $BACKUP_DIR" + echo "🌐 Resources deployed:" + terraform state list + echo + echo "Next steps:" + echo "1. Check Cloudflare dashboard for deployed resources" + echo "2. Test DNS resolution for your domains" + echo "3. Verify WAF rules are active" + echo "4. Test tunnel connectivity" + echo +} + +# Function to handle rollback +rollback() { + error "Deployment failed - rolling back..." + + # Check if we have a backup + LATEST_BACKUP=$(ls -t "${BACKUP_DIR}/state_backup_*.tfstate" 2>/dev/null | head -n1) + + if [[ -n "$LATEST_BACKUP" ]]; then + log "Restoring from backup: $LATEST_BACKUP" + cp "$LATEST_BACKUP" "$STATE_FILE" + warning "State restored from backup. Manual verification required." + else + error "No backup available for rollback" + fi +} + +# Main deployment function +main() { + echo "🚀 Cloudflare Infrastructure Deployment" + echo "==================================================" + echo + + # Change to Terraform directory + cd "$TERRAFORM_DIR" || error "Terraform directory not found" + + # Set trap for cleanup on exit + trap cleanup EXIT + + # Execute deployment steps + check_prerequisites + backup_state + prepare_config + init_terraform + validate_config + + # Create plan and check if changes are needed + if create_plan; then + case $? in + 0) + success "No changes needed - infrastructure is up to date" + exit 0 + ;; + 2) + confirm_deployment + apply_deployment + verify_deployment + deployment_summary + ;; + esac + fi +} + +# Handle errors +trap 'rollback' ERR + +# Run main function +main "$@" \ No newline at end of file diff --git a/scripts/incident_response_playbooks.py b/scripts/incident_response_playbooks.py new file mode 100644 index 0000000..3be34f8 --- /dev/null +++ b/scripts/incident_response_playbooks.py @@ -0,0 +1,421 @@ +#!/usr/bin/env python3 +""" +Cloudflare Incident Response Playbooks +Standardized procedures for common infrastructure incidents +""" + +from enum import Enum +from typing import Dict, List, Optional +from dataclasses import dataclass +from datetime import datetime + + +class IncidentSeverity(str, Enum): + """Incident severity levels""" + + LOW = "low" + MEDIUM = "medium" + HIGH = "high" + CRITICAL = "critical" + + +class IncidentType(str, Enum): + """Types of infrastructure incidents""" + + DNS_OUTAGE = "dns_outage" + WAF_BYPASS = "waf_bypass" + TUNNEL_FAILURE = "tunnel_failure" + SECURITY_BREACH = "security_breach" + CONFIGURATION_ERROR = "configuration_error" + PERFORMANCE_DEGRADATION = "performance_degradation" + + +@dataclass +class IncidentResponse: + """Incident response procedure""" + + incident_type: IncidentType + severity: IncidentSeverity + immediate_actions: List[str] + investigation_steps: List[str] + recovery_procedures: List[str] + prevention_measures: List[str] + escalation_path: List[str] + time_to_resolve: str + + +class IncidentResponsePlaybook: + """Collection of incident response playbooks""" + + def __init__(self): + self.playbooks = self._initialize_playbooks() + + def _initialize_playbooks(self) -> Dict[IncidentType, IncidentResponse]: + """Initialize all incident response playbooks""" + return { + IncidentType.DNS_OUTAGE: IncidentResponse( + incident_type=IncidentType.DNS_OUTAGE, + severity=IncidentSeverity.HIGH, + immediate_actions=[ + "Verify DNS resolution using external tools (dig, nslookup)", + "Check Cloudflare DNS dashboard for zone status", + "Review recent DNS changes in version control", + "Verify origin server connectivity", + "Check Cloudflare status page for service issues", + ], + investigation_steps=[ + "Examine DNS record changes in Git history", + "Check Terraform state for unexpected modifications", + "Review Cloudflare audit logs for recent changes", + "Verify DNS propagation using multiple geographic locations", + "Check for DNSSEC configuration issues", + ], + recovery_procedures=[ + "Rollback recent DNS changes using Terraform", + "Manually restore critical DNS records if needed", + "Update TTL values for faster propagation", + "Contact Cloudflare support if service-related", + "Implement traffic rerouting if necessary", + ], + prevention_measures=[ + "Implement DNS change approval workflows", + "Use Terraform plan/apply with peer review", + "Monitor DNS resolution from multiple locations", + "Implement automated DNS health checks", + "Maintain backup DNS configurations", + ], + escalation_path=[ + "Primary DNS Administrator", + "Infrastructure Team Lead", + "Cloudflare Support", + "Security Team", + ], + time_to_resolve="1-4 hours", + ), + IncidentType.WAF_BYPASS: IncidentResponse( + incident_type=IncidentType.WAF_BYPASS, + severity=IncidentSeverity.CRITICAL, + immediate_actions=[ + "Immediately review WAF event logs for suspicious activity", + "Check for recent WAF rule modifications", + "Verify WAF rule package status and mode", + "Temporarily block suspicious IP addresses", + "Enable challenge mode for suspicious traffic patterns", + ], + investigation_steps=[ + "Analyze WAF rule changes in version control", + "Review Cloudflare firewall event logs", + "Check for anomalous traffic patterns", + "Verify WAF rule effectiveness using test payloads", + "Examine rate limiting and threat score thresholds", + ], + recovery_procedures=[ + "Rollback WAF rule changes to known good state", + "Implement emergency WAF rules to block attack patterns", + "Update threat intelligence feeds", + "Increase security level for affected zones", + "Deploy additional security measures (Bot Fight Mode, etc.)", + ], + prevention_measures=[ + "Implement WAF change approval workflows", + "Regular security testing of WAF rules", + "Monitor WAF event logs for anomalies", + "Implement automated WAF rule validation", + "Regular security awareness training", + ], + escalation_path=[ + "Security Incident Response Team", + "WAF Administrator", + "Infrastructure Security Lead", + "CISO/Management", + ], + time_to_resolve="2-6 hours", + ), + IncidentType.TUNNEL_FAILURE: IncidentResponse( + incident_type=IncidentType.TUNNEL_FAILURE, + severity=IncidentSeverity.MEDIUM, + immediate_actions=[ + "Check Cloudflare Tunnel status and connectivity", + "Verify origin server availability and configuration", + "Check tunnel connector logs for errors", + "Restart tunnel connector service if needed", + "Verify DNS records point to correct tunnel endpoints", + ], + investigation_steps=[ + "Review recent tunnel configuration changes", + "Check network connectivity between connector and Cloudflare", + "Examine tunnel connector resource usage", + "Verify certificate validity and renewal status", + "Check for firewall/network policy changes", + ], + recovery_procedures=[ + "Restart tunnel connector with updated configuration", + "Rollback recent tunnel configuration changes", + "Recreate tunnel connector if necessary", + "Update DNS records to alternative endpoints", + "Implement traffic failover mechanisms", + ], + prevention_measures=[ + "Implement tunnel health monitoring", + "Use redundant tunnel configurations", + "Regular tunnel connector updates and maintenance", + "Monitor certificate expiration dates", + "Implement automated tunnel failover", + ], + escalation_path=[ + "Network Administrator", + "Infrastructure Team", + "Cloudflare Support", + "Security Team", + ], + time_to_resolve="1-3 hours", + ), + IncidentType.SECURITY_BREACH: IncidentResponse( + incident_type=IncidentType.SECURITY_BREACH, + severity=IncidentSeverity.CRITICAL, + immediate_actions=[ + "Isolate affected systems and services immediately", + "Preserve logs and evidence for forensic analysis", + "Change all relevant credentials and API tokens", + "Notify security incident response team", + "Implement emergency security controls", + ], + investigation_steps=[ + "Conduct forensic analysis of compromised systems", + "Review Cloudflare audit logs for unauthorized access", + "Check for API token misuse or unauthorized changes", + "Examine DNS/WAF/Tunnel configuration changes", + "Coordinate with legal and compliance teams", + ], + recovery_procedures=[ + "Rotate all Cloudflare API tokens and credentials", + "Restore configurations from verified backups", + "Implement enhanced security monitoring", + "Conduct post-incident security assessment", + "Update incident response procedures based on lessons learned", + ], + prevention_measures=[ + "Implement multi-factor authentication", + "Regular security audits and penetration testing", + "Monitor for suspicious API activity", + "Implement least privilege access controls", + "Regular security awareness training", + ], + escalation_path=[ + "Security Incident Response Team", + "CISO/Management", + "Legal Department", + "External Security Consultants", + ], + time_to_resolve="4-24 hours", + ), + IncidentType.CONFIGURATION_ERROR: IncidentResponse( + incident_type=IncidentType.CONFIGURATION_ERROR, + severity=IncidentSeverity.MEDIUM, + immediate_actions=[ + "Identify the specific configuration error", + "Assess impact on services and users", + "Check version control for recent changes", + "Verify Terraform plan output for unexpected changes", + "Communicate status to stakeholders", + ], + investigation_steps=[ + "Review Git commit history for configuration changes", + "Examine Terraform state differences", + "Check Cloudflare configuration against documented standards", + "Verify configuration consistency across environments", + "Identify root cause of configuration error", + ], + recovery_procedures=[ + "Rollback configuration using Terraform", + "Apply corrected configuration changes", + "Verify service restoration and functionality", + "Update configuration documentation", + "Implement configuration validation checks", + ], + prevention_measures=[ + "Implement configuration change approval workflows", + "Use infrastructure as code with peer review", + "Implement automated configuration validation", + "Regular configuration audits", + "Maintain configuration documentation", + ], + escalation_path=[ + "Configuration Administrator", + "Infrastructure Team Lead", + "Quality Assurance Team", + "Management", + ], + time_to_resolve="1-4 hours", + ), + IncidentType.PERFORMANCE_DEGRADATION: IncidentResponse( + incident_type=IncidentType.PERFORMANCE_DEGRADATION, + severity=IncidentSeverity.LOW, + immediate_actions=[ + "Monitor performance metrics and identify bottlenecks", + "Check Cloudflare analytics for traffic patterns", + "Verify origin server performance and resource usage", + "Review recent configuration changes", + "Implement temporary performance optimizations", + ], + investigation_steps=[ + "Analyze performance metrics over time", + "Check for DDoS attacks or abnormal traffic patterns", + "Review caching configuration and hit rates", + "Examine origin server response times", + "Identify specific performance bottlenecks", + ], + recovery_procedures=[ + "Optimize caching configuration", + "Adjust performance settings (Polish, Mirage, etc.)", + "Implement rate limiting if under attack", + "Scale origin server resources if needed", + "Update CDN configuration for better performance", + ], + prevention_measures=[ + "Implement performance monitoring and alerting", + "Regular performance testing and optimization", + "Capacity planning and resource forecasting", + "Implement automated scaling mechanisms", + "Regular performance reviews and optimizations", + ], + escalation_path=[ + "Performance Monitoring Team", + "Infrastructure Team", + "Application Development Team", + "Management", + ], + time_to_resolve="2-8 hours", + ), + } + + def get_playbook(self, incident_type: IncidentType) -> Optional[IncidentResponse]: + """Get the playbook for a specific incident type""" + return self.playbooks.get(incident_type) + + def list_playbooks(self) -> List[IncidentType]: + """List all available playbooks""" + return list(self.playbooks.keys()) + + def execute_playbook( + self, incident_type: IncidentType, custom_context: Optional[Dict] = None + ) -> Dict: + """Execute a specific incident response playbook""" + playbook = self.get_playbook(incident_type) + + if not playbook: + return {"error": f"No playbook found for incident type: {incident_type}"} + + execution_log = { + "incident_type": incident_type.value, + "severity": playbook.severity.value, + "start_time": datetime.now().isoformat(), + "steps_completed": [], + "custom_context": custom_context or {}, + } + + # Simulate execution (in real implementation, this would trigger actual actions) + execution_log["steps_completed"].extend( + [ + f"Initiated {incident_type.value} response procedure", + f"Severity level: {playbook.severity.value}", + "Notified escalation path contacts", + ] + ) + + execution_log["estimated_resolution_time"] = playbook.time_to_resolve + execution_log["completion_status"] = "in_progress" + + return execution_log + + +def main(): + """Command-line interface for incident response playbooks""" + import argparse + + parser = argparse.ArgumentParser( + description="Cloudflare Incident Response Playbooks" + ) + parser.add_argument( + "action", choices=["list", "show", "execute"], help="Action to perform" + ) + parser.add_argument( + "--type", choices=[t.value for t in IncidentType], help="Incident type" + ) + + args = parser.parse_args() + + playbook_manager = IncidentResponsePlaybook() + + if args.action == "list": + print("📋 Available Incident Response Playbooks:") + print("-" * 50) + for incident_type in playbook_manager.list_playbooks(): + playbook = playbook_manager.get_playbook(incident_type) + if not playbook: + continue + + print(f"🔸 {incident_type.value}") + print(f" Severity: {playbook.severity.value}") + print(f" Resolution Time: {playbook.time_to_resolve}") + print() + + elif args.action == "show": + if not args.type: + print("❌ Error: --type argument required") + return + + try: + incident_type = IncidentType(args.type) + except ValueError: + print(f"❌ Error: Invalid incident type: {args.type}") + return + + playbook = playbook_manager.get_playbook(incident_type) + if not playbook: + print(f"❌ Error: No playbook found for {args.type}") + return + + print(f"🔍 Incident Response Playbook: {incident_type.value}") + print("=" * 60) + print(f"Severity: {playbook.severity.value}") + print(f"Estimated Resolution: {playbook.time_to_resolve}") + + print("\n🚨 Immediate Actions:") + for i, action in enumerate(playbook.immediate_actions, 1): + print(f" {i}. {action}") + + print("\n🔍 Investigation Steps:") + for i, step in enumerate(playbook.investigation_steps, 1): + print(f" {i}. {step}") + + print("\n🔄 Recovery Procedures:") + for i, procedure in enumerate(playbook.recovery_procedures, 1): + print(f" {i}. {procedure}") + + print("\n🛡️ Prevention Measures:") + for i, measure in enumerate(playbook.prevention_measures, 1): + print(f" {i}. {measure}") + + print("\n📞 Escalation Path:") + for i, contact in enumerate(playbook.escalation_path, 1): + print(f" {i}. {contact}") + + elif args.action == "execute": + if not args.type: + print("❌ Error: --type argument required") + return + + try: + incident_type = IncidentType(args.type) + except ValueError: + print(f"❌ Error: Invalid incident type: {args.type}") + return + + result = playbook_manager.execute_playbook(incident_type) + print(f"🚀 Executing {incident_type.value} Incident Response") + print(f"📊 Result: {result}") + + +if __name__ == "__main__": + main() diff --git a/scripts/monitoring_dashboard.py b/scripts/monitoring_dashboard.py new file mode 100644 index 0000000..98d95ee --- /dev/null +++ b/scripts/monitoring_dashboard.py @@ -0,0 +1,260 @@ +#!/usr/bin/env python3 +""" +Cloudflare Infrastructure Monitoring Dashboard +Provides real-time monitoring of Cloudflare resources and services +""" + +import os +import json +import time +import requests +from datetime import datetime, timedelta +from typing import Dict, List, Any + + +class CloudflareMonitor: + def __init__(self): + self.base_url = "https://api.cloudflare.com/client/v4" + self.headers = { + "Authorization": f"Bearer {os.getenv('CLOUDFLARE_API_TOKEN')}", + "Content-Type": "application/json", + } + self.account_id = os.getenv("CLOUDFLARE_ACCOUNT_ID") + + if not self.account_id or not os.getenv("CLOUDFLARE_API_TOKEN"): + raise ValueError("Missing Cloudflare credentials in environment") + + def make_request(self, endpoint: str) -> Dict[str, Any]: + """Make API request with error handling""" + url = f"{self.base_url}{endpoint}" + try: + response = requests.get(url, headers=self.headers, timeout=10) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + return {"success": False, "errors": [str(e)]} + + def get_account_info(self) -> Dict[str, Any]: + """Get account information""" + return self.make_request(f"/accounts/{self.account_id}") + + def get_zones(self) -> List[Dict[str, Any]]: + """Get all zones""" + result = self.make_request(f"/zones?account.id={self.account_id}&per_page=50") + return result.get("result", []) if result.get("success") else [] + + def get_zone_analytics(self, zone_id: str) -> Dict[str, Any]: + """Get zone analytics for the last hour""" + since = (datetime.now() - timedelta(hours=1)).isoformat() + return self.make_request(f"/zones/{zone_id}/analytics/dashboard?since={since}") + + def get_waf_rules(self, zone_id: str) -> List[Dict[str, Any]]: + """Get WAF rules for a zone""" + result = self.make_request(f"/zones/{zone_id}/firewall/waf/packages") + if result.get("success"): + packages = result.get("result", []) + rules = [] + for package in packages: + rules_result = self.make_request( + f"/zones/{zone_id}/firewall/waf/packages/{package['id']}/rules" + ) + if rules_result.get("success"): + rules.extend(rules_result.get("result", [])) + return rules + return [] + + def get_tunnels(self) -> List[Dict[str, Any]]: + """Get Cloudflare Tunnels""" + result = self.make_request(f"/accounts/{self.account_id}/cfd_tunnel") + return result.get("result", []) if result.get("success") else [] + + def get_dns_records(self, zone_id: str) -> List[Dict[str, Any]]: + """Get DNS records for a zone""" + result = self.make_request(f"/zones/{zone_id}/dns_records?per_page=100") + return result.get("result", []) if result.get("success") else [] + + def get_health_status(self) -> Dict[str, Any]: + """Get overall health status""" + status = "healthy" + issues = [] + + # Check zones + zones = self.get_zones() + if not zones: + issues.append("No zones found") + status = "warning" + + # Check account access + account_info = self.get_account_info() + if not account_info.get("success"): + issues.append("Account access failed") + status = "critical" + + return {"status": status, "issues": issues} + + +def format_table(data: List[Dict[str, Any]], headers: List[str]) -> str: + """Format data as a table""" + if not data: + return "No data available" + + # Calculate column widths + col_widths = [len(header) for header in headers] + for row in data: + for i, header in enumerate(headers): + value = str(row.get(header, "")) + col_widths[i] = max(col_widths[i], len(value)) + + # Create header row + header_row = " | ".join( + header.ljust(col_widths[i]) for i, header in enumerate(headers) + ) + separator = "-" * len(header_row) + + # Create data rows + rows = [header_row, separator] + for row in data: + row_data = [ + str(row.get(header, "")).ljust(col_widths[i]) + for i, header in enumerate(headers) + ] + rows.append(" | ".join(row_data)) + + return "\n".join(rows) + + +def main(): + print("🌐 Cloudflare Infrastructure Monitoring Dashboard") + print("=" * 60) + print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print() + + try: + monitor = CloudflareMonitor() + + # Health check + print("🔍 Health Status") + print("-" * 30) + health = monitor.get_health_status() + status_emoji = {"healthy": "✅", "warning": "⚠️", "critical": "❌"} + print( + f"Status: {status_emoji.get(health['status'], '❓')} {health['status'].upper()}" + ) + if health["issues"]: + for issue in health["issues"]: + print(f" - {issue}") + print() + + # Account information + print("🏢 Account Information") + print("-" * 30) + account_info = monitor.get_account_info() + if account_info.get("success"): + account = account_info["result"] + print(f"Name: {account.get('name', 'N/A')}") + print(f"Type: {account.get('type', 'N/A')}") + print(f"Created: {account.get('created_on', 'N/A')}") + else: + print("Failed to retrieve account information") + print() + + # Zones overview + print("🌐 Zones Overview") + print("-" * 30) + zones = monitor.get_zones() + zone_data = [] + for zone in zones[:10]: # Limit to first 10 zones + zone_data.append( + { + "Name": zone.get("name", "N/A"), + "Status": zone.get("status", "N/A"), + "Plan": zone.get("plan", {}).get("name", "N/A"), + "Development": zone.get("development_mode", "N/A"), + } + ) + + print(format_table(zone_data, ["Name", "Status", "Plan", "Development"])) + print(f"Total zones: {len(zones)}") + print() + + # DNS Records (for first zone) + dns_records = [] + waf_rules = [] + + if zones: + first_zone = zones[0] + print("📋 DNS Records (First Zone)") + print("-" * 30) + dns_records = monitor.get_dns_records(first_zone["id"]) + dns_data = [] + for record in dns_records[:15]: # Limit to first 15 records + dns_data.append( + { + "Type": record.get("type", "N/A"), + "Name": record.get("name", "N/A"), + "Content": record.get("content", "N/A")[:40] + "..." + if len(record.get("content", "")) > 40 + else record.get("content", "N/A"), + } + ) + + print(format_table(dns_data, ["Type", "Name", "Content"])) + print(f"Total DNS records: {len(dns_records)}") + print() + + # Tunnels + print("🔗 Cloudflare Tunnels") + print("-" * 30) + tunnels = monitor.get_tunnels() + tunnel_data = [] + for tunnel in tunnels: + tunnel_data.append( + { + "Name": tunnel.get("name", "N/A"), + "Status": tunnel.get("status", "N/A"), + "Connections": len(tunnel.get("connections", [])), + } + ) + + print(format_table(tunnel_data, ["Name", "Status", "Connections"])) + print(f"Total tunnels: {len(tunnels)}") + print() + + # WAF Rules (for first zone) + if zones: + first_zone = zones[0] + print("🛡️ WAF Rules (First Zone)") + print("-" * 30) + waf_rules = monitor.get_waf_rules(first_zone["id"]) + waf_data = [] + for rule in waf_rules[:10]: # Limit to first 10 rules + waf_data.append( + { + "ID": rule.get("id", "N/A"), + "Description": rule.get("description", "N/A")[:50] + "..." + if len(rule.get("description", "")) > 50 + else rule.get("description", "N/A"), + "Mode": rule.get("mode", "N/A"), + } + ) + + print(format_table(waf_data, ["ID", "Description", "Mode"])) + print(f"Total WAF rules: {len(waf_rules)}") + print() + + # Summary + print("📊 Summary") + print("-" * 30) + print(f"Zones: {len(zones)}") + print(f"Tunnels: {len(tunnels)}") + if zones: + print(f"DNS Records (first zone): {len(dns_records)}") + print(f"WAF Rules (first zone): {len(waf_rules)}") + + except Exception as e: + print(f"❌ Error: {e}") + print("Please ensure your Cloudflare credentials are properly configured.") + + +if __name__ == "__main__": + main() diff --git a/scripts/setup_credentials.py b/scripts/setup_credentials.py new file mode 100644 index 0000000..947a0e8 --- /dev/null +++ b/scripts/setup_credentials.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +""" +Cloudflare Credential Setup Wizard +Interactive script to guide users through configuring Cloudflare API credentials +""" + +import os +import sys +import re +from pathlib import Path + + +def validate_api_token(token): + """Validate Cloudflare API token format""" + # Cloudflare API tokens are typically 40+ characters + return len(token.strip()) >= 40 + + +def validate_account_id(account_id): + """Validate Cloudflare Account ID format""" + # Account IDs are typically 32-character hex strings + return re.match(r"^[a-f0-9]{32}$", account_id.strip(), re.IGNORECASE) is not None + + +def validate_zone_id(zone_id): + """Validate Cloudflare Zone ID format""" + # Zone IDs are also 32-character hex strings + return re.match(r"^[a-f0-9]{32}$", zone_id.strip(), re.IGNORECASE) is not None + + +def get_input(prompt, validation_func=None, secret=False): + """Get validated user input""" + while True: + try: + if secret: + import getpass + + value = getpass.getpass(prompt) + else: + value = input(prompt) + + if validation_func: + if validation_func(value): + return value + else: + print("❌ Invalid format. Please try again.") + else: + return value + except KeyboardInterrupt: + print("\n\nSetup cancelled.") + sys.exit(1) + + +def create_env_file(env_vars): + """Create or update .env file with credentials""" + env_path = Path(".env") + + # Read existing .env if it exists + existing_vars = {} + if env_path.exists(): + with open(env_path, "r") as f: + for line in f: + if line.strip() and not line.startswith("#") and "=" in line: + key, value = line.strip().split("=", 1) + existing_vars[key] = value + + # Update with new values + existing_vars.update(env_vars) + + # Write back + with open(env_path, "w") as f: + f.write("# OpenCode Environment Variables\n") + f.write("# Generated by setup_credentials.py\n") + f.write("# IMPORTANT: Never commit this file to git\n\n") + + # Write Cloudflare section + f.write( + "# ============================================================================\n" + ) + f.write("# CLOUDFLARE API CONFIGURATION\n") + f.write( + "# ============================================================================\n" + ) + + for key, value in env_vars.items(): + f.write(f'{key}="{value}"\n') + + f.write("\n") + + # Preserve other sections if they exist + sections = { + "GITHUB": [k for k in existing_vars.keys() if k.startswith("GITHUB")], + "GITLAB": [k for k in existing_vars.keys() if k.startswith("GITLAB")], + "OTHER": [ + k + for k in existing_vars.keys() + if k not in env_vars and not k.startswith(("GITHUB", "GITLAB")) + ], + } + + for section_name, keys in sections.items(): + if keys: + f.write( + f"# ============================================================================\n" + ) + f.write(f"# {section_name} CONFIGURATION\n") + f.write( + f"# ============================================================================\n" + ) + for key in keys: + f.write(f'{key}="{existing_vars[key]}"\n') + f.write("\n") + + return env_path + + +def main(): + print("🚀 Cloudflare Credential Setup Wizard") + print("=" * 50) + print() + + print("This wizard will help you configure your Cloudflare API credentials.") + print("You'll need:") + print("1. Cloudflare API Token (with appropriate permissions)") + print("2. Cloudflare Account ID") + print("3. Optional: Zone ID for specific domain management") + print() + + # Check if we're in the right directory + current_dir = Path.cwd() + if "cloudflare" not in str(current_dir): + print("⚠️ Warning: This script should be run from the cloudflare directory") + print(f" Current directory: {current_dir}") + proceed = get_input("Continue anyway? (y/n): ") + if proceed.lower() != "y": + print( + "Please navigate to the cloudflare directory and run this script again." + ) + return + + # Collect credentials + print("\n🔐 Cloudflare API Configuration") + print("-" * 30) + + # API Token + print("\n📋 Step 1: Cloudflare API Token") + print("Get your token from: https://dash.cloudflare.com/profile/api-tokens") + print("Required permissions: Zone:DNS:Edit, Zone:Page Rules:Edit, Account:Read") + api_token = get_input( + "API Token: ", validation_func=validate_api_token, secret=True + ) + + # Account ID + print("\n🏢 Step 2: Cloudflare Account ID") + print("Find your Account ID in the Cloudflare dashboard sidebar") + print("Format: 32-character hex string (e.g., 1a2b3c4d5e6f7g8h9i0j1k2l3m4n5o6p)") + account_id = get_input("Account ID: ", validation_func=validate_account_id) + + # Zone ID (optional) + print("\n🌐 Step 3: Zone ID (Optional)") + print("If you want to manage a specific domain, provide its Zone ID") + print("Leave blank to skip") + zone_id = get_input( + "Zone ID (optional): ", + validation_func=lambda x: x.strip() == "" or validate_zone_id(x), + ) + + # Prepare environment variables + env_vars = {"CLOUDFLARE_API_TOKEN": api_token, "CLOUDFLARE_ACCOUNT_ID": account_id} + + if zone_id.strip(): + env_vars["CLOUDFLARE_ZONE_ID"] = zone_id + + # Create .env file + print("\n💾 Saving credentials...") + env_path = create_env_file(env_vars) + + # Set file permissions + env_path.chmod(0o600) # Only user read/write + + print(f"✅ Credentials saved to: {env_path}") + print("🔒 File permissions set to 600 (owner read/write only)") + + # Test configuration (basic validation only - no external dependencies) + print("\n🧪 Validating credentials...") + + # Basic format validation + if validate_api_token(api_token) and validate_account_id(account_id): + print("✅ Credential formats are valid") + print("⚠️ Note: Full API connectivity test requires 'requests' module") + print(" Install with: pip install requests") + else: + print("❌ Credential validation failed") + print(" Please check your inputs and try again") + + # Final instructions + print("\n🎉 Setup Complete!") + print("=" * 50) + print("\nNext steps:") + print("1. Source the environment file:") + print(" source .env") + print("\n2. Test Terraform configuration:") + print(" cd terraform && terraform init && terraform plan") + print("\n3. Deploy infrastructure:") + print(" terraform apply") + print("\n4. Start MCP servers:") + print(" Check MCP_GUIDE.md for server startup instructions") + print("\n📚 Documentation:") + print("- USAGE_GUIDE.md - Complete usage instructions") + print("- DEPLOYMENT_GUIDE.md - Deployment procedures") + print("- MCP_GUIDE.md - MCP server management") + + # Security reminder + print("\n🔐 Security Reminder:") + print("- Never commit .env to version control") + print("- Use .gitignore to exclude .env files") + print("- Consider using environment-specific .env files (.env.production, etc.)") + + +if __name__ == "__main__": + main() diff --git a/scripts/setup_credentials.sh b/scripts/setup_credentials.sh new file mode 100644 index 0000000..de2eed4 --- /dev/null +++ b/scripts/setup_credentials.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# Cloudflare Credential Setup Script +# Interactive script to configure Cloudflare API credentials + +set -e + +echo "🚀 Cloudflare Credential Setup Wizard" +echo "==================================================" +echo + +echo "This script will help you configure your Cloudflare API credentials." +echo "You'll need:" +echo "1. Cloudflare API Token (with appropriate permissions)" +echo "2. Cloudflare Account ID" +echo "3. Optional: Zone ID for specific domain management" +echo + +# Check if we're in the right directory +if [[ ! "$PWD" =~ "cloudflare" ]]; then + echo "⚠️ Warning: This script should be run from the cloudflare directory" + echo " Current directory: $PWD" + read -p "Continue anyway? (y/n): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Please navigate to the cloudflare directory and run this script again." + exit 1 + fi +fi + +# Function to validate API token format +validate_api_token() { + local token="$1" + # Cloudflare API tokens are typically 40+ characters + [[ ${#token} -ge 40 ]] +} + +# Function to validate Account ID format +validate_account_id() { + local account_id="$1" + # Account IDs are 32-character hex strings + [[ "$account_id" =~ ^[a-f0-9]{32}$ ]] +} + +# Function to validate Zone ID format +validate_zone_id() { + local zone_id="$1" + # Zone IDs are 32-character hex strings + [[ "$zone_id" =~ ^[a-f0-9]{32}$ ]] +} + +# Function to get validated input +get_validated_input() { + local prompt="$1" + local validation_func="$2" + local secret="$3" + + while true; do + if [[ "$secret" == "true" ]]; then + read -s -p "$prompt" value + echo + else + read -p "$prompt" value + fi + + if [[ -n "$validation_func" ]]; then + if $validation_func "$value"; then + echo "$value" + return + else + echo "❌ Invalid format. Please try again." + fi + else + echo "$value" + return + fi + done +} + +# Collect credentials +echo "🔐 Cloudflare API Configuration" +echo "------------------------------" +echo + +# API Token +echo "📋 Step 1: Cloudflare API Token" +echo "Get your token from: https://dash.cloudflare.com/profile/api-tokens" +echo "Required permissions: Zone:DNS:Edit, Zone:Page Rules:Edit, Account:Read" +API_TOKEN=$(get_validated_input "API Token: " validate_api_token true) + +# Account ID +echo +echo "🏢 Step 2: Cloudflare Account ID" +echo "Find your Account ID in the Cloudflare dashboard sidebar" +echo "Format: 32-character hex string (e.g., 1a2b3c4d5e6f7g8h9i0j1k2l3m4n5o6p)" +ACCOUNT_ID=$(get_validated_input "Account ID: " validate_account_id false) + +# Zone ID (optional) +echo +echo "🌐 Step 3: Zone ID (Optional)" +echo "If you want to manage a specific domain, provide its Zone ID" +echo "Leave blank to skip" +ZONE_ID=$(get_validated_input "Zone ID (optional): " "[[ -z \"\$1\" ]] || validate_zone_id \"\$1\"" false) + +# Create .env file +echo +echo "💾 Saving credentials..." + +# Read existing .env if it exists +ENV_CONTENT="" +if [[ -f ".env" ]]; then + # Preserve existing non-Cloudflare variables + while IFS= read -r line; do + if [[ ! "$line" =~ ^CLOUDFLARE_ ]] && [[ ! "$line" =~ ^#.*CLOUDFLARE ]]; then + ENV_CONTENT="$ENV_CONTENT$line\n" + fi + done < ".env" +fi + +# Create new .env content +cat > .env << EOF +# OpenCode Environment Variables +# Generated by setup_credentials.sh +# IMPORTANT: Never commit this file to git + +# ============================================================================ +# CLOUDFLARE API CONFIGURATION +# ============================================================================ +CLOUDFLARE_API_TOKEN="$API_TOKEN" +CLOUDFLARE_ACCOUNT_ID="$ACCOUNT_ID" +EOF + +# Add Zone ID if provided +if [[ -n "$ZONE_ID" ]]; then + echo "CLOUDFLARE_ZONE_ID=\"$ZONE_ID\"" >> .env +fi + +# Add preserved content +if [[ -n "$ENV_CONTENT" ]]; then + echo >> .env + echo "$ENV_CONTENT" >> .env +fi + +# Set secure permissions +chmod 600 .env + +echo "✅ Credentials saved to: .env" +echo "🔒 File permissions set to 600 (owner read/write only)" + +# Basic validation +echo +echo "🧪 Validating credentials..." +if validate_api_token "$API_TOKEN" && validate_account_id "$ACCOUNT_ID"; then + echo "✅ Credential formats are valid" + echo "⚠️ Note: Full API connectivity test requires curl or python requests" +else + echo "❌ Credential validation failed" + echo " Please check your inputs and try again" +fi + +# Final instructions +echo +echo "🎉 Setup Complete!" +echo "==================================================" +echo +echo "Next steps:" +echo "1. Source the environment file:" +echo " source .env" +echo +echo "2. Test Terraform configuration:" +echo " cd terraform && terraform init && terraform plan" +echo +echo "3. Deploy infrastructure:" +echo " terraform apply" +echo +echo "4. Start MCP servers:" +echo " Check MCP_GUIDE.md for server startup instructions" +echo +echo "📚 Documentation:" +echo "- USAGE_GUIDE.md - Complete usage instructions" +echo "- DEPLOYMENT_GUIDE.md - Deployment procedures" +echo "- MCP_GUIDE.md - MCP server management" +echo +echo "🔐 Security Reminder:" +echo "- Never commit .env to version control" +echo "- Use .gitignore to exclude .env files" +echo "- Consider using environment-specific .env files (.env.production, etc.)" + +# Make script executable +chmod +x "$0" \ No newline at end of file diff --git a/scripts/terraform_state_manager.py b/scripts/terraform_state_manager.py new file mode 100644 index 0000000..bf3301d --- /dev/null +++ b/scripts/terraform_state_manager.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +""" +Terraform State Backup and Recovery Manager +Automated state management with versioning and rollback capabilities +""" + +import os +import json +import shutil +import hashlib +from datetime import datetime, timedelta +from pathlib import Path +from typing import Dict, List, Optional +import argparse + + +class TerraformStateManager: + """Manage Terraform state backups and recovery""" + + def __init__( + self, terraform_dir: str = "terraform", backup_dir: str = "terraform_backups" + ): + self.terraform_dir = Path(terraform_dir) + self.backup_dir = Path(backup_dir) + self.state_file = self.terraform_dir / "terraform.tfstate" + self.backup_dir.mkdir(exist_ok=True) + + def create_backup(self, description: str = "", auto_backup: bool = True) -> str: + """Create a backup of the current Terraform state""" + if not self.state_file.exists(): + return "No state file found to backup" + + # Generate backup filename with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_filename = f"state_backup_{timestamp}.tfstate" + backup_path = self.backup_dir / backup_filename + + # Copy state file + shutil.copy2(self.state_file, backup_path) + + # Create metadata file + metadata = { + "timestamp": timestamp, + "description": description, + "auto_backup": auto_backup, + "file_size": os.path.getsize(backup_path), + "file_hash": self._calculate_file_hash(backup_path), + } + + metadata_path = backup_path.with_suffix(".json") + with open(metadata_path, "w") as f: + json.dump(metadata, f, indent=2) + + return f"Backup created: {backup_filename}" + + def list_backups(self) -> List[Dict]: + """List all available backups""" + backups = [] + + for file in self.backup_dir.glob("state_backup_*.tfstate"): + metadata_file = file.with_suffix(".json") + + backup_info = { + "filename": file.name, + "path": str(file), + "size": file.stat().st_size, + "modified": datetime.fromtimestamp(file.stat().st_mtime), + } + + if metadata_file.exists(): + with open(metadata_file, "r") as f: + backup_info.update(json.load(f)) + + backups.append(backup_info) + + # Sort by modification time (newest first) + backups.sort(key=lambda x: x["modified"], reverse=True) + return backups + + def restore_backup(self, backup_filename: str, dry_run: bool = False) -> str: + """Restore a specific backup""" + backup_path = self.backup_dir / backup_filename + + if not backup_path.exists(): + return f"Backup file not found: {backup_filename}" + + # Create backup of current state before restore + if self.state_file.exists() and not dry_run: + self.create_backup("Pre-restore backup", auto_backup=True) + + if dry_run: + return f"Dry run: Would restore {backup_filename}" + + # Perform restore + shutil.copy2(backup_path, self.state_file) + + return f"State restored from: {backup_filename}" + + def cleanup_old_backups( + self, keep_days: int = 30, keep_count: int = 10 + ) -> List[str]: + """Clean up old backups based on age and count""" + backups = self.list_backups() + + if not backups: + return ["No backups found to clean up"] + + cutoff_date = datetime.now() - timedelta(days=keep_days) + backups_to_delete = [] + + # Delete backups older than keep_days + for backup in backups: + if backup["modified"] < cutoff_date: + backups_to_delete.append(backup) + + # If we have more than keep_count backups, delete the oldest ones + if len(backups) > keep_count: + # Keep the newest keep_count backups + backups_to_keep = backups[:keep_count] + backups_to_delete.extend([b for b in backups if b not in backups_to_keep]) + + # Remove duplicates + backups_to_delete = list({b["filename"]: b for b in backups_to_delete}.values()) + + deleted_files = [] + for backup in backups_to_delete: + try: + # Delete state file + state_file = Path(backup["path"]) + if state_file.exists(): + state_file.unlink() + deleted_files.append(state_file.name) + + # Delete metadata file + metadata_file = state_file.with_suffix(".json") + if metadata_file.exists(): + metadata_file.unlink() + deleted_files.append(metadata_file.name) + + except Exception as e: + print(f"Error deleting {backup['filename']}: {e}") + + return deleted_files + + def verify_backup_integrity(self, backup_filename: str) -> Dict[str, bool]: + """Verify the integrity of a backup""" + backup_path = self.backup_dir / backup_filename + metadata_path = backup_path.with_suffix(".json") + + if not backup_path.exists(): + return {"exists": False, "metadata_exists": False, "integrity": False} + + if not metadata_path.exists(): + return {"exists": True, "metadata_exists": False, "integrity": False} + + # Check file size and hash + with open(metadata_path, "r") as f: + metadata = json.load(f) + + current_size = backup_path.stat().st_size + current_hash = self._calculate_file_hash(backup_path) + + size_matches = current_size == metadata.get("file_size", 0) + hash_matches = current_hash == metadata.get("file_hash", "") + + return { + "exists": True, + "metadata_exists": True, + "size_matches": size_matches, + "hash_matches": hash_matches, + "integrity": size_matches and hash_matches, + } + + def get_state_statistics(self) -> Dict: + """Get statistics about current state and backups""" + backups = self.list_backups() + + stats = { + "current_state_exists": self.state_file.exists(), + "current_state_size": self.state_file.stat().st_size + if self.state_file.exists() + else 0, + "backup_count": len(backups), + "oldest_backup": min([b["modified"] for b in backups]) if backups else None, + "newest_backup": max([b["modified"] for b in backups]) if backups else None, + "total_backup_size": sum(b["size"] for b in backups), + "backups_with_issues": [], + } + + # Check backup integrity + for backup in backups: + integrity = self.verify_backup_integrity(backup["filename"]) + if not integrity["integrity"]: + stats["backups_with_issues"].append( + {"filename": backup["filename"], "integrity": integrity} + ) + + return stats + + def _calculate_file_hash(self, file_path: Path) -> str: + """Calculate SHA256 hash of a file""" + hasher = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(4096), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def main(): + """Command-line interface for Terraform state management""" + parser = argparse.ArgumentParser( + description="Terraform State Backup and Recovery Manager" + ) + parser.add_argument( + "action", + choices=["backup", "list", "restore", "cleanup", "stats", "verify"], + help="Action to perform", + ) + parser.add_argument("--filename", help="Backup filename for restore/verify") + parser.add_argument("--description", help="Description for backup") + parser.add_argument("--dry-run", action="store_true", help="Dry run mode") + parser.add_argument( + "--keep-days", type=int, default=30, help="Days to keep backups" + ) + parser.add_argument( + "--keep-count", type=int, default=10, help="Number of backups to keep" + ) + parser.add_argument( + "--terraform-dir", default="terraform", help="Terraform directory" + ) + parser.add_argument( + "--backup-dir", default="terraform_backups", help="Backup directory" + ) + + args = parser.parse_args() + + manager = TerraformStateManager(args.terraform_dir, args.backup_dir) + + if args.action == "backup": + result = manager.create_backup( + args.description or "Manual backup", auto_backup=False + ) + print(f"✅ {result}") + + elif args.action == "list": + backups = manager.list_backups() + print("📋 Available Backups:") + print("-" * 80) + for backup in backups: + print(f"📁 {backup['filename']}") + print(f" Size: {backup['size']:,} bytes") + print(f" Modified: {backup['modified'].strftime('%Y-%m-%d %H:%M:%S')}") + if "description" in backup: + print(f" Description: {backup['description']}") + print() + + elif args.action == "restore": + if not args.filename: + print("❌ Error: --filename argument required for restore") + return + + result = manager.restore_backup(args.filename, args.dry_run) + print(f"🔁 {result}") + + elif args.action == "cleanup": + deleted = manager.cleanup_old_backups(args.keep_days, args.keep_count) + if deleted: + print("🗑️ Cleaned up backups:") + for filename in deleted: + print(f" - {filename}") + else: + print("✅ No backups needed cleanup") + + elif args.action == "stats": + stats = manager.get_state_statistics() + print("📊 Terraform State Statistics") + print("-" * 40) + print( + f"Current state exists: {'✅' if stats['current_state_exists'] else '❌'}" + ) + print(f"Current state size: {stats['current_state_size']:,} bytes") + print(f"Backup count: {stats['backup_count']}") + if stats["oldest_backup"]: + print(f"Oldest backup: {stats['oldest_backup'].strftime('%Y-%m-%d')}") + print(f"Newest backup: {stats['newest_backup'].strftime('%Y-%m-%d')}") + print(f"Total backup size: {stats['total_backup_size']:,} bytes") + + if stats["backups_with_issues"]: + print(f"\n⚠️ Backups with issues: {len(stats['backups_with_issues'])}") + for issue in stats["backups_with_issues"]: + print(f" - {issue['filename']}") + + elif args.action == "verify": + if not args.filename: + print("❌ Error: --filename argument required for verify") + return + + integrity = manager.verify_backup_integrity(args.filename) + print(f"🔍 Integrity check for {args.filename}") + print(f" File exists: {'✅' if integrity['exists'] else '❌'}") + print(f" Metadata exists: {'✅' if integrity['metadata_exists'] else '❌'}") + if integrity["metadata_exists"]: + print(f" Size matches: {'✅' if integrity['size_matches'] else '❌'}") + print(f" Hash matches: {'✅' if integrity['hash_matches'] else '❌'}") + print(f" Overall integrity: {'✅' if integrity['integrity'] else '❌'}") + + +if __name__ == "__main__": + main() diff --git a/scripts/waf-and-plan-invariants.sh b/scripts/waf-and-plan-invariants.sh new file mode 100644 index 0000000..d78d173 --- /dev/null +++ b/scripts/waf-and-plan-invariants.sh @@ -0,0 +1,393 @@ +#!/usr/bin/env bash +# ============================================================================ +# WAF + PLAN INVARIANTS CHECKER +# ============================================================================ +# Enforces security+plan gating invariants for VaultMesh Cloudflare IaC. +# Run from repo root: bash scripts/waf-and-plan-invariants.sh +# +# Exit codes: +# 0 = All invariants pass +# 1 = One or more invariants violated +# +# Governed by: RED-BOOK.md +# ============================================================================ + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$REPO_ROOT" + +# Colors +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' + +echo "============================================" +echo " VaultMesh WAF + Plan Invariants Check" +echo "============================================" +echo "" + +FAILED=0 + +echo "── 0. Toolchain Versions ──" +terraform version || true +python3 --version || true +python3 -m pip --version || true +python3 -m pytest --version || true +python3 -m mcp.waf_intelligence --version || true + +echo "" + +echo "── 1. WAF Intel Analyzer Regression ──" +if python3 -m pytest -q tests/test_waf_intelligence_analyzer.py; then + echo -e "${GREEN}✓${NC} 1.1 Analyzer regression test passed" +else + echo -e "${RED}✗${NC} 1.1 Analyzer regression test failed" + FAILED=1 +fi + +echo "" +echo "── 2. WAF Intel CLI Contract ──" + +TMP_DIR="${TMPDIR:-/tmp}" +WAF_JSON_FILE="$(mktemp -p "$TMP_DIR" waf-intel.XXXXXX.json)" +if python3 -m mcp.waf_intelligence --file terraform/waf.tf --format json --limit 5 >"$WAF_JSON_FILE"; then + if python3 - "$WAF_JSON_FILE" <<'PY' +import json +import sys + +path = sys.argv[1] +with open(path, "r", encoding="utf-8") as f: + payload = json.load(f) + +insights = payload.get("insights") +if not isinstance(insights, list): + raise SystemExit("waf_intel: insights is not a list") + +if insights: + raise SystemExit(f"waf_intel: expected 0 insights, got {len(insights)}") + +print("ok") +PY + then + echo -e "${GREEN}✓${NC} 2.1 WAF Intel JSON parses and insights are empty" + else + echo -e "${RED}✗${NC} 2.1 WAF Intel JSON contract violated" + cat "$WAF_JSON_FILE" + FAILED=1 + fi +else + echo -e "${RED}✗${NC} 2.1 WAF Intel CLI failed" + FAILED=1 +fi +rm -f "$WAF_JSON_FILE" + +echo "" +echo "── 3. Terraform Format + Validate + Plan Gates ──" + +cd terraform + +if terraform fmt -check -recursive >/dev/null 2>&1; then + echo -e "${GREEN}✓${NC} 3.1 Terraform formatting OK" +else + echo -e "${RED}✗${NC} 3.1 Terraform formatting required" + echo " Run: cd terraform && terraform fmt -recursive" + FAILED=1 +fi + +terraform init -backend=false -input=false >/dev/null 2>&1 +if terraform validate -no-color >/dev/null 2>&1; then + echo -e "${GREEN}✓${NC} 3.2 Terraform validate OK" +else + echo -e "${RED}✗${NC} 3.2 Terraform validate failed" + terraform validate -no-color + FAILED=1 +fi + +PLAN_FREE_OUT="$(mktemp -p "$TMP_DIR" tf-plan-free.XXXXXX.out)" +PLAN_PRO_OUT="$(mktemp -p "$TMP_DIR" tf-plan-pro.XXXXXX.out)" +PLAN_FREE_JSON="$(mktemp -p "$TMP_DIR" tf-plan-free.XXXXXX.json)" +PLAN_PRO_JSON="$(mktemp -p "$TMP_DIR" tf-plan-pro.XXXXXX.json)" +rm -f "$PLAN_FREE_OUT" "$PLAN_PRO_OUT" + +if terraform plan -no-color -input=false -lock=false -refresh=false -out="$PLAN_FREE_OUT" -var-file=assurance_free.tfvars >/dev/null; then + if terraform show -json "$PLAN_FREE_OUT" >"$PLAN_FREE_JSON"; then + if output="$( + python3 - "$PLAN_FREE_JSON" <<'PY' +import json +import sys + +path = sys.argv[1] +try: + with open(path, "r", encoding="utf-8") as f: + payload = json.load(f) +except json.JSONDecodeError as e: + print(f"json parse error: {e}") + raise SystemExit(2) + +resource_changes = payload.get("resource_changes") +planned_values = payload.get("planned_values") + +if not isinstance(resource_changes, list) or not isinstance(planned_values, dict): + print("invalid plan json: missing resource_changes[] and/or planned_values{}") + raise SystemExit(2) + +addresses = [ + rc.get("address", "") + for rc in resource_changes + if isinstance(rc, dict) and isinstance(rc.get("address"), str) +] + +managed_waf = sum(1 for a in addresses if a.startswith("cloudflare_ruleset.managed_waf[")) +bot_mgmt = sum(1 for a in addresses if a.startswith("cloudflare_bot_management.domains[")) + +if managed_waf != 0 or bot_mgmt != 0: + print(f"expected managed_waf=0 bot_management=0, got managed_waf={managed_waf} bot_management={bot_mgmt}") + for addr in sorted( + a + for a in addresses + if a.startswith("cloudflare_ruleset.managed_waf[") or a.startswith("cloudflare_bot_management.domains[") + ): + print(f"- {addr}") + raise SystemExit(2) +PY + )"; then + echo -e "${GREEN}✓${NC} 3.3 Free-plan gate OK (managed_waf=0 bot_management=0)" + else + echo -e "${RED}✗${NC} 3.3 Free-plan gate violated" + if [[ -n "${output:-}" ]]; then + echo "$output" | sed 's/^/ /' + fi + FAILED=1 + fi + else + echo -e "${RED}✗${NC} 3.3 terraform show -json failed (free)" + FAILED=1 + fi +else + echo -e "${RED}✗${NC} 3.3 Terraform plan failed (free)" + terraform show -no-color "$PLAN_FREE_OUT" 2>/dev/null || true + FAILED=1 +fi + +if terraform plan -no-color -input=false -lock=false -refresh=false -out="$PLAN_PRO_OUT" -var-file=assurance_pro.tfvars >/dev/null; then + if terraform show -json "$PLAN_PRO_OUT" >"$PLAN_PRO_JSON"; then + if output="$( + python3 - "$PLAN_PRO_JSON" <<'PY' +import json +import sys + +path = sys.argv[1] +try: + with open(path, "r", encoding="utf-8") as f: + payload = json.load(f) +except json.JSONDecodeError as e: + print(f"json parse error: {e}") + raise SystemExit(2) + +resource_changes = payload.get("resource_changes") +planned_values = payload.get("planned_values") + +if not isinstance(resource_changes, list) or not isinstance(planned_values, dict): + print("invalid plan json: missing resource_changes[] and/or planned_values{}") + raise SystemExit(2) + +addresses = [ + rc.get("address", "") + for rc in resource_changes + if isinstance(rc, dict) and isinstance(rc.get("address"), str) +] + +managed_waf = sum(1 for a in addresses if a.startswith("cloudflare_ruleset.managed_waf[")) +bot_mgmt = sum(1 for a in addresses if a.startswith("cloudflare_bot_management.domains[")) + +if managed_waf != 1 or bot_mgmt != 1: + print("expected managed_waf=1 bot_management=1") + print(f"got managed_waf={managed_waf} bot_management={bot_mgmt}") + print("observed:") + for addr in sorted( + a + for a in addresses + if a.startswith("cloudflare_ruleset.managed_waf[") or a.startswith("cloudflare_bot_management.domains[") + ): + print(f"- {addr}") + raise SystemExit(2) +PY + )"; then + echo -e "${GREEN}✓${NC} 3.4 Paid-plan gate OK (managed_waf=1 bot_management=1)" + else + echo -e "${RED}✗${NC} 3.4 Paid-plan gate violated" + if [[ -n "${output:-}" ]]; then + echo "$output" | sed 's/^/ /' + fi + FAILED=1 + fi + else + echo -e "${RED}✗${NC} 3.4 terraform show -json failed (pro)" + FAILED=1 + fi +else + echo -e "${RED}✗${NC} 3.4 Terraform plan failed (pro)" + terraform show -no-color "$PLAN_PRO_OUT" 2>/dev/null || true + FAILED=1 +fi + +PLAN_NEG_FREE_OUT="$(mktemp -p "$TMP_DIR" tf-plan-neg-free.XXXXXX.out)" +PLAN_NEG_PRO_OUT="$(mktemp -p "$TMP_DIR" tf-plan-neg-pro.XXXXXX.out)" +PLAN_NEG_FREE_JSON="$(mktemp -p "$TMP_DIR" tf-plan-neg-free.XXXXXX.json)" +PLAN_NEG_PRO_JSON="$(mktemp -p "$TMP_DIR" tf-plan-neg-pro.XXXXXX.json)" +rm -f "$PLAN_NEG_FREE_OUT" "$PLAN_NEG_PRO_OUT" + +echo "" +echo "── 4. Negative Controls (Prove the gate bites) ──" + +if terraform plan -no-color -input=false -lock=false -refresh=false -out="$PLAN_NEG_FREE_OUT" -var-file=assurance_negative_free_should_fail.tfvars >/dev/null; then + if terraform show -json "$PLAN_NEG_FREE_OUT" >"$PLAN_NEG_FREE_JSON"; then + if output="$( + python3 - "$PLAN_NEG_FREE_JSON" <<'PY' +import json +import sys + +path = sys.argv[1] +try: + with open(path, "r", encoding="utf-8") as f: + payload = json.load(f) +except json.JSONDecodeError as e: + print(f"json parse error: {e}") + raise SystemExit(2) + +resource_changes = payload.get("resource_changes") +planned_values = payload.get("planned_values") + +if not isinstance(resource_changes, list) or not isinstance(planned_values, dict): + print("invalid plan json: missing resource_changes[] and/or planned_values{}") + raise SystemExit(2) + +addresses = [ + rc.get("address", "") + for rc in resource_changes + if isinstance(rc, dict) and isinstance(rc.get("address"), str) +] + +managed_waf = sum(1 for a in addresses if a.startswith("cloudflare_ruleset.managed_waf[")) +bot_mgmt = sum(1 for a in addresses if a.startswith("cloudflare_bot_management.domains[")) + +if managed_waf != 0 or bot_mgmt != 0: + print(f"expected managed_waf=0 bot_management=0, got managed_waf={managed_waf} bot_management={bot_mgmt}") + for addr in sorted( + a + for a in addresses + if a.startswith("cloudflare_ruleset.managed_waf[") or a.startswith("cloudflare_bot_management.domains[") + ): + print(f"- {addr}") + raise SystemExit(2) + +print("ok") +PY + )"; then + echo -e "${RED}✗${NC} 4.1 Negative free-plan control unexpectedly passed" + FAILED=1 + else + if [[ "${output:-}" == *"expected managed_waf=0 bot_management=0"* ]]; then + echo -e "${GREEN}✓${NC} 4.1 Negative free-plan control failed as expected" + else + echo -e "${RED}✗${NC} 4.1 Negative free-plan control failed (unexpected error)" + if [[ -n "${output:-}" ]]; then + echo "$output" | sed 's/^/ /' + fi + FAILED=1 + fi + fi + else + echo -e "${RED}✗${NC} 4.1 terraform show -json failed (negative free)" + FAILED=1 + fi +else + echo -e "${RED}✗${NC} 4.1 Terraform plan failed (negative free)" + FAILED=1 +fi + +if terraform plan -no-color -input=false -lock=false -refresh=false -out="$PLAN_NEG_PRO_OUT" -var-file=assurance_negative_pro_should_fail.tfvars >/dev/null; then + if terraform show -json "$PLAN_NEG_PRO_OUT" >"$PLAN_NEG_PRO_JSON"; then + if output="$( + python3 - "$PLAN_NEG_PRO_JSON" <<'PY' +import json +import sys + +path = sys.argv[1] +try: + with open(path, "r", encoding="utf-8") as f: + payload = json.load(f) +except json.JSONDecodeError as e: + print(f"json parse error: {e}") + raise SystemExit(2) + +resource_changes = payload.get("resource_changes") +planned_values = payload.get("planned_values") + +if not isinstance(resource_changes, list) or not isinstance(planned_values, dict): + print("invalid plan json: missing resource_changes[] and/or planned_values{}") + raise SystemExit(2) + +addresses = [ + rc.get("address", "") + for rc in resource_changes + if isinstance(rc, dict) and isinstance(rc.get("address"), str) +] + +managed_waf = sum(1 for a in addresses if a.startswith("cloudflare_ruleset.managed_waf[")) +bot_mgmt = sum(1 for a in addresses if a.startswith("cloudflare_bot_management.domains[")) + +if managed_waf != 1 or bot_mgmt != 1: + print("expected managed_waf=1 bot_management=1") + print(f"got managed_waf={managed_waf} bot_management={bot_mgmt}") + print("observed:") + for addr in sorted( + a + for a in addresses + if a.startswith("cloudflare_ruleset.managed_waf[") or a.startswith("cloudflare_bot_management.domains[") + ): + print(f"- {addr}") + raise SystemExit(2) + +print("ok") +PY + )"; then + echo -e "${RED}✗${NC} 4.2 Negative paid-plan control unexpectedly passed" + FAILED=1 + else + if [[ "${output:-}" == *"expected managed_waf=1 bot_management=1"* ]]; then + echo -e "${GREEN}✓${NC} 4.2 Negative paid-plan control failed as expected" + else + echo -e "${RED}✗${NC} 4.2 Negative paid-plan control failed (unexpected error)" + if [[ -n "${output:-}" ]]; then + echo "$output" | sed 's/^/ /' + fi + FAILED=1 + fi + fi + else + echo -e "${RED}✗${NC} 4.2 terraform show -json failed (negative pro)" + FAILED=1 + fi +else + echo -e "${RED}✗${NC} 4.2 Terraform plan failed (negative pro)" + FAILED=1 +fi + +rm -f "$PLAN_FREE_OUT" "$PLAN_PRO_OUT" "$PLAN_FREE_JSON" "$PLAN_PRO_JSON" "$PLAN_NEG_FREE_OUT" "$PLAN_NEG_PRO_OUT" "$PLAN_NEG_FREE_JSON" "$PLAN_NEG_PRO_JSON" + +cd "$REPO_ROOT" + +echo "" +echo "============================================" +echo " Summary" +echo "============================================" + +if [[ $FAILED -gt 0 ]]; then + echo -e "${RED}WAF + plan invariants violated. Fix before merging.${NC}" + exit 1 +fi + +echo -e "${GREEN}All WAF + plan invariants pass. ✓${NC}" +exit 0 diff --git a/terraform/README.md b/terraform/README.md index 373f15b..6274afb 100644 --- a/terraform/README.md +++ b/terraform/README.md @@ -38,6 +38,8 @@ cloudflare_account_name = "your-account-name" tunnel_secret_vaultmesh = "base64-encoded-secret" tunnel_secret_offsec = "base64-encoded-secret" admin_emails = ["admin@vaultmesh.org"] +enable_managed_waf = true +enable_bot_management = false EOF # Plan @@ -47,6 +49,31 @@ terraform plan terraform apply ``` +## Plan-Aware Security Features + +- `enable_managed_waf` applies the managed WAF ruleset only when the zone `plan` is not `"free"`. +- `enable_bot_management` applies bot management settings only when the zone `plan` is not `"free"`. + +This lets `terraform apply` succeed on Free-plan zones (DNS, tunnels, Access, settings) while keeping the security posture ready for plan upgrades. + +### WAF Truth Table + +| Zone plan (`var.domains[*].plan`) | `enable_managed_waf` | `enable_bot_management` | Expected resources | +| --- | --- | --- | --- | +| `free` | any | any | `cloudflare_ruleset.security_rules` only | +| not `free` | `false` | any | `cloudflare_ruleset.security_rules` only | +| not `free` | `true` | `false` | `cloudflare_ruleset.security_rules`, `cloudflare_ruleset.managed_waf` | +| not `free` | `true` | `true` | `cloudflare_ruleset.security_rules`, `cloudflare_ruleset.managed_waf`, `cloudflare_bot_management.domains` | + +### Assurance Varfiles + +For deterministic, token-format-safe gating checks (no apply), use: + +```bash +terraform plan -refresh=false -var-file=assurance_free.tfvars +terraform plan -refresh=false -var-file=assurance_pro.tfvars +``` + ## Generate Tunnel Secrets ```bash diff --git a/terraform/assurance_free.tfvars b/terraform/assurance_free.tfvars new file mode 100644 index 0000000..ebc80f5 --- /dev/null +++ b/terraform/assurance_free.tfvars @@ -0,0 +1,35 @@ +cloudflare_api_token = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # Placeholder (format-valid) +cloudflare_account_id = "00000000000000000000000000000000" # Placeholder (format-valid) +cloudflare_account_name = "" + +# Exercise empty-list safety +trusted_admin_ips = [] +blocked_countries = [] + +# Even when flags are true, free-plan zones must gate these resources off +enable_managed_waf = true +enable_bot_management = true + +# Keep the full set of expected zones so hard-coded references stay valid +domains = { + "offsec.global" = { + plan = "free" + jump_start = false + } + "offsecglobal.com" = { + plan = "free" + jump_start = false + } + "offsecagent.com" = { + plan = "free" + jump_start = false + } + "offsecshield.com" = { + plan = "free" + jump_start = false + } + "vaultmesh.org" = { + plan = "free" + jump_start = false + } +} diff --git a/terraform/assurance_negative_free_should_fail.tfvars b/terraform/assurance_negative_free_should_fail.tfvars new file mode 100644 index 0000000..c8fd719 --- /dev/null +++ b/terraform/assurance_negative_free_should_fail.tfvars @@ -0,0 +1,34 @@ +cloudflare_api_token = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # Placeholder (format-valid) +cloudflare_account_id = "00000000000000000000000000000000" # Placeholder (format-valid) +cloudflare_account_name = "" + +trusted_admin_ips = [] +blocked_countries = [] + +enable_managed_waf = true +enable_bot_management = true + +# Intentionally violates the "free plan must gate managed WAF + bot mgmt off". +# Used by scripts/waf-and-plan-invariants.sh negative-control check. +domains = { + "offsec.global" = { + plan = "free" + jump_start = false + } + "offsecglobal.com" = { + plan = "free" + jump_start = false + } + "offsecagent.com" = { + plan = "free" + jump_start = false + } + "offsecshield.com" = { + plan = "free" + jump_start = false + } + "vaultmesh.org" = { + plan = "pro" + jump_start = false + } +} diff --git a/terraform/assurance_negative_pro_should_fail.tfvars b/terraform/assurance_negative_pro_should_fail.tfvars new file mode 100644 index 0000000..f92d0f3 --- /dev/null +++ b/terraform/assurance_negative_pro_should_fail.tfvars @@ -0,0 +1,34 @@ +cloudflare_api_token = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # Placeholder (format-valid) +cloudflare_account_id = "00000000000000000000000000000000" # Placeholder (format-valid) +cloudflare_account_name = "" + +trusted_admin_ips = [] +blocked_countries = [] + +enable_managed_waf = true +enable_bot_management = false + +# Intentionally violates the "pro plan must create exactly 1 managed_waf + 1 bot_management" invariant. +# Used by scripts/waf-and-plan-invariants.sh negative-control check. +domains = { + "offsec.global" = { + plan = "free" + jump_start = false + } + "offsecglobal.com" = { + plan = "free" + jump_start = false + } + "offsecagent.com" = { + plan = "free" + jump_start = false + } + "offsecshield.com" = { + plan = "free" + jump_start = false + } + "vaultmesh.org" = { + plan = "pro" + jump_start = false + } +} diff --git a/terraform/assurance_pro.tfvars b/terraform/assurance_pro.tfvars new file mode 100644 index 0000000..3e78858 --- /dev/null +++ b/terraform/assurance_pro.tfvars @@ -0,0 +1,34 @@ +cloudflare_api_token = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # Placeholder (format-valid) +cloudflare_account_id = "00000000000000000000000000000000" # Placeholder (format-valid) +cloudflare_account_name = "" + +# Exercise empty-list safety +trusted_admin_ips = [] +blocked_countries = [] + +enable_managed_waf = true +enable_bot_management = true + +# Mark at least one zone as non-free so plan includes managed WAF + bot mgmt resources. +domains = { + "offsec.global" = { + plan = "free" + jump_start = false + } + "offsecglobal.com" = { + plan = "free" + jump_start = false + } + "offsecagent.com" = { + plan = "free" + jump_start = false + } + "offsecshield.com" = { + plan = "free" + jump_start = false + } + "vaultmesh.org" = { + plan = "pro" + jump_start = false + } +} diff --git a/terraform/main.tf b/terraform/main.tf index 84d5476..b5d2bcf 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -20,10 +20,7 @@ data "cloudflare_accounts" "main" { } locals { - # Use account ID from data source if available, otherwise use variable - account_id = ( - var.cloudflare_account_name != "" && length(data.cloudflare_accounts.main) > 0 && length(data.cloudflare_accounts.main[0].accounts) > 0 - ? data.cloudflare_accounts.main[0].accounts[0].id - : var.cloudflare_account_id - ) + # Use account ID from data source if available, otherwise fall back to variable. + # `try()` avoids invalid index errors when the data source count is 0 or no accounts match. + account_id = try(data.cloudflare_accounts.main[0].accounts[0].id, var.cloudflare_account_id) } diff --git a/terraform/terraform.tfvars b/terraform/terraform.tfvars index e5cd35e..7289e1b 100644 --- a/terraform/terraform.tfvars +++ b/terraform/terraform.tfvars @@ -1,3 +1,3 @@ -cloudflare_api_token = "placeholder-token" -cloudflare_account_id = "placeholder-account-id" -cloudflare_account_name = "" # Leave empty to use hardcoded account_id +cloudflare_api_token = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" # Placeholder (format-valid, not a real token) +cloudflare_account_id = "00000000000000000000000000000000" # Placeholder (format-valid, not a real account ID) +cloudflare_account_name = "" # Leave empty to use cloudflare_account_id diff --git a/terraform/variables.tf b/terraform/variables.tf index 5fc41ff..2e20134 100644 --- a/terraform/variables.tf +++ b/terraform/variables.tf @@ -64,3 +64,15 @@ variable "blocked_countries" { type = list(string) default = ["CN", "RU", "KP", "IR"] } + +variable "enable_managed_waf" { + description = "Enable Cloudflare managed WAF rulesets (requires WAF entitlement; typically not available on Free plan)." + type = bool + default = true +} + +variable "enable_bot_management" { + description = "Enable Cloudflare Bot Management settings (requires Bot Management entitlement)." + type = bool + default = false +} diff --git a/terraform/waf.tf b/terraform/waf.tf index 728a52a..93da5f4 100644 --- a/terraform/waf.tf +++ b/terraform/waf.tf @@ -11,7 +11,7 @@ resource "cloudflare_ruleset" "security_rules" { # Rule 1: Block requests to /admin from non-trusted IPs rules { action = "block" - expression = "(http.request.uri.path contains \"/admin\") and not (ip.src in {${join(" ", var.trusted_admin_ips)}})" + expression = length(var.trusted_admin_ips) > 0 ? "(http.request.uri.path contains \"/admin\") and not (ip.src in {${join(" ", var.trusted_admin_ips)}})" : "false" description = "Block admin access from untrusted IPs" enabled = length(var.trusted_admin_ips) > 0 } @@ -19,9 +19,9 @@ resource "cloudflare_ruleset" "security_rules" { # Rule 2: Challenge suspicious countries rules { action = "managed_challenge" - expression = "(ip.src.country in {\"${join("\" \"", var.blocked_countries)}\"})" + expression = length(var.blocked_countries) > 0 ? format("(ip.src.country in {%s})", join(" ", [for c in var.blocked_countries : format("\"%s\"", c)])) : "false" description = "Challenge traffic from high-risk countries" - enabled = true + enabled = length(var.blocked_countries) > 0 } # Rule 3: Block known bad user agents @@ -49,11 +49,14 @@ resource "cloudflare_ruleset" "security_rules" { # Enable Cloudflare Managed WAF Ruleset resource "cloudflare_ruleset" "managed_waf" { - for_each = cloudflare_zone.domains - zone_id = each.value.id - name = "Managed WAF" - kind = "zone" - phase = "http_request_firewall_managed" + for_each = { + for domain, zone in cloudflare_zone.domains : domain => zone + if var.enable_managed_waf && var.domains[domain].plan != "free" + } + zone_id = each.value.id + name = "Managed WAF" + kind = "zone" + phase = "http_request_firewall_managed" # Cloudflare Managed Ruleset rules { @@ -80,7 +83,10 @@ resource "cloudflare_ruleset" "managed_waf" { # Bot Management (if available on plan) resource "cloudflare_bot_management" "domains" { - for_each = cloudflare_zone.domains + for_each = { + for domain, zone in cloudflare_zone.domains : domain => zone + if var.enable_bot_management && var.domains[domain].plan != "free" + } zone_id = each.value.id enable_js = true fight_mode = true diff --git a/tests/test_mcp_cloudflare_safe_ingress.py b/tests/test_mcp_cloudflare_safe_ingress.py new file mode 100644 index 0000000..c30b3af --- /dev/null +++ b/tests/test_mcp_cloudflare_safe_ingress.py @@ -0,0 +1,22 @@ +from mcp.cloudflare_safe.cloudflare_api import parse_cloudflared_config_ingress + + +def test_parse_cloudflared_config_ingress_extracts_hostnames_and_services(): + sample = """\ +tunnel: 00000000-0000-0000-0000-000000000000 +credentials-file: /etc/cloudflared/0000.json + +ingress: + - hostname: "api.example.com" + service: http://127.0.0.1:8080 + - hostname: app.example.com + service: "http://127.0.0.1:3000" + - service: http_status:404 +""" + + rules = parse_cloudflared_config_ingress(sample) + + assert rules == [ + {"hostname": "api.example.com", "service": "http://127.0.0.1:8080"}, + {"hostname": "app.example.com", "service": "http://127.0.0.1:3000"}, + ] diff --git a/tests/test_waf_intelligence_analyzer.py b/tests/test_waf_intelligence_analyzer.py new file mode 100644 index 0000000..f2fb059 --- /dev/null +++ b/tests/test_waf_intelligence_analyzer.py @@ -0,0 +1,43 @@ +from mcp.waf_intelligence.analyzer import WAFRuleAnalyzer + + +def test_analyzer_detects_managed_waf_ruleset(): + analyzer = WAFRuleAnalyzer() + + tf = """ +resource "cloudflare_ruleset" "managed_waf" { + name = "Managed WAF" + kind = "zone" + phase = "http_request_firewall_managed" + + rules { + action = "execute" + action_parameters { + id = "efb7b8c949ac4650a09736fc376e9aee" + } + expression = "true" + description = "Execute Cloudflare Managed Ruleset" + enabled = true + } +} +""" + + result = analyzer.analyze_terraform_text("snippet.tf", tf, min_severity="warning") + assert result.violations == [] + + +def test_analyzer_warns_when_managed_waf_missing(): + analyzer = WAFRuleAnalyzer() + + tf = """ +resource "cloudflare_ruleset" "security_rules" { + name = "Security Rules" + kind = "zone" + phase = "http_request_firewall_custom" +} +""" + + result = analyzer.analyze_terraform_text("snippet.tf", tf, min_severity="warning") + assert [v.message for v in result.violations] == [ + "No managed WAF rules detected in this snippet." + ] diff --git a/validate_registry.sh b/validate_registry.sh new file mode 100755 index 0000000..2090ba7 --- /dev/null +++ b/validate_registry.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Local Registry Validation Script +# Run this before commits to ensure registry integrity + +echo "🔍 Local Registry Validation" +echo "============================" + +# Set Python path for MCP servers +export PYTHONPATH="/Users/sovereign/work-core" + +cd /Users/sovereign/work-core/cloudflare + +# Generate fresh registry +echo "📝 Generating fresh capability registry..." +python3 generate_capability_registry_v2.py + +# Check tool name parity +echo "🔧 Checking tool name parity..." +python3 ci_check_tool_names.py + +# Check entrypoint sanity +echo "🚀 Checking entrypoint sanity..." +python3 ci_check_entrypoints.py + +# Validate registry format +echo "📊 Validating registry format..." +python3 -c " +import json +with open('capability_registry_v2.json', 'r') as f: + registry = json.load(f) + +# Required sections +required_sections = ['mcp_servers', 'terraform_resources', 'gitops_tools', 'security_framework', 'operational_tools'] +for section in required_sections: + assert section in registry, f'Missing section: {section}' + +# MCP server validation +for server_name, server_info in registry['mcp_servers'].items(): + assert 'entrypoint' in server_info, f'Missing entrypoint for {server_name}' + assert 'tools' in server_info, f'Missing tools for {server_name}' + assert 'auth_env' in server_info, f'Missing auth_env for {server_name}' + assert 'side_effects' in server_info, f'Missing side_effects for {server_name}' + assert 'outputs' in server_info, f'Missing outputs for {server_name}' + +print('✅ Registry format validation passed') +" + +# Check for changes from original +echo "📈 Checking for registry changes..." +if git diff --quiet capability_registry_v2.json; then + echo "✅ Registry is stable - no changes detected" +else + echo "⚠️ Registry changed during validation" + git diff capability_registry_v2.json + echo "💡 Consider committing these changes" +fi + +echo "" +echo "🎉 Registry validation completed successfully!" +echo "💡 Run this script before committing Cloudflare changes" \ No newline at end of file diff --git a/waf_intel_mcp.py b/waf_intel_mcp.py index 73b184f..f3e8de0 100755 --- a/waf_intel_mcp.py +++ b/waf_intel_mcp.py @@ -1,110 +1,15 @@ #!/usr/bin/env python3 from __future__ import annotations -import glob -from dataclasses import asdict -from typing import Any, Dict, List +""" +WAF Intelligence MCP Server entrypoint. -from modelcontextprotocol.python import Server -from mcp.waf_intelligence.orchestrator import WAFInsight, WAFIntelligence -from layer0 import layer0_entry -from layer0.shadow_classifier import ShadowEvalResult +This wrapper intentionally avoids third-party MCP SDK dependencies and delegates to the +in-repo stdio JSON-RPC implementation at `mcp.waf_intelligence.mcp_server`. +""" -server = Server("waf_intel") - - -def _insight_to_dict(insight: WAFInsight) -> Dict[str, Any]: - """Convert a WAFInsight dataclass into a plain dict.""" - return asdict(insight) - - -@server.tool() -async def analyze_waf( - file: str | None = None, - files: List[str] | None = None, - limit: int = 3, - severity_threshold: str = "warning", -) -> Dict[str, Any]: - """ - Analyze one or more Terraform WAF files and return curated insights. - - Args: - file: Single file path (e.g. "terraform/waf.tf"). - files: Optional list of file paths or glob patterns (e.g. ["terraform/waf*.tf"]). - limit: Max number of high-priority insights to return. - severity_threshold: Minimum severity to include ("info", "warning", "error"). - - Returns: - { - "results": [ - { - "file": "...", - "insights": [ ... ] - }, - ... - ] - } - """ - routing_action, shadow = layer0_entry(_shadow_repr(file, files, limit, severity_threshold)) - if routing_action != "HANDOFF_TO_LAYER1": - _raise_layer0(routing_action, shadow) - - paths: List[str] = [] - - if files: - for pattern in files: - for matched in glob.glob(pattern): - paths.append(matched) - - if file: - paths.append(file) - - seen = set() - unique_paths: List[str] = [] - for p in paths: - if p not in seen: - seen.add(p) - unique_paths.append(p) - - if not unique_paths: - raise ValueError("Please provide 'file' or 'files' to analyze.") - - intel = WAFIntelligence() - results: List[Dict[str, Any]] = [] - - for path in unique_paths: - insights: List[WAFInsight] = intel.analyze_and_recommend( - path, - limit=limit, - min_severity=severity_threshold, - ) - results.append( - { - "file": path, - "insights": [_insight_to_dict(insight) for insight in insights], - } - ) - - return {"results": results} +from cloudflare.mcp.waf_intelligence.mcp_server import main if __name__ == "__main__": - server.run() - - -def _shadow_repr(file: str | None, files: List[str] | None, limit: int, severity: str) -> str: - try: - return f"analyze_waf: file={file}, files={files}, limit={limit}, severity={severity}" - except Exception: - return "analyze_waf" - - -def _raise_layer0(routing_action: str, shadow: ShadowEvalResult) -> None: - if routing_action == "FAIL_CLOSED": - raise ValueError("Layer 0: cannot comply with this request.") - if routing_action == "HANDOFF_TO_GUARDRAILS": - reason = shadow.reason or "governance_violation" - raise ValueError(f"Layer 0: governance violation detected ({reason}).") - if routing_action == "PROMPT_FOR_CLARIFICATION": - raise ValueError("Layer 0: request is ambiguous. Please clarify and retry.") - raise ValueError("Layer 0: unrecognized routing action; refusing request.") + main()