Initialize repository snapshot

This commit is contained in:
Vault Sovereign
2025-12-27 00:10:32 +00:00
commit 110d644e10
281 changed files with 40331 additions and 0 deletions

33
ledger/__init__.py Normal file
View File

@@ -0,0 +1,33 @@
from __future__ import annotations
from ledger.db import (
LedgerEvent,
ShadowReceiptRow,
connect,
default_db_path,
ensure_migrated,
get_shadow_receipts_by_trace,
get_shadow_receipts_recent,
insert_shadow_receipt,
log_mcp_call,
log_proof_artifact,
log_tool_invocation,
new_id,
new_trace_id,
)
__all__ = [
"LedgerEvent",
"ShadowReceiptRow",
"connect",
"default_db_path",
"ensure_migrated",
"get_shadow_receipts_by_trace",
"get_shadow_receipts_recent",
"insert_shadow_receipt",
"log_mcp_call",
"log_proof_artifact",
"log_tool_invocation",
"new_id",
"new_trace_id",
]

426
ledger/db.py Normal file
View File

@@ -0,0 +1,426 @@
from __future__ import annotations
import json
import os
import sqlite3
import time
import uuid
from contextlib import contextmanager
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Iterator, Mapping, Sequence
from ledger.migrate import migrate
from ledger.redact import redact_json_for_storage
THIS_FILE = Path(__file__).resolve()
PKG_DIR = THIS_FILE.parent
REPO_ROOT = PKG_DIR.parent
def default_db_path() -> Path:
configured = os.environ.get("LEDGER_DB_PATH") or os.environ.get(
"VAULTMESH_LEDGER_DB"
)
if configured:
return Path(configured).expanduser().resolve()
vaultmesh_root = os.environ.get("VAULTMESH_ROOT")
if vaultmesh_root:
return (
Path(vaultmesh_root).expanduser().resolve() / ".state" / "ledger.sqlite"
).resolve()
return (REPO_ROOT / ".state" / "ledger.sqlite").resolve()
def new_id() -> str:
return str(uuid.uuid4())
def new_trace_id() -> str:
return str(uuid.uuid4())
def _apply_pragmas(conn: sqlite3.Connection) -> None:
conn.execute("PRAGMA journal_mode=WAL;")
conn.execute("PRAGMA synchronous=NORMAL;")
conn.execute("PRAGMA foreign_keys=ON;")
conn.execute("PRAGMA busy_timeout=5000;")
conn.execute("PRAGMA temp_store=MEMORY;")
def connect(db_path: Path | str | None = None) -> sqlite3.Connection:
path = Path(db_path) if db_path is not None else default_db_path()
path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(path), isolation_level=None)
conn.row_factory = sqlite3.Row
_apply_pragmas(conn)
return conn
@contextmanager
def open_db(db_path: Path | str | None = None) -> Iterator[sqlite3.Connection]:
conn = connect(db_path)
try:
yield conn
finally:
conn.close()
@contextmanager
def txn(conn: sqlite3.Connection) -> Iterator[sqlite3.Connection]:
conn.execute("BEGIN;")
try:
yield conn
conn.execute("COMMIT;")
except Exception:
conn.execute("ROLLBACK;")
raise
def ensure_migrated(conn: sqlite3.Connection) -> None:
migrate(conn)
def _utc_now_iso_z() -> str:
return (
datetime.now(timezone.utc)
.replace(microsecond=0)
.isoformat()
.replace("+00:00", "Z")
)
def _json_dumps(value: Any) -> str:
return json.dumps(value, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
def _normalize_action(value: str | None) -> str | None:
if value is None:
return None
value = value.strip()
return value or None
def _sha256_hex(data: bytes) -> str:
import hashlib
return hashlib.sha256(data).hexdigest()
def _blake3_hex(data: bytes) -> str | None:
try:
import blake3 # type: ignore
except Exception:
return None
return blake3.blake3(data).hexdigest()
@dataclass(frozen=True)
class LedgerEvent:
id: str
ts: str
kind: str
status: str
label: str
duration_ms: int | None
trace_id: str | None
error_text: str | None
@dataclass(frozen=True)
class ShadowReceiptRow:
id: str
ts: str
horizon_id: str
counterfactual_hash: str
entropy_delta: float | None
reason_unrealized: str
observer_signature: str | None
trace_id: str | None
meta_json: str | None
def log_tool_invocation(
*,
tool_name: str,
action: str | None = None,
status: str,
duration_ms: int | None = None,
input_payload: Any | None = None,
output_payload: Any | None = None,
error_text: str | None = None,
trace_id: str | None = None,
actor: str | None = None,
db_path: Path | str | None = None,
) -> str:
invocation_id = new_id()
redacted_input, input_meta = redact_json_for_storage(input_payload)
redacted_output, output_meta = redact_json_for_storage(output_payload)
input_meta_json = _json_dumps(input_meta) if input_meta else None
output_meta_json = _json_dumps(output_meta) if output_meta else None
with open_db(db_path) as conn:
ensure_migrated(conn)
with txn(conn):
conn.execute(
"""
INSERT INTO tool_invocations (
id, ts, tool_name, action, status, duration_ms,
input_json, output_json, error_text, trace_id, actor,
input_meta_json, output_meta_json
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
""",
(
invocation_id,
_utc_now_iso_z(),
tool_name,
_normalize_action(action),
status,
duration_ms,
redacted_input,
redacted_output,
error_text,
trace_id,
actor,
input_meta_json,
output_meta_json,
),
)
return invocation_id
def log_mcp_call(
*,
server_name: str,
method: str,
tool_name: str | None = None,
status: str,
duration_ms: int | None = None,
request_payload: Any | None = None,
response_payload: Any | None = None,
error_text: str | None = None,
trace_id: str | None = None,
client_id: str | None = None,
db_path: Path | str | None = None,
) -> str:
call_id = new_id()
redacted_request, request_meta = redact_json_for_storage(request_payload)
redacted_response, response_meta = redact_json_for_storage(response_payload)
request_meta_json = _json_dumps(request_meta) if request_meta else None
response_meta_json = _json_dumps(response_meta) if response_meta else None
with open_db(db_path) as conn:
ensure_migrated(conn)
with txn(conn):
conn.execute(
"""
INSERT INTO mcp_calls (
id, ts, server_name, method, tool_name, status, duration_ms,
request_json, response_json, error_text, trace_id, client_id,
request_meta_json, response_meta_json
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
""",
(
call_id,
_utc_now_iso_z(),
server_name,
method,
_normalize_action(tool_name),
status,
duration_ms,
redacted_request,
redacted_response,
error_text,
trace_id,
client_id,
request_meta_json,
response_meta_json,
),
)
return call_id
def log_proof_artifact(
*,
kind: str,
path: str | Path | None = None,
meta: Mapping[str, Any] | None = None,
trace_id: str | None = None,
db_path: Path | str | None = None,
) -> str:
artifact_id = new_id()
rel_path: str | None = None
sha256_hex: str | None = None
blake3_hex: str | None = None
size_bytes: int | None = None
if path is not None:
p = Path(path)
try:
rel_path = str(p.resolve().relative_to(REPO_ROOT))
except Exception:
rel_path = str(p)
if p.exists() and p.is_file():
data = p.read_bytes()
sha256_hex = _sha256_hex(data)
blake3_hex = _blake3_hex(data)
size_bytes = len(data)
meta_json_redacted, _ = redact_json_for_storage(meta)
meta_json = meta_json_redacted
with open_db(db_path) as conn:
ensure_migrated(conn)
with txn(conn):
conn.execute(
"""
INSERT INTO proof_artifacts (
id, ts, kind, path, sha256_hex, blake3_hex, size_bytes, meta_json, trace_id
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
""",
(
artifact_id,
_utc_now_iso_z(),
kind,
rel_path,
sha256_hex,
blake3_hex,
size_bytes,
meta_json,
trace_id,
),
)
return artifact_id
@contextmanager
def timed_operation() -> Iterator[dict[str, Any]]:
start = time.perf_counter()
info: dict[str, Any] = {}
try:
yield info
finally:
info["duration_ms"] = int((time.perf_counter() - start) * 1000)
def insert_shadow_receipt(
*,
horizon_id: str,
counterfactual_hash: str,
reason_unrealized: str,
entropy_delta: float | None = None,
observer_signature: str | None = None,
trace_id: str | None = None,
meta: Mapping[str, Any] | None = None,
db_path: Path | str | None = None,
) -> str:
"""
Insert a ShadowReceipt (proof of restraint / unrealized futures) into the local SQLite ledger.
Notes:
- `meta` is redacted via `redact_json_for_storage` before storage.
- Callers should pass `trace_id` to correlate with tool_invocations/mcp_calls/proof_artifacts.
"""
receipt_id = new_id()
meta_json_redacted, _ = redact_json_for_storage(meta)
with open_db(db_path) as conn:
ensure_migrated(conn)
with txn(conn):
conn.execute(
"""
INSERT INTO shadow_receipts (
id, ts, horizon_id, counterfactual_hash, entropy_delta,
reason_unrealized, observer_signature, trace_id, meta_json
)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?);
""",
(
receipt_id,
_utc_now_iso_z(),
horizon_id,
counterfactual_hash,
entropy_delta,
reason_unrealized,
observer_signature,
trace_id,
meta_json_redacted,
),
)
return receipt_id
def get_shadow_receipts_by_trace(
trace_id: str, *, db_path: Path | str | None = None
) -> list[ShadowReceiptRow]:
with open_db(db_path) as conn:
ensure_migrated(conn)
rows = conn.execute(
"""
SELECT id, ts, horizon_id, counterfactual_hash, entropy_delta,
reason_unrealized, observer_signature, trace_id, meta_json
FROM shadow_receipts
WHERE trace_id = ?
ORDER BY datetime(ts) ASC, id ASC;
""",
(trace_id,),
).fetchall()
out: list[ShadowReceiptRow] = []
for r in rows:
out.append(
ShadowReceiptRow(
id=r["id"],
ts=r["ts"],
horizon_id=r["horizon_id"],
counterfactual_hash=r["counterfactual_hash"],
entropy_delta=r["entropy_delta"],
reason_unrealized=r["reason_unrealized"],
observer_signature=r["observer_signature"],
trace_id=r["trace_id"],
meta_json=r["meta_json"],
)
)
return out
def get_shadow_receipts_recent(
n: int = 50, *, db_path: Path | str | None = None
) -> list[ShadowReceiptRow]:
with open_db(db_path) as conn:
ensure_migrated(conn)
rows = conn.execute(
"""
SELECT id, ts, horizon_id, counterfactual_hash, entropy_delta,
reason_unrealized, observer_signature, trace_id, meta_json
FROM shadow_receipts
ORDER BY datetime(ts) DESC, id DESC
LIMIT ?;
""",
(int(n),),
).fetchall()
out: list[ShadowReceiptRow] = []
for r in rows:
out.append(
ShadowReceiptRow(
id=r["id"],
ts=r["ts"],
horizon_id=r["horizon_id"],
counterfactual_hash=r["counterfactual_hash"],
entropy_delta=r["entropy_delta"],
reason_unrealized=r["reason_unrealized"],
observer_signature=r["observer_signature"],
trace_id=r["trace_id"],
meta_json=r["meta_json"],
)
)
return out

52
ledger/migrate.py Normal file
View File

@@ -0,0 +1,52 @@
from __future__ import annotations
import sqlite3
from pathlib import Path
THIS_FILE = Path(__file__).resolve()
PKG_DIR = THIS_FILE.parent
SCHEMA_DIR = PKG_DIR / "schema"
def _ensure_migrations_table(conn: sqlite3.Connection) -> None:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS migrations (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
);
"""
)
def _applied_migrations(conn: sqlite3.Connection) -> set[str]:
_ensure_migrations_table(conn)
rows = conn.execute("SELECT name FROM migrations;").fetchall()
names: set[str] = set()
for row in rows:
try:
names.add(row["name"])
except Exception:
names.add(row[0])
return names
def _migration_files() -> list[Path]:
if not SCHEMA_DIR.exists():
return []
files = [p for p in SCHEMA_DIR.iterdir() if p.is_file() and p.suffix == ".sql"]
files.sort(key=lambda p: p.name)
return files
def migrate(conn: sqlite3.Connection) -> None:
applied = _applied_migrations(conn)
for path in _migration_files():
name = path.name
if name in applied:
continue
sql = path.read_text(encoding="utf-8")
conn.executescript(sql)
conn.execute("INSERT INTO migrations (name) VALUES (?);", (name,))

199
ledger/redact.py Normal file
View File

@@ -0,0 +1,199 @@
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass
from typing import Any
DEFAULT_DENY_SUBSTRINGS = (
"token",
"access_token",
"api_key",
"authorization",
"cookie",
"set-cookie",
"secret",
"password",
"passphrase",
"private_key",
"seed",
"mnemonic",
"github_token",
"cloudflare_api_token",
"openai",
"aws_",
)
def _sha256_hex(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def _is_sensitive_key(key: str) -> bool:
lowered = key.strip().lower()
if not lowered:
return False
return any(s in lowered for s in DEFAULT_DENY_SUBSTRINGS)
def _safe_default(obj: Any) -> Any:
if isinstance(obj, (bytes, bytearray)):
b = bytes(obj)
return {"__type__": "bytes", "len": len(b), "sha256": _sha256_hex(b)}
return repr(obj)
@dataclass(frozen=True)
class RedactionMeta:
raw_sha256: str
raw_size_bytes: int
redacted_paths: int
truncated_strings: int
saw_sensitive: bool
def _stable_json_bytes(payload: Any) -> bytes:
if payload is None:
return b"null"
if isinstance(payload, (bytes, bytearray)):
return bytes(payload)
if isinstance(payload, str):
return payload.encode("utf-8", errors="replace")
try:
return json.dumps(
payload,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=_safe_default,
).encode("utf-8", errors="replace")
except Exception:
return repr(payload).encode("utf-8", errors="replace")
def _redact(
value: Any,
*,
max_depth: int,
max_items: int,
max_string_len: int,
_depth: int = 0,
_meta: dict[str, int] | None = None,
) -> tuple[Any, dict[str, int]]:
meta = (
_meta
if _meta is not None
else {"redacted_paths": 0, "truncated_strings": 0, "saw_sensitive": 0}
)
if _depth > max_depth:
meta["redacted_paths"] += 1
return "[REDACTED_DEPTH]", meta
if isinstance(value, dict):
out: dict[str, Any] = {}
items = list(value.items())
if len(items) > max_items:
items = items[:max_items]
out["__truncated__"] = f"dict truncated to {max_items} items"
for k, v in items:
key = str(k)
if _is_sensitive_key(key):
meta["saw_sensitive"] += 1
meta["redacted_paths"] += 1
out[key] = "[REDACTED]"
continue
out[key], meta = _redact(
v,
max_depth=max_depth,
max_items=max_items,
max_string_len=max_string_len,
_depth=_depth + 1,
_meta=meta,
)
return out, meta
if isinstance(value, list):
out_list: list[Any] = []
items = value
truncated = False
if len(items) > max_items:
items = items[:max_items]
truncated = True
for item in items:
redacted_item, meta = _redact(
item,
max_depth=max_depth,
max_items=max_items,
max_string_len=max_string_len,
_depth=_depth + 1,
_meta=meta,
)
out_list.append(redacted_item)
if truncated:
out_list.append(f"__truncated__: list truncated to {max_items} items")
return out_list, meta
if isinstance(value, tuple):
return _redact(
list(value),
max_depth=max_depth,
max_items=max_items,
max_string_len=max_string_len,
_depth=_depth,
_meta=meta,
)
if isinstance(value, str):
if len(value) <= max_string_len:
return value, meta
meta["truncated_strings"] += 1
return value[
:max_string_len
] + f" [TRUNCATED {len(value) - max_string_len} chars]", meta
if isinstance(value, (int, float, bool)) or value is None:
return value, meta
return _safe_default(value), meta
def redact_json_for_storage(payload: Any) -> tuple[str | None, dict[str, Any] | None]:
if payload is None:
return None, None
raw_bytes = _stable_json_bytes(payload)
redacted_value, counters = _redact(
payload, max_depth=20, max_items=200, max_string_len=2048
)
meta = RedactionMeta(
raw_sha256=_sha256_hex(raw_bytes),
raw_size_bytes=len(raw_bytes),
redacted_paths=counters.get("redacted_paths", 0),
truncated_strings=counters.get("truncated_strings", 0),
saw_sensitive=counters.get("saw_sensitive", 0) > 0,
)
try:
redacted_json = json.dumps(
redacted_value,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=_safe_default,
)
except Exception:
redacted_json = json.dumps(repr(redacted_value), ensure_ascii=False)
meta_dict: dict[str, Any] = {
"raw_sha256": meta.raw_sha256,
"raw_size_bytes": meta.raw_size_bytes,
"redacted_paths": meta.redacted_paths,
"truncated_strings": meta.truncated_strings,
}
if meta.saw_sensitive:
meta_dict["saw_sensitive"] = True
return redacted_json, meta_dict

View File

@@ -0,0 +1,50 @@
CREATE TABLE IF NOT EXISTS migrations (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL UNIQUE,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS tool_invocations (
id TEXT PRIMARY KEY,
ts TEXT NOT NULL DEFAULT (datetime('now')),
tool_name TEXT NOT NULL,
action TEXT,
status TEXT NOT NULL,
duration_ms INTEGER,
input_json TEXT,
output_json TEXT,
error_text TEXT,
trace_id TEXT,
actor TEXT,
input_meta_json TEXT,
output_meta_json TEXT
);
CREATE TABLE IF NOT EXISTS mcp_calls (
id TEXT PRIMARY KEY,
ts TEXT NOT NULL DEFAULT (datetime('now')),
server_name TEXT NOT NULL,
method TEXT NOT NULL,
tool_name TEXT,
status TEXT NOT NULL,
duration_ms INTEGER,
request_json TEXT,
response_json TEXT,
error_text TEXT,
trace_id TEXT,
client_id TEXT,
request_meta_json TEXT,
response_meta_json TEXT
);
CREATE TABLE IF NOT EXISTS proof_artifacts (
id TEXT PRIMARY KEY,
ts TEXT NOT NULL DEFAULT (datetime('now')),
kind TEXT NOT NULL,
path TEXT,
sha256_hex TEXT,
blake3_hex TEXT,
size_bytes INTEGER,
meta_json TEXT,
trace_id TEXT
);

View File

@@ -0,0 +1,10 @@
CREATE INDEX IF NOT EXISTS idx_tool_invocations_ts ON tool_invocations(ts);
CREATE INDEX IF NOT EXISTS idx_tool_invocations_tool_ts ON tool_invocations(tool_name, ts);
CREATE INDEX IF NOT EXISTS idx_tool_invocations_trace_id ON tool_invocations(trace_id);
CREATE INDEX IF NOT EXISTS idx_mcp_calls_ts ON mcp_calls(ts);
CREATE INDEX IF NOT EXISTS idx_mcp_calls_server_ts ON mcp_calls(server_name, ts);
CREATE INDEX IF NOT EXISTS idx_mcp_calls_trace_id ON mcp_calls(trace_id);
CREATE INDEX IF NOT EXISTS idx_proof_artifacts_ts ON proof_artifacts(ts);
CREATE INDEX IF NOT EXISTS idx_proof_artifacts_trace_id ON proof_artifacts(trace_id);

View File

@@ -0,0 +1,18 @@
-- 0003_shadow_receipts.sql
CREATE TABLE IF NOT EXISTS shadow_receipts (
id TEXT PRIMARY KEY, -- uuid
ts TEXT NOT NULL DEFAULT (datetime('now')),
horizon_id TEXT NOT NULL, -- grouping key for “unrealized futures”
counterfactual_hash TEXT NOT NULL, -- hash of normalized counterfactual payload
entropy_delta REAL, -- optional numeric signal (can be NULL)
reason_unrealized TEXT NOT NULL, -- short enum-like string (e.g. "blocked", "operator_abort")
observer_signature TEXT, -- optional (future: Ed25519 signature)
trace_id TEXT, -- correlate to invocation chain
meta_json TEXT -- redacted metadata
);
-- minimal safety constraints
CREATE INDEX IF NOT EXISTS idx_shadow_receipts_ts ON shadow_receipts(ts);
CREATE INDEX IF NOT EXISTS idx_shadow_receipts_horizon ON shadow_receipts(horizon_id);
CREATE INDEX IF NOT EXISTS idx_shadow_receipts_trace ON shadow_receipts(trace_id);
CREATE INDEX IF NOT EXISTS idx_shadow_receipts_reason ON shadow_receipts(reason_unrealized);