Files
vm-core/cli/ledger.py
2025-12-27 00:10:32 +00:00

668 lines
21 KiB
Python

from __future__ import annotations
import argparse
import hashlib
import json
import sqlite3
import uuid
from contextlib import contextmanager
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any
from ledger.db import (
default_db_path,
ensure_migrated,
log_proof_artifact,
new_trace_id,
open_db,
)
def _parse_since(value: str | None) -> str | None:
if not value:
return None
v = value.strip()
if not v:
return None
parts = v.split()
if len(parts) == 2 and parts[0].isdigit():
n = int(parts[0])
unit = parts[1].lower()
if unit in {"day", "days"}:
dt = datetime.now(timezone.utc) - timedelta(days=n)
return dt.replace(microsecond=0).isoformat().replace("+00:00", "Z")
if unit in {"hour", "hours"}:
dt = datetime.now(timezone.utc) - timedelta(hours=n)
return dt.replace(microsecond=0).isoformat().replace("+00:00", "Z")
if unit in {"minute", "minutes"}:
dt = datetime.now(timezone.utc) - timedelta(minutes=n)
return dt.replace(microsecond=0).isoformat().replace("+00:00", "Z")
# Accept "YYYY-MM-DD HH:MM:SS" or ISO8601
try:
dt = datetime.fromisoformat(v.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
return (
dt.astimezone(timezone.utc)
.replace(microsecond=0)
.isoformat()
.replace("+00:00", "Z")
)
except Exception:
return v
def _fmt_ms(ms: int | None) -> str:
return f"{ms}ms" if ms is not None else "-"
def _fmt_ts(ts: str) -> str:
# Stored as ISO Z; keep as-is if it already looks like it.
if ts.endswith("Z") and "T" in ts:
return ts
try:
# sqlite datetime('now') is "YYYY-MM-DD HH:MM:SS"
dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
return dt.replace(microsecond=0).isoformat().replace("+00:00", "Z")
except Exception:
return ts
def _one_line(value: str | None, *, max_len: int = 120) -> str | None:
if not value:
return None
line = value.splitlines()[0].strip()
if len(line) <= max_len:
return line
return line[:max_len] + "..."
@contextmanager
def _open(db_path: str | None):
with open_db(Path(db_path) if db_path else None) as conn:
ensure_migrated(conn)
yield conn
def cmd_last(args: argparse.Namespace) -> int:
with _open(args.db) as conn:
rows = conn.execute(
"""
SELECT ts, status, duration_ms, trace_id, kind, label, id, error_text
FROM (
SELECT ts, status, duration_ms, trace_id,
'tool' AS kind,
tool_name || COALESCE('.' || action, '') AS label,
id,
error_text
FROM tool_invocations
UNION ALL
SELECT ts, status, duration_ms, trace_id,
'mcp' AS kind,
'mcp:' || server_name || '.' || method AS label,
id,
error_text
FROM mcp_calls
UNION ALL
SELECT ts, 'ok' AS status, NULL AS duration_ms, trace_id,
'artifact' AS kind,
'artifact:' || kind || COALESCE(' ' || path, '') AS label,
id,
NULL AS error_text
FROM proof_artifacts
)
ORDER BY ts DESC
LIMIT ?;
""",
(args.n,),
).fetchall()
for row in rows:
ts = _fmt_ts(row["ts"])
status = row["status"]
label = row["label"]
duration = _fmt_ms(row["duration_ms"])
trace_id = row["trace_id"]
err = _one_line(row["error_text"])
tail = []
if trace_id:
tail.append(f"trace={trace_id}")
if err and status != "ok":
tail.append(err)
tail_s = (" " + " ".join(tail)) if tail else ""
print(f"{ts} {status:<5} {label:<28} {duration:>6}{tail_s}")
return 0
def cmd_trace(args: argparse.Namespace) -> int:
with _open(args.db) as conn:
rows = conn.execute(
"""
SELECT ts, status, duration_ms, trace_id, kind, label, id, error_text
FROM (
SELECT ts, status, duration_ms, trace_id,
'tool' AS kind,
tool_name || COALESCE('.' || action, '') AS label,
id,
error_text
FROM tool_invocations
WHERE trace_id = ?
UNION ALL
SELECT ts, status, duration_ms, trace_id,
'mcp' AS kind,
'mcp:' || server_name || '.' || method AS label,
id,
error_text
FROM mcp_calls
WHERE trace_id = ?
UNION ALL
SELECT ts, 'ok' AS status, NULL AS duration_ms, trace_id,
'artifact' AS kind,
'artifact:' || kind || COALESCE(' ' || path, '') AS label,
id,
NULL AS error_text
FROM proof_artifacts
WHERE trace_id = ?
)
ORDER BY ts ASC;
""",
(args.trace_id, args.trace_id, args.trace_id),
).fetchall()
for row in rows:
ts = _fmt_ts(row["ts"])
status = row["status"]
label = row["label"]
duration = _fmt_ms(row["duration_ms"])
err = _one_line(row["error_text"])
tail = f" {err}" if err and status != "ok" else ""
print(f"{ts} {status:<5} {label:<28} {duration:>6}{tail}")
return 0
def cmd_tool(args: argparse.Namespace) -> int:
since = _parse_since(args.since)
params: list[Any] = [args.tool_name]
where = "tool_name = ?"
if since:
where += " AND ts >= ?"
params.append(since)
limit_sql = " LIMIT ?" if args.n is not None else ""
if args.n is not None:
params.append(args.n)
with _open(args.db) as conn:
rows = conn.execute(
f"""
SELECT ts, status, duration_ms, trace_id, tool_name, action, error_text
FROM tool_invocations
WHERE {where}
ORDER BY ts DESC{limit_sql};
""",
tuple(params),
).fetchall()
for row in rows:
ts = _fmt_ts(row["ts"])
status = row["status"]
label = row["tool_name"] + (("." + row["action"]) if row["action"] else "")
duration = _fmt_ms(row["duration_ms"])
trace_id = row["trace_id"]
err = _one_line(row["error_text"])
tail = []
if trace_id:
tail.append(f"trace={trace_id}")
if err and status != "ok":
tail.append(err)
tail_s = (" " + " ".join(tail)) if tail else ""
print(f"{ts} {status:<5} {label:<28} {duration:>6}{tail_s}")
return 0
def cmd_errors(args: argparse.Namespace) -> int:
since = _parse_since(args.since)
params: list[Any] = []
where = "status != 'ok'"
if since:
where += " AND ts >= ?"
params.append(since)
with _open(args.db) as conn:
rows = conn.execute(
f"""
SELECT ts, status, duration_ms, trace_id,
tool_name || COALESCE('.' || action, '') AS label,
error_text
FROM tool_invocations
WHERE {where}
ORDER BY ts DESC
LIMIT ?;
""",
(*params, args.n),
).fetchall()
for row in rows:
ts = _fmt_ts(row["ts"])
status = row["status"]
duration = _fmt_ms(row["duration_ms"])
trace_id = row["trace_id"]
label = row["label"]
err = _one_line(row["error_text"]) or "-"
tail = f" trace={trace_id}" if trace_id else ""
print(f"{ts} {status:<5} {label:<28} {duration:>6} {err}{tail}")
return 0
def cmd_search(args: argparse.Namespace) -> int:
term = args.term.strip()
if not term:
return 2
like = f"%{term}%"
with _open(args.db) as conn:
rows = conn.execute(
"""
SELECT ts, status, duration_ms, trace_id,
tool_name || COALESCE('.' || action, '') AS label,
id
FROM tool_invocations
WHERE input_json LIKE ? OR output_json LIKE ? OR input_meta_json LIKE ? OR output_meta_json LIKE ?
OR error_text LIKE ?
ORDER BY ts DESC
LIMIT ?;
""",
(like, like, like, like, like, args.n),
).fetchall()
for row in rows:
ts = _fmt_ts(row["ts"])
status = row["status"]
duration = _fmt_ms(row["duration_ms"])
trace_id = row["trace_id"]
label = row["label"]
rec_id = row["id"]
tail = [f"id={rec_id}"]
if trace_id:
tail.append(f"trace={trace_id}")
print(f"{ts} {status:<5} {label:<28} {duration:>6} " + " ".join(tail))
return 0
def _sha256_hex_text(value: str | None) -> str:
b = (value or "").encode("utf-8")
return hashlib.sha256(b).hexdigest()
def _canon(value: Any) -> str:
if value is None:
return ""
if isinstance(value, (int,)):
return str(value)
if isinstance(value, float):
return format(value, ".17g")
return str(value)
def _digest_update(hasher: "hashlib._Hash", line: str) -> None:
hasher.update(line.encode("utf-8"))
hasher.update(b"\n")
def _seal_query_window(
args: argparse.Namespace, conn: sqlite3.Connection
) -> tuple[dict[str, Any], str]:
since = _parse_since(args.since)
until = _parse_since(args.until) if getattr(args, "until", None) else None
trace_ids: list[str] = [t.strip() for t in (args.trace_id or []) if t and t.strip()]
scope = "trace_set" if trace_ids else "time_window"
def where_ts(prefix: str) -> tuple[str, list[Any]]:
clauses: list[str] = []
params: list[Any] = []
if since:
clauses.append(f"datetime({prefix}ts) >= datetime(?)")
params.append(since)
if until:
clauses.append(f"datetime({prefix}ts) <= datetime(?)")
params.append(until)
if trace_ids:
marks = ",".join(["?"] * len(trace_ids))
clauses.append(f"{prefix}trace_id IN ({marks})")
params.extend(trace_ids)
if not clauses:
return ("1=1", [])
return (" AND ".join(clauses), params)
hasher = hashlib.sha256()
counts: dict[str, int] = {}
bounds_min: str | None = None
bounds_max: str | None = None
def note_ts(ts_value: Any) -> None:
nonlocal bounds_min, bounds_max
ts = _fmt_ts(_canon(ts_value))
if not ts:
return
if bounds_min is None or ts < bounds_min:
bounds_min = ts
if bounds_max is None or ts > bounds_max:
bounds_max = ts
# tool_invocations
where, params = where_ts("")
rows = conn.execute(
f"""
SELECT id, ts, tool_name, action, status, duration_ms, trace_id,
input_json, output_json, error_text
FROM tool_invocations
WHERE {where}
ORDER BY datetime(ts) ASC, id ASC;
""",
tuple(params),
).fetchall()
for r in rows:
note_ts(r["ts"])
line = (
"tool_invocations"
f"|id={_canon(r['id'])}"
f"|ts={_fmt_ts(_canon(r['ts']))}"
f"|tool_name={_canon(r['tool_name'])}"
f"|action={_canon(r['action'])}"
f"|status={_canon(r['status'])}"
f"|duration_ms={_canon(r['duration_ms'])}"
f"|trace_id={_canon(r['trace_id'])}"
f"|input_sha256={_sha256_hex_text(r['input_json'])}"
f"|output_sha256={_sha256_hex_text(r['output_json'])}"
f"|error_sha256={_sha256_hex_text(r['error_text'])}"
)
_digest_update(hasher, line)
counts["tool_invocations"] = len(rows)
# mcp_calls
where, params = where_ts("")
rows = conn.execute(
f"""
SELECT id, ts, server_name, method, tool_name, status, duration_ms, trace_id,
request_json, response_json, error_text
FROM mcp_calls
WHERE {where}
ORDER BY datetime(ts) ASC, id ASC;
""",
tuple(params),
).fetchall()
for r in rows:
note_ts(r["ts"])
line = (
"mcp_calls"
f"|id={_canon(r['id'])}"
f"|ts={_fmt_ts(_canon(r['ts']))}"
f"|server_name={_canon(r['server_name'])}"
f"|method={_canon(r['method'])}"
f"|tool_name={_canon(r['tool_name'])}"
f"|status={_canon(r['status'])}"
f"|duration_ms={_canon(r['duration_ms'])}"
f"|trace_id={_canon(r['trace_id'])}"
f"|request_sha256={_sha256_hex_text(r['request_json'])}"
f"|response_sha256={_sha256_hex_text(r['response_json'])}"
f"|error_sha256={_sha256_hex_text(r['error_text'])}"
)
_digest_update(hasher, line)
counts["mcp_calls"] = len(rows)
# proof_artifacts
where, params = where_ts("")
rows = conn.execute(
f"""
SELECT id, ts, kind, path, sha256_hex, blake3_hex, size_bytes, trace_id, meta_json
FROM proof_artifacts
WHERE {where}
ORDER BY datetime(ts) ASC, id ASC;
""",
tuple(params),
).fetchall()
for r in rows:
note_ts(r["ts"])
line = (
"proof_artifacts"
f"|id={_canon(r['id'])}"
f"|ts={_fmt_ts(_canon(r['ts']))}"
f"|kind={_canon(r['kind'])}"
f"|path={_canon(r['path'])}"
f"|sha256_hex={_canon(r['sha256_hex'])}"
f"|blake3_hex={_canon(r['blake3_hex'])}"
f"|size_bytes={_canon(r['size_bytes'])}"
f"|trace_id={_canon(r['trace_id'])}"
f"|meta_sha256={_sha256_hex_text(r['meta_json'])}"
)
_digest_update(hasher, line)
counts["proof_artifacts"] = len(rows)
# shadow_receipts (if present)
try:
where, params = where_ts("")
rows = conn.execute(
f"""
SELECT id, ts, horizon_id, counterfactual_hash, entropy_delta, reason_unrealized,
observer_signature, trace_id, meta_json
FROM shadow_receipts
WHERE {where}
ORDER BY datetime(ts) ASC, id ASC;
""",
tuple(params),
).fetchall()
for r in rows:
note_ts(r["ts"])
line = (
"shadow_receipts"
f"|id={_canon(r['id'])}"
f"|ts={_fmt_ts(_canon(r['ts']))}"
f"|horizon_id={_canon(r['horizon_id'])}"
f"|counterfactual_hash={_canon(r['counterfactual_hash'])}"
f"|entropy_delta={_canon(r['entropy_delta'])}"
f"|reason_unrealized={_canon(r['reason_unrealized'])}"
f"|observer_signature={_canon(r['observer_signature'])}"
f"|trace_id={_canon(r['trace_id'])}"
f"|meta_sha256={_sha256_hex_text(r['meta_json'])}"
)
_digest_update(hasher, line)
counts["shadow_receipts"] = len(rows)
except sqlite3.OperationalError:
counts["shadow_receipts"] = 0
selection = {
"scope": scope,
"since": since,
"until": until,
"trace_ids": trace_ids,
"kinds": [
"tool_invocations",
"mcp_calls",
"proof_artifacts",
"shadow_receipts",
],
}
digest = {"algorithm": "sha256", "hex": hasher.hexdigest()}
bounds = {"min_ts": bounds_min, "max_ts": bounds_max}
return (
{"selection": selection, "counts": counts, "bounds": bounds, "digest": digest},
digest["hex"],
)
def _schema_version(conn: sqlite3.Connection) -> tuple[int, str | None]:
"""
Returns (schema_version_int, last_migration_name).
schema_version_int is derived from the leading 4-digit prefix in applied migration filenames.
"""
try:
rows = conn.execute("SELECT name FROM migrations;").fetchall()
except sqlite3.OperationalError:
return (0, None)
max_v = 0
last_name: str | None = None
for r in rows:
name = r["name"] if isinstance(r, sqlite3.Row) else r[0]
if not isinstance(name, str):
continue
if not last_name or name > last_name:
last_name = name
prefix = name.split("_", 1)[0]
if prefix.isdigit():
max_v = max(max_v, int(prefix))
return (max_v, last_name)
def cmd_seal(args: argparse.Namespace) -> int:
db_path = Path(args.db).expanduser().resolve() if args.db else default_db_path()
with _open(str(db_path)) as conn:
payload, digest_hex = _seal_query_window(args, conn)
schema_version, schema_last_migration = _schema_version(conn)
now = datetime.now(timezone.utc).replace(microsecond=0)
ts_tag = now.strftime("%Y%m%dT%H%M%SZ")
seal_dir = db_path.parent / "seals"
seal_dir.mkdir(parents=True, exist_ok=True)
bundle_path = (seal_dir / f"ouroboros_seal_{ts_tag}.json").resolve()
seal_id = str(uuid.uuid4())
trace_id = new_trace_id()
bundle = {
"format": "vm-ouroboros-seal-v0",
"schema_version": schema_version,
"schema_last_migration": schema_last_migration,
"seal_id": seal_id,
"sealed_at": now.isoformat().replace("+00:00", "Z"),
"digest_algo": payload["digest"]["algorithm"],
"selection": payload["selection"],
"digest": payload["digest"],
"counts": payload["counts"],
"bounds": payload["bounds"],
"inputs": {
"sqlite_db_path": str(db_path),
},
"trace_id": trace_id,
}
bundle_path.write_text(
json.dumps(bundle, ensure_ascii=False, sort_keys=True, indent=2) + "\n",
encoding="utf-8",
)
artifact_id = log_proof_artifact(
kind="ouroboros_seal_bundle",
path=bundle_path,
meta={
"seal_id": seal_id,
"schema_version": schema_version,
"schema_last_migration": schema_last_migration,
"digest_algo": payload["digest"]["algorithm"],
"digest": payload["digest"],
"counts": payload["counts"],
"bounds": payload["bounds"],
"selection": payload["selection"],
},
trace_id=trace_id,
db_path=db_path,
)
print(f"seal_id={seal_id}")
print(f"trace_id={trace_id}")
print(f"digest={digest_hex}")
print(f"bundle={bundle_path}")
print(f"artifact_id={artifact_id}")
return 0
def cmd_seals_last(args: argparse.Namespace) -> int:
with _open(args.db) as conn:
rows = conn.execute(
"""
SELECT ts, id, kind, path, sha256_hex, trace_id
FROM proof_artifacts
WHERE kind = 'ouroboros_seal_bundle'
ORDER BY datetime(ts) DESC, id DESC
LIMIT ?;
""",
(int(args.n),),
).fetchall()
for r in rows:
ts = _fmt_ts(r["ts"])
print(
f"{ts} id={r['id']} sha256={r['sha256_hex'] or '-'} trace={r['trace_id'] or '-'} {r['path'] or ''}"
)
return 0
def build_parser() -> argparse.ArgumentParser:
p = argparse.ArgumentParser(
prog="ledger", description="Local-first SQLite ledger (tool + MCP call log)"
)
p.add_argument("--db", help=f"SQLite path (default: {default_db_path()})")
sub = p.add_subparsers(dest="cmd", required=True)
sp = sub.add_parser("last", help="Show last N events")
sp.add_argument("--n", type=int, default=50)
sp.set_defaults(func=cmd_last)
sp = sub.add_parser("trace", help="Show an end-to-end trace")
sp.add_argument("trace_id")
sp.set_defaults(func=cmd_trace)
sp = sub.add_parser("tool", help="Filter tool invocations by tool name")
sp.add_argument("tool_name")
sp.add_argument("--since", help='e.g. "2025-12-17 00:00:00" or "7 days"')
sp.add_argument("--n", type=int, default=200)
sp.set_defaults(func=cmd_tool)
sp = sub.add_parser("errors", help="Show recent errors")
sp.add_argument("--since", help='e.g. "7 days"')
sp.add_argument("--n", type=int, default=200)
sp.set_defaults(func=cmd_errors)
sp = sub.add_parser("search", help="Search redacted JSON blobs")
sp.add_argument("term")
sp.add_argument("--n", type=int, default=200)
sp.set_defaults(func=cmd_search)
sp = sub.add_parser(
"seal", help="Create an Ouroboros seal bundle (deterministic digest)"
)
sp.add_argument("--since", help='e.g. "7 days" or ISO8601')
sp.add_argument("--until", help='e.g. "2025-12-17 00:00:00" or ISO8601')
sp.add_argument(
"--trace-id",
action="append",
default=[],
help="Limit sealing to one or more trace ids (repeatable).",
)
sp.set_defaults(func=cmd_seal)
sp = sub.add_parser("seals", help="Seal bundle utilities")
seals_sub = sp.add_subparsers(dest="seals_cmd", required=True)
sp2 = seals_sub.add_parser("last", help="Show last N seal bundles")
sp2.add_argument("--n", type=int, default=10)
sp2.set_defaults(func=cmd_seals_last)
return p
def main(argv: list[str] | None = None) -> int:
parser = build_parser()
args = parser.parse_args(argv)
return int(args.func(args))
if __name__ == "__main__":
raise SystemExit(main())