Initialize repository snapshot

This commit is contained in:
Vault Sovereign
2025-12-27 00:10:32 +00:00
commit 110d644e10
281 changed files with 40331 additions and 0 deletions

199
ledger/redact.py Normal file
View File

@@ -0,0 +1,199 @@
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass
from typing import Any
DEFAULT_DENY_SUBSTRINGS = (
"token",
"access_token",
"api_key",
"authorization",
"cookie",
"set-cookie",
"secret",
"password",
"passphrase",
"private_key",
"seed",
"mnemonic",
"github_token",
"cloudflare_api_token",
"openai",
"aws_",
)
def _sha256_hex(data: bytes) -> str:
return hashlib.sha256(data).hexdigest()
def _is_sensitive_key(key: str) -> bool:
lowered = key.strip().lower()
if not lowered:
return False
return any(s in lowered for s in DEFAULT_DENY_SUBSTRINGS)
def _safe_default(obj: Any) -> Any:
if isinstance(obj, (bytes, bytearray)):
b = bytes(obj)
return {"__type__": "bytes", "len": len(b), "sha256": _sha256_hex(b)}
return repr(obj)
@dataclass(frozen=True)
class RedactionMeta:
raw_sha256: str
raw_size_bytes: int
redacted_paths: int
truncated_strings: int
saw_sensitive: bool
def _stable_json_bytes(payload: Any) -> bytes:
if payload is None:
return b"null"
if isinstance(payload, (bytes, bytearray)):
return bytes(payload)
if isinstance(payload, str):
return payload.encode("utf-8", errors="replace")
try:
return json.dumps(
payload,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=_safe_default,
).encode("utf-8", errors="replace")
except Exception:
return repr(payload).encode("utf-8", errors="replace")
def _redact(
value: Any,
*,
max_depth: int,
max_items: int,
max_string_len: int,
_depth: int = 0,
_meta: dict[str, int] | None = None,
) -> tuple[Any, dict[str, int]]:
meta = (
_meta
if _meta is not None
else {"redacted_paths": 0, "truncated_strings": 0, "saw_sensitive": 0}
)
if _depth > max_depth:
meta["redacted_paths"] += 1
return "[REDACTED_DEPTH]", meta
if isinstance(value, dict):
out: dict[str, Any] = {}
items = list(value.items())
if len(items) > max_items:
items = items[:max_items]
out["__truncated__"] = f"dict truncated to {max_items} items"
for k, v in items:
key = str(k)
if _is_sensitive_key(key):
meta["saw_sensitive"] += 1
meta["redacted_paths"] += 1
out[key] = "[REDACTED]"
continue
out[key], meta = _redact(
v,
max_depth=max_depth,
max_items=max_items,
max_string_len=max_string_len,
_depth=_depth + 1,
_meta=meta,
)
return out, meta
if isinstance(value, list):
out_list: list[Any] = []
items = value
truncated = False
if len(items) > max_items:
items = items[:max_items]
truncated = True
for item in items:
redacted_item, meta = _redact(
item,
max_depth=max_depth,
max_items=max_items,
max_string_len=max_string_len,
_depth=_depth + 1,
_meta=meta,
)
out_list.append(redacted_item)
if truncated:
out_list.append(f"__truncated__: list truncated to {max_items} items")
return out_list, meta
if isinstance(value, tuple):
return _redact(
list(value),
max_depth=max_depth,
max_items=max_items,
max_string_len=max_string_len,
_depth=_depth,
_meta=meta,
)
if isinstance(value, str):
if len(value) <= max_string_len:
return value, meta
meta["truncated_strings"] += 1
return value[
:max_string_len
] + f" [TRUNCATED {len(value) - max_string_len} chars]", meta
if isinstance(value, (int, float, bool)) or value is None:
return value, meta
return _safe_default(value), meta
def redact_json_for_storage(payload: Any) -> tuple[str | None, dict[str, Any] | None]:
if payload is None:
return None, None
raw_bytes = _stable_json_bytes(payload)
redacted_value, counters = _redact(
payload, max_depth=20, max_items=200, max_string_len=2048
)
meta = RedactionMeta(
raw_sha256=_sha256_hex(raw_bytes),
raw_size_bytes=len(raw_bytes),
redacted_paths=counters.get("redacted_paths", 0),
truncated_strings=counters.get("truncated_strings", 0),
saw_sensitive=counters.get("saw_sensitive", 0) > 0,
)
try:
redacted_json = json.dumps(
redacted_value,
ensure_ascii=False,
sort_keys=True,
separators=(",", ":"),
default=_safe_default,
)
except Exception:
redacted_json = json.dumps(repr(redacted_value), ensure_ascii=False)
meta_dict: dict[str, Any] = {
"raw_sha256": meta.raw_sha256,
"raw_size_bytes": meta.raw_size_bytes,
"redacted_paths": meta.redacted_paths,
"truncated_strings": meta.truncated_strings,
}
if meta.saw_sensitive:
meta_dict["saw_sensitive"] = True
return redacted_json, meta_dict