Initialize repository snapshot
This commit is contained in:
199
ledger/redact.py
Normal file
199
ledger/redact.py
Normal file
@@ -0,0 +1,199 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
DEFAULT_DENY_SUBSTRINGS = (
|
||||
"token",
|
||||
"access_token",
|
||||
"api_key",
|
||||
"authorization",
|
||||
"cookie",
|
||||
"set-cookie",
|
||||
"secret",
|
||||
"password",
|
||||
"passphrase",
|
||||
"private_key",
|
||||
"seed",
|
||||
"mnemonic",
|
||||
"github_token",
|
||||
"cloudflare_api_token",
|
||||
"openai",
|
||||
"aws_",
|
||||
)
|
||||
|
||||
|
||||
def _sha256_hex(data: bytes) -> str:
|
||||
return hashlib.sha256(data).hexdigest()
|
||||
|
||||
|
||||
def _is_sensitive_key(key: str) -> bool:
|
||||
lowered = key.strip().lower()
|
||||
if not lowered:
|
||||
return False
|
||||
return any(s in lowered for s in DEFAULT_DENY_SUBSTRINGS)
|
||||
|
||||
|
||||
def _safe_default(obj: Any) -> Any:
|
||||
if isinstance(obj, (bytes, bytearray)):
|
||||
b = bytes(obj)
|
||||
return {"__type__": "bytes", "len": len(b), "sha256": _sha256_hex(b)}
|
||||
return repr(obj)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class RedactionMeta:
|
||||
raw_sha256: str
|
||||
raw_size_bytes: int
|
||||
redacted_paths: int
|
||||
truncated_strings: int
|
||||
saw_sensitive: bool
|
||||
|
||||
|
||||
def _stable_json_bytes(payload: Any) -> bytes:
|
||||
if payload is None:
|
||||
return b"null"
|
||||
if isinstance(payload, (bytes, bytearray)):
|
||||
return bytes(payload)
|
||||
if isinstance(payload, str):
|
||||
return payload.encode("utf-8", errors="replace")
|
||||
try:
|
||||
return json.dumps(
|
||||
payload,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=_safe_default,
|
||||
).encode("utf-8", errors="replace")
|
||||
except Exception:
|
||||
return repr(payload).encode("utf-8", errors="replace")
|
||||
|
||||
|
||||
def _redact(
|
||||
value: Any,
|
||||
*,
|
||||
max_depth: int,
|
||||
max_items: int,
|
||||
max_string_len: int,
|
||||
_depth: int = 0,
|
||||
_meta: dict[str, int] | None = None,
|
||||
) -> tuple[Any, dict[str, int]]:
|
||||
meta = (
|
||||
_meta
|
||||
if _meta is not None
|
||||
else {"redacted_paths": 0, "truncated_strings": 0, "saw_sensitive": 0}
|
||||
)
|
||||
|
||||
if _depth > max_depth:
|
||||
meta["redacted_paths"] += 1
|
||||
return "[REDACTED_DEPTH]", meta
|
||||
|
||||
if isinstance(value, dict):
|
||||
out: dict[str, Any] = {}
|
||||
items = list(value.items())
|
||||
if len(items) > max_items:
|
||||
items = items[:max_items]
|
||||
out["__truncated__"] = f"dict truncated to {max_items} items"
|
||||
for k, v in items:
|
||||
key = str(k)
|
||||
if _is_sensitive_key(key):
|
||||
meta["saw_sensitive"] += 1
|
||||
meta["redacted_paths"] += 1
|
||||
out[key] = "[REDACTED]"
|
||||
continue
|
||||
|
||||
out[key], meta = _redact(
|
||||
v,
|
||||
max_depth=max_depth,
|
||||
max_items=max_items,
|
||||
max_string_len=max_string_len,
|
||||
_depth=_depth + 1,
|
||||
_meta=meta,
|
||||
)
|
||||
return out, meta
|
||||
|
||||
if isinstance(value, list):
|
||||
out_list: list[Any] = []
|
||||
items = value
|
||||
truncated = False
|
||||
if len(items) > max_items:
|
||||
items = items[:max_items]
|
||||
truncated = True
|
||||
for item in items:
|
||||
redacted_item, meta = _redact(
|
||||
item,
|
||||
max_depth=max_depth,
|
||||
max_items=max_items,
|
||||
max_string_len=max_string_len,
|
||||
_depth=_depth + 1,
|
||||
_meta=meta,
|
||||
)
|
||||
out_list.append(redacted_item)
|
||||
if truncated:
|
||||
out_list.append(f"__truncated__: list truncated to {max_items} items")
|
||||
return out_list, meta
|
||||
|
||||
if isinstance(value, tuple):
|
||||
return _redact(
|
||||
list(value),
|
||||
max_depth=max_depth,
|
||||
max_items=max_items,
|
||||
max_string_len=max_string_len,
|
||||
_depth=_depth,
|
||||
_meta=meta,
|
||||
)
|
||||
|
||||
if isinstance(value, str):
|
||||
if len(value) <= max_string_len:
|
||||
return value, meta
|
||||
meta["truncated_strings"] += 1
|
||||
return value[
|
||||
:max_string_len
|
||||
] + f" [TRUNCATED {len(value) - max_string_len} chars]", meta
|
||||
|
||||
if isinstance(value, (int, float, bool)) or value is None:
|
||||
return value, meta
|
||||
|
||||
return _safe_default(value), meta
|
||||
|
||||
|
||||
def redact_json_for_storage(payload: Any) -> tuple[str | None, dict[str, Any] | None]:
|
||||
if payload is None:
|
||||
return None, None
|
||||
|
||||
raw_bytes = _stable_json_bytes(payload)
|
||||
redacted_value, counters = _redact(
|
||||
payload, max_depth=20, max_items=200, max_string_len=2048
|
||||
)
|
||||
|
||||
meta = RedactionMeta(
|
||||
raw_sha256=_sha256_hex(raw_bytes),
|
||||
raw_size_bytes=len(raw_bytes),
|
||||
redacted_paths=counters.get("redacted_paths", 0),
|
||||
truncated_strings=counters.get("truncated_strings", 0),
|
||||
saw_sensitive=counters.get("saw_sensitive", 0) > 0,
|
||||
)
|
||||
|
||||
try:
|
||||
redacted_json = json.dumps(
|
||||
redacted_value,
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
separators=(",", ":"),
|
||||
default=_safe_default,
|
||||
)
|
||||
except Exception:
|
||||
redacted_json = json.dumps(repr(redacted_value), ensure_ascii=False)
|
||||
|
||||
meta_dict: dict[str, Any] = {
|
||||
"raw_sha256": meta.raw_sha256,
|
||||
"raw_size_bytes": meta.raw_size_bytes,
|
||||
"redacted_paths": meta.redacted_paths,
|
||||
"truncated_strings": meta.truncated_strings,
|
||||
}
|
||||
if meta.saw_sensitive:
|
||||
meta_dict["saw_sensitive"] = True
|
||||
|
||||
return redacted_json, meta_dict
|
||||
Reference in New Issue
Block a user