from __future__ import annotations import hashlib import json from dataclasses import dataclass from typing import Any DEFAULT_DENY_SUBSTRINGS = ( "token", "access_token", "api_key", "authorization", "cookie", "set-cookie", "secret", "password", "passphrase", "private_key", "seed", "mnemonic", "github_token", "cloudflare_api_token", "openai", "aws_", ) def _sha256_hex(data: bytes) -> str: return hashlib.sha256(data).hexdigest() def _is_sensitive_key(key: str) -> bool: lowered = key.strip().lower() if not lowered: return False return any(s in lowered for s in DEFAULT_DENY_SUBSTRINGS) def _safe_default(obj: Any) -> Any: if isinstance(obj, (bytes, bytearray)): b = bytes(obj) return {"__type__": "bytes", "len": len(b), "sha256": _sha256_hex(b)} return repr(obj) @dataclass(frozen=True) class RedactionMeta: raw_sha256: str raw_size_bytes: int redacted_paths: int truncated_strings: int saw_sensitive: bool def _stable_json_bytes(payload: Any) -> bytes: if payload is None: return b"null" if isinstance(payload, (bytes, bytearray)): return bytes(payload) if isinstance(payload, str): return payload.encode("utf-8", errors="replace") try: return json.dumps( payload, ensure_ascii=False, sort_keys=True, separators=(",", ":"), default=_safe_default, ).encode("utf-8", errors="replace") except Exception: return repr(payload).encode("utf-8", errors="replace") def _redact( value: Any, *, max_depth: int, max_items: int, max_string_len: int, _depth: int = 0, _meta: dict[str, int] | None = None, ) -> tuple[Any, dict[str, int]]: meta = ( _meta if _meta is not None else {"redacted_paths": 0, "truncated_strings": 0, "saw_sensitive": 0} ) if _depth > max_depth: meta["redacted_paths"] += 1 return "[REDACTED_DEPTH]", meta if isinstance(value, dict): out: dict[str, Any] = {} items = list(value.items()) if len(items) > max_items: items = items[:max_items] out["__truncated__"] = f"dict truncated to {max_items} items" for k, v in items: key = str(k) if _is_sensitive_key(key): meta["saw_sensitive"] += 1 meta["redacted_paths"] += 1 out[key] = "[REDACTED]" continue out[key], meta = _redact( v, max_depth=max_depth, max_items=max_items, max_string_len=max_string_len, _depth=_depth + 1, _meta=meta, ) return out, meta if isinstance(value, list): out_list: list[Any] = [] items = value truncated = False if len(items) > max_items: items = items[:max_items] truncated = True for item in items: redacted_item, meta = _redact( item, max_depth=max_depth, max_items=max_items, max_string_len=max_string_len, _depth=_depth + 1, _meta=meta, ) out_list.append(redacted_item) if truncated: out_list.append(f"__truncated__: list truncated to {max_items} items") return out_list, meta if isinstance(value, tuple): return _redact( list(value), max_depth=max_depth, max_items=max_items, max_string_len=max_string_len, _depth=_depth, _meta=meta, ) if isinstance(value, str): if len(value) <= max_string_len: return value, meta meta["truncated_strings"] += 1 return value[ :max_string_len ] + f" [TRUNCATED {len(value) - max_string_len} chars]", meta if isinstance(value, (int, float, bool)) or value is None: return value, meta return _safe_default(value), meta def redact_json_for_storage(payload: Any) -> tuple[str | None, dict[str, Any] | None]: if payload is None: return None, None raw_bytes = _stable_json_bytes(payload) redacted_value, counters = _redact( payload, max_depth=20, max_items=200, max_string_len=2048 ) meta = RedactionMeta( raw_sha256=_sha256_hex(raw_bytes), raw_size_bytes=len(raw_bytes), redacted_paths=counters.get("redacted_paths", 0), truncated_strings=counters.get("truncated_strings", 0), saw_sensitive=counters.get("saw_sensitive", 0) > 0, ) try: redacted_json = json.dumps( redacted_value, ensure_ascii=False, sort_keys=True, separators=(",", ":"), default=_safe_default, ) except Exception: redacted_json = json.dumps(repr(redacted_value), ensure_ascii=False) meta_dict: dict[str, Any] = { "raw_sha256": meta.raw_sha256, "raw_size_bytes": meta.raw_size_bytes, "redacted_paths": meta.redacted_paths, "truncated_strings": meta.truncated_strings, } if meta.saw_sensitive: meta_dict["saw_sensitive"] = True return redacted_json, meta_dict