200 lines
5.3 KiB
Python
200 lines
5.3 KiB
Python
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import json
|
|
from dataclasses import dataclass
|
|
from typing import Any
|
|
|
|
DEFAULT_DENY_SUBSTRINGS = (
|
|
"token",
|
|
"access_token",
|
|
"api_key",
|
|
"authorization",
|
|
"cookie",
|
|
"set-cookie",
|
|
"secret",
|
|
"password",
|
|
"passphrase",
|
|
"private_key",
|
|
"seed",
|
|
"mnemonic",
|
|
"github_token",
|
|
"cloudflare_api_token",
|
|
"openai",
|
|
"aws_",
|
|
)
|
|
|
|
|
|
def _sha256_hex(data: bytes) -> str:
|
|
return hashlib.sha256(data).hexdigest()
|
|
|
|
|
|
def _is_sensitive_key(key: str) -> bool:
|
|
lowered = key.strip().lower()
|
|
if not lowered:
|
|
return False
|
|
return any(s in lowered for s in DEFAULT_DENY_SUBSTRINGS)
|
|
|
|
|
|
def _safe_default(obj: Any) -> Any:
|
|
if isinstance(obj, (bytes, bytearray)):
|
|
b = bytes(obj)
|
|
return {"__type__": "bytes", "len": len(b), "sha256": _sha256_hex(b)}
|
|
return repr(obj)
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class RedactionMeta:
|
|
raw_sha256: str
|
|
raw_size_bytes: int
|
|
redacted_paths: int
|
|
truncated_strings: int
|
|
saw_sensitive: bool
|
|
|
|
|
|
def _stable_json_bytes(payload: Any) -> bytes:
|
|
if payload is None:
|
|
return b"null"
|
|
if isinstance(payload, (bytes, bytearray)):
|
|
return bytes(payload)
|
|
if isinstance(payload, str):
|
|
return payload.encode("utf-8", errors="replace")
|
|
try:
|
|
return json.dumps(
|
|
payload,
|
|
ensure_ascii=False,
|
|
sort_keys=True,
|
|
separators=(",", ":"),
|
|
default=_safe_default,
|
|
).encode("utf-8", errors="replace")
|
|
except Exception:
|
|
return repr(payload).encode("utf-8", errors="replace")
|
|
|
|
|
|
def _redact(
|
|
value: Any,
|
|
*,
|
|
max_depth: int,
|
|
max_items: int,
|
|
max_string_len: int,
|
|
_depth: int = 0,
|
|
_meta: dict[str, int] | None = None,
|
|
) -> tuple[Any, dict[str, int]]:
|
|
meta = (
|
|
_meta
|
|
if _meta is not None
|
|
else {"redacted_paths": 0, "truncated_strings": 0, "saw_sensitive": 0}
|
|
)
|
|
|
|
if _depth > max_depth:
|
|
meta["redacted_paths"] += 1
|
|
return "[REDACTED_DEPTH]", meta
|
|
|
|
if isinstance(value, dict):
|
|
out: dict[str, Any] = {}
|
|
items = list(value.items())
|
|
if len(items) > max_items:
|
|
items = items[:max_items]
|
|
out["__truncated__"] = f"dict truncated to {max_items} items"
|
|
for k, v in items:
|
|
key = str(k)
|
|
if _is_sensitive_key(key):
|
|
meta["saw_sensitive"] += 1
|
|
meta["redacted_paths"] += 1
|
|
out[key] = "[REDACTED]"
|
|
continue
|
|
|
|
out[key], meta = _redact(
|
|
v,
|
|
max_depth=max_depth,
|
|
max_items=max_items,
|
|
max_string_len=max_string_len,
|
|
_depth=_depth + 1,
|
|
_meta=meta,
|
|
)
|
|
return out, meta
|
|
|
|
if isinstance(value, list):
|
|
out_list: list[Any] = []
|
|
items = value
|
|
truncated = False
|
|
if len(items) > max_items:
|
|
items = items[:max_items]
|
|
truncated = True
|
|
for item in items:
|
|
redacted_item, meta = _redact(
|
|
item,
|
|
max_depth=max_depth,
|
|
max_items=max_items,
|
|
max_string_len=max_string_len,
|
|
_depth=_depth + 1,
|
|
_meta=meta,
|
|
)
|
|
out_list.append(redacted_item)
|
|
if truncated:
|
|
out_list.append(f"__truncated__: list truncated to {max_items} items")
|
|
return out_list, meta
|
|
|
|
if isinstance(value, tuple):
|
|
return _redact(
|
|
list(value),
|
|
max_depth=max_depth,
|
|
max_items=max_items,
|
|
max_string_len=max_string_len,
|
|
_depth=_depth,
|
|
_meta=meta,
|
|
)
|
|
|
|
if isinstance(value, str):
|
|
if len(value) <= max_string_len:
|
|
return value, meta
|
|
meta["truncated_strings"] += 1
|
|
return value[
|
|
:max_string_len
|
|
] + f" [TRUNCATED {len(value) - max_string_len} chars]", meta
|
|
|
|
if isinstance(value, (int, float, bool)) or value is None:
|
|
return value, meta
|
|
|
|
return _safe_default(value), meta
|
|
|
|
|
|
def redact_json_for_storage(payload: Any) -> tuple[str | None, dict[str, Any] | None]:
|
|
if payload is None:
|
|
return None, None
|
|
|
|
raw_bytes = _stable_json_bytes(payload)
|
|
redacted_value, counters = _redact(
|
|
payload, max_depth=20, max_items=200, max_string_len=2048
|
|
)
|
|
|
|
meta = RedactionMeta(
|
|
raw_sha256=_sha256_hex(raw_bytes),
|
|
raw_size_bytes=len(raw_bytes),
|
|
redacted_paths=counters.get("redacted_paths", 0),
|
|
truncated_strings=counters.get("truncated_strings", 0),
|
|
saw_sensitive=counters.get("saw_sensitive", 0) > 0,
|
|
)
|
|
|
|
try:
|
|
redacted_json = json.dumps(
|
|
redacted_value,
|
|
ensure_ascii=False,
|
|
sort_keys=True,
|
|
separators=(",", ":"),
|
|
default=_safe_default,
|
|
)
|
|
except Exception:
|
|
redacted_json = json.dumps(repr(redacted_value), ensure_ascii=False)
|
|
|
|
meta_dict: dict[str, Any] = {
|
|
"raw_sha256": meta.raw_sha256,
|
|
"raw_size_bytes": meta.raw_size_bytes,
|
|
"redacted_paths": meta.redacted_paths,
|
|
"truncated_strings": meta.truncated_strings,
|
|
}
|
|
if meta.saw_sensitive:
|
|
meta_dict["saw_sensitive"] = True
|
|
|
|
return redacted_json, meta_dict
|