#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Script: scripts/report_hashes.py — Übersicht & Lücken bei Mehrfach-Hashes Version: 1.0.0 Datum: 2025-09-10 Funktion -------- Listet je Note die vorhandenen Einträge im Feld `hashes` (Signaturen: ::) und meldet fehlende Soll-Keys. Eignet sich als CI-Check. Optionen -------- --prefix TEXT Collection-Prefix (CLI überschreibt ENV) --require K [K ...] Zusätzliche Soll-Keys (Default: body|frontmatter|full:parsed:canonical) --fail-on-missing Exitcode 2, wenn fehlende Keys gefunden werden Beispiele --------- python3 -m scripts.report_hashes --prefix mindnet python3 -m scripts.report_hashes --require frontmatter:raw:none --fail-on-missing """ from __future__ import annotations import argparse import json import os from typing import List, Dict, Any from qdrant_client.http import models as rest from app.core.qdrant import QdrantConfig, get_client def collections(prefix: str): return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" def _scroll_all(client, collection: str): out = [] nextp = None while True: pts, nextp = client.scroll(collection_name=collection, with_payload=True, with_vectors=False, limit=256, offset=nextp) if not pts: break out.extend(pts) if nextp is None: break return out def main(): ap = argparse.ArgumentParser() ap.add_argument("--prefix", help="Collection-Prefix (überschreibt ENV COLLECTION_PREFIX)") ap.add_argument("--require", nargs="+", help="Zusätzliche Soll-Keys ::") ap.add_argument("--fail-on-missing", action="store_true", help="Exitcode 2 bei fehlenden Keys") args = ap.parse_args() cfg = QdrantConfig.from_env() if args.prefix: cfg.prefix = args.prefix.strip() client = get_client(cfg) notes_col, _, _ = collections(cfg.prefix) pts = _scroll_all(client, notes_col) required = set(args.require or []) required |= { "body:parsed:canonical", "frontmatter:parsed:canonical", "full:parsed:canonical", } missing_total = 0 for p in pts: pl = p.payload or {} nid = pl.get("note_id") hashes = pl.get("hashes") or {} present = set(hashes.keys()) missing = sorted(list(required - present)) obj = { "note_id": nid, "present_count": len(present), "missing": missing, } print(json.dumps(obj, ensure_ascii=False)) missing_total += len(missing) if args.fail_on_missing and missing_total > 0: raise SystemExit(2) print(json.dumps({"summary_missing_total": missing_total})) if __name__ == "__main__": main()