From b46e5461a7bd52fe33cd2d112a9cdc370638dbb7 Mon Sep 17 00:00:00 2001 From: Lars Date: Wed, 24 Sep 2025 12:16:05 +0200 Subject: [PATCH] =?UTF-8?q?scripts/report=5Fhashes.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/report_hashes.py | 92 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 scripts/report_hashes.py diff --git a/scripts/report_hashes.py b/scripts/report_hashes.py new file mode 100644 index 0000000..b9aaa29 --- /dev/null +++ b/scripts/report_hashes.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Script: scripts/report_hashes.py — Übersicht & Lücken bei Mehrfach-Hashes +Version: 1.0.0 +Datum: 2025-09-10 + +Funktion +-------- +Listet je Note die vorhandenen Einträge im Feld `hashes` (Signaturen: ::) +und meldet fehlende Soll-Keys. Eignet sich als CI-Check. + +Optionen +-------- +--prefix TEXT Collection-Prefix (CLI überschreibt ENV) +--require K [K ...] Zusätzliche Soll-Keys (Default: body|frontmatter|full:parsed:canonical) +--fail-on-missing Exitcode 2, wenn fehlende Keys gefunden werden + +Beispiele +--------- + python3 -m scripts.report_hashes --prefix mindnet + python3 -m scripts.report_hashes --require frontmatter:raw:none --fail-on-missing +""" +from __future__ import annotations + +import argparse +import json +import os +from typing import List, Dict, Any + +from qdrant_client.http import models as rest +from app.core.qdrant import QdrantConfig, get_client + +def collections(prefix: str): + return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" + +def _scroll_all(client, collection: str): + out = [] + nextp = None + while True: + pts, nextp = client.scroll(collection_name=collection, with_payload=True, with_vectors=False, limit=256, offset=nextp) + if not pts: + break + out.extend(pts) + if nextp is None: + break + return out + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--prefix", help="Collection-Prefix (überschreibt ENV COLLECTION_PREFIX)") + ap.add_argument("--require", nargs="+", help="Zusätzliche Soll-Keys ::") + ap.add_argument("--fail-on-missing", action="store_true", help="Exitcode 2 bei fehlenden Keys") + args = ap.parse_args() + + cfg = QdrantConfig.from_env() + if args.prefix: + cfg.prefix = args.prefix.strip() + client = get_client(cfg) + + notes_col, _, _ = collections(cfg.prefix) + pts = _scroll_all(client, notes_col) + + required = set(args.require or []) + required |= { + "body:parsed:canonical", + "frontmatter:parsed:canonical", + "full:parsed:canonical", + } + + missing_total = 0 + for p in pts: + pl = p.payload or {} + nid = pl.get("note_id") + hashes = pl.get("hashes") or {} + present = set(hashes.keys()) + missing = sorted(list(required - present)) + obj = { + "note_id": nid, + "present_count": len(present), + "missing": missing, + } + print(json.dumps(obj, ensure_ascii=False)) + missing_total += len(missing) + + if args.fail_on_missing and missing_total > 0: + raise SystemExit(2) + + print(json.dumps({"summary_missing_total": missing_total})) + +if __name__ == "__main__": + main()