#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ scripts/test_edges_smoke_fast.py — Zählung über /count (sehr schnell) Verwendet Qdrant 'count' API je Note/Kind (anstatt scroll), dadurch sehr schnelle Ausführung. Optionen: --max-notes N : prüft nur die ersten N Notizen """ from __future__ import annotations import argparse, json from typing import Dict, Any, List, Tuple from qdrant_client.http import models as rest from app.core.qdrant import QdrantConfig, get_client KINDS = ["belongs_to", "next", "prev", "references", "backlink"] def collections(prefix: str) -> Tuple[str, str, str]: return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" def list_note_ids(client, notes_col: str, max_notes: int | None) -> List[Dict[str, Any]]: pts, _ = client.scroll(collection_name=notes_col, with_payload=True, with_vectors=False, limit=max_notes or 1024) out = [] for p in pts or []: pl = p.payload or {} nid = pl.get("note_id") or pl.get("id") if nid: out.append({"note_id": nid, "title": pl.get("title"), "type": pl.get("type")}) return out def count_points(client, col: str, filt: rest.Filter) -> int: res = client.count(collection_name=col, count_filter=filt, exact=True) return int(getattr(res, "count", 0)) def main(): ap = argparse.ArgumentParser() ap.add_argument("--max-notes", type=int) args = ap.parse_args() cfg = QdrantConfig.from_env() client = get_client(cfg) notes_col, chunks_col, edges_col = collections(cfg.prefix) notes = list_note_ids(client, notes_col, args.max_notes) summary = {"notes": 0, "chunks": 0, "edges": 0} for n in notes: nid = n["note_id"] summary["notes"] += 1 filt_note = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid))]) chunk_cnt = count_points(client, chunks_col, filt_note) summary["chunks"] += chunk_cnt # counts per kind (edges) by_kind: Dict[str, int] = {} for k in KINDS: f = rest.Filter(must=[ rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid)), rest.FieldCondition(key="kind", match=rest.MatchValue(value=k)), ]) c = count_points(client, edges_col, f) if c: by_kind[k] = c summary["edges"] += sum(by_kind.values()) line = {"note_id": nid, "title": n.get("title"), "type": n.get("type"), "chunks": chunk_cnt, "edges_by_kind": by_kind} print(json.dumps(line, ensure_ascii=False)) print(json.dumps({"prefix": cfg.prefix, "summary": summary}, ensure_ascii=False)) if __name__ == "__main__": main()