mindnet/tests/test_edges_smoke_fast.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
scripts/test_edges_smoke_fast.py — Zählung über /count (sehr schnell)

Verwendet Qdrant 'count' API je Note/Kind (anstatt scroll), dadurch sehr schnelle Ausführung.
Optionen:
  --max-notes N : prüft nur die ersten N Notizen
"""

from __future__ import annotations
import argparse, json
from typing import Dict, Any, List, Tuple
from qdrant_client.http import models as rest

from app.core.qdrant import QdrantConfig, get_client

KINDS = ["belongs_to", "next", "prev", "references", "backlink"]

def collections(prefix: str) -> Tuple[str, str, str]:
    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"

def list_note_ids(client, notes_col: str, max_notes: int | None) -> List[Dict[str, Any]]:
    pts, _ = client.scroll(collection_name=notes_col, with_payload=True, with_vectors=False, limit=max_notes or 1024)
    out = []
    for p in pts or []:
        pl = p.payload or {}
        nid = pl.get("note_id") or pl.get("id")
        if nid:
            out.append({"note_id": nid, "title": pl.get("title"), "type": pl.get("type")})
    return out

def count_points(client, col: str, filt: rest.Filter) -> int:
    res = client.count(collection_name=col, count_filter=filt, exact=True)
    return int(getattr(res, "count", 0))

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--max-notes", type=int)
    args = ap.parse_args()

    cfg = QdrantConfig.from_env()
    client = get_client(cfg)
    notes_col, chunks_col, edges_col = collections(cfg.prefix)
    notes = list_note_ids(client, notes_col, args.max_notes)

    summary = {"notes": 0, "chunks": 0, "edges": 0}
    for n in notes:
        nid = n["note_id"]
        summary["notes"] += 1

        filt_note = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid))])
        chunk_cnt = count_points(client, chunks_col, filt_note)
        summary["chunks"] += chunk_cnt

        # counts per kind (edges)
        by_kind: Dict[str, int] = {}
        for k in KINDS:
            f = rest.Filter(must=[
                rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid)),
                rest.FieldCondition(key="kind", match=rest.MatchValue(value=k)),
            ])
            c = count_points(client, edges_col, f)
            if c:
                by_kind[k] = c

        summary["edges"] += sum(by_kind.values())
        line = {"note_id": nid, "title": n.get("title"), "type": n.get("type"), "chunks": chunk_cnt, "edges_by_kind": by_kind}
        print(json.dumps(line, ensure_ascii=False))

    print(json.dumps({"prefix": cfg.prefix, "summary": summary}, ensure_ascii=False))

if __name__ == "__main__":
    main()