mindnet/tests/test_edges_smoke_fast.py
Lars c501f8d6e6
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
Dateien nach "tests" hochladen
2025-11-11 17:30:36 +01:00

75 lines
2.7 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
scripts/test_edges_smoke_fast.py — Zählung über /count (sehr schnell)
Verwendet Qdrant 'count' API je Note/Kind (anstatt scroll), dadurch sehr schnelle Ausführung.
Optionen:
--max-notes N : prüft nur die ersten N Notizen
"""
from __future__ import annotations
import argparse, json
from typing import Dict, Any, List, Tuple
from qdrant_client.http import models as rest
from app.core.qdrant import QdrantConfig, get_client
KINDS = ["belongs_to", "next", "prev", "references", "backlink"]
def collections(prefix: str) -> Tuple[str, str, str]:
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
def list_note_ids(client, notes_col: str, max_notes: int | None) -> List[Dict[str, Any]]:
pts, _ = client.scroll(collection_name=notes_col, with_payload=True, with_vectors=False, limit=max_notes or 1024)
out = []
for p in pts or []:
pl = p.payload or {}
nid = pl.get("note_id") or pl.get("id")
if nid:
out.append({"note_id": nid, "title": pl.get("title"), "type": pl.get("type")})
return out
def count_points(client, col: str, filt: rest.Filter) -> int:
res = client.count(collection_name=col, count_filter=filt, exact=True)
return int(getattr(res, "count", 0))
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--max-notes", type=int)
args = ap.parse_args()
cfg = QdrantConfig.from_env()
client = get_client(cfg)
notes_col, chunks_col, edges_col = collections(cfg.prefix)
notes = list_note_ids(client, notes_col, args.max_notes)
summary = {"notes": 0, "chunks": 0, "edges": 0}
for n in notes:
nid = n["note_id"]
summary["notes"] += 1
filt_note = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid))])
chunk_cnt = count_points(client, chunks_col, filt_note)
summary["chunks"] += chunk_cnt
# counts per kind (edges)
by_kind: Dict[str, int] = {}
for k in KINDS:
f = rest.Filter(must=[
rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid)),
rest.FieldCondition(key="kind", match=rest.MatchValue(value=k)),
])
c = count_points(client, edges_col, f)
if c:
by_kind[k] = c
summary["edges"] += sum(by_kind.values())
line = {"note_id": nid, "title": n.get("title"), "type": n.get("type"), "chunks": chunk_cnt, "edges_by_kind": by_kind}
print(json.dumps(line, ensure_ascii=False))
print(json.dumps({"prefix": cfg.prefix, "summary": summary}, ensure_ascii=False))
if __name__ == "__main__":
main()