All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
75 lines
2.7 KiB
Python
75 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
scripts/test_edges_smoke_fast.py — Zählung über /count (sehr schnell)
|
|
|
|
Verwendet Qdrant 'count' API je Note/Kind (anstatt scroll), dadurch sehr schnelle Ausführung.
|
|
Optionen:
|
|
--max-notes N : prüft nur die ersten N Notizen
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
import argparse, json
|
|
from typing import Dict, Any, List, Tuple
|
|
from qdrant_client.http import models as rest
|
|
|
|
from app.core.database.qdrant import QdrantConfig, get_client
|
|
|
|
KINDS = ["belongs_to", "next", "prev", "references", "backlink"]
|
|
|
|
def collections(prefix: str) -> Tuple[str, str, str]:
|
|
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
|
|
|
def list_note_ids(client, notes_col: str, max_notes: int | None) -> List[Dict[str, Any]]:
|
|
pts, _ = client.scroll(collection_name=notes_col, with_payload=True, with_vectors=False, limit=max_notes or 1024)
|
|
out = []
|
|
for p in pts or []:
|
|
pl = p.payload or {}
|
|
nid = pl.get("note_id") or pl.get("id")
|
|
if nid:
|
|
out.append({"note_id": nid, "title": pl.get("title"), "type": pl.get("type")})
|
|
return out
|
|
|
|
def count_points(client, col: str, filt: rest.Filter) -> int:
|
|
res = client.count(collection_name=col, count_filter=filt, exact=True)
|
|
return int(getattr(res, "count", 0))
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--max-notes", type=int)
|
|
args = ap.parse_args()
|
|
|
|
cfg = QdrantConfig.from_env()
|
|
client = get_client(cfg)
|
|
notes_col, chunks_col, edges_col = collections(cfg.prefix)
|
|
notes = list_note_ids(client, notes_col, args.max_notes)
|
|
|
|
summary = {"notes": 0, "chunks": 0, "edges": 0}
|
|
for n in notes:
|
|
nid = n["note_id"]
|
|
summary["notes"] += 1
|
|
|
|
filt_note = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid))])
|
|
chunk_cnt = count_points(client, chunks_col, filt_note)
|
|
summary["chunks"] += chunk_cnt
|
|
|
|
# counts per kind (edges)
|
|
by_kind: Dict[str, int] = {}
|
|
for k in KINDS:
|
|
f = rest.Filter(must=[
|
|
rest.FieldCondition(key="note_id", match=rest.MatchValue(value=nid)),
|
|
rest.FieldCondition(key="kind", match=rest.MatchValue(value=k)),
|
|
])
|
|
c = count_points(client, edges_col, f)
|
|
if c:
|
|
by_kind[k] = c
|
|
|
|
summary["edges"] += sum(by_kind.values())
|
|
line = {"note_id": nid, "title": n.get("title"), "type": n.get("type"), "chunks": chunk_cnt, "edges_by_kind": by_kind}
|
|
print(json.dumps(line, ensure_ascii=False))
|
|
|
|
print(json.dumps({"prefix": cfg.prefix, "summary": summary}, ensure_ascii=False))
|
|
|
|
if __name__ == "__main__":
|
|
main()
|