Dateien nach "tests" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s

This commit is contained in:
Lars 2025-11-08 17:46:52 +01:00
parent 820e52d869
commit b9316c693d

View File

@ -1,25 +1,18 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
check_types_registry_qdrant.py check_types_registry_qdrant_v2.py
Verifiziert, ob Felder aus der Type-Registry in Qdrant ankommen: - Zeigt Note-Payloads (type, retriever_weight) aus Qdrant
- Note-Payload: type, retriever_weight - Ermittelt eine geeignete note_id (bevorzugt 'note_id', sonst 'id')
- Chunk-Payload: retriever_weight - Lädt dazugehörige Chunks und zeigt retriever_weight sowie Fenstergrößen
- Optional: Chunks pro Note (Filter über note_id)
Ausgabe: JSON-Objekte (einige Stichproben) Aufrufbeispiele:
python3 check_types_registry_qdrant_v2.py
Beispiele: python3 check_types_registry_qdrant_v2.py --limit 10
python3 check_types_registry_qdrant.py python3 check_types_registry_qdrant_v2.py --note-id 20250827-xyz
python3 check_types_registry_qdrant.py --limit 10 COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant_v2.py
python3 check_types_registry_qdrant.py --note-id 20250827-xyz
COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant.py
""" """
import os, sys, json, argparse
import os
import sys
import json
import argparse
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
try: try:
@ -37,39 +30,46 @@ def env_default(name: str, fallback: str) -> str:
return v return v
def get_client(host: str, port: int) -> QdrantClient:
return QdrantClient(host=host, port=port)
def collections(prefix: str) -> Tuple[str, str, str]: def collections(prefix: str) -> Tuple[str, str, str]:
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
def sample_notes(client: QdrantClient, coll: str, limit: int) -> List[Any]: def get_client(host: str, port: int) -> QdrantClient:
return QdrantClient(host=host, port=port)
def sample_notes(client: QdrantClient, coll: str, limit: int):
pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True) pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True)
return pts return pts
def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100) -> List[Any]: def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100):
flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))]) flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))])
pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt) pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt)
return pts return pts
def main() -> None: def pick_note_id(payload: Dict[str, Any]) -> str | None:
ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant") # bevorzugt 'note_id', sonst 'id'
ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"), help="Collection prefix (default: env COLLECTION_PREFIX or 'mindnet')") nid = payload.get("note_id") or payload.get("id")
ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1"), help="Qdrant host (default: env QDRANT_HOST or '127.0.0.1')") if isinstance(nid, str) and nid.strip():
ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int, help="Qdrant port (default: env QDRANT_PORT or 6333)") return nid.strip()
ap.add_argument("--limit", default=5, type=int, help="Sample size for notes (default: 5)") return None
ap.add_argument("--note-id", default=None, help="Optional note_id to inspect chunk payloads")
def main():
ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant (v2)")
ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"))
ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1"))
ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int)
ap.add_argument("--limit", default=5, type=int)
ap.add_argument("--note-id", default=None, help="Optional erzwungene note_id")
args = ap.parse_args() args = ap.parse_args()
notes_coll, chunks_coll, edges_coll = collections(args.prefix) notes_coll, chunks_coll, edges_coll = collections(args.prefix)
try: try:
client = get_client(args.host, args.port) client = get_client(args.host, args.port)
# sanity: list collections (ignore errors)
try: try:
cols = client.get_collections().collections cols = client.get_collections().collections
print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False)) print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False))
@ -80,6 +80,7 @@ def main() -> None:
sys.exit(2) sys.exit(2)
# 1) Notes sample # 1) Notes sample
notes = []
try: try:
notes = sample_notes(client, notes_coll, args.limit) notes = sample_notes(client, notes_coll, args.limit)
out = [] out = []
@ -87,6 +88,7 @@ def main() -> None:
pl = p.payload or {} pl = p.payload or {}
out.append({ out.append({
"point_id": getattr(p, "id", None), "point_id": getattr(p, "id", None),
"note_id": pl.get("note_id"),
"id": pl.get("id"), "id": pl.get("id"),
"title": pl.get("title"), "title": pl.get("title"),
"type": pl.get("type"), "type": pl.get("type"),
@ -96,13 +98,17 @@ def main() -> None:
except Exception as e: except Exception as e:
print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll})) print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll}))
# 2) Chunks for first note or specific note-id # 2) Chunks for note
try: try:
target_note_id = None target_note_id = args.note_id
if args.note_id: if not target_note_id and notes:
target_note_id = args.note_id # benutze den ersten Treffer mit verwertbarer note_id
elif notes: for p in notes:
target_note_id = notes[0].payload.get("id") pl = p.payload or {}
candidate = pick_note_id(pl)
if candidate:
target_note_id = candidate
break
if target_note_id: if target_note_id:
cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50) cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50)
@ -121,7 +127,7 @@ def main() -> None:
}) })
print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False)) print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False))
else: else:
print(json.dumps({"warn": "no note_id available to fetch chunks"})) print(json.dumps({"warn": "no usable note_id in sample; pass --note-id explicitly"}))
except Exception as e: except Exception as e:
print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll})) print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll}))