Dateien nach "tests" hochladen

2025-11-08 17:43:42 +01:00 · 2025-11-08 17:43:42 +01:00 · 820e52d869
commit 820e52d869
parent 444e8e4206
1 changed files with 130 additions and 0 deletions
--- a/tests/check_types_registry_qdrant.py
+++ b/tests/check_types_registry_qdrant.py
@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+check_types_registry_qdrant.py
+Verifiziert, ob Felder aus der Type-Registry in Qdrant ankommen:
+- Note-Payload: type, retriever_weight
+- Chunk-Payload: retriever_weight
+- Optional: Chunks pro Note (Filter über note_id)
+
+Ausgabe: JSON-Objekte (einige Stichproben)
+
+Beispiele:
+  python3 check_types_registry_qdrant.py
+  python3 check_types_registry_qdrant.py --limit 10
+  python3 check_types_registry_qdrant.py --note-id 20250827-xyz
+  COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant.py
+"""
+
+import os
+import sys
+import json
+import argparse
+from typing import Any, Dict, List, Tuple
+
+try:
+    from qdrant_client import QdrantClient
+    from qdrant_client.models import Filter, FieldCondition, MatchValue
+except Exception as e:
+    print(json.dumps({"error": f"qdrant_client import failed: {type(e).__name__}: {e}"}))
+    sys.exit(1)
+
+
+def env_default(name: str, fallback: str) -> str:
+    v = os.environ.get(name)
+    if v is None or v == "":
+        return fallback
+    return v
+
+
+def get_client(host: str, port: int) -> QdrantClient:
+    return QdrantClient(host=host, port=port)
+
+
+def collections(prefix: str) -> Tuple[str, str, str]:
+    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
+
+
+def sample_notes(client: QdrantClient, coll: str, limit: int) -> List[Any]:
+    pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True)
+    return pts
+
+
+def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100) -> List[Any]:
+    flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))])
+    pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt)
+    return pts
+
+
+def main() -> None:
+    ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant")
+    ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"), help="Collection prefix (default: env COLLECTION_PREFIX or 'mindnet')")
+    ap.add_argument("--host",   default=env_default("QDRANT_HOST", "127.0.0.1"),    help="Qdrant host (default: env QDRANT_HOST or '127.0.0.1')")
+    ap.add_argument("--port",   default=int(env_default("QDRANT_PORT", "6333")),    type=int, help="Qdrant port (default: env QDRANT_PORT or 6333)")
+    ap.add_argument("--limit",  default=5, type=int, help="Sample size for notes (default: 5)")
+    ap.add_argument("--note-id", default=None, help="Optional note_id to inspect chunk payloads")
+    args = ap.parse_args()
+
+    notes_coll, chunks_coll, edges_coll = collections(args.prefix)
+
+    try:
+        client = get_client(args.host, args.port)
+        # sanity: list collections (ignore errors)
+        try:
+            cols = client.get_collections().collections
+            print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False))
+        except Exception as e:
+            print(json.dumps({"warn": f"get_collections failed: {type(e).__name__}: {e}"}))
+    except Exception as e:
+        print(json.dumps({"error": f"QdrantClient init failed: {type(e).__name__}: {e}"}))
+        sys.exit(2)
+
+    # 1) Notes sample
+    try:
+        notes = sample_notes(client, notes_coll, args.limit)
+        out = []
+        for p in notes:
+            pl = p.payload or {}
+            out.append({
+                "point_id": getattr(p, "id", None),
+                "id": pl.get("id"),
+                "title": pl.get("title"),
+                "type": pl.get("type"),
+                "retriever_weight": pl.get("retriever_weight"),
+            })
+        print(json.dumps({"notes_sample": out}, ensure_ascii=False))
+    except Exception as e:
+        print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll}))
+
+    # 2) Chunks for first note or specific note-id
+    try:
+        target_note_id = None
+        if args.note_id:
+            target_note_id = args.note_id
+        elif notes:
+            target_note_id = notes[0].payload.get("id")
+
+        if target_note_id:
+            cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50)
+            out = []
+            for p in cks:
+                pl = p.payload or {}
+                text = pl.get("text") or ""
+                window = pl.get("window") or ""
+                out.append({
+                    "chunk_id": pl.get("chunk_id"),
+                    "note_id": pl.get("note_id"),
+                    "retriever_weight": pl.get("retriever_weight"),
+                    "text_len": len(text),
+                    "window_len": len(window),
+                    "window_minus_text": len(window) - len(text),
+                })
+            print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False))
+        else:
+            print(json.dumps({"warn": "no note_id available to fetch chunks"}))
+    except Exception as e:
+        print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll}))
+
+
+if __name__ == "__main__":
+    main()