From 820e52d869d945fc46060f636d06281337db9364 Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 17:43:42 +0100 Subject: [PATCH] Dateien nach "tests" hochladen --- tests/check_types_registry_qdrant.py | 130 +++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 tests/check_types_registry_qdrant.py diff --git a/tests/check_types_registry_qdrant.py b/tests/check_types_registry_qdrant.py new file mode 100644 index 0000000..eff8ad1 --- /dev/null +++ b/tests/check_types_registry_qdrant.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +check_types_registry_qdrant.py +Verifiziert, ob Felder aus der Type-Registry in Qdrant ankommen: +- Note-Payload: type, retriever_weight +- Chunk-Payload: retriever_weight +- Optional: Chunks pro Note (Filter über note_id) + +Ausgabe: JSON-Objekte (einige Stichproben) + +Beispiele: + python3 check_types_registry_qdrant.py + python3 check_types_registry_qdrant.py --limit 10 + python3 check_types_registry_qdrant.py --note-id 20250827-xyz + COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant.py +""" + +import os +import sys +import json +import argparse +from typing import Any, Dict, List, Tuple + +try: + from qdrant_client import QdrantClient + from qdrant_client.models import Filter, FieldCondition, MatchValue +except Exception as e: + print(json.dumps({"error": f"qdrant_client import failed: {type(e).__name__}: {e}"})) + sys.exit(1) + + +def env_default(name: str, fallback: str) -> str: + v = os.environ.get(name) + if v is None or v == "": + return fallback + return v + + +def get_client(host: str, port: int) -> QdrantClient: + return QdrantClient(host=host, port=port) + + +def collections(prefix: str) -> Tuple[str, str, str]: + return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" + + +def sample_notes(client: QdrantClient, coll: str, limit: int) -> List[Any]: + pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True) + return pts + + +def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100) -> List[Any]: + flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))]) + pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt) + return pts + + +def main() -> None: + ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant") + ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"), help="Collection prefix (default: env COLLECTION_PREFIX or 'mindnet')") + ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1"), help="Qdrant host (default: env QDRANT_HOST or '127.0.0.1')") + ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int, help="Qdrant port (default: env QDRANT_PORT or 6333)") + ap.add_argument("--limit", default=5, type=int, help="Sample size for notes (default: 5)") + ap.add_argument("--note-id", default=None, help="Optional note_id to inspect chunk payloads") + args = ap.parse_args() + + notes_coll, chunks_coll, edges_coll = collections(args.prefix) + + try: + client = get_client(args.host, args.port) + # sanity: list collections (ignore errors) + try: + cols = client.get_collections().collections + print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False)) + except Exception as e: + print(json.dumps({"warn": f"get_collections failed: {type(e).__name__}: {e}"})) + except Exception as e: + print(json.dumps({"error": f"QdrantClient init failed: {type(e).__name__}: {e}"})) + sys.exit(2) + + # 1) Notes sample + try: + notes = sample_notes(client, notes_coll, args.limit) + out = [] + for p in notes: + pl = p.payload or {} + out.append({ + "point_id": getattr(p, "id", None), + "id": pl.get("id"), + "title": pl.get("title"), + "type": pl.get("type"), + "retriever_weight": pl.get("retriever_weight"), + }) + print(json.dumps({"notes_sample": out}, ensure_ascii=False)) + except Exception as e: + print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll})) + + # 2) Chunks for first note or specific note-id + try: + target_note_id = None + if args.note_id: + target_note_id = args.note_id + elif notes: + target_note_id = notes[0].payload.get("id") + + if target_note_id: + cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50) + out = [] + for p in cks: + pl = p.payload or {} + text = pl.get("text") or "" + window = pl.get("window") or "" + out.append({ + "chunk_id": pl.get("chunk_id"), + "note_id": pl.get("note_id"), + "retriever_weight": pl.get("retriever_weight"), + "text_len": len(text), + "window_len": len(window), + "window_minus_text": len(window) - len(text), + }) + print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False)) + else: + print(json.dumps({"warn": "no note_id available to fetch chunks"})) + except Exception as e: + print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll})) + + +if __name__ == "__main__": + main()