diff --git a/tests/check_types_registry_qdrant.py b/tests/check_types_registry_qdrant.py index eff8ad1..54f9215 100644 --- a/tests/check_types_registry_qdrant.py +++ b/tests/check_types_registry_qdrant.py @@ -1,25 +1,18 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- """ -check_types_registry_qdrant.py -Verifiziert, ob Felder aus der Type-Registry in Qdrant ankommen: -- Note-Payload: type, retriever_weight -- Chunk-Payload: retriever_weight -- Optional: Chunks pro Note (Filter über note_id) +check_types_registry_qdrant_v2.py +- Zeigt Note-Payloads (type, retriever_weight) aus Qdrant +- Ermittelt eine geeignete note_id (bevorzugt 'note_id', sonst 'id') +- Lädt dazugehörige Chunks und zeigt retriever_weight sowie Fenstergrößen -Ausgabe: JSON-Objekte (einige Stichproben) - -Beispiele: - python3 check_types_registry_qdrant.py - python3 check_types_registry_qdrant.py --limit 10 - python3 check_types_registry_qdrant.py --note-id 20250827-xyz - COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant.py +Aufrufbeispiele: + python3 check_types_registry_qdrant_v2.py + python3 check_types_registry_qdrant_v2.py --limit 10 + python3 check_types_registry_qdrant_v2.py --note-id 20250827-xyz + COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant_v2.py """ - -import os -import sys -import json -import argparse +import os, sys, json, argparse from typing import Any, Dict, List, Tuple try: @@ -37,39 +30,46 @@ def env_default(name: str, fallback: str) -> str: return v -def get_client(host: str, port: int) -> QdrantClient: - return QdrantClient(host=host, port=port) - - def collections(prefix: str) -> Tuple[str, str, str]: return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" -def sample_notes(client: QdrantClient, coll: str, limit: int) -> List[Any]: +def get_client(host: str, port: int) -> QdrantClient: + return QdrantClient(host=host, port=port) + + +def sample_notes(client: QdrantClient, coll: str, limit: int): pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True) return pts -def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100) -> List[Any]: +def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100): flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))]) pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt) return pts -def main() -> None: - ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant") - ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"), help="Collection prefix (default: env COLLECTION_PREFIX or 'mindnet')") - ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1"), help="Qdrant host (default: env QDRANT_HOST or '127.0.0.1')") - ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int, help="Qdrant port (default: env QDRANT_PORT or 6333)") - ap.add_argument("--limit", default=5, type=int, help="Sample size for notes (default: 5)") - ap.add_argument("--note-id", default=None, help="Optional note_id to inspect chunk payloads") +def pick_note_id(payload: Dict[str, Any]) -> str | None: + # bevorzugt 'note_id', sonst 'id' + nid = payload.get("note_id") or payload.get("id") + if isinstance(nid, str) and nid.strip(): + return nid.strip() + return None + + +def main(): + ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant (v2)") + ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet")) + ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1")) + ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int) + ap.add_argument("--limit", default=5, type=int) + ap.add_argument("--note-id", default=None, help="Optional erzwungene note_id") args = ap.parse_args() notes_coll, chunks_coll, edges_coll = collections(args.prefix) try: client = get_client(args.host, args.port) - # sanity: list collections (ignore errors) try: cols = client.get_collections().collections print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False)) @@ -80,6 +80,7 @@ def main() -> None: sys.exit(2) # 1) Notes sample + notes = [] try: notes = sample_notes(client, notes_coll, args.limit) out = [] @@ -87,6 +88,7 @@ def main() -> None: pl = p.payload or {} out.append({ "point_id": getattr(p, "id", None), + "note_id": pl.get("note_id"), "id": pl.get("id"), "title": pl.get("title"), "type": pl.get("type"), @@ -96,13 +98,17 @@ def main() -> None: except Exception as e: print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll})) - # 2) Chunks for first note or specific note-id + # 2) Chunks for note try: - target_note_id = None - if args.note_id: - target_note_id = args.note_id - elif notes: - target_note_id = notes[0].payload.get("id") + target_note_id = args.note_id + if not target_note_id and notes: + # benutze den ersten Treffer mit verwertbarer note_id + for p in notes: + pl = p.payload or {} + candidate = pick_note_id(pl) + if candidate: + target_note_id = candidate + break if target_note_id: cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50) @@ -121,7 +127,7 @@ def main() -> None: }) print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False)) else: - print(json.dumps({"warn": "no note_id available to fetch chunks"})) + print(json.dumps({"warn": "no usable note_id in sample; pass --note-id explicitly"})) except Exception as e: print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll}))