Dateien nach "tests" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
444e8e4206
commit
820e52d869
130
tests/check_types_registry_qdrant.py
Normal file
130
tests/check_types_registry_qdrant.py
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
check_types_registry_qdrant.py
|
||||
Verifiziert, ob Felder aus der Type-Registry in Qdrant ankommen:
|
||||
- Note-Payload: type, retriever_weight
|
||||
- Chunk-Payload: retriever_weight
|
||||
- Optional: Chunks pro Note (Filter über note_id)
|
||||
|
||||
Ausgabe: JSON-Objekte (einige Stichproben)
|
||||
|
||||
Beispiele:
|
||||
python3 check_types_registry_qdrant.py
|
||||
python3 check_types_registry_qdrant.py --limit 10
|
||||
python3 check_types_registry_qdrant.py --note-id 20250827-xyz
|
||||
COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
try:
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": f"qdrant_client import failed: {type(e).__name__}: {e}"}))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def env_default(name: str, fallback: str) -> str:
|
||||
v = os.environ.get(name)
|
||||
if v is None or v == "":
|
||||
return fallback
|
||||
return v
|
||||
|
||||
|
||||
def get_client(host: str, port: int) -> QdrantClient:
|
||||
return QdrantClient(host=host, port=port)
|
||||
|
||||
|
||||
def collections(prefix: str) -> Tuple[str, str, str]:
|
||||
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
||||
|
||||
|
||||
def sample_notes(client: QdrantClient, coll: str, limit: int) -> List[Any]:
|
||||
pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True)
|
||||
return pts
|
||||
|
||||
|
||||
def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100) -> List[Any]:
|
||||
flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))])
|
||||
pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt)
|
||||
return pts
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant")
|
||||
ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"), help="Collection prefix (default: env COLLECTION_PREFIX or 'mindnet')")
|
||||
ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1"), help="Qdrant host (default: env QDRANT_HOST or '127.0.0.1')")
|
||||
ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int, help="Qdrant port (default: env QDRANT_PORT or 6333)")
|
||||
ap.add_argument("--limit", default=5, type=int, help="Sample size for notes (default: 5)")
|
||||
ap.add_argument("--note-id", default=None, help="Optional note_id to inspect chunk payloads")
|
||||
args = ap.parse_args()
|
||||
|
||||
notes_coll, chunks_coll, edges_coll = collections(args.prefix)
|
||||
|
||||
try:
|
||||
client = get_client(args.host, args.port)
|
||||
# sanity: list collections (ignore errors)
|
||||
try:
|
||||
cols = client.get_collections().collections
|
||||
print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False))
|
||||
except Exception as e:
|
||||
print(json.dumps({"warn": f"get_collections failed: {type(e).__name__}: {e}"}))
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": f"QdrantClient init failed: {type(e).__name__}: {e}"}))
|
||||
sys.exit(2)
|
||||
|
||||
# 1) Notes sample
|
||||
try:
|
||||
notes = sample_notes(client, notes_coll, args.limit)
|
||||
out = []
|
||||
for p in notes:
|
||||
pl = p.payload or {}
|
||||
out.append({
|
||||
"point_id": getattr(p, "id", None),
|
||||
"id": pl.get("id"),
|
||||
"title": pl.get("title"),
|
||||
"type": pl.get("type"),
|
||||
"retriever_weight": pl.get("retriever_weight"),
|
||||
})
|
||||
print(json.dumps({"notes_sample": out}, ensure_ascii=False))
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll}))
|
||||
|
||||
# 2) Chunks for first note or specific note-id
|
||||
try:
|
||||
target_note_id = None
|
||||
if args.note_id:
|
||||
target_note_id = args.note_id
|
||||
elif notes:
|
||||
target_note_id = notes[0].payload.get("id")
|
||||
|
||||
if target_note_id:
|
||||
cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50)
|
||||
out = []
|
||||
for p in cks:
|
||||
pl = p.payload or {}
|
||||
text = pl.get("text") or ""
|
||||
window = pl.get("window") or ""
|
||||
out.append({
|
||||
"chunk_id": pl.get("chunk_id"),
|
||||
"note_id": pl.get("note_id"),
|
||||
"retriever_weight": pl.get("retriever_weight"),
|
||||
"text_len": len(text),
|
||||
"window_len": len(window),
|
||||
"window_minus_text": len(window) - len(text),
|
||||
})
|
||||
print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False))
|
||||
else:
|
||||
print(json.dumps({"warn": "no note_id available to fetch chunks"}))
|
||||
except Exception as e:
|
||||
print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll}))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in New Issue
Block a user