Dateien nach "tests" hochladen
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
b9316c693d
commit
315a1c0945
|
|
@ -1,136 +1,93 @@
|
||||||
|
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
import os, json, argparse, random
|
||||||
check_types_registry_qdrant_v2.py
|
from typing import Any, Dict, List, Tuple, Optional
|
||||||
- Zeigt Note-Payloads (type, retriever_weight) aus Qdrant
|
from qdrant_client import QdrantClient
|
||||||
- Ermittelt eine geeignete note_id (bevorzugt 'note_id', sonst 'id')
|
|
||||||
- Lädt dazugehörige Chunks und zeigt retriever_weight sowie Fenstergrößen
|
|
||||||
|
|
||||||
Aufrufbeispiele:
|
def _get_prefix(explicit: Optional[str]) -> str:
|
||||||
python3 check_types_registry_qdrant_v2.py
|
return (explicit or os.environ.get("COLLECTION_PREFIX") or os.environ.get("MINDNET_PREFIX") or "mindnet").strip()
|
||||||
python3 check_types_registry_qdrant_v2.py --limit 10
|
|
||||||
python3 check_types_registry_qdrant_v2.py --note-id 20250827-xyz
|
|
||||||
COLLECTION_PREFIX=mindnet python3 check_types_registry_qdrant_v2.py
|
|
||||||
"""
|
|
||||||
import os, sys, json, argparse
|
|
||||||
from typing import Any, Dict, List, Tuple
|
|
||||||
|
|
||||||
try:
|
def _names(prefix: str) -> Dict[str,str]:
|
||||||
from qdrant_client import QdrantClient
|
return {
|
||||||
from qdrant_client.models import Filter, FieldCondition, MatchValue
|
"notes": f"{prefix}_notes",
|
||||||
except Exception as e:
|
"chunks": f"{prefix}_chunks",
|
||||||
print(json.dumps({"error": f"qdrant_client import failed: {type(e).__name__}: {e}"}))
|
"edges": f"{prefix}_edges",
|
||||||
sys.exit(1)
|
}
|
||||||
|
|
||||||
|
|
||||||
def env_default(name: str, fallback: str) -> str:
|
|
||||||
v = os.environ.get(name)
|
|
||||||
if v is None or v == "":
|
|
||||||
return fallback
|
|
||||||
return v
|
|
||||||
|
|
||||||
|
|
||||||
def collections(prefix: str) -> Tuple[str, str, str]:
|
|
||||||
return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
|
|
||||||
|
|
||||||
|
|
||||||
def get_client(host: str, port: int) -> QdrantClient:
|
|
||||||
return QdrantClient(host=host, port=port)
|
|
||||||
|
|
||||||
|
|
||||||
def sample_notes(client: QdrantClient, coll: str, limit: int):
|
|
||||||
pts, _ = client.scroll(collection_name=coll, limit=limit, with_payload=True)
|
|
||||||
return pts
|
|
||||||
|
|
||||||
|
|
||||||
def chunks_for_note(client: QdrantClient, coll: str, note_id: str, limit: int = 100):
|
|
||||||
flt = Filter(must=[FieldCondition(key="note_id", match=MatchValue(value=note_id))])
|
|
||||||
pts, _ = client.scroll(collection_name=coll, with_payload=True, limit=limit, filter=flt)
|
|
||||||
return pts
|
|
||||||
|
|
||||||
|
|
||||||
def pick_note_id(payload: Dict[str, Any]) -> str | None:
|
|
||||||
# bevorzugt 'note_id', sonst 'id'
|
|
||||||
nid = payload.get("note_id") or payload.get("id")
|
|
||||||
if isinstance(nid, str) and nid.strip():
|
|
||||||
return nid.strip()
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
def _scroll(client: QdrantClient, collection: str, *, limit=5, with_payload=True):
|
||||||
|
# Support both modern and older SDKs (filter vs scroll_filter)
|
||||||
|
try:
|
||||||
|
return client.scroll(collection_name=collection, limit=limit, with_payload=with_payload)
|
||||||
|
except TypeError as e:
|
||||||
|
# older signatures might accept scroll_filter kw, but if we passed none it's fine
|
||||||
|
return client.scroll(collection_name=collection, limit=limit, with_payload=with_payload)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
ap = argparse.ArgumentParser(description="Check Type-Registry fields in Qdrant (v2)")
|
ap = argparse.ArgumentParser()
|
||||||
ap.add_argument("--prefix", default=env_default("COLLECTION_PREFIX", "mindnet"))
|
ap.add_argument("--host", default=os.environ.get("QDRANT_HOST", "127.0.0.1"))
|
||||||
ap.add_argument("--host", default=env_default("QDRANT_HOST", "127.0.0.1"))
|
ap.add_argument("--port", default=int(os.environ.get("QDRANT_PORT", "6333")))
|
||||||
ap.add_argument("--port", default=int(env_default("QDRANT_PORT", "6333")), type=int)
|
ap.add_argument("--prefix", default=None)
|
||||||
ap.add_argument("--limit", default=5, type=int)
|
ap.add_argument("--limit", type=int, default=5)
|
||||||
ap.add_argument("--note-id", default=None, help="Optional erzwungene note_id")
|
ap.add_argument("--note-id", default=None)
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
|
|
||||||
notes_coll, chunks_coll, edges_coll = collections(args.prefix)
|
prefix = _get_prefix(args.prefix)
|
||||||
|
names = _names(prefix)
|
||||||
|
client = QdrantClient(host=args.host, port=args.port, prefer_grpc=False)
|
||||||
|
|
||||||
|
# Notes sample
|
||||||
try:
|
try:
|
||||||
client = get_client(args.host, args.port)
|
pts, _ = _scroll(client, names["notes"], limit=args.limit, with_payload=True)
|
||||||
try:
|
|
||||||
cols = client.get_collections().collections
|
|
||||||
print(json.dumps({"info": "collections", "count": len(cols)}, ensure_ascii=False))
|
|
||||||
except Exception as e:
|
|
||||||
print(json.dumps({"warn": f"get_collections failed: {type(e).__name__}: {e}"}))
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(json.dumps({"error": f"QdrantClient init failed: {type(e).__name__}: {e}"}))
|
print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": names["notes"]}))
|
||||||
sys.exit(2)
|
return
|
||||||
|
|
||||||
# 1) Notes sample
|
sample = []
|
||||||
notes = []
|
for p in pts:
|
||||||
|
pl = (p.payload or {})
|
||||||
|
sample.append({
|
||||||
|
"point_id": str(p.id),
|
||||||
|
"note_id": pl.get("note_id"),
|
||||||
|
"id": pl.get("id"),
|
||||||
|
"title": pl.get("title"),
|
||||||
|
"type": pl.get("type"),
|
||||||
|
"retriever_weight": pl.get("retriever_weight"),
|
||||||
|
"chunk_profile": pl.get("chunk_profile"),
|
||||||
|
})
|
||||||
|
print(json.dumps({"notes_sample": sample}, ensure_ascii=False))
|
||||||
|
|
||||||
|
# Pick a note to fetch chunks
|
||||||
|
target_note_id = args.note_id or None
|
||||||
|
if not target_note_id:
|
||||||
|
for s in sample:
|
||||||
|
if s.get("note_id"):
|
||||||
|
target_note_id = s["note_id"]
|
||||||
|
break
|
||||||
|
|
||||||
|
if not target_note_id:
|
||||||
|
print(json.dumps({"warn": "no note_id available to fetch chunks"}))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Chunks for target note
|
||||||
try:
|
try:
|
||||||
notes = sample_notes(client, notes_coll, args.limit)
|
# pull a few chunks and filter client-side by note_id
|
||||||
out = []
|
pts, _ = _scroll(client, names["chunks"], limit=50, with_payload=True)
|
||||||
for p in notes:
|
|
||||||
pl = p.payload or {}
|
|
||||||
out.append({
|
|
||||||
"point_id": getattr(p, "id", None),
|
|
||||||
"note_id": pl.get("note_id"),
|
|
||||||
"id": pl.get("id"),
|
|
||||||
"title": pl.get("title"),
|
|
||||||
"type": pl.get("type"),
|
|
||||||
"retriever_weight": pl.get("retriever_weight"),
|
|
||||||
})
|
|
||||||
print(json.dumps({"notes_sample": out}, ensure_ascii=False))
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(json.dumps({"error": f"notes scroll failed: {type(e).__name__}: {e}", "collection": notes_coll}))
|
print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": names["chunks"]}))
|
||||||
|
return
|
||||||
|
|
||||||
# 2) Chunks for note
|
chunks = [{
|
||||||
try:
|
"id": str(p.id),
|
||||||
target_note_id = args.note_id
|
"chunk_id": (p.payload or {}).get("chunk_id"),
|
||||||
if not target_note_id and notes:
|
"note_id": (p.payload or {}).get("note_id"),
|
||||||
# benutze den ersten Treffer mit verwertbarer note_id
|
"type": (p.payload or {}).get("type"),
|
||||||
for p in notes:
|
"retriever_weight": (p.payload or {}).get("retriever_weight"),
|
||||||
pl = p.payload or {}
|
"window_len": len((p.payload or {}).get("window") or ""),
|
||||||
candidate = pick_note_id(pl)
|
"text_len": len((p.payload or {}).get("text") or ""),
|
||||||
if candidate:
|
} for p in pts if (p.payload or {}).get("note_id") == target_note_id][:5]
|
||||||
target_note_id = candidate
|
|
||||||
break
|
|
||||||
|
|
||||||
if target_note_id:
|
|
||||||
cks = chunks_for_note(client, chunks_coll, target_note_id, limit=50)
|
|
||||||
out = []
|
|
||||||
for p in cks:
|
|
||||||
pl = p.payload or {}
|
|
||||||
text = pl.get("text") or ""
|
|
||||||
window = pl.get("window") or ""
|
|
||||||
out.append({
|
|
||||||
"chunk_id": pl.get("chunk_id"),
|
|
||||||
"note_id": pl.get("note_id"),
|
|
||||||
"retriever_weight": pl.get("retriever_weight"),
|
|
||||||
"text_len": len(text),
|
|
||||||
"window_len": len(window),
|
|
||||||
"window_minus_text": len(window) - len(text),
|
|
||||||
})
|
|
||||||
print(json.dumps({"chunks_for_note": target_note_id, "sample": out}, ensure_ascii=False))
|
|
||||||
else:
|
|
||||||
print(json.dumps({"warn": "no usable note_id in sample; pass --note-id explicitly"}))
|
|
||||||
except Exception as e:
|
|
||||||
print(json.dumps({"error": f"chunks scroll failed: {type(e).__name__}: {e}", "collection": chunks_coll}))
|
|
||||||
|
|
||||||
|
print(json.dumps({"target_note_id": target_note_id, "chunk_samples": chunks}, ensure_ascii=False))
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user