#!/usr/bin/env python3 """ Richtet Qdrant-Collections für dein Mindnet-Projekt ein (idempotent). Erzeugt: - mindnet_chunks (size=384, distance=Cosine) -> semantische Suche über Text-Chunks - mindnet_notes (size=384, distance=Cosine) -> Notizebene / Facettierung - mindnet_edges (size=1, distance=Cosine) -> explizite Links (Dummy-Vektor; Filter via Payload) Legt sinnvolle Payload-Indizes an (keyword/text). """ import os import sys import json import argparse import requests QDRANT_URL = os.environ.get("QDRANT_URL", "http://127.0.0.1:6333") def api(method: str, path: str, **kwargs) -> requests.Response: url = QDRANT_URL.rstrip("/") + path r = requests.request(method, url, timeout=15, **kwargs) if not r.ok: raise RuntimeError(f"{method} {url} -> {r.status_code} {r.text}") return r def exists(collection: str) -> bool: r = api("GET", f"/collections/{collection}") j = r.json() return j.get("result", {}).get("status") == "green" def create_collection(collection: str, size: int, distance: str) -> None: if exists(collection): print(f"[=] {collection} existiert bereits.") return payload = {"vectors": {"size": size, "distance": distance}} api("PUT", f"/collections/{collection}", json=payload) print(f"[+] Collection {collection} angelegt (size={size}, distance={distance}).") def keyword_index(collection: str, field: str) -> None: api("PUT", f"/collections/{collection}/index", json={"field_name": field, "field_schema": "keyword"}) print(f"[+] keyword-Index: {collection}.{field}") def text_index(collection: str, field: str = "text") -> None: api("PUT", f"/collections/{collection}/index", json={"field_name": field, "field_schema": {"type": "text"}}) print(f"[+] text-Index: {collection}.{field}") def main(): p = argparse.ArgumentParser() p.add_argument("--qdrant-url", default=QDRANT_URL, help="z.B. http://127.0.0.1:6333") p.add_argument("--prefix", default="mindnet", help="Präfix für Collections") p.add_argument("--dim", type=int, default=384, help="Embedding-Dimension (MiniLM: 384)") p.add_argument("--distance", default="Cosine", choices=["Cosine", "Euclid", "Dot"], help="Distanzmetrik") args = p.parse_args() global QDRANT_URL QDRANT_URL = args.qdrant_url chunks = f"{args.prefix}_chunks" notes = f"{args.prefix}_notes" edges = f"{args.prefix}_edges" # 1) Collections create_collection(chunks, args.dim, args.distance) create_collection(notes, args.dim, args.distance) create_collection(edges, 1, args.distance) # Dummy-Vektor # 2) Indizes # mindnet_chunks for f in ["note_id", "Status", "Typ", "title", "path"]: keyword_index(chunks, f) for f in ["tags", "Rolle", "links"]: keyword_index(chunks, f) text_index(chunks, "text") # mindnet_notes for f in ["note_id", "title", "path", "Typ", "Status"]: keyword_index(notes, f) for f in ["tags", "Rolle"]: keyword_index(notes, f) # mindnet_edges for f in ["src_note_id", "dst_note_id", "src_chunk_id", "dst_chunk_id", "link_text", "relation"]: keyword_index(edges, f) # 3) Übersicht coll = api("GET", "/collections").json().get("result", {}).get("collections", []) print("\n[Info] Collections vorhanden:") print(json.dumps(coll, indent=2, ensure_ascii=False)) if __name__ == "__main__": try: main() except Exception as e: print(f"[ERROR] {e}", file=sys.stderr) sys.exit(1)