From a5260a2aad49ae11ae2ef6fe8593f6579d05689d Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 1 Sep 2025 14:50:26 +0200 Subject: [PATCH] =?UTF-8?q?scripts/setup=5Fmindnet=5Fcollections.py=20hinz?= =?UTF-8?q?ugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/setup_mindnet_collections.py | 100 +++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 scripts/setup_mindnet_collections.py diff --git a/scripts/setup_mindnet_collections.py b/scripts/setup_mindnet_collections.py new file mode 100644 index 0000000..a17f3a2 --- /dev/null +++ b/scripts/setup_mindnet_collections.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 +""" +Richtet Qdrant-Collections für dein Mindnet-Projekt ein (idempotent). + +Erzeugt: +- mindnet_chunks (size=384, distance=Cosine) -> semantische Suche über Text-Chunks +- mindnet_notes (size=384, distance=Cosine) -> Notizebene / Facettierung +- mindnet_edges (size=1, distance=Cosine) -> explizite Links (Dummy-Vektor; Filter via Payload) + +Legt sinnvolle Payload-Indizes an (keyword/text). +""" + +import os +import sys +import json +import argparse +import requests + +QDRANT_URL = os.environ.get("QDRANT_URL", "http://127.0.0.1:6333") + +def api(method: str, path: str, **kwargs) -> requests.Response: + url = QDRANT_URL.rstrip("/") + path + r = requests.request(method, url, timeout=15, **kwargs) + if not r.ok: + raise RuntimeError(f"{method} {url} -> {r.status_code} {r.text}") + return r + +def exists(collection: str) -> bool: + r = api("GET", f"/collections/{collection}") + j = r.json() + return j.get("result", {}).get("status") == "green" + +def create_collection(collection: str, size: int, distance: str) -> None: + if exists(collection): + print(f"[=] {collection} existiert bereits.") + return + payload = {"vectors": {"size": size, "distance": distance}} + api("PUT", f"/collections/{collection}", json=payload) + print(f"[+] Collection {collection} angelegt (size={size}, distance={distance}).") + +def keyword_index(collection: str, field: str) -> None: + api("PUT", f"/collections/{collection}/index", + json={"field_name": field, "field_schema": "keyword"}) + print(f"[+] keyword-Index: {collection}.{field}") + +def text_index(collection: str, field: str = "text") -> None: + api("PUT", f"/collections/{collection}/index", + json={"field_name": field, "field_schema": {"type": "text"}}) + print(f"[+] text-Index: {collection}.{field}") + +def main(): + p = argparse.ArgumentParser() + p.add_argument("--qdrant-url", default=QDRANT_URL, help="z.B. http://127.0.0.1:6333") + p.add_argument("--prefix", default="mindnet", help="Präfix für Collections") + p.add_argument("--dim", type=int, default=384, help="Embedding-Dimension (MiniLM: 384)") + p.add_argument("--distance", default="Cosine", choices=["Cosine", "Euclid", "Dot"], + help="Distanzmetrik") + args = p.parse_args() + + global QDRANT_URL + QDRANT_URL = args.qdrant_url + + chunks = f"{args.prefix}_chunks" + notes = f"{args.prefix}_notes" + edges = f"{args.prefix}_edges" + + # 1) Collections + create_collection(chunks, args.dim, args.distance) + create_collection(notes, args.dim, args.distance) + create_collection(edges, 1, args.distance) # Dummy-Vektor + + # 2) Indizes + # mindnet_chunks + for f in ["note_id", "Status", "Typ", "title", "path"]: + keyword_index(chunks, f) + for f in ["tags", "Rolle", "links"]: + keyword_index(chunks, f) + text_index(chunks, "text") + + # mindnet_notes + for f in ["note_id", "title", "path", "Typ", "Status"]: + keyword_index(notes, f) + for f in ["tags", "Rolle"]: + keyword_index(notes, f) + + # mindnet_edges + for f in ["src_note_id", "dst_note_id", "src_chunk_id", "dst_chunk_id", "link_text", "relation"]: + keyword_index(edges, f) + + # 3) Übersicht + coll = api("GET", "/collections").json().get("result", {}).get("collections", []) + print("\n[Info] Collections vorhanden:") + print(json.dumps(coll, indent=2, ensure_ascii=False)) + +if __name__ == "__main__": + try: + main() + except Exception as e: + print(f"[ERROR] {e}", file=sys.stderr) + sys.exit(1)