All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 4s
187 lines
6.3 KiB
Python
187 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
FILE: scripts/setup_mindnet_collections.py
|
||
VERSION: 2.1.0 (2025-12-15)
|
||
STATUS: Active
|
||
COMPATIBILITY: v2.9.1 (Post-WP14/WP-15b)
|
||
|
||
Zweck:
|
||
-------
|
||
Richtet die Qdrant-Collections für das mindnet-Projekt ein.
|
||
Legt Collections und Payload-Indizes an (idempotent).
|
||
|
||
Funktionsweise:
|
||
---------------
|
||
1. Prüft Qdrant-Verfügbarkeit (optional /ready Endpoint)
|
||
2. Legt drei Collections an:
|
||
- {prefix}_chunks: Semantische Suche über Text-Chunks (Vektor: dim/Cosine)
|
||
- {prefix}_notes: Metadaten pro Notiz (Vektor: dim/Cosine)
|
||
- {prefix}_edges: Link-Kanten (Dummy-Vektor size=1, Filter über Payload)
|
||
3. Richtet Payload-Indizes ein:
|
||
- Keyword-Indizes für häufige Filter-Felder
|
||
- Text-Index für Volltextsuche (chunks.text)
|
||
|
||
Ergebnis-Interpretation:
|
||
------------------------
|
||
- Ausgabe: Status pro Collection ([+] angelegt, [=] existiert bereits)
|
||
- Abschluss: JSON-Liste aller vorhandenen Collections
|
||
- Exit-Code 0: Erfolgreich
|
||
- Exit-Code 1: Fehler (z.B. Qdrant nicht erreichbar)
|
||
|
||
Verwendung:
|
||
-----------
|
||
- Initial-Setup einer neuen mindnet-Instanz
|
||
- Nach Qdrant-Reset oder Migration
|
||
- Validierung der Collection-Struktur
|
||
|
||
Hinweise:
|
||
---------
|
||
- Idempotent: Überspringt existierende Collections
|
||
- Nutzt HTTP-API direkt (kein qdrant-client)
|
||
- Legt nur Basis-Indizes an (erweiterte Indizes via ensure_payload_indexes)
|
||
|
||
Aufruf:
|
||
-------
|
||
python3 -m scripts.setup_mindnet_collections --qdrant-url http://127.0.0.1:6333 --prefix mindnet --dim 768
|
||
python3 -m scripts.setup_mindnet_collections --prefix mindnet_dev --dim 768 --distance Cosine
|
||
|
||
Parameter:
|
||
----------
|
||
--qdrant-url URL Qdrant-URL (Default: http://127.0.0.1:6333)
|
||
--prefix TEXT Collection-Präfix (Default: mindnet)
|
||
--dim INT Vektor-Dimension (Default: 384, empfohlen: 768 für nomic)
|
||
--distance METRIC Cosine | Euclid | Dot (Default: Cosine)
|
||
|
||
Änderungen:
|
||
-----------
|
||
v2.1.0 (2025-12-15): Kompatibilität mit WP-14 Modularisierung
|
||
- Dokumentation aktualisiert
|
||
v1.0.0: Initial Release
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import sys
|
||
from dataclasses import dataclass
|
||
from typing import Any, Dict
|
||
|
||
import requests
|
||
|
||
|
||
@dataclass
|
||
class QdrantHTTP:
|
||
base_url: str
|
||
|
||
def _url(self, path: str) -> str:
|
||
return self.base_url.rstrip("/") + path
|
||
|
||
def rq(self, method: str, path: str, **kwargs) -> requests.Response:
|
||
url = self._url(path)
|
||
r = requests.request(method, url, timeout=20, **kwargs)
|
||
if not r.ok:
|
||
raise RuntimeError(f"{method} {url} -> {r.status_code} {r.text}")
|
||
return r
|
||
|
||
def collection_exists(self, name: str) -> bool:
|
||
r = self.rq("GET", f"/collections/{name}")
|
||
data = r.json()
|
||
return data.get("result", {}).get("status") == "green"
|
||
|
||
def create_collection(self, name: str, size: int, distance: str = "Cosine") -> None:
|
||
if self.collection_exists(name):
|
||
print(f"[=] Collection '{name}' existiert bereits – überspringe Anlage.")
|
||
return
|
||
payload = {"vectors": {"size": size, "distance": distance}}
|
||
self.rq("PUT", f"/collections/{name}", json=payload)
|
||
print(f"[+] Collection '{name}' angelegt (size={size}, distance={distance}).")
|
||
|
||
def create_keyword_index(self, collection: str, field: str) -> None:
|
||
payload = {"field_name": field, "field_schema": "keyword"}
|
||
self.rq("PUT", f"/collections/{collection}/index", json=payload)
|
||
print(f"[+] Index keyword on {collection}.{field}")
|
||
|
||
def create_text_index(self, collection: str, field: str = "text") -> None:
|
||
payload = {"field_name": field, "field_schema": {"type": "text"}}
|
||
self.rq("PUT", f"/collections/{collection}/index", json=payload)
|
||
print(f"[+] Index text on {collection}.{field}")
|
||
|
||
def list_collections(self) -> Dict[str, Any]:
|
||
r = self.rq("GET", "/collections")
|
||
return r.json().get("result", {}).get("collections", [])
|
||
|
||
|
||
def setup_mindnet_collections(q: QdrantHTTP, prefix: str, dim: int, distance: str) -> None:
|
||
chunks = f"{prefix}_chunks"
|
||
notes = f"{prefix}_notes"
|
||
edges = f"{prefix}_edges"
|
||
|
||
# 1) Collections anlegen
|
||
q.create_collection(chunks, size=dim, distance=distance)
|
||
q.create_collection(notes, size=dim, distance=distance)
|
||
q.create_collection(edges, size=1, distance=distance) # Dummy-Vektor
|
||
|
||
# 2) Indizes definieren
|
||
# mindnet_chunks: häufige Filter + Volltext
|
||
for f in ["note_id", "Status", "Typ", "title", "path"]:
|
||
q.create_keyword_index(chunks, f)
|
||
for f in ["tags", "Rolle", "links"]:
|
||
q.create_keyword_index(chunks, f)
|
||
q.create_text_index(chunks, "text") # Volltextsuche auf dem Textfeld
|
||
|
||
# mindnet_notes: Metadaten der Notizen
|
||
for f in ["note_id", "title", "path", "Typ", "Status"]:
|
||
q.create_keyword_index(notes, f)
|
||
for f in ["tags", "Rolle"]:
|
||
q.create_keyword_index(notes, f)
|
||
|
||
# mindnet_edges: Graph/Kanten (Filter-only)
|
||
for f in [
|
||
"src_note_id",
|
||
"dst_note_id",
|
||
"src_chunk_id",
|
||
"dst_chunk_id",
|
||
"link_text",
|
||
"relation",
|
||
]:
|
||
q.create_keyword_index(edges, f)
|
||
|
||
|
||
def parse_args() -> argparse.Namespace:
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("--qdrant-url", default="http://127.0.0.1:6333", help="z.B. http://127.0.0.1:6333")
|
||
ap.add_argument("--prefix", default="mindnet", help="Collection-Präfix (default: mindnet)")
|
||
ap.add_argument("--dim", type=int, default=384, help="Embedding-Dimension (z.B. 384 für MiniLM)")
|
||
ap.add_argument("--distance", default="Cosine", choices=["Cosine", "Euclid", "Dot"], help="Distanzmetrik")
|
||
return ap.parse_args()
|
||
|
||
|
||
def main() -> int:
|
||
args = parse_args()
|
||
q = QdrantHTTP(args.qdrant_url)
|
||
|
||
try:
|
||
# Readiness (optional, ignoriert Fehler)
|
||
try:
|
||
r = q.rq("GET", "/ready")
|
||
if r.text.strip():
|
||
print(f"[ready] {r.text.strip()}")
|
||
except Exception as e:
|
||
print(f"[warn] /ready nicht erreichbar oder kein Text: {e}")
|
||
|
||
setup_mindnet_collections(q, prefix=args.prefix, dim=args.dim, distance=args.distance)
|
||
|
||
cols = q.list_collections()
|
||
print("\n[Info] Collections vorhanden:")
|
||
print(json.dumps(cols, indent=2, ensure_ascii=False))
|
||
return 0
|
||
except Exception as e:
|
||
print(f"[ERROR] {e}", file=sys.stderr)
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
raise SystemExit(main())
|