scripts/setup_mindnet_collections.py hinzugefügt
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
This commit is contained in:
parent
29682d870b
commit
a5260a2aad
100
scripts/setup_mindnet_collections.py
Normal file
100
scripts/setup_mindnet_collections.py
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Richtet Qdrant-Collections für dein Mindnet-Projekt ein (idempotent).
|
||||||
|
|
||||||
|
Erzeugt:
|
||||||
|
- mindnet_chunks (size=384, distance=Cosine) -> semantische Suche über Text-Chunks
|
||||||
|
- mindnet_notes (size=384, distance=Cosine) -> Notizebene / Facettierung
|
||||||
|
- mindnet_edges (size=1, distance=Cosine) -> explizite Links (Dummy-Vektor; Filter via Payload)
|
||||||
|
|
||||||
|
Legt sinnvolle Payload-Indizes an (keyword/text).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import argparse
|
||||||
|
import requests
|
||||||
|
|
||||||
|
QDRANT_URL = os.environ.get("QDRANT_URL", "http://127.0.0.1:6333")
|
||||||
|
|
||||||
|
def api(method: str, path: str, **kwargs) -> requests.Response:
|
||||||
|
url = QDRANT_URL.rstrip("/") + path
|
||||||
|
r = requests.request(method, url, timeout=15, **kwargs)
|
||||||
|
if not r.ok:
|
||||||
|
raise RuntimeError(f"{method} {url} -> {r.status_code} {r.text}")
|
||||||
|
return r
|
||||||
|
|
||||||
|
def exists(collection: str) -> bool:
|
||||||
|
r = api("GET", f"/collections/{collection}")
|
||||||
|
j = r.json()
|
||||||
|
return j.get("result", {}).get("status") == "green"
|
||||||
|
|
||||||
|
def create_collection(collection: str, size: int, distance: str) -> None:
|
||||||
|
if exists(collection):
|
||||||
|
print(f"[=] {collection} existiert bereits.")
|
||||||
|
return
|
||||||
|
payload = {"vectors": {"size": size, "distance": distance}}
|
||||||
|
api("PUT", f"/collections/{collection}", json=payload)
|
||||||
|
print(f"[+] Collection {collection} angelegt (size={size}, distance={distance}).")
|
||||||
|
|
||||||
|
def keyword_index(collection: str, field: str) -> None:
|
||||||
|
api("PUT", f"/collections/{collection}/index",
|
||||||
|
json={"field_name": field, "field_schema": "keyword"})
|
||||||
|
print(f"[+] keyword-Index: {collection}.{field}")
|
||||||
|
|
||||||
|
def text_index(collection: str, field: str = "text") -> None:
|
||||||
|
api("PUT", f"/collections/{collection}/index",
|
||||||
|
json={"field_name": field, "field_schema": {"type": "text"}})
|
||||||
|
print(f"[+] text-Index: {collection}.{field}")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
p = argparse.ArgumentParser()
|
||||||
|
p.add_argument("--qdrant-url", default=QDRANT_URL, help="z.B. http://127.0.0.1:6333")
|
||||||
|
p.add_argument("--prefix", default="mindnet", help="Präfix für Collections")
|
||||||
|
p.add_argument("--dim", type=int, default=384, help="Embedding-Dimension (MiniLM: 384)")
|
||||||
|
p.add_argument("--distance", default="Cosine", choices=["Cosine", "Euclid", "Dot"],
|
||||||
|
help="Distanzmetrik")
|
||||||
|
args = p.parse_args()
|
||||||
|
|
||||||
|
global QDRANT_URL
|
||||||
|
QDRANT_URL = args.qdrant_url
|
||||||
|
|
||||||
|
chunks = f"{args.prefix}_chunks"
|
||||||
|
notes = f"{args.prefix}_notes"
|
||||||
|
edges = f"{args.prefix}_edges"
|
||||||
|
|
||||||
|
# 1) Collections
|
||||||
|
create_collection(chunks, args.dim, args.distance)
|
||||||
|
create_collection(notes, args.dim, args.distance)
|
||||||
|
create_collection(edges, 1, args.distance) # Dummy-Vektor
|
||||||
|
|
||||||
|
# 2) Indizes
|
||||||
|
# mindnet_chunks
|
||||||
|
for f in ["note_id", "Status", "Typ", "title", "path"]:
|
||||||
|
keyword_index(chunks, f)
|
||||||
|
for f in ["tags", "Rolle", "links"]:
|
||||||
|
keyword_index(chunks, f)
|
||||||
|
text_index(chunks, "text")
|
||||||
|
|
||||||
|
# mindnet_notes
|
||||||
|
for f in ["note_id", "title", "path", "Typ", "Status"]:
|
||||||
|
keyword_index(notes, f)
|
||||||
|
for f in ["tags", "Rolle"]:
|
||||||
|
keyword_index(notes, f)
|
||||||
|
|
||||||
|
# mindnet_edges
|
||||||
|
for f in ["src_note_id", "dst_note_id", "src_chunk_id", "dst_chunk_id", "link_text", "relation"]:
|
||||||
|
keyword_index(edges, f)
|
||||||
|
|
||||||
|
# 3) Übersicht
|
||||||
|
coll = api("GET", "/collections").json().get("result", {}).get("collections", [])
|
||||||
|
print("\n[Info] Collections vorhanden:")
|
||||||
|
print(json.dumps(coll, indent=2, ensure_ascii=False))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
try:
|
||||||
|
main()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[ERROR] {e}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
Loading…
Reference in New Issue
Block a user