app/core/qdrant.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s

This commit is contained in:
Lars 2025-11-08 11:06:57 +01:00
parent 987b3c1770
commit 3d44de9d87

View File

@ -2,42 +2,49 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
""" """
Name: app/core/qdrant.py Name: app/core/qdrant.py
Version: v1.6.0 (2025-11-08) Version: v1.7.0 (2025-11-08)
Kurzbeschreibung: Kurzbeschreibung
Qdrant-Client & Collection-Setup für mindnet. Qdrant-Client & Collection-Setup für mindnet.
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren. - Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
- Edges-Collection nutzt 1D Dummy-Vektor. - Edges-Collection nutzt 1D Dummy-Vektor (kein Such-Usecase).
- ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an. - Legt sinnvolle Payload-Indizes an.
- Liefert zähl-/list-/fetch-Helfer, die von Importer/Exporter/Tests genutzt werden.
NEU / Änderungen: Änderungsverlauf (Relevantes)
v1.5.0: v1.5.0:
* ensure_collections_for_prefix(...) Wrapper für legacy-Importer * ensure_collections_for_prefix(...) Wrapper für legacy-Importer
* count_points(client, collection) stabile Zählfunktion (mit Fallback) * count_points(client, collection) stabile Zählfunktion (mit Fallback)
* get_counts_for_prefix(...) Summary über alle drei Collections * get_counts_for_prefix(...) Summary über alle drei Collections
* truncate_collections(...) löscht *alle Punkte* in den Collections * truncate_collections(...) alle Punkte löschen (Collections bleiben)
v1.6.0: v1.6.0:
* list_note_ids(client, notes_collection) liefert alle payload.note_id-Werte * list_note_ids(client, notes_collection) alle payload.note_id (unique)
(wird von import_markdown.py v3.9.0 erwartet) v1.7.0:
* fetch_one_note(client, notes_collection, note_id, with_vectors=False)
von import_markdown v3.9.0 erwartet; liefert (point_id, payload, vector?)
Aufruf: Öffentliche API
from app.core.qdrant import ( from app.core.qdrant import (
QdrantConfig, get_client, QdrantConfig, get_client,
ensure_collections, ensure_payload_indexes, ensure_collections, ensure_payload_indexes,
ensure_collections_for_prefix, count_points, ensure_collections_for_prefix, collection_names,
collection_names, get_counts_for_prefix, truncate_collections, count_points, get_counts_for_prefix, truncate_collections,
list_note_ids, list_note_ids, fetch_one_note,
) )
""" """
from __future__ import annotations from __future__ import annotations
import os import os
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple, Dict, List from typing import Optional, Tuple, Dict, List, Any
from qdrant_client import QdrantClient from qdrant_client import QdrantClient
from qdrant_client.http import models as rest from qdrant_client.http import models as rest
# ---------------------------------------------------------
# Konfiguration
# ---------------------------------------------------------
@dataclass @dataclass
class QdrantConfig: class QdrantConfig:
url: str url: str
@ -62,9 +69,9 @@ def get_client(cfg: QdrantConfig) -> QdrantClient:
return QdrantClient(url=cfg.url, api_key=cfg.api_key) return QdrantClient(url=cfg.url, api_key=cfg.api_key)
# ------------------------------- # ---------------------------------------------------------
# Collection-Erstellung # Collection-Erstellung
# ------------------------------- # ---------------------------------------------------------
def _create_notes(client: QdrantClient, name: str, dim: int) -> None: def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
if not client.collection_exists(name): if not client.collection_exists(name):
client.create_collection( client.create_collection(
@ -85,7 +92,7 @@ def _create_edges(client: QdrantClient, name: str) -> None:
if not client.collection_exists(name): if not client.collection_exists(name):
client.create_collection( client.create_collection(
collection_name=name, collection_name=name,
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D-Dummy vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D Dummy
) )
@ -98,6 +105,7 @@ def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive:
_create_chunks(client, chunks, dim) _create_chunks(client, chunks, dim)
if client.collection_exists(edges): if client.collection_exists(edges):
# Robustheit: Prüfen, ob eine VectorConfig existiert; falls nicht → optional neu erstellen
try: try:
info = client.get_collection(edges) info = client.get_collection(edges)
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None) vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
@ -118,18 +126,14 @@ def collection_names(prefix: str) -> Tuple[str, str, str]:
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges") return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
# ------------------------------- # ---------------------------------------------------------
# Payload-Indexing # Payload-Indizes
# ------------------------------- # ---------------------------------------------------------
def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType): def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType) -> None:
try: try:
client.create_payload_index( client.create_payload_index(collection_name=col, field_name=field, field_schema=schema)
collection_name=col,
field_name=field,
field_schema=schema,
)
except Exception: except Exception:
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren # bereits vorhanden oder Schema nicht unterstützt → ignorieren
pass pass
@ -146,13 +150,14 @@ def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD) _safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)
# ------------------------------- # ---------------------------------------------------------
# NEU: Abwärtskompatible Helfer # Zähl-/Listen-/Maintenance-Helfer
# ------------------------------- # ---------------------------------------------------------
def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> Tuple[str, str, str]: def ensure_collections_for_prefix(
client: QdrantClient, prefix: str, dim: int, destructive: bool = False
) -> Tuple[str, str, str]:
""" """
Legacy-Wrapper, damit ältere Skripte (Importer bis v3.7.x) funktionieren. Legacy-Wrapper (Kompatibilität zu älteren Skripten).
Gibt die Collection-Namen zurück.
""" """
ensure_collections(client, prefix, dim, destructive=destructive) ensure_collections(client, prefix, dim, destructive=destructive)
ensure_payload_indexes(client, prefix) ensure_payload_indexes(client, prefix)
@ -161,9 +166,9 @@ def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, d
def count_points(client: QdrantClient, collection: str) -> int: def count_points(client: QdrantClient, collection: str) -> int:
""" """
Zähle Punkte in einer Collection robust: Zähle Punkte robust:
1) bevorzugt client.count(..., exact=True) 1) bevorzugt count(exact=True)
2) Fallback: Scrollen ohne Filter und mitzählen 2) Fallback via Scroll
""" """
try: try:
res = client.count(collection_name=collection, count_filter=None, exact=True) res = client.count(collection_name=collection, count_filter=None, exact=True)
@ -175,7 +180,6 @@ def count_points(client: QdrantClient, collection: str) -> int:
except Exception: except Exception:
pass pass
# Fallback via Scroll
total = 0 total = 0
next_page = None next_page = None
while True: while True:
@ -203,52 +207,31 @@ def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]:
def truncate_collections(client: QdrantClient, prefix: str) -> None: def truncate_collections(client: QdrantClient, prefix: str) -> None:
""" """
Löscht *alle Punkte* (nicht die Collections selber) für {prefix}. Löscht alle Punkte (Collections bleiben bestehen).
Entspricht funktional einem "truncate" in deinem Reset-Skript.
""" """
for col in collection_names(prefix): for col in collection_names(prefix):
try: try:
client.delete( client.delete(
collection_name=col, collection_name=col,
points_selector=rest.FilterSelector( points_selector=rest.FilterSelector(filter=rest.Filter(must=[])),
filter=rest.Filter(must=[]) # leeres Filter => alle Punkte
),
wait=True, wait=True,
) )
except Exception: except Exception:
# Fallback: Collection ggf. leer/nicht vorhanden → ignorieren
pass pass
# -------------------------------
# NEU v1.6.0: list_note_ids
# -------------------------------
def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 100000) -> List[str]: def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 100000) -> List[str]:
""" """
Liefert alle payload.note_id aus der angegebenen Notes-Collection. Liste aller payload.note_id (unique) aus der Notes-Collection.
- Wird von import_markdown.py (>= v3.9.0) verwendet, z.B. für Baseline-/Idempotenz-Checks.
- Robust gegen fehlende Felder: ignoriert Punkte ohne 'note_id'.
Args:
client: QdrantClient
notes_collection: Name der Notes-Collection (z.B. 'mindnet_notes')
limit: harte Obergrenze für die Anzahl der zurückzugebenden IDs
Returns:
Liste der note_id-Strings (ohne Duplikate, Reihenfolge nicht garantiert).
""" """
out: List[str] = [] out: List[str] = []
seen = set() seen = set()
next_page = None next_page = None
fetched = 0 fetched = 0
flt = None # kein Filter → alle Punkte
while True: while True:
# scroll_filter in neueren Clients; ältere akzeptieren 'scroll_filter' oder 'filter'
points, next_page = client.scroll( points, next_page = client.scroll(
collection_name=notes_collection, collection_name=notes_collection,
scroll_filter=flt, scroll_filter=None,
limit=min(512, max(1, limit - fetched)), limit=min(512, max(1, limit - fetched)),
with_payload=True, with_payload=True,
with_vectors=False, with_vectors=False,
@ -256,7 +239,6 @@ def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 1000
) )
if not points: if not points:
break break
for p in points: for p in points:
pl = p.payload or {} pl = p.payload or {}
nid = pl.get("note_id") nid = pl.get("note_id")
@ -266,8 +248,58 @@ def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 1000
fetched += 1 fetched += 1
if fetched >= limit: if fetched >= limit:
return out return out
if next_page is None: if next_page is None:
break break
return out return out
# ---------------------------------------------------------
# Fetch-Helfer (NEU für Importer v3.9.0)
# ---------------------------------------------------------
def _match_value(value: Any):
"""
Qdrant HTTP-Models haben je nach Version unterschiedliche Konstruktoren.
Wir versuchen zuerst MatchValue(value=...), dann MatchValue(...) als Fallback.
"""
try:
return rest.MatchValue(value=value)
except TypeError:
return rest.MatchValue(value) # ältere Signatur
def fetch_one_note(
client: QdrantClient,
notes_collection: str,
note_id: str,
with_vectors: bool = False,
) -> Optional[Tuple[str, Dict[str, Any], Optional[Any]]]:
"""
Liefert genau eine Note anhand payload.note_id.
Rückgabe:
(point_id, payload_dict, vector_or_None) oder None, falls nicht gefunden.
Bruchsicher ggü. unterschiedlichen Client-Versionen.
"""
cond = rest.FieldCondition(key="note_id", match=_match_value(note_id))
flt = rest.Filter(must=[cond])
points, _ = client.scroll(
collection_name=notes_collection,
scroll_filter=flt,
limit=1,
with_payload=True,
with_vectors=with_vectors,
)
if not points:
return None
p = points[0]
pid = str(getattr(p, "id", "")) if getattr(p, "id", None) is not None else ""
payload = p.payload or {}
vec = None
if with_vectors:
# Vektoren-Struktur ist je nach Clientversion leicht anders
vec = getattr(p, "vector", None)
if vec is None:
vec = payload.get("_vector") # selten als Payload-Schatten
return (pid, payload, vec)