app/core/qdrant.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
987b3c1770
commit
3d44de9d87
|
|
@ -2,42 +2,49 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Name: app/core/qdrant.py
|
Name: app/core/qdrant.py
|
||||||
Version: v1.6.0 (2025-11-08)
|
Version: v1.7.0 (2025-11-08)
|
||||||
|
|
||||||
Kurzbeschreibung:
|
Kurzbeschreibung
|
||||||
Qdrant-Client & Collection-Setup für mindnet.
|
Qdrant-Client & Collection-Setup für mindnet.
|
||||||
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
|
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
|
||||||
- Edges-Collection nutzt 1D Dummy-Vektor.
|
- Edges-Collection nutzt 1D Dummy-Vektor (kein Such-Usecase).
|
||||||
- ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
|
- Legt sinnvolle Payload-Indizes an.
|
||||||
|
- Liefert zähl-/list-/fetch-Helfer, die von Importer/Exporter/Tests genutzt werden.
|
||||||
|
|
||||||
NEU / Änderungen:
|
Änderungsverlauf (Relevantes)
|
||||||
v1.5.0:
|
v1.5.0:
|
||||||
* ensure_collections_for_prefix(...) → Wrapper für legacy-Importer
|
* ensure_collections_for_prefix(...) → Wrapper für legacy-Importer
|
||||||
* count_points(client, collection) → stabile Zählfunktion (mit Fallback)
|
* count_points(client, collection) → stabile Zählfunktion (mit Fallback)
|
||||||
* get_counts_for_prefix(...) → Summary über alle drei Collections
|
* get_counts_for_prefix(...) → Summary über alle drei Collections
|
||||||
* truncate_collections(...) → löscht *alle Punkte* in den Collections
|
* truncate_collections(...) → alle Punkte löschen (Collections bleiben)
|
||||||
v1.6.0:
|
v1.6.0:
|
||||||
* list_note_ids(client, notes_collection) → liefert alle payload.note_id-Werte
|
* list_note_ids(client, notes_collection) → alle payload.note_id (unique)
|
||||||
(wird von import_markdown.py v3.9.0 erwartet)
|
v1.7.0:
|
||||||
|
* fetch_one_note(client, notes_collection, note_id, with_vectors=False)
|
||||||
|
→ von import_markdown v3.9.0 erwartet; liefert (point_id, payload, vector?)
|
||||||
|
|
||||||
Aufruf:
|
Öffentliche API
|
||||||
from app.core.qdrant import (
|
from app.core.qdrant import (
|
||||||
QdrantConfig, get_client,
|
QdrantConfig, get_client,
|
||||||
ensure_collections, ensure_payload_indexes,
|
ensure_collections, ensure_payload_indexes,
|
||||||
ensure_collections_for_prefix, count_points,
|
ensure_collections_for_prefix, collection_names,
|
||||||
collection_names, get_counts_for_prefix, truncate_collections,
|
count_points, get_counts_for_prefix, truncate_collections,
|
||||||
list_note_ids,
|
list_note_ids, fetch_one_note,
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple, Dict, List
|
from typing import Optional, Tuple, Dict, List, Any
|
||||||
|
|
||||||
from qdrant_client import QdrantClient
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Konfiguration
|
||||||
|
# ---------------------------------------------------------
|
||||||
@dataclass
|
@dataclass
|
||||||
class QdrantConfig:
|
class QdrantConfig:
|
||||||
url: str
|
url: str
|
||||||
|
|
@ -62,9 +69,9 @@ def get_client(cfg: QdrantConfig) -> QdrantClient:
|
||||||
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
return QdrantClient(url=cfg.url, api_key=cfg.api_key)
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------
|
# ---------------------------------------------------------
|
||||||
# Collection-Erstellung
|
# Collection-Erstellung
|
||||||
# -------------------------------
|
# ---------------------------------------------------------
|
||||||
def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
|
def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
|
||||||
if not client.collection_exists(name):
|
if not client.collection_exists(name):
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
|
|
@ -85,7 +92,7 @@ def _create_edges(client: QdrantClient, name: str) -> None:
|
||||||
if not client.collection_exists(name):
|
if not client.collection_exists(name):
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
collection_name=name,
|
collection_name=name,
|
||||||
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D-Dummy
|
vectors_config=rest.VectorParams(size=1, distance=rest.Distance.DOT), # 1D Dummy
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -98,6 +105,7 @@ def ensure_collections(client: QdrantClient, prefix: str, dim: int, destructive:
|
||||||
_create_chunks(client, chunks, dim)
|
_create_chunks(client, chunks, dim)
|
||||||
|
|
||||||
if client.collection_exists(edges):
|
if client.collection_exists(edges):
|
||||||
|
# Robustheit: Prüfen, ob eine VectorConfig existiert; falls nicht → optional neu erstellen
|
||||||
try:
|
try:
|
||||||
info = client.get_collection(edges)
|
info = client.get_collection(edges)
|
||||||
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
vectors_cfg = getattr(getattr(info.result, "config", None), "params", None)
|
||||||
|
|
@ -118,18 +126,14 @@ def collection_names(prefix: str) -> Tuple[str, str, str]:
|
||||||
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
return (f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges")
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------
|
# ---------------------------------------------------------
|
||||||
# Payload-Indexing
|
# Payload-Indizes
|
||||||
# -------------------------------
|
# ---------------------------------------------------------
|
||||||
def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType):
|
def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.PayloadSchemaType) -> None:
|
||||||
try:
|
try:
|
||||||
client.create_payload_index(
|
client.create_payload_index(collection_name=col, field_name=field, field_schema=schema)
|
||||||
collection_name=col,
|
|
||||||
field_name=field,
|
|
||||||
field_schema=schema,
|
|
||||||
)
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren
|
# bereits vorhanden oder Schema nicht unterstützt → ignorieren
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -146,13 +150,14 @@ def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
|
||||||
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)
|
_safe_create_index(client, edges, f, rest.PayloadSchemaType.KEYWORD)
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------
|
# ---------------------------------------------------------
|
||||||
# NEU: Abwärtskompatible Helfer
|
# Zähl-/Listen-/Maintenance-Helfer
|
||||||
# -------------------------------
|
# ---------------------------------------------------------
|
||||||
def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, destructive: bool = False) -> Tuple[str, str, str]:
|
def ensure_collections_for_prefix(
|
||||||
|
client: QdrantClient, prefix: str, dim: int, destructive: bool = False
|
||||||
|
) -> Tuple[str, str, str]:
|
||||||
"""
|
"""
|
||||||
Legacy-Wrapper, damit ältere Skripte (Importer bis v3.7.x) funktionieren.
|
Legacy-Wrapper (Kompatibilität zu älteren Skripten).
|
||||||
Gibt die Collection-Namen zurück.
|
|
||||||
"""
|
"""
|
||||||
ensure_collections(client, prefix, dim, destructive=destructive)
|
ensure_collections(client, prefix, dim, destructive=destructive)
|
||||||
ensure_payload_indexes(client, prefix)
|
ensure_payload_indexes(client, prefix)
|
||||||
|
|
@ -161,9 +166,9 @@ def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, d
|
||||||
|
|
||||||
def count_points(client: QdrantClient, collection: str) -> int:
|
def count_points(client: QdrantClient, collection: str) -> int:
|
||||||
"""
|
"""
|
||||||
Zähle Punkte in einer Collection robust:
|
Zähle Punkte robust:
|
||||||
1) bevorzugt client.count(..., exact=True)
|
1) bevorzugt count(exact=True)
|
||||||
2) Fallback: Scrollen ohne Filter und mitzählen
|
2) Fallback via Scroll
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
res = client.count(collection_name=collection, count_filter=None, exact=True)
|
res = client.count(collection_name=collection, count_filter=None, exact=True)
|
||||||
|
|
@ -175,7 +180,6 @@ def count_points(client: QdrantClient, collection: str) -> int:
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Fallback via Scroll
|
|
||||||
total = 0
|
total = 0
|
||||||
next_page = None
|
next_page = None
|
||||||
while True:
|
while True:
|
||||||
|
|
@ -203,52 +207,31 @@ def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]:
|
||||||
|
|
||||||
def truncate_collections(client: QdrantClient, prefix: str) -> None:
|
def truncate_collections(client: QdrantClient, prefix: str) -> None:
|
||||||
"""
|
"""
|
||||||
Löscht *alle Punkte* (nicht die Collections selber) für {prefix}.
|
Löscht alle Punkte (Collections bleiben bestehen).
|
||||||
Entspricht funktional einem "truncate" in deinem Reset-Skript.
|
|
||||||
"""
|
"""
|
||||||
for col in collection_names(prefix):
|
for col in collection_names(prefix):
|
||||||
try:
|
try:
|
||||||
client.delete(
|
client.delete(
|
||||||
collection_name=col,
|
collection_name=col,
|
||||||
points_selector=rest.FilterSelector(
|
points_selector=rest.FilterSelector(filter=rest.Filter(must=[])),
|
||||||
filter=rest.Filter(must=[]) # leeres Filter => alle Punkte
|
|
||||||
),
|
|
||||||
wait=True,
|
wait=True,
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Fallback: Collection ggf. leer/nicht vorhanden → ignorieren
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# -------------------------------
|
|
||||||
# NEU v1.6.0: list_note_ids
|
|
||||||
# -------------------------------
|
|
||||||
def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 100000) -> List[str]:
|
def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 100000) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Liefert alle payload.note_id aus der angegebenen Notes-Collection.
|
Liste aller payload.note_id (unique) aus der Notes-Collection.
|
||||||
- Wird von import_markdown.py (>= v3.9.0) verwendet, z.B. für Baseline-/Idempotenz-Checks.
|
|
||||||
- Robust gegen fehlende Felder: ignoriert Punkte ohne 'note_id'.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
client: QdrantClient
|
|
||||||
notes_collection: Name der Notes-Collection (z.B. 'mindnet_notes')
|
|
||||||
limit: harte Obergrenze für die Anzahl der zurückzugebenden IDs
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Liste der note_id-Strings (ohne Duplikate, Reihenfolge nicht garantiert).
|
|
||||||
"""
|
"""
|
||||||
out: List[str] = []
|
out: List[str] = []
|
||||||
seen = set()
|
seen = set()
|
||||||
next_page = None
|
next_page = None
|
||||||
fetched = 0
|
fetched = 0
|
||||||
|
|
||||||
flt = None # kein Filter → alle Punkte
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# scroll_filter in neueren Clients; ältere akzeptieren 'scroll_filter' oder 'filter'
|
|
||||||
points, next_page = client.scroll(
|
points, next_page = client.scroll(
|
||||||
collection_name=notes_collection,
|
collection_name=notes_collection,
|
||||||
scroll_filter=flt,
|
scroll_filter=None,
|
||||||
limit=min(512, max(1, limit - fetched)),
|
limit=min(512, max(1, limit - fetched)),
|
||||||
with_payload=True,
|
with_payload=True,
|
||||||
with_vectors=False,
|
with_vectors=False,
|
||||||
|
|
@ -256,7 +239,6 @@ def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 1000
|
||||||
)
|
)
|
||||||
if not points:
|
if not points:
|
||||||
break
|
break
|
||||||
|
|
||||||
for p in points:
|
for p in points:
|
||||||
pl = p.payload or {}
|
pl = p.payload or {}
|
||||||
nid = pl.get("note_id")
|
nid = pl.get("note_id")
|
||||||
|
|
@ -266,8 +248,58 @@ def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 1000
|
||||||
fetched += 1
|
fetched += 1
|
||||||
if fetched >= limit:
|
if fetched >= limit:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
if next_page is None:
|
if next_page is None:
|
||||||
break
|
break
|
||||||
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Fetch-Helfer (NEU für Importer v3.9.0)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _match_value(value: Any):
|
||||||
|
"""
|
||||||
|
Qdrant HTTP-Models haben je nach Version unterschiedliche Konstruktoren.
|
||||||
|
Wir versuchen zuerst MatchValue(value=...), dann MatchValue(...) als Fallback.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return rest.MatchValue(value=value)
|
||||||
|
except TypeError:
|
||||||
|
return rest.MatchValue(value) # ältere Signatur
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_one_note(
|
||||||
|
client: QdrantClient,
|
||||||
|
notes_collection: str,
|
||||||
|
note_id: str,
|
||||||
|
with_vectors: bool = False,
|
||||||
|
) -> Optional[Tuple[str, Dict[str, Any], Optional[Any]]]:
|
||||||
|
"""
|
||||||
|
Liefert genau eine Note anhand payload.note_id.
|
||||||
|
Rückgabe:
|
||||||
|
(point_id, payload_dict, vector_or_None) oder None, falls nicht gefunden.
|
||||||
|
|
||||||
|
Bruchsicher ggü. unterschiedlichen Client-Versionen.
|
||||||
|
"""
|
||||||
|
cond = rest.FieldCondition(key="note_id", match=_match_value(note_id))
|
||||||
|
flt = rest.Filter(must=[cond])
|
||||||
|
|
||||||
|
points, _ = client.scroll(
|
||||||
|
collection_name=notes_collection,
|
||||||
|
scroll_filter=flt,
|
||||||
|
limit=1,
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=with_vectors,
|
||||||
|
)
|
||||||
|
if not points:
|
||||||
|
return None
|
||||||
|
|
||||||
|
p = points[0]
|
||||||
|
pid = str(getattr(p, "id", "")) if getattr(p, "id", None) is not None else ""
|
||||||
|
payload = p.payload or {}
|
||||||
|
vec = None
|
||||||
|
if with_vectors:
|
||||||
|
# Vektoren-Struktur ist je nach Clientversion leicht anders
|
||||||
|
vec = getattr(p, "vector", None)
|
||||||
|
if vec is None:
|
||||||
|
vec = payload.get("_vector") # selten als Payload-Schatten
|
||||||
|
return (pid, payload, vec)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user