app/core/qdrant.py aktualisiert
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
This commit is contained in:
parent
25bc6544c4
commit
987b3c1770
|
|
@ -2,31 +2,37 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
"""
|
"""
|
||||||
Name: app/core/qdrant.py
|
Name: app/core/qdrant.py
|
||||||
Version: v1.5.0 (2025-11-08)
|
Version: v1.6.0 (2025-11-08)
|
||||||
|
|
||||||
Kurzbeschreibung:
|
Kurzbeschreibung:
|
||||||
Qdrant-Client & Collection-Setup für mindnet.
|
Qdrant-Client & Collection-Setup für mindnet.
|
||||||
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
|
- Stellt sicher, dass {prefix}_notes / {prefix}_chunks / {prefix}_edges existieren.
|
||||||
- Edges-Collection nutzt 1D Dummy-Vektor.
|
- Edges-Collection nutzt 1D Dummy-Vektor.
|
||||||
- ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
|
- ensure_payload_indexes(...) legt sinnvolle Payload-Indizes an.
|
||||||
- **NEU (v1.5.0):** Abwärtskompatible Helfer:
|
|
||||||
* ensure_collections_for_prefix(...) → Wrapper für legacy-Importer
|
NEU / Änderungen:
|
||||||
* count_points(client, collection) → stabile Zählfunktion (mit Fallback)
|
v1.5.0:
|
||||||
* get_counts_for_prefix(...) → Summary über alle drei Collections
|
* ensure_collections_for_prefix(...) → Wrapper für legacy-Importer
|
||||||
* truncate_collections(...) → löscht *alle Punkte* in den Collections
|
* count_points(client, collection) → stabile Zählfunktion (mit Fallback)
|
||||||
|
* get_counts_for_prefix(...) → Summary über alle drei Collections
|
||||||
|
* truncate_collections(...) → löscht *alle Punkte* in den Collections
|
||||||
|
v1.6.0:
|
||||||
|
* list_note_ids(client, notes_collection) → liefert alle payload.note_id-Werte
|
||||||
|
(wird von import_markdown.py v3.9.0 erwartet)
|
||||||
|
|
||||||
Aufruf:
|
Aufruf:
|
||||||
from app.core.qdrant import (
|
from app.core.qdrant import (
|
||||||
QdrantConfig, get_client,
|
QdrantConfig, get_client,
|
||||||
ensure_collections, ensure_payload_indexes,
|
ensure_collections, ensure_payload_indexes,
|
||||||
ensure_collections_for_prefix, count_points,
|
ensure_collections_for_prefix, count_points,
|
||||||
collection_names, get_counts_for_prefix, truncate_collections
|
collection_names, get_counts_for_prefix, truncate_collections,
|
||||||
|
list_note_ids,
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
import os
|
import os
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Optional, Tuple, Dict
|
from typing import Optional, Tuple, Dict, List
|
||||||
|
|
||||||
from qdrant_client import QdrantClient
|
from qdrant_client import QdrantClient
|
||||||
from qdrant_client.http import models as rest
|
from qdrant_client.http import models as rest
|
||||||
|
|
@ -66,6 +72,7 @@ def _create_notes(client: QdrantClient, name: str, dim: int) -> None:
|
||||||
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _create_chunks(client: QdrantClient, name: str, dim: int) -> None:
|
def _create_chunks(client: QdrantClient, name: str, dim: int) -> None:
|
||||||
if not client.collection_exists(name):
|
if not client.collection_exists(name):
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
|
|
@ -73,6 +80,7 @@ def _create_chunks(client: QdrantClient, name: str, dim: int) -> None:
|
||||||
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
vectors_config=rest.VectorParams(size=dim, distance=rest.Distance.COSINE),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def _create_edges(client: QdrantClient, name: str) -> None:
|
def _create_edges(client: QdrantClient, name: str) -> None:
|
||||||
if not client.collection_exists(name):
|
if not client.collection_exists(name):
|
||||||
client.create_collection(
|
client.create_collection(
|
||||||
|
|
@ -124,6 +132,7 @@ def _safe_create_index(client: QdrantClient, col: str, field: str, schema: rest.
|
||||||
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren
|
# bereits vorhanden oder nicht unterstütztes Schema → ignorieren
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
|
def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
|
||||||
notes, chunks, edges = collection_names(prefix)
|
notes, chunks, edges = collection_names(prefix)
|
||||||
# Notes
|
# Notes
|
||||||
|
|
@ -149,6 +158,7 @@ def ensure_collections_for_prefix(client: QdrantClient, prefix: str, dim: int, d
|
||||||
ensure_payload_indexes(client, prefix)
|
ensure_payload_indexes(client, prefix)
|
||||||
return collection_names(prefix)
|
return collection_names(prefix)
|
||||||
|
|
||||||
|
|
||||||
def count_points(client: QdrantClient, collection: str) -> int:
|
def count_points(client: QdrantClient, collection: str) -> int:
|
||||||
"""
|
"""
|
||||||
Zähle Punkte in einer Collection robust:
|
Zähle Punkte in einer Collection robust:
|
||||||
|
|
@ -157,11 +167,9 @@ def count_points(client: QdrantClient, collection: str) -> int:
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
res = client.count(collection_name=collection, count_filter=None, exact=True)
|
res = client.count(collection_name=collection, count_filter=None, exact=True)
|
||||||
# qdrant_client >=1.7: res.count
|
|
||||||
cnt = getattr(res, "count", None)
|
cnt = getattr(res, "count", None)
|
||||||
if isinstance(cnt, int):
|
if isinstance(cnt, int):
|
||||||
return cnt
|
return cnt
|
||||||
# ältere Clients liefern evtl. ein Dict
|
|
||||||
if isinstance(res, dict) and "count" in res:
|
if isinstance(res, dict) and "count" in res:
|
||||||
return int(res["count"])
|
return int(res["count"])
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|
@ -183,6 +191,7 @@ def count_points(client: QdrantClient, collection: str) -> int:
|
||||||
break
|
break
|
||||||
return total
|
return total
|
||||||
|
|
||||||
|
|
||||||
def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]:
|
def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]:
|
||||||
notes, chunks, edges = collection_names(prefix)
|
notes, chunks, edges = collection_names(prefix)
|
||||||
return {
|
return {
|
||||||
|
|
@ -191,6 +200,7 @@ def get_counts_for_prefix(client: QdrantClient, prefix: str) -> Dict[str, int]:
|
||||||
"edges": count_points(client, edges),
|
"edges": count_points(client, edges),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def truncate_collections(client: QdrantClient, prefix: str) -> None:
|
def truncate_collections(client: QdrantClient, prefix: str) -> None:
|
||||||
"""
|
"""
|
||||||
Löscht *alle Punkte* (nicht die Collections selber) für {prefix}.
|
Löscht *alle Punkte* (nicht die Collections selber) für {prefix}.
|
||||||
|
|
@ -208,3 +218,56 @@ def truncate_collections(client: QdrantClient, prefix: str) -> None:
|
||||||
except Exception:
|
except Exception:
|
||||||
# Fallback: Collection ggf. leer/nicht vorhanden → ignorieren
|
# Fallback: Collection ggf. leer/nicht vorhanden → ignorieren
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# -------------------------------
|
||||||
|
# NEU v1.6.0: list_note_ids
|
||||||
|
# -------------------------------
|
||||||
|
def list_note_ids(client: QdrantClient, notes_collection: str, limit: int = 100000) -> List[str]:
|
||||||
|
"""
|
||||||
|
Liefert alle payload.note_id aus der angegebenen Notes-Collection.
|
||||||
|
- Wird von import_markdown.py (>= v3.9.0) verwendet, z.B. für Baseline-/Idempotenz-Checks.
|
||||||
|
- Robust gegen fehlende Felder: ignoriert Punkte ohne 'note_id'.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
client: QdrantClient
|
||||||
|
notes_collection: Name der Notes-Collection (z.B. 'mindnet_notes')
|
||||||
|
limit: harte Obergrenze für die Anzahl der zurückzugebenden IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Liste der note_id-Strings (ohne Duplikate, Reihenfolge nicht garantiert).
|
||||||
|
"""
|
||||||
|
out: List[str] = []
|
||||||
|
seen = set()
|
||||||
|
next_page = None
|
||||||
|
fetched = 0
|
||||||
|
|
||||||
|
flt = None # kein Filter → alle Punkte
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# scroll_filter in neueren Clients; ältere akzeptieren 'scroll_filter' oder 'filter'
|
||||||
|
points, next_page = client.scroll(
|
||||||
|
collection_name=notes_collection,
|
||||||
|
scroll_filter=flt,
|
||||||
|
limit=min(512, max(1, limit - fetched)),
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False,
|
||||||
|
offset=next_page,
|
||||||
|
)
|
||||||
|
if not points:
|
||||||
|
break
|
||||||
|
|
||||||
|
for p in points:
|
||||||
|
pl = p.payload or {}
|
||||||
|
nid = pl.get("note_id")
|
||||||
|
if isinstance(nid, str) and nid not in seen:
|
||||||
|
seen.add(nid)
|
||||||
|
out.append(nid)
|
||||||
|
fetched += 1
|
||||||
|
if fetched >= limit:
|
||||||
|
return out
|
||||||
|
|
||||||
|
if next_page is None:
|
||||||
|
break
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user