From ced3b0fa72e47387988df236898fc9e523c61ae6 Mon Sep 17 00:00:00 2001 From: Lars Date: Thu, 4 Sep 2025 08:08:33 +0200 Subject: [PATCH] app/core/qdrant_points.py aktualisiert --- app/core/qdrant_points.py | 71 +++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 29 deletions(-) diff --git a/app/core/qdrant_points.py b/app/core/qdrant_points.py index 6be8fd0..9f0b9c8 100644 --- a/app/core/qdrant_points.py +++ b/app/core/qdrant_points.py @@ -1,37 +1,50 @@ from __future__ import annotations -import os, datetime -from typing import List, Dict, Tuple -from qdrant_client import QdrantClient +import os +from typing import List, Tuple from qdrant_client.http import models as rest -from .qdrant import QdrantConfig, get_client, _collection_names -def ts_iso() -> str: - return datetime.datetime.utcnow().replace(microsecond=0).isoformat() + "Z" -def points_for_chunks(prefix: str, chunk_payloads: List[Dict], vectors: List[List[float]]) -> Tuple[str, List[rest.PointStruct]]: - assert len(chunk_payloads) == len(vectors) - _, chunks, _ = _collection_names(prefix) - pts = [] +def _names(prefix: str) -> Tuple[str, str, str]: + return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges" + + +def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | None, dim: int) -> Tuple[str, List[rest.PointStruct]]: + """ + Liefert (collection_name, [PointStruct]) für die Notes-Collection. + Falls kein Note-Embedding übergeben wurde, wird ein Nullvektor der Länge `dim` verwendet. + Hintergrund: Die Notes-Collection ist in ensure_collections mit Vektor-Dimension angelegt. + """ + notes_col, _, _ = _names(prefix) + vector = note_vec if note_vec is not None else [0.0] * int(dim) + pt = rest.PointStruct(id=note_payload["note_id"], vector=vector, payload=note_payload) + return notes_col, [pt] + + +def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[List[float]]) -> Tuple[str, List[rest.PointStruct]]: + """ + Liefert (collection_name, [PointStruct]) für die Chunks-Collection. + Erwartet für jeden Chunk einen Embedding-Vektor (oder Nullvektor, wenn --skip-embed). + """ + _, chunks_col, _ = _names(prefix) + points: List[rest.PointStruct] = [] for pl, vec in zip(chunk_payloads, vectors): - pts.append(rest.PointStruct(id=pl["id"], vector=vec, payload=pl)) - return chunks, pts + points.append(rest.PointStruct(id=pl["chunk_id"], vector=vec, payload=pl)) + return chunks_col, points -def points_for_note(prefix: str, note_payload: Dict, vector: List[float] | None) -> Tuple[str, List[rest.PointStruct]]: - notes, _, _ = _collection_names(prefix) - if vector is None: - # Für Notizen erlauben wir auch Payload-only Upserts (Vektor leer) - return notes, [rest.PointStruct(id=note_payload["note_id"], payload=note_payload)] - return notes, [rest.PointStruct(id=note_payload["note_id"], vector=vector, payload=note_payload)] -def points_for_edges(prefix: str, edges: List[Dict]) -> Tuple[str, List[rest.PointStruct]]: - _, _, edges_col = _collection_names(prefix) - pts = [] - for e in edges: - pid = f"{e['src_id']}~{e['edge_type']}~{e['dst_id']}~{e.get('scope','note')}" - # 1-dim Dummy-Vektor, weil Collection einen Vektor erwartet - pts.append(rest.PointStruct(id=pid, vector=[0.0], payload=e)) - return edges_col, pts +def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]: + """ + Liefert (collection_name, [PointStruct]) für die Edges-Collection. + Edges-Collection ist VEKTORENLOS angelegt → nur Payload. + """ + _, _, edges_col = _names(prefix) + points: List[rest.PointStruct] = [] + for pl in edge_payloads: + points.append(rest.PointStruct(id=pl["edge_id"], payload=pl)) + return edges_col, points -def upsert_batch(client: QdrantClient, collection: str, points: List[rest.PointStruct]): - if not points: return - client.upsert(collection, points=points, wait=True) + +def upsert_batch(client, collection: str, points: List[rest.PointStruct]) -> None: + if not points: + return + client.upsert(collection_name=collection, points=points, wait=True)