diff --git a/app/core/qdrant_points.py b/app/core/qdrant_points.py index 70877d4..1116540 100644 --- a/app/core/qdrant_points.py +++ b/app/core/qdrant_points.py @@ -1,3 +1,29 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Name: app/core/qdrant_points.py +Version: v1.2.0 (2025-09-05) + +Kurzbeschreibung: + Erzeugt Qdrant-Points für Notes, Chunks, Edges. + - Edges: 1D-Dummy-Vektor (Workaround). + - **NEU**: Edge-ID berücksichtigt optional 'occ' (Occurrence-Index), + um Mehrfach-Vorkommen von Wikilinks eindeutig zu machen. + +Aufruf: + from app.core.qdrant_points import ( + points_for_note, points_for_chunks, points_for_edges, upsert_batch + ) + +Kompatibilität: + - Bestehende Edge-IDs bleiben gültig; neue Edges können ein '@{occ}'-Suffix in der ID haben. + - Idempotenz weiterhin über UUIDv5 auf stabilem Key. + +Changelog: + v1.2.0: Edge-ID-Logik erweitert: "{kind}:{src}->{tgt}#{seq}@{occ}" wenn 'occ' vorhanden. + v1.1.x: Stabilisierung Note-/Chunk-Upsert. +""" + from __future__ import annotations import uuid from typing import List, Tuple @@ -22,7 +48,6 @@ def points_for_note( """Notes-Collection: falls kein Note-Embedding -> Nullvektor der Länge dim.""" notes_col, _, _ = _names(prefix) vector = note_vec if note_vec is not None else [0.0] * int(dim) - # Qdrant-Point-ID MUSS int/UUID sein raw_note_id = note_payload.get("note_id") or note_payload.get("id") or "missing-note-id" point_id = _to_uuid(raw_note_id) pt = rest.PointStruct(id=point_id, vector=vector, payload=note_payload) @@ -47,7 +72,7 @@ def points_for_chunks( if not chunk_id: note_id = pl.get("note_id") or pl.get("parent_note_id") or "missing-note" chunk_id = f"{note_id}#{i}" - pl["chunk_id"] = chunk_id # persistenter Fallback in Payload + pl["chunk_id"] = chunk_id point_id = _to_uuid(chunk_id) points.append(rest.PointStruct(id=point_id, vector=vec, payload=pl)) return chunks_col, points @@ -56,8 +81,9 @@ def points_for_chunks( def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]: """ Edges-Collection mit 1D-Dummy-Vektor. - - Fehlt 'edge_id', konstruieren wir eine stabile ID aus (kind, source_id, target_id, seq). - - vector=[0.0] erfüllt die Client-Validierung. + ID-Bildung: + - Basis: "{kind}:{src}->{tgt}#{seq}" + - **Neu**: Falls 'occ' in Payload, erweitern zu "{...}@{occ}" """ _, _, edges_col = _names(prefix) points: List[rest.PointStruct] = [] @@ -68,15 +94,15 @@ def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[ s = pl.get("source_id", "unknown-src") t = pl.get("target_id", "unknown-tgt") seq = pl.get("seq") or pl.get("order") or "" - edge_id = f"{kind}:{s}->{t}#{seq}" + occ = pl.get("occ") # optionaler Occurrence-Index + base = f"{kind}:{s}->{t}#{seq}" + edge_id = f"{base}@{occ}" if occ is not None else base pl["edge_id"] = edge_id point_id = _to_uuid(edge_id) points.append(rest.PointStruct(id=point_id, vector=[0.0], payload=pl)) return edges_col, points - - def upsert_batch(client, collection: str, points: List[rest.PointStruct]) -> None: if not points: return