From c3815afcd46f423f3e69206c1c0ff3d00b0de89d Mon Sep 17 00:00:00 2001 From: Lars Date: Sun, 9 Nov 2025 10:39:16 +0100 Subject: [PATCH] =?UTF-8?q?app/core/edges=5Fwriter.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/core/edges_writer.py | 94 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 app/core/edges_writer.py diff --git a/app/core/edges_writer.py b/app/core/edges_writer.py new file mode 100644 index 0000000..066e44a --- /dev/null +++ b/app/core/edges_writer.py @@ -0,0 +1,94 @@ +# app/core/edges_writer.py +from __future__ import annotations +import hashlib +from typing import Dict, List, Iterable, Tuple + +try: + # Dein Modul mit der Schemadefinition und der Builder-Funktion + from app.core.edges import build_edges_for_note # noqa: F401 +except Exception as e: + raise RuntimeError("Konnte app.core.edges nicht importieren. " + "Bitte sicherstellen, dass app/core/edges.py vorhanden ist.") from e + +def _edge_uid(kind: str, source_id: str, target_id: str, scope: str) -> str: + """ + Deterministische, kurze ID für eine Edge. + Kollisionen sind praktisch ausgeschlossen (BLAKE2s über den Kanonischen Schlüssel). + """ + key = f"{kind}|{source_id}|{target_id}|{scope}" + return hashlib.blake2s(key.encode("utf-8"), digest_size=12).hexdigest() + +def ensure_edges_collection(qdrant_client, collection: str) -> None: + """ + Legt die Edge-Collection an, falls sie nicht existiert. + Minimal: 1D-Vector (Dummy), Cosine. Payload-only-Collections sind je nach Qdrant-Version heikel. + """ + from qdrant_client.http import models as qm + + existing = [c.name for c in qdrant_client.get_collections().collections] + if collection in existing: + return + + qdrant_client.recreate_collection( + collection_name=collection, + vectors_config=qm.VectorParams(size=1, distance=qm.Distance.COSINE), + on_disk_payload=True, + ) + +def edges_from_note( + note_id: str, + chunk_payloads: List[Dict], + note_level_refs: Iterable[str] | None, + *, + include_note_scope_refs: bool = False, +) -> List[Dict]: + """ + Ruft deinen Edge-Builder auf und gibt die (deduplizierten) Edge-Payloads zurück. + Keine Schemaänderung – exakt das aus app/core/edges.py. + """ + return build_edges_for_note( + note_id=note_id, + chunk_payloads=chunk_payloads, + note_level_refs=list(note_level_refs or []), + include_note_scope_refs=include_note_scope_refs, + ) + +def upsert_edges( + qdrant_client, + collection: str, + edge_payloads: List[Dict], +) -> Tuple[int, int]: + """ + Schreibt Edges als Points in Qdrant. + - id: deterministisch aus (kind, source_id, target_id, scope) + - vector: [0.0] Dummy + - payload: Edge-Dict (unverändert, siehe Schema in app/core/edges.py) + Gibt (anzahl_points, anzahl_unique_keys) zurück. + """ + from qdrant_client.models import PointStruct + + if not edge_payloads: + return 0, 0 + + points = [] + seen = set() + for e in edge_payloads: + key = (e.get("kind"), e.get("source_id"), e.get("target_id"), e.get("scope")) + if key in seen: + continue + seen.add(key) + eid = _edge_uid(*key) + points.append( + PointStruct( + id=eid, + vector=[0.0], + payload=e, + ) + ) + + if not points: + return 0, 0 + + ensure_edges_collection(qdrant_client, collection) + qdrant_client.upsert(collection_name=collection, points=points) + return len(points), len(seen)