From da2bbf72a6b1cf804e6d062bfcc684726c4755aa Mon Sep 17 00:00:00 2001 From: Lars Date: Fri, 5 Sep 2025 09:47:09 +0200 Subject: [PATCH] app/core/derive_edges.py aktualisiert --- app/core/derive_edges.py | 43 +++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/app/core/derive_edges.py b/app/core/derive_edges.py index 5187ea4..f2fd0f9 100644 --- a/app/core/derive_edges.py +++ b/app/core/derive_edges.py @@ -1,4 +1,25 @@ -# app/core/derive_edges.py +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Name: app/core/derive_edges.py +Version: v1.1.0 (2025-09-05) + +Kurzbeschreibung: + Leitet Edges aus Wikilinks ([[...]]) ab. + - Unterstützt Auflösung über note_id, Title-Slug, File-Slug. + - Erzeugt Edges: + * "references" (Note->Note) + "backlink" (inverse) + * "references_at" (Chunk->Note) mit seq = chunk_index + - **NEU**: pro Match ein Occurrence-Zähler 'occ' für eindeutige Edge-IDs. + +Kompatibilität: + - Vorherige Payload-Felder bleiben erhalten. + - Zusätzliche Felder: 'seq' für Volltext ("body"), 'occ' überall. + +Changelog: + v1.1.0: 'occ' eingeführt; 'references' erhalten jetzt seq="body". +""" + from __future__ import annotations import re import unicodedata @@ -54,42 +75,46 @@ def derive_wikilink_edges(note_payload: dict, chunks_payloads: List[dict], note_ edges: List[dict] = [] source_note_id = note_payload["note_id"] - def _make_edge(kind: str, src: str, tgt: str, seq=None, extra: dict|None=None): + def _make_edge(kind: str, src: str, tgt: str, seq=None, occ=None, extra: dict|None=None): e = {"edge_id": None, "kind": kind, "source_id": src, "target_id": tgt} if seq is not None: e["seq"] = seq + if occ is not None: + e["occ"] = occ if extra: e.update(extra) return e - # Links im Volltext (falls vorhanden) + # Links im Volltext (gesamter Body) fulltext = note_payload.get("fulltext") or note_payload.get("body") or "" if fulltext: - for m in WIKILINK_RE.finditer(fulltext): + for k, m in enumerate(WIKILINK_RE.finditer(fulltext), start=1): raw_target, heading, alias = m.groups() target_id, how = resolve_target(raw_target, note_index) extra = {"raw": raw_target, "alias": alias, "heading": heading, "resolution": how} if target_id: - edges.append(_make_edge("references", source_note_id, target_id, extra=extra)) - edges.append(_make_edge("backlink", target_id, source_note_id, extra=extra)) + edges.append(_make_edge("references", source_note_id, target_id, seq="body", occ=k, extra=extra)) + edges.append(_make_edge("backlink", target_id, source_note_id, seq="body", occ=k, extra=extra)) else: extra["status"] = "unresolved" extra["target_label"] = raw_target - edges.append(_make_edge("references", source_note_id, raw_target, extra=extra)) + edges.append(_make_edge("references", source_note_id, raw_target, seq="body", occ=k, extra=extra)) # Links in Chunks (wenn übergeben) for i, ch in enumerate(chunks_payloads, start=1): txt = ch.get("text") or ch.get("content") or "" if not txt: continue + occ = 0 for m in WIKILINK_RE.finditer(txt): + occ += 1 raw_target, heading, alias = m.groups() target_id, how = resolve_target(raw_target, note_index) extra = {"raw": raw_target, "alias": alias, "heading": heading, "resolution": how} if target_id: - edges.append(_make_edge("references_at", ch["chunk_id"], target_id, seq=i, extra=extra)) + edges.append(_make_edge("references_at", ch["chunk_id"], target_id, seq=i, occ=occ, extra=extra)) else: extra["status"] = "unresolved" extra["target_label"] = raw_target - edges.append(_make_edge("references_at", ch["chunk_id"], raw_target, seq=i, extra=extra)) + edges.append(_make_edge("references_at", ch["chunk_id"], raw_target, seq=i, occ=occ, extra=extra)) return edges