From bfdd8ac6be39a54695485cf84de0fe5c03768159 Mon Sep 17 00:00:00 2001 From: Lars Date: Mon, 17 Nov 2025 21:53:49 +0100 Subject: [PATCH] app/core/derive_edges.py aktualisiert --- app/core/derive_edges.py | 62 +++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/app/core/derive_edges.py b/app/core/derive_edges.py index 6a89950..5a93f18 100644 --- a/app/core/derive_edges.py +++ b/app/core/derive_edges.py @@ -8,7 +8,8 @@ Zweck: - Unterstützt "typed inline relations": * [[rel:KIND | Target]] * [[rel:KIND Target]] - * rel: KIND [[Target]] + * rel: KIND [[Target]] (1 Ziel) + * rel: KIND [[T1]] [[T2]] ... (NEU: mehrere Ziele in einer Zeile) - Unterstützt Obsidian-Callouts: * > [!edge] KIND: [[Target]] [[Target2]] ... Kompatibilität: @@ -142,35 +143,68 @@ _WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]") # [[rel:KIND Target]] _REL_PIPE = re.compile(r"\[\[\s*rel:(?P[a-z_]+)\s*\|\s*(?P[^\]]+?)\s*\]\]", re.IGNORECASE) _REL_SPACE = re.compile(r"\[\[\s*rel:(?P[a-z_]+)\s+(?P[^\]]+?)\s*\]\]", re.IGNORECASE) -# rel: KIND [[Target]] (reines Textmuster) -_REL_TEXT = re.compile(r"rel\s*:\s*(?P[a-z_]+)\s*\[\[\s*(?P[^\]]+?)\s*\]\]", re.IGNORECASE) +# rel: KIND [[Target]] (ein Ziel) +_REL_TEXT_SINGLE = re.compile(r"rel\s*:\s*(?P[a-z_]+)\s*\[\[\s*(?P[^\]]+?)\s*\]\]", re.IGNORECASE) +# rel: KIND [[T1]] [[T2]] ... (NEU: mehrere Ziele) +_REL_TEXT_MULTI = re.compile(r"rel\s*:\s*(?P[a-z_]+)\s*(?P(?:\[\[[^\]]+\]\]\s*){1,})", re.IGNORECASE) + +# Re-Use für mehrfaches Extrahieren der Wikilinks am Stück +_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]") def _extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]: """ Gibt Liste (kind, target) zurück und den Text mit entfernten getypten Relation-Links, damit die generische Wikilink-Erkennung sie nicht doppelt zählt. - Unterstützt drei Varianten: + Unterstützt Varianten: - [[rel:KIND | Target]] - [[rel:KIND Target]] - rel: KIND [[Target]] + - rel: KIND [[T1]] [[T2]] ... """ pairs: List[Tuple[str,str]] = [] - def _collect(m): + + def _collect_pipe(m): k = (m.group("kind") or "").strip().lower() t = (m.group("target") or "").strip() if k and t: pairs.append((k, t)) return "" # Link entfernen - text = _REL_PIPE.sub(_collect, text) - text = _REL_SPACE.sub(_collect, text) - text = _REL_TEXT.sub(_collect, text) + def _collect_space(m): + k = (m.group("kind") or "").strip().lower() + t = (m.group("target") or "").strip() + if k and t: + pairs.append((k, t)) + return "" + + def _collect_text_multi(m): + k = (m.group("kind") or "").strip().lower() + targets = m.group("targets") or "" + if k and targets: + for t in _WIKILINKS_IN_LINE.findall(targets): + t = t.strip() + if t: + pairs.append((k, t)) + return "" + + def _collect_text_single(m): + k = (m.group("kind") or "").strip().lower() + t = (m.group("target") or "").strip() + if k and t: + pairs.append((k, t)) + return "" + + # Reihenfolge wichtig: zuerst MULTI, dann SINGLE, dann die [[rel:...]]-Varianten + text = _REL_TEXT_MULTI.sub(_collect_text_multi, text) + text = _REL_TEXT_SINGLE.sub(_collect_text_single, text) + text = _REL_PIPE.sub(_collect_pipe, text) + text = _REL_SPACE.sub(_collect_space, text) + return pairs, text # Obsidian Callout Parser _CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE) _REL_LINE = re.compile(r"^(?P[a-z_]+)\s*:\s*(?P.+?)\s*$", re.IGNORECASE) -_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]") def _extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]: """ @@ -249,7 +283,7 @@ def build_edges_for_note( - belongs_to: für jeden Chunk (chunk -> note) - next / prev: zwischen aufeinanderfolgenden Chunks - references: pro Chunk aus window/text (via Wikilinks) - - typed inline relations: [[rel:KIND | Target]] / [[rel:KIND Target]] / rel: KIND [[Target]] + - typed inline relations: [[rel:KIND | Target]] / [[rel:KIND Target]] / rel: KIND [[Target]] / rel: KIND [[T1]] [[T2]] ... - Obsidian Callouts: > [!edge] KIND: [[Target]] [[Target2]] - optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references - typenbasierte Default-Kanten (edge_defaults) je gefundener Referenz @@ -287,14 +321,14 @@ def build_edges_for_note( "provenance": "rule", "rule_id": "structure:order", "confidence": 0.95, - })) + }))) edges.append(_edge("prev", "chunk", b_id, a_id, note_id, { "chunk_id": b_id, "edge_id": _mk_edge_id("prev", b_id, a_id, "chunk", "structure:order"), "provenance": "rule", "rule_id": "structure:order", "confidence": 0.95, - })) + }))) # 3) references + typed inline + callouts + defaults (chunk-scope) reg = _load_types_registry() @@ -307,7 +341,7 @@ def build_edges_for_note( continue raw = _chunk_text_for_refs(ch) - # 3a) typed inline relations + # 3a) typed inline relations (inkl. MULTI-Variante) typed, remainder = _extract_typed_relations(raw) for kind, target in typed: kind = kind.strip().lower() @@ -415,7 +449,7 @@ def build_edges_for_note( edges.append(_edge(rel, "note", r, note_id, note_id, { "edge_id": _mk_edge_id(rel, r, note_id, "note", f"edge_defaults:{note_type}:{rel}"), "provenance": "rule", - "rule_id": f"edge_defaults:{note_type}:{rel}", + "rule_id": f"edge_defaults:{note_type}:{rel}"), "confidence": 0.7, }))