#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Modul: app/core/derive_edges.py Version: 1.5.0 (Mindnet V2) Status: Stable Ziele ----- 1) Beibehalten der bewährten Edge-Ableitung: - belongs_to (chunk -> note) - next / prev (Chunk-Kette) - references (chunk-scope) aus Chunk.window/text via `extract_wikilinks` 2) Ergänzung: typenbasierte, abgeleitete Kanten aus `config/types.yaml`: - Für jede gefundene Referenz werden zusätzliche Relationen aus `edge_defaults` des Notiztyps erzeugt (z. B. "depends_on", "related_to"). - Optional symmetrische Relationen (z. B. "related_to", "similar_to"). - Dedupe bleibt kompatibel (Key: kind, source_id, target_id, scope). Hinweise -------- - Es werden keine Markdown-Links neu geparst; wir bleiben bei der vorhandenen Parser-Logik (`extract_wikilinks`) zur Sicherung der Kompatibilität. - `edge_defaults` werden sowohl für Chunk-scope-Referenzen als auch – falls `include_note_scope_refs=True` – für Note-scope-Referenzen angewendet. """ from __future__ import annotations from typing import Dict, List, Optional, Iterable, Set import os import yaml # Wikilinks-Parser beibehalten (Kompatibilität!) from app.core.parser import extract_wikilinks # ---------------------------- Utilities ------------------------------------ def _get(d: dict, *keys, default=None): for k in keys: if k in d and d[k] is not None: return d[k] return default def _chunk_text_for_refs(chunk: dict) -> str: # bevorzugt 'window' → dann 'text' → 'content' → 'raw' return ( _get(chunk, "window") or _get(chunk, "text") or _get(chunk, "content") or _get(chunk, "raw") or "" ) def _dedupe(seq: Iterable[str]) -> List[str]: seen: Set[str] = set() out: List[str] = [] for s in seq: if s not in seen: seen.add(s) out.append(s) return out def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict: pl = { "kind": kind, "scope": scope, # "chunk" | "note" "source_id": source_id, "target_id": target_id, "note_id": note_id, # Träger/Quelle der Kante (aktuelle Note) } if extra: pl.update(extra) return pl # ---------------------- Typen-Registry (types.yaml) ------------------------ SYM_REL = {"related_to", "similar_to"} # symmetrische Relationstypen def _env(n: str, default: Optional[str] = None) -> str: v = os.getenv(n) return v if v is not None else (default or "") def _load_types_registry() -> dict: """Lädt die YAML-Registry aus MINDNET_TYPES_FILE oder ./config/types.yaml""" p = _env("MINDNET_TYPES_FILE", "./config/types.yaml") try: with open(p, "r", encoding="utf-8") as f: data = yaml.safe_load(f) or {} return data except Exception: return {} def _get_types_map(reg: dict) -> dict: if isinstance(reg, dict) and isinstance(reg.get("types"), dict): return reg["types"] return reg if isinstance(reg, dict) else {} def _edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]: """ Liefert die edge_defaults-Liste für den gegebenen Notiztyp. Fallback-Reihenfolge: 1) reg['types'][note_type]['edge_defaults'] 2) reg['defaults']['edge_defaults'] (oder 'default'/'global') 3) [] """ types_map = _get_types_map(reg) # 1) exakter Typ if note_type and isinstance(types_map, dict): t = types_map.get(note_type) if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list): return [str(x) for x in t["edge_defaults"] if isinstance(x, str)] # 2) Fallback for key in ("defaults", "default", "global"): v = reg.get(key) if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list): return [str(x) for x in v["edge_defaults"] if isinstance(x, str)] # 3) leer return [] # --------------------------- Hauptfunktion --------------------------------- def build_edges_for_note( note_id: str, chunks: List[dict], note_level_references: Optional[List[str]] = None, include_note_scope_refs: bool = False, ) -> List[dict]: """ Erzeugt Kanten für eine Note. - belongs_to: für jeden Chunk (chunk -> note) - next / prev: zwischen aufeinanderfolgenden Chunks - references: pro Chunk aus window/text (via extract_wikilinks) - optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references - NEU: typenbasierte, abgeleitete Kanten (edge_defaults) je gefundener Referenz """ edges: List[dict] = [] # --- 0) Note-Typ ermitteln (aus erstem Chunk erwartet) --- note_type = None if chunks: note_type = _get(chunks[0], "type") # --- 1) belongs_to --- for ch in chunks: cid = _get(ch, "chunk_id", "id") if not cid: continue edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {"chunk_id": cid})) # --- 2) next/prev --- for i in range(len(chunks) - 1): a, b = chunks[i], chunks[i + 1] a_id = _get(a, "chunk_id", "id") b_id = _get(b, "chunk_id", "id") if not a_id or not b_id: continue edges.append(_edge("next", "chunk", a_id, b_id, note_id, {"chunk_id": a_id})) edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {"chunk_id": b_id})) # --- 3) references (chunk-scope) + abgeleitete Relationen je Ref --- reg = _load_types_registry() defaults = _edge_defaults_for(note_type, reg) refs_all: List[str] = [] for ch in chunks: cid = _get(ch, "chunk_id", "id") if not cid: continue txt = _chunk_text_for_refs(ch) refs = extract_wikilinks(txt) # Parser-Logik nicht verändert for r in refs: # reale Referenz (wie bisher) edges.append(_edge("references", "chunk", cid, r, note_id, {"chunk_id": cid, "ref_text": r})) # abgeleitete Kanten je default-Relation for rel in defaults: if rel == "references": continue # doppelt vermeiden edges.append(_edge(rel, "chunk", cid, r, note_id, {"chunk_id": cid, "rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7})) # symmetrisch? if rel in {"related_to", "similar_to"}: edges.append(_edge(rel, "chunk", r, cid, note_id, {"chunk_id": cid, "rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7})) refs_all.extend(refs) # --- 4) optional: note-scope references/backlinks (+ defaults) --- if include_note_scope_refs: refs_note = refs_all[:] if note_level_references: refs_note.extend([r for r in note_level_references if isinstance(r, str) and r]) refs_note = _dedupe(refs_note) for r in refs_note: # echte note-scope Referenz & Backlink (wie bisher) edges.append(_edge("references", "note", note_id, r, note_id)) edges.append(_edge("backlink", "note", r, note_id, note_id)) # und zusätzlich default-Relationen (note-scope) for rel in defaults: if rel == "references": continue edges.append(_edge(rel, "note", note_id, r, note_id, {"rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7})) if rel in {"related_to", "similar_to"}: edges.append(_edge(rel, "note", r, note_id, note_id, {"rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7})) # --- 5) Dedupe (unverändert kompatibel) --- dedup = {} for e in edges: k = (e["kind"], e["source_id"], e["target_id"], e.get("scope", "")) dedup[k] = e return list(dedup.values())