# app/core/derive_edges.py # -*- coding: utf-8 -*- """ Mindnet V2 — Edge derivation Features preserved & extended: - Structure edges: belongs_to, next, prev (deterministic order by index/ord/chunk_id) - Explicit references from wikilinks [[Title]] -> kind="references", rule_id="explicit:wikilink", confidence=1.0 - Inline relations: A) [[rel: ]] -> kind from link, rule_id="inline:rel", confidence=0.95 B) rel: [[Target1]] [[Target2]] ... -> supports multiple wikilinks on a single line - Callout relations: > [!edge] : [[Target1]] [[Target2]] -> rule_id="callout:edge", confidence=0.9 - Type-based defaults (from types.yaml): For each note type's `edge_defaults`, derive additional edges for every explicit/inline/callout target. rule_id="edge_defaults::", confidence=0.7 Symmetric kinds ("related_to","similar_to") also add reversed edges. - De-duplication across all sources by (kind, scope, source_id, target_id, rule_id) - Backward-compatible function signature Return: list[dict] with at least: kind, scope, source_id, target_id, note_id, chunk_id, rule_id, confidence """ from __future__ import annotations import re from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple # Kanten, die symmetrisch interpretiert werden (Rückkante wird erzeugt) SYMMETRIC_KINDS = {"related_to", "similar_to"} # ----------------------------------------------------------------------------- # Utilities # ----------------------------------------------------------------------------- def _get(d: Dict[str, Any], *keys: str, default: Any = None) -> Any: for k in keys: if isinstance(d, dict) and k in d: return d[k] return default def _safe_int(x: Any, default: int = 10**9) -> int: try: return int(x) except Exception: return default def _sort_chunks(chs: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]: """Sort chunks primarily by index, then ord, then chunk_id to guarantee stable next/prev.""" return sorted(chs, key=lambda ch: ( _get(ch, "index") is None, _safe_int(_get(ch, "index")), _get(ch, "ord") is None, _safe_int(_get(ch, "ord")), str(_get(ch, "chunk_id") or _get(ch, "id") or "") )) def _make_edge(kind: str, source_id: str, target_id: str, note_id: str, chunk_id: str, rule_id: str, confidence: float, scope: str = "chunk") -> Dict[str, Any]: return { "kind": kind, "scope": scope, "source_id": source_id, "target_id": target_id, "note_id": note_id, "chunk_id": chunk_id, "rule_id": rule_id, "confidence": float(confidence), } def _dedupe_key(e: Dict[str, Any]) -> Tuple[Any, ...]: return ( e.get("kind"), e.get("scope"), e.get("source_id"), e.get("target_id"), e.get("rule_id"), ) # ----------------------------------------------------------------------------- # Parsing helpers # ----------------------------------------------------------------------------- RE_WIKILINK = re.compile(r"\[\[([^\]]+?)\]\]") RE_INLINE_BRACKET = re.compile( r"\[\[\s*rel\s*:\s*(?P[a-z_][a-z0-9_]*)\s+(?P[^\]]+?)\s*\]\]", flags=re.IGNORECASE, ) RE_INLINE_PREFIX = re.compile( r"(?m)\brel\s*:\s*(?P[a-z_][a-z0-9_]*)\s+(?P(?:\[\[[^\]]+\]\]\s*)+)", flags=re.IGNORECASE, ) RE_CALLOUT = re.compile( r"(?m)^\s*>\s*\[!edge\]\s*(?P[a-z_][a-z0-9_]*)\s*:\s*(?P.+)$", flags=re.IGNORECASE, ) def _extract_wikilinks(text: str) -> List[str]: """All [[Title]] except those starting with 'rel:' (reserved for inline relations).""" targets: List[str] = [] for m in RE_WIKILINK.finditer(text or ""): label = m.group(1).strip() if label.lower().startswith("rel:"): # handled by explicit inline relation parser continue if label: targets.append(label) return targets def _extract_inline_relations(text: str) -> List[Tuple[str, str]]: """Find inline relations from: A) [[rel:kind Target]] B) rel: kind [[Target1]] [[Target2]] (supports multiple targets on one line) """ out: List[Tuple[str, str]] = [] # A) Bracket form for m in RE_INLINE_BRACKET.finditer(text or ""): kind = m.group("kind").strip().lower() target = m.group("target").strip() if kind and target: out.append((kind, target)) # B) Prefix form with multiple wikilinks for m in RE_INLINE_PREFIX.finditer(text or ""): kind = m.group("kind").strip().lower() links = m.group("links") for lm in RE_WIKILINK.finditer(links or ""): target = lm.group(1).strip() if target: out.append((kind, target)) return out def _extract_callout_relations(text: str) -> List[Tuple[str, str]]: """Find callout > [!edge] kind: [[Target1]] [[Target2]] | Target, Target2""" out: List[Tuple[str, str]] = [] for m in RE_CALLOUT.finditer(text or ""): kind = (m.group("kind") or "").strip().lower() body = m.group("body") or "" # Prefer wikilinks; fallback: split by comma wl = [mm.group(1).strip() for mm in RE_WIKILINK.finditer(body)] if wl: out.extend((kind, t) for t in wl if t) else: for raw in body.split(","): t = raw.strip() if t: out.append((kind, t)) return out # ----------------------------------------------------------------------------- # Edge builder # ----------------------------------------------------------------------------- def _edge_defaults_for_type(types_cfg: Optional[Dict[str, Any]], note_type: Optional[str]) -> List[str]: """Read defaults from types.yaml structure: {'types': {: {'edge_defaults': [..]}}}""" if not types_cfg or not note_type: return [] t = types_cfg.get("types", {}).get(note_type, {}) vals = t.get("edge_defaults", []) or [] return [str(v) for v in vals if isinstance(v, (str,))] def _append_with_dedupe(edges: List[Dict[str, Any]], new_edges: Iterable[Dict[str, Any]]) -> None: seen = { _dedupe_key(e) for e in edges } for e in new_edges: k = _dedupe_key(e) if k in seen: continue edges.append(e) seen.add(k) def _structure_edges(note_id: str, title: str, chunks: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]: edges: List[Dict[str, Any]] = [] ordered = _sort_chunks(chunks) # belongs_to for ch in ordered: cid = _get(ch, "chunk_id", "id") if not cid: continue edges.append(_make_edge("belongs_to", cid, note_id, note_id, cid, "structure:belongs_to", 1.0)) # next/prev for i in range(len(ordered) - 1): a, b = ordered[i], ordered[i + 1] a_id = _get(a, "chunk_id", "id"); b_id = _get(b, "chunk_id", "id") if not a_id or not b_id: continue edges.append(_make_edge("next", a_id, b_id, note_id, a_id, "structure:next", 1.0)) edges.append(_make_edge("prev", b_id, a_id, note_id, b_id, "structure:prev", 1.0)) return edges def _explicit_and_inline_edges(note_id: str, title: str, note_type: Optional[str], chunks: Sequence[Dict[str, Any]], types_cfg: Optional[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[str]]: """Return (edges, referenced_targets)""" edges: List[Dict[str, Any]] = [] referenced: List[str] = [] defaults_kinds = set(_edge_defaults_for_type(types_cfg, note_type)) for ch in _sort_chunks(chunks): cid = _get(ch, "chunk_id", "id") text = _get(ch, "text", "window", default="") or "" # --- Explicit wikilinks -> references for target in _extract_wikilinks(text): referenced.append(target) edges.append(_make_edge("references", cid, target, note_id, cid, "explicit:wikilink", 1.0)) # --- Inline relations (both forms) for kind, target in _extract_inline_relations(text): referenced.append(target) # forward edges.append(_make_edge(kind, cid, target, note_id, cid, "inline:rel", 0.95)) # symmetric reverse if kind in SYMMETRIC_KINDS: edges.append(_make_edge(kind, target, cid, note_id, cid, "inline:rel", 0.95)) # --- Callout relations for kind, target in _extract_callout_relations(text): referenced.append(target) edges.append(_make_edge(kind, cid, target, note_id, cid, "callout:edge", 0.90)) if kind in SYMMETRIC_KINDS: edges.append(_make_edge(kind, target, cid, note_id, cid, "callout:edge", 0.90)) # --- Type-based defaults: apply to the union of referenced targets if defaults_kinds and referenced: uniq_targets = list(dict.fromkeys(referenced)) # preserve order, drop dups for ch in _sort_chunks(chunks): cid = _get(ch, "chunk_id", "id") if not cid: continue for kind in defaults_kinds: for target in uniq_targets: edges.append(_make_edge(kind, cid, target, note_id, cid, f"edge_defaults:{note_type}:{kind}", 0.70)) if kind in SYMMETRIC_KINDS: edges.append(_make_edge(kind, target, cid, note_id, cid, f"edge_defaults:{note_type}:{kind}", 0.70)) return edges, referenced # ----------------------------------------------------------------------------- # Public API (backward-compatible) # ----------------------------------------------------------------------------- def build_edges_for_note(*args, **kwargs) -> List[Dict[str, Any]]: """ Backward-compatible entry point. Supported call styles: 1) build_edges_for_note(note_id, title, note_type, chunks, types_cfg) 2) build_edges_for_note(note_payload=dict, chunks=[...], types_cfg=dict) where note_payload contains: note_id/id, title, type """ # Detect signature form if args and isinstance(args[0], dict) and ("title" in args[0] or "type" in args[0] or "note_id" in args[0] or "id" in args[0]): note = args[0] chunks = args[1] if len(args) > 1 else kwargs.get("chunks", []) types_cfg = args[2] if len(args) > 2 else kwargs.get("types_cfg") note_id = _get(note, "note_id", "id") title = _get(note, "title") or "" note_type = _get(note, "type") else: # Positional legacy form note_id = args[0] if len(args) > 0 else kwargs.get("note_id") title = args[1] if len(args) > 1 else kwargs.get("title", "") note_type = args[2] if len(args) > 2 else kwargs.get("note_type") chunks = args[3] if len(args) > 3 else kwargs.get("chunks", []) types_cfg = args[4] if len(args) > 4 else kwargs.get("types_cfg") chunks = list(chunks or []) # Structure edges edges = _structure_edges(note_id, title, chunks) # Explicit / inline / callout / defaults more, _ = _explicit_and_inline_edges(note_id, title, note_type, chunks, types_cfg) _append_with_dedupe(edges, more) return edges