All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
287 lines
11 KiB
Python
287 lines
11 KiB
Python
# app/core/derive_edges.py
|
|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Mindnet V2 — Edge derivation
|
|
|
|
Features preserved & extended:
|
|
- Structure edges: belongs_to, next, prev (deterministic order by index/ord/chunk_id)
|
|
- Explicit references from wikilinks [[Title]] -> kind="references", rule_id="explicit:wikilink", confidence=1.0
|
|
- Inline relations:
|
|
A) [[rel:<kind> <Target>]] -> kind from link, rule_id="inline:rel", confidence=0.95
|
|
B) rel: <kind> [[Target1]] [[Target2]] ... -> supports multiple wikilinks on a single line
|
|
- Callout relations:
|
|
> [!edge] <kind>: [[Target1]] [[Target2]] -> rule_id="callout:edge", confidence=0.9
|
|
- Type-based defaults (from types.yaml):
|
|
For each note type's `edge_defaults`, derive additional edges for every explicit/inline/callout target.
|
|
rule_id="edge_defaults:<note_type>:<kind>", confidence=0.7
|
|
Symmetric kinds ("related_to","similar_to") also add reversed edges.
|
|
- De-duplication across all sources by (kind, scope, source_id, target_id, rule_id)
|
|
- Backward-compatible function signature
|
|
|
|
Return: list[dict] with at least:
|
|
kind, scope, source_id, target_id, note_id, chunk_id, rule_id, confidence
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
|
|
|
|
# Kanten, die symmetrisch interpretiert werden (Rückkante wird erzeugt)
|
|
SYMMETRIC_KINDS = {"related_to", "similar_to"}
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Utilities
|
|
# -----------------------------------------------------------------------------
|
|
|
|
def _get(d: Dict[str, Any], *keys: str, default: Any = None) -> Any:
|
|
for k in keys:
|
|
if isinstance(d, dict) and k in d:
|
|
return d[k]
|
|
return default
|
|
|
|
def _safe_int(x: Any, default: int = 10**9) -> int:
|
|
try:
|
|
return int(x)
|
|
except Exception:
|
|
return default
|
|
|
|
def _sort_chunks(chs: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
"""Sort chunks primarily by index, then ord, then chunk_id to guarantee stable next/prev."""
|
|
return sorted(chs, key=lambda ch: (
|
|
_get(ch, "index") is None, _safe_int(_get(ch, "index")),
|
|
_get(ch, "ord") is None, _safe_int(_get(ch, "ord")),
|
|
str(_get(ch, "chunk_id") or _get(ch, "id") or "")
|
|
))
|
|
|
|
def _make_edge(kind: str,
|
|
source_id: str,
|
|
target_id: str,
|
|
note_id: str,
|
|
chunk_id: str,
|
|
rule_id: str,
|
|
confidence: float,
|
|
scope: str = "chunk") -> Dict[str, Any]:
|
|
return {
|
|
"kind": kind,
|
|
"scope": scope,
|
|
"source_id": source_id,
|
|
"target_id": target_id,
|
|
"note_id": note_id,
|
|
"chunk_id": chunk_id,
|
|
"rule_id": rule_id,
|
|
"confidence": float(confidence),
|
|
}
|
|
|
|
def _dedupe_key(e: Dict[str, Any]) -> Tuple[Any, ...]:
|
|
return (
|
|
e.get("kind"),
|
|
e.get("scope"),
|
|
e.get("source_id"),
|
|
e.get("target_id"),
|
|
e.get("rule_id"),
|
|
)
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Parsing helpers
|
|
# -----------------------------------------------------------------------------
|
|
|
|
RE_WIKILINK = re.compile(r"\[\[([^\]]+?)\]\]")
|
|
RE_INLINE_BRACKET = re.compile(
|
|
r"\[\[\s*rel\s*:\s*(?P<kind>[a-z_][a-z0-9_]*)\s+(?P<target>[^\]]+?)\s*\]\]",
|
|
flags=re.IGNORECASE,
|
|
)
|
|
RE_INLINE_PREFIX = re.compile(
|
|
r"(?m)\brel\s*:\s*(?P<kind>[a-z_][a-z0-9_]*)\s+(?P<links>(?:\[\[[^\]]+\]\]\s*)+)",
|
|
flags=re.IGNORECASE,
|
|
)
|
|
RE_CALLOUT = re.compile(
|
|
r"(?m)^\s*>\s*\[!edge\]\s*(?P<kind>[a-z_][a-z0-9_]*)\s*:\s*(?P<body>.+)$",
|
|
flags=re.IGNORECASE,
|
|
)
|
|
|
|
def _extract_wikilinks(text: str) -> List[str]:
|
|
"""All [[Title]] except those starting with 'rel:' (reserved for inline relations)."""
|
|
targets: List[str] = []
|
|
for m in RE_WIKILINK.finditer(text or ""):
|
|
label = m.group(1).strip()
|
|
if label.lower().startswith("rel:"):
|
|
# handled by explicit inline relation parser
|
|
continue
|
|
if label:
|
|
targets.append(label)
|
|
return targets
|
|
|
|
def _extract_inline_relations(text: str) -> List[Tuple[str, str]]:
|
|
"""Find inline relations from:
|
|
A) [[rel:kind Target]]
|
|
B) rel: kind [[Target1]] [[Target2]] (supports multiple targets on one line)
|
|
"""
|
|
out: List[Tuple[str, str]] = []
|
|
|
|
# A) Bracket form
|
|
for m in RE_INLINE_BRACKET.finditer(text or ""):
|
|
kind = m.group("kind").strip().lower()
|
|
target = m.group("target").strip()
|
|
if kind and target:
|
|
out.append((kind, target))
|
|
|
|
# B) Prefix form with multiple wikilinks
|
|
for m in RE_INLINE_PREFIX.finditer(text or ""):
|
|
kind = m.group("kind").strip().lower()
|
|
links = m.group("links")
|
|
for lm in RE_WIKILINK.finditer(links or ""):
|
|
target = lm.group(1).strip()
|
|
if target:
|
|
out.append((kind, target))
|
|
|
|
return out
|
|
|
|
def _extract_callout_relations(text: str) -> List[Tuple[str, str]]:
|
|
"""Find callout > [!edge] kind: [[Target1]] [[Target2]] | Target, Target2"""
|
|
out: List[Tuple[str, str]] = []
|
|
for m in RE_CALLOUT.finditer(text or ""):
|
|
kind = (m.group("kind") or "").strip().lower()
|
|
body = m.group("body") or ""
|
|
# Prefer wikilinks; fallback: split by comma
|
|
wl = [mm.group(1).strip() for mm in RE_WIKILINK.finditer(body)]
|
|
if wl:
|
|
out.extend((kind, t) for t in wl if t)
|
|
else:
|
|
for raw in body.split(","):
|
|
t = raw.strip()
|
|
if t:
|
|
out.append((kind, t))
|
|
return out
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Edge builder
|
|
# -----------------------------------------------------------------------------
|
|
|
|
def _edge_defaults_for_type(types_cfg: Optional[Dict[str, Any]], note_type: Optional[str]) -> List[str]:
|
|
"""Read defaults from types.yaml structure: {'types': {<type>: {'edge_defaults': [..]}}}"""
|
|
if not types_cfg or not note_type:
|
|
return []
|
|
t = types_cfg.get("types", {}).get(note_type, {})
|
|
vals = t.get("edge_defaults", []) or []
|
|
return [str(v) for v in vals if isinstance(v, (str,))]
|
|
|
|
def _append_with_dedupe(edges: List[Dict[str, Any]], new_edges: Iterable[Dict[str, Any]]) -> None:
|
|
seen = { _dedupe_key(e) for e in edges }
|
|
for e in new_edges:
|
|
k = _dedupe_key(e)
|
|
if k in seen:
|
|
continue
|
|
edges.append(e)
|
|
seen.add(k)
|
|
|
|
def _structure_edges(note_id: str, title: str, chunks: Sequence[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
edges: List[Dict[str, Any]] = []
|
|
ordered = _sort_chunks(chunks)
|
|
# belongs_to
|
|
for ch in ordered:
|
|
cid = _get(ch, "chunk_id", "id")
|
|
if not cid:
|
|
continue
|
|
edges.append(_make_edge("belongs_to", cid, note_id, note_id, cid, "structure:belongs_to", 1.0))
|
|
# next/prev
|
|
for i in range(len(ordered) - 1):
|
|
a, b = ordered[i], ordered[i + 1]
|
|
a_id = _get(a, "chunk_id", "id"); b_id = _get(b, "chunk_id", "id")
|
|
if not a_id or not b_id:
|
|
continue
|
|
edges.append(_make_edge("next", a_id, b_id, note_id, a_id, "structure:next", 1.0))
|
|
edges.append(_make_edge("prev", b_id, a_id, note_id, b_id, "structure:prev", 1.0))
|
|
return edges
|
|
|
|
def _explicit_and_inline_edges(note_id: str,
|
|
title: str,
|
|
note_type: Optional[str],
|
|
chunks: Sequence[Dict[str, Any]],
|
|
types_cfg: Optional[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[str]]:
|
|
"""Return (edges, referenced_targets)"""
|
|
edges: List[Dict[str, Any]] = []
|
|
referenced: List[str] = []
|
|
|
|
defaults_kinds = set(_edge_defaults_for_type(types_cfg, note_type))
|
|
|
|
for ch in _sort_chunks(chunks):
|
|
cid = _get(ch, "chunk_id", "id")
|
|
text = _get(ch, "text", "window", default="") or ""
|
|
|
|
# --- Explicit wikilinks -> references
|
|
for target in _extract_wikilinks(text):
|
|
referenced.append(target)
|
|
edges.append(_make_edge("references", cid, target, note_id, cid, "explicit:wikilink", 1.0))
|
|
|
|
# --- Inline relations (both forms)
|
|
for kind, target in _extract_inline_relations(text):
|
|
referenced.append(target)
|
|
# forward
|
|
edges.append(_make_edge(kind, cid, target, note_id, cid, "inline:rel", 0.95))
|
|
# symmetric reverse
|
|
if kind in SYMMETRIC_KINDS:
|
|
edges.append(_make_edge(kind, target, cid, note_id, cid, "inline:rel", 0.95))
|
|
|
|
# --- Callout relations
|
|
for kind, target in _extract_callout_relations(text):
|
|
referenced.append(target)
|
|
edges.append(_make_edge(kind, cid, target, note_id, cid, "callout:edge", 0.90))
|
|
if kind in SYMMETRIC_KINDS:
|
|
edges.append(_make_edge(kind, target, cid, note_id, cid, "callout:edge", 0.90))
|
|
|
|
# --- Type-based defaults: apply to the union of referenced targets
|
|
if defaults_kinds and referenced:
|
|
uniq_targets = list(dict.fromkeys(referenced)) # preserve order, drop dups
|
|
for ch in _sort_chunks(chunks):
|
|
cid = _get(ch, "chunk_id", "id")
|
|
if not cid:
|
|
continue
|
|
for kind in defaults_kinds:
|
|
for target in uniq_targets:
|
|
edges.append(_make_edge(kind, cid, target, note_id, cid, f"edge_defaults:{note_type}:{kind}", 0.70))
|
|
if kind in SYMMETRIC_KINDS:
|
|
edges.append(_make_edge(kind, target, cid, note_id, cid, f"edge_defaults:{note_type}:{kind}", 0.70))
|
|
|
|
return edges, referenced
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# Public API (backward-compatible)
|
|
# -----------------------------------------------------------------------------
|
|
|
|
def build_edges_for_note(*args, **kwargs) -> List[Dict[str, Any]]:
|
|
"""
|
|
Backward-compatible entry point.
|
|
|
|
Supported call styles:
|
|
1) build_edges_for_note(note_id, title, note_type, chunks, types_cfg)
|
|
2) build_edges_for_note(note_payload=dict, chunks=[...], types_cfg=dict)
|
|
where note_payload contains: note_id/id, title, type
|
|
"""
|
|
# Detect signature form
|
|
if args and isinstance(args[0], dict) and ("title" in args[0] or "type" in args[0] or "note_id" in args[0] or "id" in args[0]):
|
|
note = args[0]
|
|
chunks = args[1] if len(args) > 1 else kwargs.get("chunks", [])
|
|
types_cfg = args[2] if len(args) > 2 else kwargs.get("types_cfg")
|
|
note_id = _get(note, "note_id", "id")
|
|
title = _get(note, "title") or ""
|
|
note_type = _get(note, "type")
|
|
else:
|
|
# Positional legacy form
|
|
note_id = args[0] if len(args) > 0 else kwargs.get("note_id")
|
|
title = args[1] if len(args) > 1 else kwargs.get("title", "")
|
|
note_type = args[2] if len(args) > 2 else kwargs.get("note_type")
|
|
chunks = args[3] if len(args) > 3 else kwargs.get("chunks", [])
|
|
types_cfg = args[4] if len(args) > 4 else kwargs.get("types_cfg")
|
|
|
|
chunks = list(chunks or [])
|
|
|
|
# Structure edges
|
|
edges = _structure_edges(note_id, title, chunks)
|
|
|
|
# Explicit / inline / callout / defaults
|
|
more, _ = _explicit_and_inline_edges(note_id, title, note_type, chunks, types_cfg)
|
|
_append_with_dedupe(edges, more)
|
|
|
|
return edges
|