All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 5s
279 lines
9.0 KiB
Python
279 lines
9.0 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Dict, List, Tuple, Iterable, Set
|
|
|
|
# --------------------------------------------
|
|
# Hilfsfunktionen
|
|
# --------------------------------------------
|
|
|
|
WIKILINK_RE = re.compile(r"\[\[([^\]]+?)\]\]")
|
|
# Inline-Relationen:
|
|
# [[rel:depends_on | Target]] oder [[rel:related_to Target]]
|
|
INLINE_REL_RE = re.compile(
|
|
r"""\[\[\s*rel\s*:\s*([a-zA-Z_][\w\-]*)\s*(?:\|\s*([^\]]+?)|(\s+[^\]]+?))\s*\]\]"""
|
|
)
|
|
|
|
# Callout-Zeilen:
|
|
# > [!edge] related_to: [[A]] [[B]]
|
|
# erlaubt flexible Whitespaces/Case, Relation-Token aus [a-zA-Z_][\w-]*
|
|
CALLOUT_LINE_RE = re.compile(
|
|
r"""^\s*>\s*\[\s*!edge\s*\]\s*([a-zA-Z_][\w\-]*)\s*:\s*(.+?)\s*$""",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def _chunk_text(payload: Dict) -> str:
|
|
# bevorzugt 'text', sonst 'window', sonst leer
|
|
return payload.get("text") or payload.get("window") or ""
|
|
|
|
|
|
def _make_edge(
|
|
*,
|
|
note_id: str,
|
|
chunk_id: str | None,
|
|
source_id: str,
|
|
target_id: str,
|
|
relation: str,
|
|
rule_id: str,
|
|
scope: str = "chunk",
|
|
confidence: float | None = None,
|
|
) -> Dict:
|
|
pl = {
|
|
"note_id": note_id,
|
|
"chunk_id": chunk_id if chunk_id else None,
|
|
"scope": scope,
|
|
"kind": relation, # für Backward-Kompatibilität
|
|
"relation": relation, # für Auswerteskripte
|
|
"source_id": source_id,
|
|
"target_id": target_id,
|
|
"rule_id": rule_id,
|
|
}
|
|
if confidence is not None:
|
|
pl["confidence"] = confidence
|
|
return pl
|
|
|
|
|
|
def _dedup(edges: Iterable[Dict]) -> List[Dict]:
|
|
seen: Set[Tuple[str, str, str, str]] = set()
|
|
out: List[Dict] = []
|
|
for e in edges:
|
|
key = (
|
|
str(e.get("source_id") or ""),
|
|
str(e.get("target_id") or ""),
|
|
str(e.get("relation") or e.get("kind") or ""),
|
|
str(e.get("rule_id") or ""),
|
|
)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
out.append(e)
|
|
return out
|
|
|
|
|
|
def _wikilink_targets(text: str) -> List[str]:
|
|
return [m.group(1).strip() for m in WIKILINK_RE.finditer(text)]
|
|
|
|
|
|
def _inline_relations(text: str) -> List[Tuple[str, str]]:
|
|
"""
|
|
Liefert Liste (relation, target).
|
|
Erlaubt beide Schreibweisen:
|
|
[[rel:depends_on | Target]]
|
|
[[rel:depends_on Target]]
|
|
"""
|
|
out: List[Tuple[str, str]] = []
|
|
for m in INLINE_REL_RE.finditer(text):
|
|
rel = m.group(1).strip().lower()
|
|
tgt = (m.group(2) or m.group(3) or "").strip()
|
|
if tgt.startswith("|"):
|
|
tgt = tgt[1:].strip()
|
|
if tgt:
|
|
out.append((rel, tgt))
|
|
return out
|
|
|
|
|
|
def _callout_relations(lines: List[str]) -> List[Tuple[str, List[str]]]:
|
|
"""
|
|
Sucht Zeilen wie:
|
|
> [!edge] related_to: [[A]] [[B]]
|
|
Gibt Liste (relation, [targets...]) zurück.
|
|
"""
|
|
out: List[Tuple[str, List[str]]] = []
|
|
for ln in lines:
|
|
m = CALLOUT_LINE_RE.match(ln)
|
|
if not m:
|
|
continue
|
|
rel = m.group(1).strip().lower()
|
|
tail = m.group(2)
|
|
targets = _wikilink_targets(tail)
|
|
if targets:
|
|
out.append((rel, targets))
|
|
return out
|
|
|
|
|
|
# --------------------------------------------
|
|
# Öffentliche Hauptfunktion
|
|
# --------------------------------------------
|
|
def derive_edges(note_core: Dict, chunks: List[Dict], types_cfg: Dict | None = None) -> List[Dict]:
|
|
"""
|
|
note_core: {"note_id","title","type","text"}
|
|
chunks: Liste von Chunk-Payloads (enthält 'chunk_id','index','text'/'window')
|
|
types_cfg: geladene types.yaml (dict)
|
|
|
|
Erzeugt:
|
|
- strukturelle Edges: belongs_to, next, prev
|
|
- reale Referenzen: Wikilinks -> references
|
|
- Inline-Relationen: [[rel:depends_on | Target]] -> depends_on
|
|
- Callouts: > [!edge] related_to: [[A]] [[B]] -> related_to
|
|
- Typ-Defaults: types.yaml edge_defaults -> relationen zwischen Chunk und bekannten Zielen
|
|
"""
|
|
nid = note_core.get("note_id")
|
|
ntype = (note_core.get("type") or "").strip().lower()
|
|
ntext = note_core.get("text") or ""
|
|
lines = ntext.splitlines()
|
|
|
|
edges: List[Dict] = []
|
|
|
|
# -------------------------------------------------
|
|
# 1) Strukturelle Edges je Chunk
|
|
# -------------------------------------------------
|
|
for i, ch in enumerate(chunks):
|
|
cid = ch.get("chunk_id")
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=cid,
|
|
source_id=cid,
|
|
target_id=nid,
|
|
relation="belongs_to",
|
|
rule_id="struct:belongs_to",
|
|
confidence=1.0,
|
|
)
|
|
)
|
|
if i + 1 < len(chunks):
|
|
nxt = chunks[i + 1].get("chunk_id")
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=cid,
|
|
source_id=cid,
|
|
target_id=nxt,
|
|
relation="next",
|
|
rule_id="struct:next",
|
|
confidence=0.99,
|
|
)
|
|
)
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=nxt,
|
|
source_id=nxt,
|
|
target_id=cid,
|
|
relation="prev",
|
|
rule_id="struct:prev",
|
|
confidence=0.99,
|
|
)
|
|
)
|
|
|
|
# -------------------------------------------------
|
|
# 2) Reale Referenzen aus jedem Chunk-Text (Wikilinks)
|
|
# -------------------------------------------------
|
|
all_explicit_targets: Set[str] = set()
|
|
for ch in chunks:
|
|
cid = ch.get("chunk_id")
|
|
txt = _chunk_text(ch)
|
|
for tgt in _wikilink_targets(txt):
|
|
all_explicit_targets.add(tgt)
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=cid,
|
|
source_id=cid,
|
|
target_id=tgt,
|
|
relation="references",
|
|
rule_id="explicit:wikilink",
|
|
confidence=0.9,
|
|
)
|
|
)
|
|
|
|
# -------------------------------------------------
|
|
# 3) Inline-Relationen (getypte Kanten im Text)
|
|
# -------------------------------------------------
|
|
for ch in chunks:
|
|
cid = ch.get("chunk_id")
|
|
txt = _chunk_text(ch)
|
|
for rel, tgt in _inline_relations(txt):
|
|
all_explicit_targets.add(tgt)
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=cid,
|
|
source_id=cid,
|
|
target_id=tgt,
|
|
relation=rel,
|
|
rule_id=f"inline:rel:v1:{rel}",
|
|
confidence=0.8,
|
|
)
|
|
)
|
|
|
|
# -------------------------------------------------
|
|
# 4) Callout-Relationen (> [!edge] related_to: [[A]] [[B]])
|
|
# - Auf Note-Ebene definiert, aber wir hängen sie an den
|
|
# ersten Chunk (falls vorhanden) an, damit scope='chunk' bleibt.
|
|
# -------------------------------------------------
|
|
callouts = _callout_relations(lines)
|
|
if callouts and chunks:
|
|
first_cid = chunks[0].get("chunk_id")
|
|
for rel, tgts in callouts:
|
|
for tgt in tgts:
|
|
all_explicit_targets.add(tgt)
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=first_cid,
|
|
source_id=first_cid,
|
|
target_id=tgt,
|
|
relation=rel,
|
|
rule_id=f"callout:edge:v1:{rel}",
|
|
confidence=0.8,
|
|
)
|
|
)
|
|
|
|
# -------------------------------------------------
|
|
# 5) Typ-Defaults (edge_defaults) aus types.yaml
|
|
# - Wenn vorhanden, erstelle pro Chunk relationen zu allen
|
|
# im Text erkannten Zielen (Wikilinks/Inline/Callout).
|
|
# -------------------------------------------------
|
|
defaults: List[str] = []
|
|
if types_cfg and isinstance(types_cfg, dict):
|
|
tdef = types_cfg.get("types", {}).get(ntype, {})
|
|
defaults = list(tdef.get("edge_defaults", []) or [])
|
|
|
|
if defaults and all_explicit_targets:
|
|
for ch in chunks:
|
|
cid = ch.get("chunk_id")
|
|
for rel in defaults:
|
|
rel_norm = str(rel).strip().lower()
|
|
if not rel_norm:
|
|
continue
|
|
for tgt in sorted(all_explicit_targets):
|
|
edges.append(
|
|
_make_edge(
|
|
note_id=nid,
|
|
chunk_id=cid,
|
|
source_id=cid,
|
|
target_id=tgt,
|
|
relation=rel_norm,
|
|
rule_id=f"edge_defaults:{ntype}:{rel_norm}",
|
|
confidence=0.7,
|
|
)
|
|
)
|
|
|
|
# -------------------------------------------------
|
|
# 6) De-Dup
|
|
# -------------------------------------------------
|
|
return _dedup(edges)
|