All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
242 lines
7.8 KiB
Python
242 lines
7.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Dict, Iterable, List, Optional, Tuple
|
|
|
|
|
|
# ------------------------------
|
|
# Edge payload helper
|
|
# ------------------------------
|
|
def _edge_payload(
|
|
*,
|
|
note_id: str,
|
|
chunk_id: Optional[str],
|
|
kind: str,
|
|
source_id: str,
|
|
target_id: str,
|
|
rule_id: str,
|
|
scope: str = "chunk",
|
|
confidence: Optional[float] = None,
|
|
) -> Dict:
|
|
p = {
|
|
"note_id": note_id,
|
|
"chunk_id": chunk_id,
|
|
"kind": kind,
|
|
"scope": scope,
|
|
"source_id": source_id,
|
|
"target_id": target_id,
|
|
"rule_id": rule_id,
|
|
}
|
|
if confidence is not None:
|
|
p["confidence"] = float(confidence)
|
|
return p
|
|
|
|
|
|
# ------------------------------
|
|
# Inline [[wikilink]] parser
|
|
# ------------------------------
|
|
_WIKILINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
|
|
|
|
def _iter_wikilinks(text: str) -> Iterable[str]:
|
|
for m in _WIKILINK_RE.finditer(text):
|
|
yield m.group(1).strip()
|
|
|
|
|
|
# ------------------------------
|
|
# Callout parser
|
|
# Syntax:
|
|
# > [!edge] related_to: [[Vector DB Basics]] [[Embeddings 101]]
|
|
# Mehrere Ziele pro Zeile erlaubt.
|
|
# ------------------------------
|
|
_CALLOUT_RE = re.compile(
|
|
r"^\s*>\s*\[!edge\]\s*([a-z_]+)\s*:\s*(.+)$",
|
|
flags=re.IGNORECASE,
|
|
)
|
|
|
|
def _parse_callout_line(line: str) -> Optional[Tuple[str, List[str]]]:
|
|
m = _CALLOUT_RE.match(line)
|
|
if not m:
|
|
return None
|
|
relation = m.group(1).strip().lower()
|
|
rhs = m.group(2)
|
|
targets = [t.strip() for t in _WIKILINK_RE.findall(rhs) if t.strip()]
|
|
if not targets:
|
|
return None
|
|
return (relation, targets)
|
|
|
|
|
|
# ------------------------------
|
|
# Defaults aus types.yaml anwenden (wenn konfiguriert)
|
|
# types_cfg Beispiel:
|
|
# { "types": { "project": { "edge_defaults": ["references","depends_on"] }, ... } }
|
|
# ------------------------------
|
|
def _edge_defaults_for_type(types_cfg: Dict, note_type: str) -> List[str]:
|
|
tdef = (types_cfg or {}).get("types", {}).get(note_type, {})
|
|
vals = tdef.get("edge_defaults") or []
|
|
return [str(v).strip().lower() for v in vals if str(v).strip()]
|
|
|
|
|
|
# ------------------------------
|
|
# Hauptfunktion: Edges ableiten
|
|
# Erwartete Inputs:
|
|
# note: { "note_id","title","type","text", ... }
|
|
# chunks: [ { "chunk_id","note_id","index","ord","text","window", ... }, ... ]
|
|
# types_cfg: geladene types.yaml als Dict
|
|
# ------------------------------
|
|
def derive_edges(
|
|
note: Dict,
|
|
chunks: List[Dict],
|
|
types_cfg: Optional[Dict] = None,
|
|
) -> List[Dict]:
|
|
note_id = note.get("note_id") or note.get("id")
|
|
note_title = note.get("title") or ""
|
|
note_type = (note.get("type") or "").strip().lower()
|
|
text = note.get("text") or ""
|
|
|
|
edges: List[Dict] = []
|
|
|
|
# 1) Sequenz-Edges je Note: belongs_to / next / prev
|
|
for i, ch in enumerate(chunks):
|
|
cid = ch.get("chunk_id")
|
|
# belongs_to
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=cid,
|
|
kind="belongs_to",
|
|
source_id=cid,
|
|
target_id=note_id,
|
|
rule_id="structure:v1:belongs_to",
|
|
scope="chunk",
|
|
)
|
|
)
|
|
# next/prev
|
|
if i + 1 < len(chunks):
|
|
nxt = chunks[i + 1]["chunk_id"]
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=cid,
|
|
kind="next",
|
|
source_id=cid,
|
|
target_id=nxt,
|
|
rule_id="structure:v1:next",
|
|
scope="chunk",
|
|
)
|
|
)
|
|
if i - 1 >= 0:
|
|
prv = chunks[i - 1]["chunk_id"]
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=cid,
|
|
kind="prev",
|
|
source_id=cid,
|
|
target_id=prv,
|
|
rule_id="structure:v1:prev",
|
|
scope="chunk",
|
|
)
|
|
)
|
|
|
|
# 2) Inline-Wikilinks ([[Title]]) => references (note-scope + chunk-scope)
|
|
# - chunk-scope: pro Chunk in dessen Text/Window
|
|
# - note-scope: Gesamttext der Note
|
|
# Hinweis: target_id wird hier als Titel gespeichert; später kann ein Resolver auf note_id mappen.
|
|
# chunk-scope
|
|
for ch in chunks:
|
|
cid = ch.get("chunk_id")
|
|
body = (ch.get("window") or ch.get("text") or "")
|
|
touched = False
|
|
for tgt in _iter_wikilinks(body):
|
|
touched = True
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=cid,
|
|
kind="references",
|
|
source_id=cid,
|
|
target_id=tgt, # Titel
|
|
rule_id="inline:rel:v1:references",
|
|
scope="chunk",
|
|
confidence=0.8,
|
|
)
|
|
)
|
|
# Optional: wenn in einem Chunk Wikilinks vorkamen, kannst du (später) einen counter o. ä. setzen.
|
|
_ = touched
|
|
|
|
# note-scope (Gesamttext)
|
|
for tgt in _iter_wikilinks(text):
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=None,
|
|
kind="references",
|
|
source_id=note_id,
|
|
target_id=tgt, # Titel
|
|
rule_id="explicit:ref:v1:wikilink",
|
|
scope="note",
|
|
confidence=0.8,
|
|
)
|
|
)
|
|
|
|
# 3) Callouts:
|
|
# > [!edge] related_to: [[A]] [[B]]
|
|
# ⇒ pro Ziel A/B je ein Edge mit rule_id="callout:edge:v1:<relation>"
|
|
for ch in chunks:
|
|
cid = ch.get("chunk_id")
|
|
body = (ch.get("window") or ch.get("text") or "")
|
|
for line in body.splitlines():
|
|
parsed = _parse_callout_line(line)
|
|
if not parsed:
|
|
continue
|
|
relation, targets = parsed
|
|
# normalize relation name
|
|
relation = relation.lower()
|
|
# einheitliches Rule-Tagging für Callouts:
|
|
rule_tag = f"callout:edge:v1:{relation}"
|
|
for tgt in targets:
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=cid,
|
|
kind=relation,
|
|
source_id=cid,
|
|
target_id=tgt, # Titel
|
|
rule_id=rule_tag,
|
|
scope="chunk",
|
|
confidence=0.7,
|
|
)
|
|
)
|
|
|
|
# 4) Ableitungs-Edges (edge_defaults) aus types.yaml
|
|
# Beispiel: project -> ["references","depends_on"]
|
|
# Für jede Chunk-Einheit eine schwach gewichtete Default-Beziehung gegen den Note-Titel,
|
|
# damit es als Navigationskanten funktioniert, bis ein Resolver Titeleindeutigkeit herstellt.
|
|
defaults = _edge_defaults_for_type(types_cfg or {}, note_type)
|
|
if defaults:
|
|
rule_prefix = f"edge_defaults:{note_type}"
|
|
for ch in chunks:
|
|
cid = ch.get("chunk_id")
|
|
for rel in defaults:
|
|
edges.append(
|
|
_edge_payload(
|
|
note_id=note_id,
|
|
chunk_id=cid,
|
|
kind=rel,
|
|
source_id=cid,
|
|
target_id=note_title or note_id, # weiche Zielmarke
|
|
rule_id=f"{rule_prefix}:{rel}",
|
|
scope="chunk",
|
|
confidence=0.7,
|
|
)
|
|
)
|
|
|
|
# 5) De-Duplizierung (idempotent): key = (source_id, target_id, kind, rule_id)
|
|
unique: Dict[Tuple[str, str, str, str], Dict] = {}
|
|
for e in edges:
|
|
k = (e["source_id"], e["target_id"], e["kind"], e["rule_id"])
|
|
unique[k] = e
|
|
return list(unique.values())
|