All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
214 lines
7.8 KiB
Python
214 lines
7.8 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Modul: app/core/derive_edges.py
|
||
Version: 1.5.0 (Mindnet V2)
|
||
Status: Stable
|
||
|
||
Ziele
|
||
-----
|
||
1) Beibehalten der bewährten Edge-Ableitung:
|
||
- belongs_to (chunk -> note)
|
||
- next / prev (Chunk-Kette)
|
||
- references (chunk-scope) aus Chunk.window/text via `extract_wikilinks`
|
||
|
||
2) Ergänzung: typenbasierte, abgeleitete Kanten aus `config/types.yaml`:
|
||
- Für jede gefundene Referenz werden zusätzliche Relationen aus
|
||
`edge_defaults` des Notiztyps erzeugt (z. B. "depends_on", "related_to").
|
||
- Optional symmetrische Relationen (z. B. "related_to", "similar_to").
|
||
- Dedupe bleibt kompatibel (Key: kind, source_id, target_id, scope).
|
||
|
||
Hinweise
|
||
--------
|
||
- Es werden keine Markdown-Links neu geparst; wir bleiben bei der
|
||
vorhandenen Parser-Logik (`extract_wikilinks`) zur Sicherung der Kompatibilität.
|
||
- `edge_defaults` werden sowohl für Chunk-scope-Referenzen als auch – falls
|
||
`include_note_scope_refs=True` – für Note-scope-Referenzen angewendet.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
from typing import Dict, List, Optional, Iterable, Set
|
||
|
||
import os
|
||
import yaml
|
||
|
||
# Wikilinks-Parser beibehalten (Kompatibilität!)
|
||
from app.core.parser import extract_wikilinks
|
||
|
||
|
||
# ---------------------------- Utilities ------------------------------------
|
||
|
||
def _get(d: dict, *keys, default=None):
|
||
for k in keys:
|
||
if k in d and d[k] is not None:
|
||
return d[k]
|
||
return default
|
||
|
||
def _chunk_text_for_refs(chunk: dict) -> str:
|
||
# bevorzugt 'window' → dann 'text' → 'content' → 'raw'
|
||
return (
|
||
_get(chunk, "window")
|
||
or _get(chunk, "text")
|
||
or _get(chunk, "content")
|
||
or _get(chunk, "raw")
|
||
or ""
|
||
)
|
||
|
||
def _dedupe(seq: Iterable[str]) -> List[str]:
|
||
seen: Set[str] = set()
|
||
out: List[str] = []
|
||
for s in seq:
|
||
if s not in seen:
|
||
seen.add(s)
|
||
out.append(s)
|
||
return out
|
||
|
||
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
|
||
pl = {
|
||
"kind": kind,
|
||
"scope": scope, # "chunk" | "note"
|
||
"source_id": source_id,
|
||
"target_id": target_id,
|
||
"note_id": note_id, # Träger/Quelle der Kante (aktuelle Note)
|
||
}
|
||
if extra:
|
||
pl.update(extra)
|
||
return pl
|
||
|
||
|
||
# ---------------------- Typen-Registry (types.yaml) ------------------------
|
||
|
||
SYM_REL = {"related_to", "similar_to"} # symmetrische Relationstypen
|
||
|
||
def _env(n: str, default: Optional[str] = None) -> str:
|
||
v = os.getenv(n)
|
||
return v if v is not None else (default or "")
|
||
|
||
def _load_types_registry() -> dict:
|
||
"""Lädt die YAML-Registry aus MINDNET_TYPES_FILE oder ./config/types.yaml"""
|
||
p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
|
||
try:
|
||
with open(p, "r", encoding="utf-8") as f:
|
||
data = yaml.safe_load(f) or {}
|
||
return data
|
||
except Exception:
|
||
return {}
|
||
|
||
def _get_types_map(reg: dict) -> dict:
|
||
if isinstance(reg, dict) and isinstance(reg.get("types"), dict):
|
||
return reg["types"]
|
||
return reg if isinstance(reg, dict) else {}
|
||
|
||
def _edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
|
||
"""
|
||
Liefert die edge_defaults-Liste für den gegebenen Notiztyp.
|
||
Fallback-Reihenfolge:
|
||
1) reg['types'][note_type]['edge_defaults']
|
||
2) reg['defaults']['edge_defaults'] (oder 'default'/'global')
|
||
3) []
|
||
"""
|
||
types_map = _get_types_map(reg)
|
||
# 1) exakter Typ
|
||
if note_type and isinstance(types_map, dict):
|
||
t = types_map.get(note_type)
|
||
if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
|
||
return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
|
||
# 2) Fallback
|
||
for key in ("defaults", "default", "global"):
|
||
v = reg.get(key)
|
||
if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
|
||
return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
|
||
# 3) leer
|
||
return []
|
||
|
||
|
||
# --------------------------- Hauptfunktion ---------------------------------
|
||
|
||
def build_edges_for_note(
|
||
note_id: str,
|
||
chunks: List[dict],
|
||
note_level_references: Optional[List[str]] = None,
|
||
include_note_scope_refs: bool = False,
|
||
) -> List[dict]:
|
||
"""
|
||
Erzeugt Kanten für eine Note.
|
||
|
||
- belongs_to: für jeden Chunk (chunk -> note)
|
||
- next / prev: zwischen aufeinanderfolgenden Chunks
|
||
- references: pro Chunk aus window/text (via extract_wikilinks)
|
||
- optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references
|
||
- NEU: typenbasierte, abgeleitete Kanten (edge_defaults) je gefundener Referenz
|
||
"""
|
||
edges: List[dict] = []
|
||
|
||
# --- 0) Note-Typ ermitteln (aus erstem Chunk erwartet) ---
|
||
note_type = None
|
||
if chunks:
|
||
note_type = _get(chunks[0], "type")
|
||
|
||
# --- 1) belongs_to ---
|
||
for ch in chunks:
|
||
cid = _get(ch, "chunk_id", "id")
|
||
if not cid:
|
||
continue
|
||
edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {"chunk_id": cid}))
|
||
|
||
# --- 2) next/prev ---
|
||
for i in range(len(chunks) - 1):
|
||
a, b = chunks[i], chunks[i + 1]
|
||
a_id = _get(a, "chunk_id", "id")
|
||
b_id = _get(b, "chunk_id", "id")
|
||
if not a_id or not b_id:
|
||
continue
|
||
edges.append(_edge("next", "chunk", a_id, b_id, note_id, {"chunk_id": a_id}))
|
||
edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {"chunk_id": b_id}))
|
||
|
||
# --- 3) references (chunk-scope) + abgeleitete Relationen je Ref ---
|
||
reg = _load_types_registry()
|
||
defaults = _edge_defaults_for(note_type, reg)
|
||
refs_all: List[str] = []
|
||
|
||
for ch in chunks:
|
||
cid = _get(ch, "chunk_id", "id")
|
||
if not cid:
|
||
continue
|
||
txt = _chunk_text_for_refs(ch)
|
||
refs = extract_wikilinks(txt) # Parser-Logik nicht verändert
|
||
for r in refs:
|
||
# reale Referenz (wie bisher)
|
||
edges.append(_edge("references", "chunk", cid, r, note_id, {"chunk_id": cid, "ref_text": r}))
|
||
# abgeleitete Kanten je default-Relation
|
||
for rel in defaults:
|
||
if rel == "references":
|
||
continue # doppelt vermeiden
|
||
edges.append(_edge(rel, "chunk", cid, r, note_id, {"chunk_id": cid, "rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
|
||
# symmetrisch?
|
||
if rel in {"related_to", "similar_to"}:
|
||
edges.append(_edge(rel, "chunk", r, cid, note_id, {"chunk_id": cid, "rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
|
||
refs_all.extend(refs)
|
||
|
||
# --- 4) optional: note-scope references/backlinks (+ defaults) ---
|
||
if include_note_scope_refs:
|
||
refs_note = refs_all[:]
|
||
if note_level_references:
|
||
refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
|
||
refs_note = _dedupe(refs_note)
|
||
for r in refs_note:
|
||
# echte note-scope Referenz & Backlink (wie bisher)
|
||
edges.append(_edge("references", "note", note_id, r, note_id))
|
||
edges.append(_edge("backlink", "note", r, note_id, note_id))
|
||
# und zusätzlich default-Relationen (note-scope)
|
||
for rel in defaults:
|
||
if rel == "references":
|
||
continue
|
||
edges.append(_edge(rel, "note", note_id, r, note_id, {"rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
|
||
if rel in {"related_to", "similar_to"}:
|
||
edges.append(_edge(rel, "note", r, note_id, note_id, {"rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
|
||
|
||
# --- 5) Dedupe (unverändert kompatibel) ---
|
||
dedup = {}
|
||
for e in edges:
|
||
k = (e["kind"], e["source_id"], e["target_id"], e.get("scope", ""))
|
||
dedup[k] = e
|
||
return list(dedup.values())
|