mindnet/app/core/derive_edges.py
Lars 95b59e9b0a
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
app/core/derive_edges.py aktualisiert
2025-11-17 11:09:05 +01:00

214 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Modul: app/core/derive_edges.py
Version: 1.5.0 (Mindnet V2)
Status: Stable
Ziele
-----
1) Beibehalten der bewährten Edge-Ableitung:
- belongs_to (chunk -> note)
- next / prev (Chunk-Kette)
- references (chunk-scope) aus Chunk.window/text via `extract_wikilinks`
2) Ergänzung: typenbasierte, abgeleitete Kanten aus `config/types.yaml`:
- Für jede gefundene Referenz werden zusätzliche Relationen aus
`edge_defaults` des Notiztyps erzeugt (z. B. "depends_on", "related_to").
- Optional symmetrische Relationen (z. B. "related_to", "similar_to").
- Dedupe bleibt kompatibel (Key: kind, source_id, target_id, scope).
Hinweise
--------
- Es werden keine Markdown-Links neu geparst; wir bleiben bei der
vorhandenen Parser-Logik (`extract_wikilinks`) zur Sicherung der Kompatibilität.
- `edge_defaults` werden sowohl für Chunk-scope-Referenzen als auch falls
`include_note_scope_refs=True` für Note-scope-Referenzen angewendet.
"""
from __future__ import annotations
from typing import Dict, List, Optional, Iterable, Set
import os
import yaml
# Wikilinks-Parser beibehalten (Kompatibilität!)
from app.core.parser import extract_wikilinks
# ---------------------------- Utilities ------------------------------------
def _get(d: dict, *keys, default=None):
for k in keys:
if k in d and d[k] is not None:
return d[k]
return default
def _chunk_text_for_refs(chunk: dict) -> str:
# bevorzugt 'window' → dann 'text' → 'content' → 'raw'
return (
_get(chunk, "window")
or _get(chunk, "text")
or _get(chunk, "content")
or _get(chunk, "raw")
or ""
)
def _dedupe(seq: Iterable[str]) -> List[str]:
seen: Set[str] = set()
out: List[str] = []
for s in seq:
if s not in seen:
seen.add(s)
out.append(s)
return out
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
pl = {
"kind": kind,
"scope": scope, # "chunk" | "note"
"source_id": source_id,
"target_id": target_id,
"note_id": note_id, # Träger/Quelle der Kante (aktuelle Note)
}
if extra:
pl.update(extra)
return pl
# ---------------------- Typen-Registry (types.yaml) ------------------------
SYM_REL = {"related_to", "similar_to"} # symmetrische Relationstypen
def _env(n: str, default: Optional[str] = None) -> str:
v = os.getenv(n)
return v if v is not None else (default or "")
def _load_types_registry() -> dict:
"""Lädt die YAML-Registry aus MINDNET_TYPES_FILE oder ./config/types.yaml"""
p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
try:
with open(p, "r", encoding="utf-8") as f:
data = yaml.safe_load(f) or {}
return data
except Exception:
return {}
def _get_types_map(reg: dict) -> dict:
if isinstance(reg, dict) and isinstance(reg.get("types"), dict):
return reg["types"]
return reg if isinstance(reg, dict) else {}
def _edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
"""
Liefert die edge_defaults-Liste für den gegebenen Notiztyp.
Fallback-Reihenfolge:
1) reg['types'][note_type]['edge_defaults']
2) reg['defaults']['edge_defaults'] (oder 'default'/'global')
3) []
"""
types_map = _get_types_map(reg)
# 1) exakter Typ
if note_type and isinstance(types_map, dict):
t = types_map.get(note_type)
if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
# 2) Fallback
for key in ("defaults", "default", "global"):
v = reg.get(key)
if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
# 3) leer
return []
# --------------------------- Hauptfunktion ---------------------------------
def build_edges_for_note(
note_id: str,
chunks: List[dict],
note_level_references: Optional[List[str]] = None,
include_note_scope_refs: bool = False,
) -> List[dict]:
"""
Erzeugt Kanten für eine Note.
- belongs_to: für jeden Chunk (chunk -> note)
- next / prev: zwischen aufeinanderfolgenden Chunks
- references: pro Chunk aus window/text (via extract_wikilinks)
- optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references
- NEU: typenbasierte, abgeleitete Kanten (edge_defaults) je gefundener Referenz
"""
edges: List[dict] = []
# --- 0) Note-Typ ermitteln (aus erstem Chunk erwartet) ---
note_type = None
if chunks:
note_type = _get(chunks[0], "type")
# --- 1) belongs_to ---
for ch in chunks:
cid = _get(ch, "chunk_id", "id")
if not cid:
continue
edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {"chunk_id": cid}))
# --- 2) next/prev ---
for i in range(len(chunks) - 1):
a, b = chunks[i], chunks[i + 1]
a_id = _get(a, "chunk_id", "id")
b_id = _get(b, "chunk_id", "id")
if not a_id or not b_id:
continue
edges.append(_edge("next", "chunk", a_id, b_id, note_id, {"chunk_id": a_id}))
edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {"chunk_id": b_id}))
# --- 3) references (chunk-scope) + abgeleitete Relationen je Ref ---
reg = _load_types_registry()
defaults = _edge_defaults_for(note_type, reg)
refs_all: List[str] = []
for ch in chunks:
cid = _get(ch, "chunk_id", "id")
if not cid:
continue
txt = _chunk_text_for_refs(ch)
refs = extract_wikilinks(txt) # Parser-Logik nicht verändert
for r in refs:
# reale Referenz (wie bisher)
edges.append(_edge("references", "chunk", cid, r, note_id, {"chunk_id": cid, "ref_text": r}))
# abgeleitete Kanten je default-Relation
for rel in defaults:
if rel == "references":
continue # doppelt vermeiden
edges.append(_edge(rel, "chunk", cid, r, note_id, {"chunk_id": cid, "rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
# symmetrisch?
if rel in {"related_to", "similar_to"}:
edges.append(_edge(rel, "chunk", r, cid, note_id, {"chunk_id": cid, "rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
refs_all.extend(refs)
# --- 4) optional: note-scope references/backlinks (+ defaults) ---
if include_note_scope_refs:
refs_note = refs_all[:]
if note_level_references:
refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
refs_note = _dedupe(refs_note)
for r in refs_note:
# echte note-scope Referenz & Backlink (wie bisher)
edges.append(_edge("references", "note", note_id, r, note_id))
edges.append(_edge("backlink", "note", r, note_id, note_id))
# und zusätzlich default-Relationen (note-scope)
for rel in defaults:
if rel == "references":
continue
edges.append(_edge(rel, "note", note_id, r, note_id, {"rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
if rel in {"related_to", "similar_to"}:
edges.append(_edge(rel, "note", r, note_id, note_id, {"rule_id": f"edge_defaults:{note_type}:{rel}", "confidence": 0.7}))
# --- 5) Dedupe (unverändert kompatibel) ---
dedup = {}
for e in edges:
k = (e["kind"], e["source_id"], e["target_id"], e.get("scope", ""))
dedup[k] = e
return list(dedup.values())