mindnet/app/core/derive_edges.py
Lars 4eb5e34ea7
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 3s
app/core/derive_edges.py aktualisiert
2025-11-08 15:21:17 +01:00

143 lines
4.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Modul: app/core/derive_edges.py
Version: 1.5.0
Datum: 2025-11-08
Änderung
--------
- Integration der Type-Registry (optional): Ist im Typ die Default-Kante
"references" enthalten, werden Note-Scope-References/Backlinks **additiv**
aktiviert auch wenn `include_note_scope_refs=False` übergeben wurde.
(Keine Breaking Changes: bestehende Parameter bleiben erhalten.)
Weitere Logik (belongs_to/prev/next & chunk-scope references) bleibt unverändert.
"""
from __future__ import annotations
from typing import Dict, List, Optional, Iterable
# WICHTIG: benutze die Parser-Extraktion für saubere Wikilinks
from app.core.parser import extract_wikilinks
# optional: Type-Registry (Fallback: deaktiviert)
try:
from app.core.type_registry import load_type_registry, get_type_config
_REG = load_type_registry() # prozessweiter Cache
except Exception: # pragma: no cover
_REG = {"types": {"concept": {"edge_defaults": ["references"]}}}
def get_type_config(_t, _r): # type: ignore
return {"edge_defaults": ["references"]}
def _get(d: dict, *keys, default=None):
for k in keys:
if k in d and d[k] is not None:
return d[k]
return default
def _chunk_text_for_refs(chunk: dict) -> str:
# bevorzugt 'window' → dann 'text' → 'content' → 'raw'
return (
_get(chunk, "window")
or _get(chunk, "text")
or _get(chunk, "content")
or _get(chunk, "raw")
or ""
)
def _dedupe(seq: Iterable[str]) -> List[str]:
seen = set()
out: List[str] = []
for s in seq:
if s not in seen:
seen.add(s)
out.append(s)
return out
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
pl = {
"kind": kind,
"scope": scope, # "chunk" | "note"
"source_id": source_id,
"target_id": target_id,
"note_id": note_id, # Träger/Quelle der Kante (aktuelle Note)
}
if extra:
pl.update(extra)
return pl
def build_edges_for_note(
note_id: str,
chunks: List[dict],
note_level_references: Optional[List[str]] = None,
include_note_scope_refs: bool = False,
) -> List[dict]:
"""
Erzeugt Kanten für eine Note.
- belongs_to: für jeden Chunk (chunk -> note)
- next / prev: zwischen aufeinanderfolgenden Chunks
- references: pro Chunk aus window/text
- optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references
Type-Registry-Erweiterung (additiv):
- Wenn der *Note-Typ* 'references' in seinen edge_defaults hat, werden
note-scope references/backlinks zusätzlich aktiviert.
"""
edges: List[dict] = []
# Typ aus Chunk-Payloads ableiten (falls vorhanden)
note_type = None
for ch in chunks:
nt = ch.get("type")
if isinstance(nt, str) and nt.strip():
note_type = nt.strip().lower()
break
type_cfg = get_type_config(note_type, _REG)
edge_defaults = [e for e in (type_cfg.get("edge_defaults") or []) if isinstance(e, str)]
want_note_scope_refs = bool(include_note_scope_refs) or ("references" in edge_defaults)
# belongs_to
for ch in chunks:
cid = _get(ch, "chunk_id", "id")
if not cid:
continue
edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {"chunk_id": cid}))
# next/prev
for i in range(len(chunks) - 1):
a, b = chunks[i], chunks[i + 1]
a_id = _get(a, "chunk_id", "id")
b_id = _get(b, "chunk_id", "id")
if not a_id or not b_id:
continue
edges.append(_edge("next", "chunk", a_id, b_id, note_id, {"chunk_id": a_id}))
edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {"chunk_id": b_id}))
# references (chunk-scope) Links aus window bevorzugen (Overlap-fest)
refs_all: List[str] = []
for ch in chunks:
cid = _get(ch, "chunk_id", "id")
if not cid:
continue
txt = _chunk_text_for_refs(ch)
refs = extract_wikilinks(txt) # <— Parser-Logik, kompatibel zu deinem System
for r in refs:
edges.append(_edge("references", "chunk", cid, r, note_id, {"chunk_id": cid, "ref_text": r}))
refs_all.extend(refs)
# optional: note-scope references/backlinks
if want_note_scope_refs:
refs_note = refs_all[:]
if note_level_references:
refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
refs_note = _dedupe(refs_note)
for r in refs_note:
edges.append(_edge("references", "note", note_id, r, note_id))
edges.append(_edge("backlink", "note", r, note_id, note_id))
return edges