mindnet/app/core/derive_edges.py
Lars 44e468fc21
All checks were successful
Deploy mindnet to llm-node / deploy (push) Successful in 2s
app/core/derive_edges.py aktualisiert
2025-11-08 07:59:24 +01:00

125 lines
4.0 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Modul: app/core/derive_edges.py
Version: 1.9.0
Datum: 2025-11-07
Zweck
-----
Robuste Kantenbildung für mindnet (Notes/Chunks):
• belongs_to (chunk → note)
• next / prev (Chunk-Sequenz)
• references (chunk-scope) aus Chunk.window/text
• optional note-scope references/backlink (Flag)
• optional Default-Kanten pro Note-Type aus Type-Registry (falls vorhanden)
Abwärtskompatibel zu v1.4.0 (keine Pflicht auf Registry).
"""
from __future__ import annotations
from typing import Dict, List, Optional, Iterable
from app.core.parser import extract_wikilinks
# Type-Registry (optional)
try:
from app.core.type_registry import get_edge_defaults # type: ignore
except Exception:
def get_edge_defaults(_note_type: str) -> List[str]:
return []
def _get(d: dict, *keys, default=None):
for k in keys:
if k in d and d[k] is not None:
return d[k]
return default
def _chunk_text_for_refs(chunk: dict) -> str:
return (
_get(chunk, "window")
or _get(chunk, "text")
or _get(chunk, "content")
or _get(chunk, "raw")
or ""
)
def _dedupe(seq: Iterable[str]) -> List[str]:
seen = set()
out: List[str] = []
for s in seq:
if s not in seen:
seen.add(s)
out.append(s)
return out
def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
pl = {"kind": kind, "scope": scope, "source_id": source_id, "target_id": target_id, "note_id": note_id}
if extra:
pl.update(extra)
return pl
def build_edges_for_note(
note_id: str,
chunks: List[dict],
note_level_references: Optional[List[str]] = None,
include_note_scope_refs: bool = False,
) -> List[dict]:
edges: List[dict] = []
# belongs_to
for ch in chunks:
cid = _get(ch, "chunk_id", "id")
if not cid:
continue
edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {"chunk_id": cid}))
# next/prev
for i in range(len(chunks) - 1):
a, b = chunks[i], chunks[i + 1]
a_id = _get(a, "chunk_id", "id")
b_id = _get(b, "chunk_id", "id")
if not a_id or not b_id:
continue
edges.append(_edge("next", "chunk", a_id, b_id, note_id, {"chunk_id": a_id}))
edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {"chunk_id": b_id}))
# references (chunk-scope)
refs_all: List[str] = []
for ch in chunks:
cid = _get(ch, "chunk_id", "id")
if not cid:
continue
txt = _chunk_text_for_refs(ch)
refs = extract_wikilinks(txt)
for r in refs:
edges.append(_edge("references", "chunk", cid, r, note_id, {"chunk_id": cid, "ref_text": r}))
refs_all.extend(refs)
# optional: note-scope references/backlinks
if include_note_scope_refs:
refs_note = refs_all[:]
if note_level_references:
refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
refs_note = _dedupe(refs_note)
for r in refs_note:
edges.append(_edge("references", "note", note_id, r, note_id))
edges.append(_edge("backlink", "note", r, note_id, note_id))
# optional: Default-Kanten aus Registry (no-op, wenn leer)
# Beispiel: task → depends_on, concept → related_to etc.
# Wir erzeugen nur "formale" Kanten note→note_id selbst nicht; diese dienen Retri­ever-Gewichtung später.
# (Hier keine Targets das sind Typ-Hinweise und werden als self-hints abgelegt.)
try:
note_type = ""
if chunks:
note_type = str(_get(chunks[0], "type", default="") or "")
defaults = get_edge_defaults(note_type)
for k in defaults:
# Self-hint-Kante (kann vom Retriever gewichtet werden). Scope 'note'
edges.append(_edge(k, "note", note_id, note_id, note_id))
except Exception:
pass
return edges