From 44e468fc212906d98f7caee2d8a1b3e60123682f Mon Sep 17 00:00:00 2001 From: Lars Date: Sat, 8 Nov 2025 07:59:24 +0100 Subject: [PATCH] app/core/derive_edges.py aktualisiert --- app/core/derive_edges.py | 73 ++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 40 deletions(-) diff --git a/app/core/derive_edges.py b/app/core/derive_edges.py index 82ae58b..3bccd32 100644 --- a/app/core/derive_edges.py +++ b/app/core/derive_edges.py @@ -2,40 +2,33 @@ # -*- coding: utf-8 -*- """ Modul: app/core/derive_edges.py -Version: 1.4.0 -Datum: 2025-10-01 +Version: 1.9.0 +Datum: 2025-11-07 Zweck ----- Robuste Kantenbildung für mindnet (Notes/Chunks): -- belongs_to (chunk -> note) -- next / prev (chunk-Kette) -- references (chunk-scope) aus Chunk.window/text -- optional references/backlink (note-scope) + • belongs_to (chunk → note) + • next / prev (Chunk-Sequenz) + • references (chunk-scope) aus Chunk.window/text + • optional note-scope references/backlink (Flag) + • optional Default-Kanten pro Note-Type aus Type-Registry (falls vorhanden) -Wichtig: Wikilinks werden mit der Parser-Funktion `extract_wikilinks` extrahiert, -damit Varianten wie [[id#anchor]] oder [[id|label]] korrekt auf 'id' reduziert werden. - -Erwartete Chunk-Payload-Felder: - { - "note_id": "...", - "chunk_id": "...", # Alias "id" ist zulässig - "id": "...", - "chunk_index": int, - "seq": int, - "window": str, - "text": str, - "path": "rel/path.md", - ... - } +Abwärtskompatibel zu v1.4.0 (keine Pflicht auf Registry). """ from __future__ import annotations from typing import Dict, List, Optional, Iterable -# WICHTIG: benutze die Parser-Extraktion für saubere Wikilinks from app.core.parser import extract_wikilinks +# Type-Registry (optional) +try: + from app.core.type_registry import get_edge_defaults # type: ignore +except Exception: + def get_edge_defaults(_note_type: str) -> List[str]: + return [] + def _get(d: dict, *keys, default=None): for k in keys: if k in d and d[k] is not None: @@ -43,7 +36,6 @@ def _get(d: dict, *keys, default=None): return default def _chunk_text_for_refs(chunk: dict) -> str: - # bevorzugt 'window' → dann 'text' → 'content' → 'raw' return ( _get(chunk, "window") or _get(chunk, "text") @@ -62,13 +54,7 @@ def _dedupe(seq: Iterable[str]) -> List[str]: return out def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict: - pl = { - "kind": kind, - "scope": scope, # "chunk" | "note" - "source_id": source_id, - "target_id": target_id, - "note_id": note_id, # Träger/Quelle der Kante (aktuelle Note) - } + pl = {"kind": kind, "scope": scope, "source_id": source_id, "target_id": target_id, "note_id": note_id} if extra: pl.update(extra) return pl @@ -79,14 +65,6 @@ def build_edges_for_note( note_level_references: Optional[List[str]] = None, include_note_scope_refs: bool = False, ) -> List[dict]: - """ - Erzeugt Kanten für eine Note. - - - belongs_to: für jeden Chunk (chunk -> note) - - next / prev: zwischen aufeinanderfolgenden Chunks - - references: pro Chunk aus window/text - - optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references - """ edges: List[dict] = [] # belongs_to @@ -106,14 +84,14 @@ def build_edges_for_note( edges.append(_edge("next", "chunk", a_id, b_id, note_id, {"chunk_id": a_id})) edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {"chunk_id": b_id})) - # references (chunk-scope) – Links aus window bevorzugen (Overlap-fest) + # references (chunk-scope) refs_all: List[str] = [] for ch in chunks: cid = _get(ch, "chunk_id", "id") if not cid: continue txt = _chunk_text_for_refs(ch) - refs = extract_wikilinks(txt) # <— Parser-Logik, kompatibel zu deinem System + refs = extract_wikilinks(txt) for r in refs: edges.append(_edge("references", "chunk", cid, r, note_id, {"chunk_id": cid, "ref_text": r})) refs_all.extend(refs) @@ -128,4 +106,19 @@ def build_edges_for_note( edges.append(_edge("references", "note", note_id, r, note_id)) edges.append(_edge("backlink", "note", r, note_id, note_id)) + # optional: Default-Kanten aus Registry (no-op, wenn leer) + # Beispiel: task → depends_on, concept → related_to etc. + # Wir erzeugen nur "formale" Kanten note→note_id selbst nicht; diese dienen Retri­ever-Gewichtung später. + # (Hier keine Targets – das sind Typ-Hinweise und werden als self-hints abgelegt.) + try: + note_type = "" + if chunks: + note_type = str(_get(chunks[0], "type", default="") or "") + defaults = get_edge_defaults(note_type) + for k in defaults: + # Self-hint-Kante (kann vom Retriever gewichtet werden). Scope 'note' + edges.append(_edge(k, "note", note_id, note_id, note_id)) + except Exception: + pass + return edges