diff --git a/app/core/graph/graph_derive_edges.py b/app/core/graph/graph_derive_edges.py index 284e789..d12c5e8 100644 --- a/app/core/graph/graph_derive_edges.py +++ b/app/core/graph/graph_derive_edges.py @@ -1,10 +1,11 @@ """ FILE: app/core/graph/graph_derive_edges.py DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung. + AUDIT: Integriert parse_link_target für saubere Graphen-Topologie. """ from typing import List, Optional, Dict, Tuple from .graph_utils import ( - _get, _edge, _mk_edge_id, _dedupe_seq, + _get, _edge, _mk_edge_id, _dedupe_seq, parse_link_target, PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for ) from .graph_extractors import ( @@ -53,47 +54,79 @@ def build_edges_for_note( # Typed & Candidate Pool (WP-15b Integration) typed, rem = extract_typed_relations(raw) - for k, t in typed: - edges.append(_edge(k, "chunk", cid, t, note_id, { + for k, raw_t in typed: + t, sec = parse_link_target(raw_t, note_id) + if not t: continue + + payload = { "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"), "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"] - })) + } + if sec: payload["target_section"] = sec + + edges.append(_edge(k, "chunk", cid, t, note_id, payload)) pool = ch.get("candidate_pool") or ch.get("candidate_edges") or [] for cand in pool: - t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai") + raw_t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai") + t, sec = parse_link_target(raw_t, note_id) if t: - edges.append(_edge(k, "chunk", cid, t, note_id, { + payload = { "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"), "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90) - })) + } + if sec: payload["target_section"] = sec + + edges.append(_edge(k, "chunk", cid, t, note_id, payload)) # Callouts & Wikilinks call_pairs, rem2 = extract_callout_relations(rem) - for k, t in call_pairs: - edges.append(_edge(k, "chunk", cid, t, note_id, { + for k, raw_t in call_pairs: + t, sec = parse_link_target(raw_t, note_id) + if not t: continue + + payload = { "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"), "provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"] - })) + } + if sec: payload["target_section"] = sec + + edges.append(_edge(k, "chunk", cid, t, note_id, payload)) refs = extract_wikilinks(rem2) - for r in refs: - edges.append(_edge("references", "chunk", cid, r, note_id, { - "chunk_id": cid, "ref_text": r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"), + for raw_r in refs: + r, sec = parse_link_target(raw_r, note_id) + if not r: continue + + payload = { + "chunk_id": cid, "ref_text": raw_r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"), "provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"] - })) + } + if sec: payload["target_section"] = sec + + edges.append(_edge("references", "chunk", cid, r, note_id, payload)) + for rel in defaults: if rel != "references": - edges.append(_edge(rel, "chunk", cid, r, note_id, { + def_payload = { "chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"), "provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"] - })) - refs_all.extend(refs) + } + if sec: def_payload["target_section"] = sec + edges.append(_edge(rel, "chunk", cid, r, note_id, def_payload)) + + # Für Note-Scope Sammlung nutzen wir den Original-String zur Dedup, aber gesäubert + refs_all.extend([parse_link_target(r, note_id)[0] for r in refs]) # 3) Note-Scope & De-Duplizierung if include_note_scope_refs: - refs_note = _dedupe_seq((refs_all or []) + (note_level_references or [])) + # refs_all ist jetzt schon gesäubert (nur Targets) + # note_level_references müssen auch gesäubert werden + cleaned_note_refs = [parse_link_target(r, note_id)[0] for r in (note_level_references or [])] + refs_note = _dedupe_seq((refs_all or []) + cleaned_note_refs) + for r in refs_note: + if not r: continue edges.append(_edge("references", "note", note_id, r, note_id, { "edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"), "provenance": "explicit", "confidence": PROVENANCE_PRIORITY["explicit:note_scope"] diff --git a/app/core/graph/graph_utils.py b/app/core/graph/graph_utils.py index 5f295ed..d814ad7 100644 --- a/app/core/graph/graph_utils.py +++ b/app/core/graph/graph_utils.py @@ -1,10 +1,11 @@ """ FILE: app/core/graph/graph_utils.py DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen. + AUDIT: Erweitert um parse_link_target für sauberes Section-Splitting (WP-Fix). """ import os import hashlib -from typing import Iterable, List, Optional, Set, Any +from typing import Iterable, List, Optional, Set, Any, Tuple try: import yaml @@ -59,6 +60,27 @@ def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, e if extra: pl.update(extra) return pl +def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]: + """ + Zerlegt einen Link (z.B. 'Note#Section') in Target-ID und Section. + Behandelt Self-Links ('#Section'), indem current_note_id eingesetzt wird. + + Returns: + (target_id, target_section) + """ + if not raw: + return "", None + + parts = raw.split("#", 1) + target = parts[0].strip() + section = parts[1].strip() if len(parts) > 1 else None + + # Handle Self-Link [[#Section]] -> target wird zu current_note_id + if not target and section and current_note_id: + target = current_note_id + + return target, section + def load_types_registry() -> dict: """Lädt die YAML-Registry.""" p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml") diff --git a/app/models/dto.py b/app/models/dto.py index 7d4cb64..4c6dd67 100644 --- a/app/models/dto.py +++ b/app/models/dto.py @@ -1,10 +1,10 @@ """ FILE: app/models/dto.py DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema. -VERSION: 0.6.6 (WP-22 Debug & Stability Update) +VERSION: 0.6.7 (WP-Fix: Target Section Support) STATUS: Active DEPENDENCIES: pydantic, typing, uuid -LAST_ANALYSIS: 2025-12-18 +LAST_ANALYSIS: 2025-12-29 """ from __future__ import annotations @@ -43,6 +43,7 @@ class EdgeDTO(BaseModel): direction: Literal["out", "in", "undirected"] = "out" provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit" confidence: float = 1.0 + target_section: Optional[str] = None # Neu: Speichert den Anker (z.B. #Abschnitt) # --- Request Models ---