WP4d #16
|
|
@ -1,10 +1,11 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/graph/graph_derive_edges.py
|
FILE: app/core/graph/graph_derive_edges.py
|
||||||
DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
|
DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
|
||||||
|
AUDIT: Integriert parse_link_target für saubere Graphen-Topologie.
|
||||||
"""
|
"""
|
||||||
from typing import List, Optional, Dict, Tuple
|
from typing import List, Optional, Dict, Tuple
|
||||||
from .graph_utils import (
|
from .graph_utils import (
|
||||||
_get, _edge, _mk_edge_id, _dedupe_seq,
|
_get, _edge, _mk_edge_id, _dedupe_seq, parse_link_target,
|
||||||
PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for
|
PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for
|
||||||
)
|
)
|
||||||
from .graph_extractors import (
|
from .graph_extractors import (
|
||||||
|
|
@ -53,47 +54,79 @@ def build_edges_for_note(
|
||||||
|
|
||||||
# Typed & Candidate Pool (WP-15b Integration)
|
# Typed & Candidate Pool (WP-15b Integration)
|
||||||
typed, rem = extract_typed_relations(raw)
|
typed, rem = extract_typed_relations(raw)
|
||||||
for k, t in typed:
|
for k, raw_t in typed:
|
||||||
edges.append(_edge(k, "chunk", cid, t, note_id, {
|
t, sec = parse_link_target(raw_t, note_id)
|
||||||
|
if not t: continue
|
||||||
|
|
||||||
|
payload = {
|
||||||
"chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"),
|
"chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"),
|
||||||
"provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
|
"provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
|
||||||
}))
|
}
|
||||||
|
if sec: payload["target_section"] = sec
|
||||||
|
|
||||||
|
edges.append(_edge(k, "chunk", cid, t, note_id, payload))
|
||||||
|
|
||||||
pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
|
pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
|
||||||
for cand in pool:
|
for cand in pool:
|
||||||
t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
|
raw_t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
|
||||||
|
t, sec = parse_link_target(raw_t, note_id)
|
||||||
if t:
|
if t:
|
||||||
edges.append(_edge(k, "chunk", cid, t, note_id, {
|
payload = {
|
||||||
"chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"),
|
"chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"),
|
||||||
"provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
|
"provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
|
||||||
}))
|
}
|
||||||
|
if sec: payload["target_section"] = sec
|
||||||
|
|
||||||
|
edges.append(_edge(k, "chunk", cid, t, note_id, payload))
|
||||||
|
|
||||||
# Callouts & Wikilinks
|
# Callouts & Wikilinks
|
||||||
call_pairs, rem2 = extract_callout_relations(rem)
|
call_pairs, rem2 = extract_callout_relations(rem)
|
||||||
for k, t in call_pairs:
|
for k, raw_t in call_pairs:
|
||||||
edges.append(_edge(k, "chunk", cid, t, note_id, {
|
t, sec = parse_link_target(raw_t, note_id)
|
||||||
|
if not t: continue
|
||||||
|
|
||||||
|
payload = {
|
||||||
"chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"),
|
"chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"),
|
||||||
"provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
|
"provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
|
||||||
}))
|
}
|
||||||
|
if sec: payload["target_section"] = sec
|
||||||
|
|
||||||
|
edges.append(_edge(k, "chunk", cid, t, note_id, payload))
|
||||||
|
|
||||||
refs = extract_wikilinks(rem2)
|
refs = extract_wikilinks(rem2)
|
||||||
for r in refs:
|
for raw_r in refs:
|
||||||
edges.append(_edge("references", "chunk", cid, r, note_id, {
|
r, sec = parse_link_target(raw_r, note_id)
|
||||||
"chunk_id": cid, "ref_text": r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
|
if not r: continue
|
||||||
|
|
||||||
|
payload = {
|
||||||
|
"chunk_id": cid, "ref_text": raw_r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
|
||||||
"provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
|
"provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
|
||||||
}))
|
}
|
||||||
|
if sec: payload["target_section"] = sec
|
||||||
|
|
||||||
|
edges.append(_edge("references", "chunk", cid, r, note_id, payload))
|
||||||
|
|
||||||
for rel in defaults:
|
for rel in defaults:
|
||||||
if rel != "references":
|
if rel != "references":
|
||||||
edges.append(_edge(rel, "chunk", cid, r, note_id, {
|
def_payload = {
|
||||||
"chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"),
|
"chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"),
|
||||||
"provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"]
|
"provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"]
|
||||||
}))
|
}
|
||||||
refs_all.extend(refs)
|
if sec: def_payload["target_section"] = sec
|
||||||
|
edges.append(_edge(rel, "chunk", cid, r, note_id, def_payload))
|
||||||
|
|
||||||
|
# Für Note-Scope Sammlung nutzen wir den Original-String zur Dedup, aber gesäubert
|
||||||
|
refs_all.extend([parse_link_target(r, note_id)[0] for r in refs])
|
||||||
|
|
||||||
# 3) Note-Scope & De-Duplizierung
|
# 3) Note-Scope & De-Duplizierung
|
||||||
if include_note_scope_refs:
|
if include_note_scope_refs:
|
||||||
refs_note = _dedupe_seq((refs_all or []) + (note_level_references or []))
|
# refs_all ist jetzt schon gesäubert (nur Targets)
|
||||||
|
# note_level_references müssen auch gesäubert werden
|
||||||
|
cleaned_note_refs = [parse_link_target(r, note_id)[0] for r in (note_level_references or [])]
|
||||||
|
refs_note = _dedupe_seq((refs_all or []) + cleaned_note_refs)
|
||||||
|
|
||||||
for r in refs_note:
|
for r in refs_note:
|
||||||
|
if not r: continue
|
||||||
edges.append(_edge("references", "note", note_id, r, note_id, {
|
edges.append(_edge("references", "note", note_id, r, note_id, {
|
||||||
"edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
|
"edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
|
||||||
"provenance": "explicit", "confidence": PROVENANCE_PRIORITY["explicit:note_scope"]
|
"provenance": "explicit", "confidence": PROVENANCE_PRIORITY["explicit:note_scope"]
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/core/graph/graph_utils.py
|
FILE: app/core/graph/graph_utils.py
|
||||||
DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
|
DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
|
||||||
|
AUDIT: Erweitert um parse_link_target für sauberes Section-Splitting (WP-Fix).
|
||||||
"""
|
"""
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
from typing import Iterable, List, Optional, Set, Any
|
from typing import Iterable, List, Optional, Set, Any, Tuple
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import yaml
|
import yaml
|
||||||
|
|
@ -59,6 +60,27 @@ def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, e
|
||||||
if extra: pl.update(extra)
|
if extra: pl.update(extra)
|
||||||
return pl
|
return pl
|
||||||
|
|
||||||
|
def parse_link_target(raw: str, current_note_id: Optional[str] = None) -> Tuple[str, Optional[str]]:
|
||||||
|
"""
|
||||||
|
Zerlegt einen Link (z.B. 'Note#Section') in Target-ID und Section.
|
||||||
|
Behandelt Self-Links ('#Section'), indem current_note_id eingesetzt wird.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(target_id, target_section)
|
||||||
|
"""
|
||||||
|
if not raw:
|
||||||
|
return "", None
|
||||||
|
|
||||||
|
parts = raw.split("#", 1)
|
||||||
|
target = parts[0].strip()
|
||||||
|
section = parts[1].strip() if len(parts) > 1 else None
|
||||||
|
|
||||||
|
# Handle Self-Link [[#Section]] -> target wird zu current_note_id
|
||||||
|
if not target and section and current_note_id:
|
||||||
|
target = current_note_id
|
||||||
|
|
||||||
|
return target, section
|
||||||
|
|
||||||
def load_types_registry() -> dict:
|
def load_types_registry() -> dict:
|
||||||
"""Lädt die YAML-Registry."""
|
"""Lädt die YAML-Registry."""
|
||||||
p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
|
p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
"""
|
"""
|
||||||
FILE: app/models/dto.py
|
FILE: app/models/dto.py
|
||||||
DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
|
DESCRIPTION: Pydantic-Modelle (DTOs) für Request/Response Bodies. Definiert das API-Schema.
|
||||||
VERSION: 0.6.6 (WP-22 Debug & Stability Update)
|
VERSION: 0.6.7 (WP-Fix: Target Section Support)
|
||||||
STATUS: Active
|
STATUS: Active
|
||||||
DEPENDENCIES: pydantic, typing, uuid
|
DEPENDENCIES: pydantic, typing, uuid
|
||||||
LAST_ANALYSIS: 2025-12-18
|
LAST_ANALYSIS: 2025-12-29
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
@ -43,6 +43,7 @@ class EdgeDTO(BaseModel):
|
||||||
direction: Literal["out", "in", "undirected"] = "out"
|
direction: Literal["out", "in", "undirected"] = "out"
|
||||||
provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
|
provenance: Optional[Literal["explicit", "rule", "smart", "structure"]] = "explicit"
|
||||||
confidence: float = 1.0
|
confidence: float = 1.0
|
||||||
|
target_section: Optional[str] = None # Neu: Speichert den Anker (z.B. #Abschnitt)
|
||||||
|
|
||||||
|
|
||||||
# --- Request Models ---
|
# --- Request Models ---
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user