2025-12-27 22:15:27 +01:00
9 changed files with 477 additions and 635 deletions
--- a/app/core/derive_edges.py
+++ b/app/core/derive_edges.py
@ -1,394 +1,10 @@
 """
 FILE: app/core/derive_edges.py
-DESCRIPTION: Extrahiert Graph-Kanten aus Text. Unterstützt Wikilinks, Inline-Relations ([[rel:type|target]]) und Obsidian Callouts.
-             WP-15b: Integration des Candidate-Pools und Provenance-Priorisierung.
-             Sichert die Graph-Integrität durch confidence-basiertes De-Duplicating.
-VERSION: 2.1.0
-STATUS: Active
-DEPENDENCIES: re, os, yaml, typing, hashlib
-EXTERNAL_CONFIG: config/types.yaml
-LAST_ANALYSIS: 2025-12-26
+DESCRIPTION: Facade für das neue graph Package.
+             WP-14: Modularisierung abgeschlossen.
+VERSION: 2.2.0
 """
+from .graph.graph_derive_edges import build_edges_for_note
+from .graph.graph_utils import PROVENANCE_PRIORITY

-from __future__ import annotations
-
-import os
-import re
-import hashlib
-from typing import Iterable, List, Optional, Tuple, Set, Dict
-
-try:
-    import yaml  # optional, nur für types.yaml
-except Exception:  # pragma: no cover
-    yaml = None
-
-# --------------------------------------------------------------------------- #
-# 1. Utilities & ID Generation
-# --------------------------------------------------------------------------- #
-
-def _get(d: dict, *keys, default=None):
-    """Sicherer Zugriff auf verschachtelte Dictionary-Keys."""
-    for k in keys:
-        if isinstance(d, dict) and k in d and d[k] is not None:
-            return d[k]
-    return default
-
-def _chunk_text_for_refs(chunk: dict) -> str:
-    """Extrahiert den relevanten Text für die Referenzsuche (bevorzugt Window)."""
-    return (
-        _get(chunk, "window")
-        or _get(chunk, "text")
-        or _get(chunk, "content")
-        or _get(chunk, "raw")
-        or ""
-    )
-
-def _dedupe_seq(seq: Iterable[str]) -> List[str]:
-    """Dedupliziert eine Sequenz von Strings unter Beibehaltung der Reihenfolge."""
-    seen: Set[str] = set()
-    out: List[str] = []
-    for s in seq:
-        if s not in seen:
-            seen.add(s)
-            out.append(s)
-    return out
-
-def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
-    """Konstruiert ein valides Kanten-Payload-Objekt für Qdrant."""
-    pl = {
-        "kind": kind,
-        "relation": kind,   # Alias für Abwärtskompatibilität (v2)
-        "scope": scope,     # "chunk" | "note"
-        "source_id": source_id,
-        "target_id": target_id,
-        "note_id": note_id, # Träger-Note der Kante
-    }
-    if extra:
-        pl.update(extra)
-    return pl
-
-def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None) -> str:
-    """Erzeugt eine deterministische 12-Byte ID mittels BLAKE2s."""
-    base = f"{kind}:{s}->{t}#{scope}"
-    if rule_id:
-        base += f"|{rule_id}"
-    try:
-        return hashlib.blake2s(base.encode("utf-8"), digest_size=12).hexdigest()
-    except Exception:  # pragma: no cover
-        return base
-
-# --------------------------------------------------------------------------- #
-# 2. Konfiguration & Provenance-Skala
-# --------------------------------------------------------------------------- #
-
-# WP-15b: Prioritäten-Ranking für die De-Duplizierung
-PROVENANCE_PRIORITY = {
-    "explicit:wikilink": 1.00,
-    "inline:rel": 0.95,
-    "callout:edge": 0.90,
-    "semantic_ai": 0.90,           # Validierte KI-Kanten
-    "structure:belongs_to": 1.00,
-    "structure:order": 0.95,       # next/prev
-    "explicit:note_scope": 1.00,
-    "derived:backlink": 0.90,
-    "edge_defaults": 0.70          # Heuristik (types.yaml)
-}
-
-def _env(n: str, default: Optional[str] = None) -> str:
-    v = os.getenv(n)
-    return v if v is not None else (default or "")
-
-def _load_types_registry() -> dict:
-    """Lädt die YAML-Registry zur Ermittlung von Standard-Kanten."""
-    p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
-    if not os.path.isfile(p) or yaml is None:
-        return {}
-    try:
-        with open(p, "r", encoding="utf-8") as f:
-            data = yaml.safe_load(f) or {}
-            return data
-    except Exception:
-        return {}
-
-def _get_types_map(reg: dict) -> dict:
-    if isinstance(reg, dict) and isinstance(reg.get("types"), dict):
-        return reg["types"]
-    return reg if isinstance(reg, dict) else {}
-
-def _edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
-    """Liefert die edge_defaults-Liste für den gegebenen Notiztyp."""
-    types_map = _get_types_map(reg)
-    if note_type and isinstance(types_map, dict):
-        t = types_map.get(note_type)
-        if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
-            return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
-    for key in ("defaults", "default", "global"):
-        v = reg.get(key)
-        if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
-            return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
-    return []
-
-# --------------------------------------------------------------------------- #
-# 3. Parser für Links / Relationen (Core Logik v2.0.0)
-# --------------------------------------------------------------------------- #
-
-# Normale Wikilinks (Fallback)
-_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]")
-
-# Getypte Inline-Relationen
-_REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
-_REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\]\]",   re.IGNORECASE)
-_REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
-
-def _extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """Extrahiert [[rel:KIND|Target]] und entfernt sie zur Vermeidung von Dubletten."""
-    pairs: List[Tuple[str,str]] = []
-    def _collect(m):
-        k = (m.group("kind") or "").strip().lower()
-        t = (m.group("target") or "").strip()
-        if k and t:
-            pairs.append((k, t))
-        return ""  # Link entfernen
-
-    text = _REL_PIPE.sub(_collect, text)
-    text = _REL_SPACE.sub(_collect, text)
-    text = _REL_TEXT.sub(_collect, text)
-    return pairs, text
-
-# Obsidian Callout Parser für mehrzeilige Blöcke
-_CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
-_REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
-_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
-
-def _extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """Verarbeitet [!edge]-Callouts und entfernt diese aus dem Textfluss."""
-    if not text:
-        return [], text
-
-    lines = text.splitlines()
-    out_pairs: List[Tuple[str,str]] = []
-    keep_lines: List[str] = []
-    i = 0
-
-    while i < len(lines):
-        m = _CALLOUT_START.match(lines[i])
-        if not m:
-            keep_lines.append(lines[i])
-            i += 1
-            continue
-
-        block_lines: List[str] = []
-        first_rest = m.group(1) or ""
-        if first_rest.strip():
-            block_lines.append(first_rest)
-
-        i += 1
-        while i < len(lines) and lines[i].lstrip().startswith('>'):
-            block_lines.append(lines[i].lstrip()[1:].lstrip())
-            i += 1
-
-        for bl in block_lines:
-            mrel = _REL_LINE.match(bl)
-            if not mrel:
-                continue
-            kind = (mrel.group("kind") or "").strip().lower()
-            targets = mrel.group("targets") or ""
-            found = _WIKILINKS_IN_LINE.findall(targets)
-            if found:
-                for t in found:
-                    t = t.strip()
-                    if t:
-                        out_pairs.append((kind, t))
-            else:
-                for raw in re.split(r"[,;]", targets):
-                    t = raw.strip()
-                    if t:
-                        out_pairs.append((kind, t))
-        continue
-
-    remainder = "\n".join(keep_lines)
-    return out_pairs, remainder
-
-def _extract_wikilinks(text: str) -> List[str]:
-    """Extrahiert Standard-Wikilinks aus dem verbleibenden Text."""
-    ids: List[str] = []
-    for m in _WIKILINK_RE.finditer(text or ""):
-        ids.append(m.group(1).strip())
-    return ids
-
-# --------------------------------------------------------------------------- #
-# 4. Hauptfunktion (build_edges_for_note)
-# --------------------------------------------------------------------------- #
-
-def build_edges_for_note(
-    note_id: str,
-    chunks: List[dict],
-    note_level_references: Optional[List[str]] = None,
-    include_note_scope_refs: bool = False,
-) -> List[dict]:
-    """
-    Erzeugt und aggregiert alle Kanten für eine Note inklusive WP-15b Candidate-Processing.
-    Setzt Provenance-Ranking zur Graph-Stabilisierung ein.
-    """
-    edges: List[dict] = []
-    note_type = _get(chunks[0], "type") if chunks else "concept"
-
-    # 1) Struktur-Kanten: belongs_to (Chunk -> Note)
-    for ch in chunks:
-        cid = _get(ch, "chunk_id", "id")
-        if not cid:
-            continue
-        edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {
-            "chunk_id": cid,
-            "edge_id": _mk_edge_id("belongs_to", cid, note_id, "chunk", "structure:belongs_to"),
-            "provenance": "structure",
-            "rule_id": "structure:belongs_to",
-            "confidence": PROVENANCE_PRIORITY["structure:belongs_to"],
-        }))
-
-    # 2) Struktur-Kanten: next / prev (Sequenz)
-    for i in range(len(chunks) - 1):
-        a, b = chunks[i], chunks[i + 1]
-        a_id = _get(a, "chunk_id", "id")
-        b_id = _get(b, "chunk_id", "id")
-        if not a_id or not b_id:
-            continue
-        edges.append(_edge("next", "chunk", a_id, b_id, note_id, {
-            "chunk_id": a_id,
-            "edge_id": _mk_edge_id("next", a_id, b_id, "chunk", "structure:order"),
-            "provenance": "structure",
-            "rule_id": "structure:order",
-            "confidence": PROVENANCE_PRIORITY["structure:order"],
-        }))
-        edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {
-            "chunk_id": b_id,
-            "edge_id": _mk_edge_id("prev", b_id, a_id, "chunk", "structure:order"),
-            "provenance": "structure",
-            "rule_id": "structure:order",
-            "confidence": PROVENANCE_PRIORITY["structure:order"],
-        }))
-
-    # 3) Inhaltliche Kanten (Refs, Inlines, Callouts, Candidates)
-    reg = _load_types_registry()
-    defaults = _edge_defaults_for(note_type, reg)
-    refs_all: List[str] = []
-
-    for ch in chunks:
-        cid = _get(ch, "chunk_id", "id")
-        if not cid:
-            continue
-        raw = _chunk_text_for_refs(ch)
-
-        # 3a) Typed Inline Relations
-        typed, remainder = _extract_typed_relations(raw)
-        for kind, target in typed:
-            k = kind.strip().lower()
-            if not k or not target: continue
-            edges.append(_edge(k, "chunk", cid, target, note_id, {
-                "chunk_id": cid,
-                "edge_id": _mk_edge_id(k, cid, target, "chunk", "inline:rel"),
-                "provenance": "explicit",
-                "rule_id": "inline:rel",
-                "confidence": PROVENANCE_PRIORITY["inline:rel"],
-            }))
-
-        # 3b) WP-15b Candidate Pool Integration (KI-validierte Kanten)
-        # Verarbeitet Kanten, die bereits in der Ingestion semantisch geprüft wurden.
-        pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
-        for cand in pool:
-            target = cand.get("to")
-            kind = cand.get("kind", "related_to")
-            prov = cand.get("provenance", "semantic_ai")
-            if not target: continue
-            edges.append(_edge(kind, "chunk", cid, target, note_id, {
-                "chunk_id": cid,
-                "edge_id": _mk_edge_id(kind, cid, target, "chunk", f"candidate:{prov}"),
-                "provenance": prov,
-                "rule_id": f"candidate:{prov}",
-                "confidence": PROVENANCE_PRIORITY.get(prov, 0.90),
-            }))
-
-        # 3c) Obsidian Callouts
-        call_pairs, remainder2 = _extract_callout_relations(remainder)
-        for kind, target in call_pairs:
-            k = (kind or "").strip().lower()
-            if not k or not target: continue
-            edges.append(_edge(k, "chunk", cid, target, note_id, {
-                "chunk_id": cid,
-                "edge_id": _mk_edge_id(k, cid, target, "chunk", "callout:edge"),
-                "provenance": "explicit",
-                "rule_id": "callout:edge",
-                "confidence": PROVENANCE_PRIORITY["callout:edge"],
-            }))
-
-        # 3d) Standard-Wikilinks -> references (+ defaults)
-        refs = _extract_wikilinks(remainder2)
-        for r in refs:
-            edges.append(_edge("references", "chunk", cid, r, note_id, {
-                "chunk_id": cid,
-                "ref_text": r,
-                "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
-                "provenance": "explicit",
-                "rule_id": "explicit:wikilink",
-                "confidence": PROVENANCE_PRIORITY["explicit:wikilink"],
-            }))
-            # Regelbasierte Kanten aus types.yaml anhängen
-            for rel in defaults:
-                if rel == "references": continue
-                edges.append(_edge(rel, "chunk", cid, r, note_id, {
-                    "chunk_id": cid,
-                    "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{note_type}:{rel}"),
-                    "provenance": "rule",
-                    "rule_id": f"edge_defaults:{note_type}:{rel}",
-                    "confidence": PROVENANCE_PRIORITY["edge_defaults"],
-                }))
-
-        refs_all.extend(refs)
-
-    # 4) Optionale Note-Scope Referenzen & Backlinks
-    if include_note_scope_refs:
-        refs_note = list(refs_all or [])
-        if note_level_references:
-            refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
-        refs_note = _dedupe_seq(refs_note)
-        
-        for r in refs_note:
-            edges.append(_edge("references", "note", note_id, r, note_id, {
-                "edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
-                "provenance": "explicit",
-                "rule_id": "explicit:note_scope",
-                "confidence": PROVENANCE_PRIORITY["explicit:note_scope"],
-            }))
-            # Backlink-Erzeugung zur Graphen-Stärkung
-            edges.append(_edge("backlink", "note", r, note_id, note_id, {
-                "edge_id": _mk_edge_id("backlink", r, note_id, "note", "derived:backlink"),
-                "provenance": "rule",
-                "rule_id": "derived:backlink",
-                "confidence": PROVENANCE_PRIORITY["derived:backlink"],
-            }))
-            for rel in defaults:
-                if rel == "references": continue
-                edges.append(_edge(rel, "note", note_id, r, note_id, {
-                    "edge_id": _mk_edge_id(rel, note_id, r, "note", f"edge_defaults:{note_type}:{rel}"),
-                    "provenance": "rule",
-                    "rule_id": f"edge_defaults:{note_type}:{rel}",
-                    "confidence": PROVENANCE_PRIORITY["edge_defaults"],
-                }))
-
-    # 5) WP-15b: Confidence-basierte De-Duplizierung
-    # Wenn dieselbe Relation mehrfach existiert, gewinnt die mit der höchsten Confidence.
-    unique_map: Dict[Tuple[str, str, str], dict] = {}
-    
-    for e in edges:
-        s, t = str(e.get("source_id")), str(e.get("target_id"))
-        rel = str(e.get("relation") or e.get("kind") or "edge")
-        key = (s, t, rel)
-        
-        if key not in unique_map:
-            unique_map[key] = e
-        else:
-            # Vergleich der Vertrauenswürdigkeit (Provenance Ranking)
-            if e.get("confidence", 0) > unique_map[key].get("confidence", 0):
-                unique_map[key] = e
-                
-    return list(unique_map.values())
+__all__ = ["build_edges_for_note", "PROVENANCE_PRIORITY"]
--- a/app/core/graph/init.py
+++ b/app/core/graph/init.py
@ -0,0 +1,16 @@
+"""
+FILE: app/core/graph/__init__.py
+DESCRIPTION: Unified Graph Package. Exportiert Kanten-Ableitung und Graph-Adapter.
+"""
+from .graph_derive_edges import build_edges_for_note
+from .graph_utils import PROVENANCE_PRIORITY
+from .graph_subgraph import Subgraph, expand
+from .graph_weights import EDGE_BASE_WEIGHTS
+
+__all__ = [
+    "build_edges_for_note", 
+    "PROVENANCE_PRIORITY", 
+    "Subgraph", 
+    "expand", 
+    "EDGE_BASE_WEIGHTS"
+]
--- a/app/core/graph/graph_db_adapter.py
+++ b/app/core/graph/graph_db_adapter.py
@ -0,0 +1,56 @@
+"""
+FILE: app/core/graph/graph_db_adapter.py
+DESCRIPTION: Datenbeschaffung aus Qdrant für den Graphen.
+"""
+from typing import List, Dict, Optional
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as rest
+from app.core.qdrant import collection_names
+
+def fetch_edges_from_qdrant(
+    client: QdrantClient,
+    prefix: str,
+    seeds: List[str],
+    edge_types: Optional[List[str]] = None,
+    limit: int = 2048,
+) -> List[Dict]:
+    """
+    Holt Edges aus der Datenbank basierend auf Seed-IDs.
+    Filtert auf source_id, target_id oder note_id.
+    """
+    if not seeds or limit <= 0:
+        return []
+
+    _, _, edges_col = collection_names(prefix)
+
+    seed_conditions = []
+    for field in ("source_id", "target_id", "note_id"):
+        for s in seeds:
+            seed_conditions.append(
+                rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s)))
+            )
+    seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
+
+    type_filter = None
+    if edge_types:
+        type_conds = [
+            rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k)))
+            for k in edge_types
+        ]
+        type_filter = rest.Filter(should=type_conds)
+
+    must = []
+    if seeds_filter: must.append(seeds_filter)
+    if type_filter: must.append(type_filter)
+    
+    flt = rest.Filter(must=must) if must else None
+
+    pts, _ = client.scroll(
+        collection_name=edges_col,
+        scroll_filter=flt,
+        limit=limit,
+        with_payload=True,
+        with_vectors=False,
+    )
+
+    return [dict(p.payload) for p in pts if p.payload]
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@ -0,0 +1,112 @@
+"""
+FILE: app/core/graph/graph_derive_edges.py
+DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
+"""
+from typing import List, Optional, Dict, Tuple
+from .graph_utils import (
+    _get, _edge, _mk_edge_id, _dedupe_seq, 
+    PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for
+)
+from .graph_extractors import (
+    extract_typed_relations, extract_callout_relations, extract_wikilinks
+)
+
+def build_edges_for_note(
+    note_id: str,
+    chunks: List[dict],
+    note_level_references: Optional[List[str]] = None,
+    include_note_scope_refs: bool = False,
+) -> List[dict]:
+    """Erzeugt und aggregiert alle Kanten für eine Note (WP-15b)."""
+    edges: List[dict] = []
+    note_type = _get(chunks[0], "type") if chunks else "concept"
+
+    # 1) Struktur-Kanten (belongs_to, next/prev)
+    for idx, ch in enumerate(chunks):
+        cid = _get(ch, "chunk_id", "id")
+        if not cid: continue
+        edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {
+            "chunk_id": cid, "edge_id": _mk_edge_id("belongs_to", cid, note_id, "chunk", "structure:belongs_to"),
+            "provenance": "structure", "rule_id": "structure:belongs_to", "confidence": PROVENANCE_PRIORITY["structure:belongs_to"]
+        }))
+        if idx < len(chunks) - 1:
+            next_id = _get(chunks[idx+1], "chunk_id", "id")
+            if next_id:
+                edges.append(_edge("next", "chunk", cid, next_id, note_id, {
+                    "chunk_id": cid, "edge_id": _mk_edge_id("next", cid, next_id, "chunk", "structure:order"),
+                    "provenance": "structure", "rule_id": "structure:order", "confidence": PROVENANCE_PRIORITY["structure:order"]
+                }))
+                edges.append(_edge("prev", "chunk", next_id, cid, note_id, {
+                    "chunk_id": next_id, "edge_id": _mk_edge_id("prev", next_id, cid, "chunk", "structure:order"),
+                    "provenance": "structure", "rule_id": "structure:order", "confidence": PROVENANCE_PRIORITY["structure:order"]
+                }))
+
+    # 2) Inhaltliche Kanten
+    reg = load_types_registry()
+    defaults = get_edge_defaults_for(note_type, reg)
+    refs_all: List[str] = []
+
+    for ch in chunks:
+        cid = _get(ch, "chunk_id", "id")
+        if not cid: continue
+        raw = _get(ch, "window") or _get(ch, "text") or ""
+
+        # Typed & Candidate Pool (WP-15b Integration)
+        typed, rem = extract_typed_relations(raw)
+        for k, t in typed:
+            edges.append(_edge(k, "chunk", cid, t, note_id, {
+                "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"),
+                "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
+            }))
+
+        pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
+        for cand in pool:
+            t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
+            if t:
+                edges.append(_edge(k, "chunk", cid, t, note_id, {
+                    "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"),
+                    "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
+                }))
+
+        # Callouts & Wikilinks
+        call_pairs, rem2 = extract_callout_relations(rem)
+        for k, t in call_pairs:
+            edges.append(_edge(k, "chunk", cid, t, note_id, {
+                "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"),
+                "provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
+            }))
+
+        refs = extract_wikilinks(rem2)
+        for r in refs:
+            edges.append(_edge("references", "chunk", cid, r, note_id, {
+                "chunk_id": cid, "ref_text": r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
+                "provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
+            }))
+            for rel in defaults:
+                if rel != "references":
+                    edges.append(_edge(rel, "chunk", cid, r, note_id, {
+                        "chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"),
+                        "provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"]
+                    }))
+        refs_all.extend(refs)
+
+    # 3) Note-Scope & De-Duplizierung
+    if include_note_scope_refs:
+        refs_note = _dedupe_seq((refs_all or []) + (note_level_references or []))
+        for r in refs_note:
+            edges.append(_edge("references", "note", note_id, r, note_id, {
+                "edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
+                "provenance": "explicit", "confidence": PROVENANCE_PRIORITY["explicit:note_scope"]
+            }))
+            edges.append(_edge("backlink", "note", r, note_id, note_id, {
+                "edge_id": _mk_edge_id("backlink", r, note_id, "note", "derived:backlink"),
+                "provenance": "rule", "confidence": PROVENANCE_PRIORITY["derived:backlink"]
+            }))
+
+    unique_map: Dict[Tuple[str, str, str], dict] = {}
+    for e in edges:
+        key = (str(e.get("source_id")), str(e.get("target_id")), str(e.get("kind")))
+        if key not in unique_map or e.get("confidence", 0) > unique_map[key].get("confidence", 0):
+            unique_map[key] = e
+                
+    return list(unique_map.values())
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@ -0,0 +1,55 @@
+"""
+FILE: app/core/graph/graph_extractors.py
+DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
+"""
+import re
+from typing import List, Tuple
+
+_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]")
+_REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
+_REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\]\]",   re.IGNORECASE)
+_REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
+
+_CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
+_REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
+_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
+
+def extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
+    """Extrahiert [[rel:KIND|Target]]."""
+    pairs = []
+    def _collect(m):
+        k, t = (m.group("kind") or "").strip().lower(), (m.group("target") or "").strip()
+        if k and t: pairs.append((k, t))
+        return ""
+    text = _REL_PIPE.sub(_collect, text)
+    text = _REL_SPACE.sub(_collect, text)
+    text = _REL_TEXT.sub(_collect, text)
+    return pairs, text
+
+def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
+    """Verarbeitet Obsidian [!edge]-Callouts."""
+    if not text: return [], text
+    lines = text.splitlines(); out_pairs, keep_lines, i = [], [], 0
+    while i < len(lines):
+        m = _CALLOUT_START.match(lines[i])
+        if not m:
+            keep_lines.append(lines[i]); i += 1; continue
+        block_lines = [m.group(1)] if m.group(1).strip() else []
+        i += 1
+        while i < len(lines) and lines[i].lstrip().startswith('>'):
+            block_lines.append(lines[i].lstrip()[1:].lstrip()); i += 1
+        for bl in block_lines:
+            mrel = _REL_LINE.match(bl)
+            if not mrel: continue
+            kind, targets = mrel.group("kind").strip().lower(), mrel.group("targets") or ""
+            found = _WIKILINKS_IN_LINE.findall(targets)
+            if found:
+                for t in found: out_pairs.append((kind, t.strip()))
+            else:
+                for raw in re.split(r"[,;]", targets):
+                    if raw.strip(): out_pairs.append((kind, raw.strip()))
+    return out_pairs, "\n".join(keep_lines)
+
+def extract_wikilinks(text: str) -> List[str]:
+    """Extrahiert Standard-Wikilinks."""
+    return [m.group(1).strip() for m in _WIKILINK_RE.finditer(text or "")]
--- a/app/core/graph/graph_subgraph.py
+++ b/app/core/graph/graph_subgraph.py
@ -0,0 +1,106 @@
+"""
+FILE: app/core/graph/graph_subgraph.py
+DESCRIPTION: In-Memory Repräsentation eines Graphen für Scoring und Analyse.
+"""
+import math
+from collections import defaultdict
+from typing import Dict, List, Optional, DefaultDict, Any, Set
+from qdrant_client import QdrantClient
+from .graph_weights import EDGE_BASE_WEIGHTS, calculate_edge_weight
+from .graph_db_adapter import fetch_edges_from_qdrant
+
+class Subgraph:
+    """Leichtgewichtiger Subgraph mit Adjazenzlisten & Kennzahlen."""
+
+    def __init__(self) -> None:
+        self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
+        self.reverse_adj: DefaultDict[str, List[Dict]] = defaultdict(list)
+        self.in_degree: DefaultDict[str, int] = defaultdict(int)
+        self.out_degree: DefaultDict[str, int] = defaultdict(int)
+
+    def add_edge(self, e: Dict) -> None:
+        """Fügt eine Kante hinzu und aktualisiert Indizes."""
+        src = e.get("source")
+        tgt = e.get("target")
+        kind = e.get("kind")
+        weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
+        owner = e.get("note_id")
+
+        if not src or not tgt:
+            return
+
+        # 1. Forward
+        self.adj[src].append({"target": tgt, "kind": kind, "weight": weight})
+        self.out_degree[src] += 1
+        self.in_degree[tgt] += 1
+
+        # 2. Reverse (WP-04b Explanation)
+        self.reverse_adj[tgt].append({"source": src, "kind": kind, "weight": weight})
+
+        # 3. Kontext-Note Handling
+        if owner and owner != src:
+            self.adj[owner].append({"target": tgt, "kind": kind, "weight": weight})
+            self.out_degree[owner] += 1
+            if owner != tgt:
+                self.reverse_adj[tgt].append({"source": owner, "kind": kind, "weight": weight, "via_context": True})
+                self.in_degree[owner] += 1
+
+    def aggregate_edge_bonus(self, node_id: str) -> float:
+        """Summe der ausgehenden Kantengewichte (Hub-Score)."""
+        return sum(edge["weight"] for edge in self.adj.get(node_id, []))
+
+    def edge_bonus(self, node_id: str) -> float:
+        """API für Retriever (WP-04a Kompatibilität)."""
+        return self.aggregate_edge_bonus(node_id)
+
+    def centrality_bonus(self, node_id: str) -> float:
+        """Log-gedämpfte Zentralität (In-Degree)."""
+        indeg = self.in_degree.get(node_id, 0)
+        if indeg <= 0:
+            return 0.0
+        return min(math.log1p(indeg) / 10.0, 0.15)
+
+    def get_outgoing_edges(self, node_id: str) -> List[Dict[str, Any]]:
+        return self.adj.get(node_id, [])
+
+    def get_incoming_edges(self, node_id: str) -> List[Dict[str, Any]]:
+        return self.reverse_adj.get(node_id, [])
+
+
+def expand(
+    client: QdrantClient,
+    prefix: str,
+    seeds: List[str],
+    depth: int = 1,
+    edge_types: Optional[List[str]] = None,
+) -> Subgraph:
+    """Expandiert ab Seeds entlang von Edges bis zu einer bestimmten Tiefe."""
+    sg = Subgraph()
+    frontier = set(seeds)
+    visited = set()
+
+    for _ in range(max(depth, 0)):
+        if not frontier:
+            break
+
+        payloads = fetch_edges_from_qdrant(client, prefix, list(frontier), edge_types)
+        next_frontier: Set[str] = set()
+
+        for pl in payloads:
+            src, tgt = pl.get("source_id"), pl.get("target_id")
+            if not src or not tgt: continue
+
+            sg.add_edge({
+                "source": src, "target": tgt,
+                "kind": pl.get("kind", "edge"),
+                "weight": calculate_edge_weight(pl),
+                "note_id": pl.get("note_id"),
+            })
+
+            if tgt not in visited:
+                next_frontier.add(str(tgt))
+
+        visited |= frontier
+        frontier = next_frontier - visited
+
+    return sg
--- a/app/core/graph/graph_utils.py
+++ b/app/core/graph/graph_utils.py
@ -0,0 +1,81 @@
+"""
+FILE: app/core/graph/graph_utils.py
+DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
+"""
+import os
+import hashlib
+from typing import Iterable, List, Optional, Set, Any
+
+try:
+    import yaml
+except ImportError:
+    yaml = None
+
+# WP-15b: Prioritäten-Ranking für die De-Duplizierung
+PROVENANCE_PRIORITY = {
+    "explicit:wikilink": 1.00,
+    "inline:rel": 0.95,
+    "callout:edge": 0.90,
+    "semantic_ai": 0.90,           # Validierte KI-Kanten
+    "structure:belongs_to": 1.00,
+    "structure:order": 0.95,       # next/prev
+    "explicit:note_scope": 1.00,
+    "derived:backlink": 0.90,
+    "edge_defaults": 0.70          # Heuristik (types.yaml)
+}
+
+def _get(d: dict, *keys, default=None):
+    """Sicherer Zugriff auf verschachtelte Keys."""
+    for k in keys:
+        if isinstance(d, dict) and k in d and d[k] is not None:
+            return d[k]
+    return default
+
+def _dedupe_seq(seq: Iterable[str]) -> List[str]:
+    """Dedupliziert Strings unter Beibehaltung der Reihenfolge."""
+    seen: Set[str] = set()
+    out: List[str] = []
+    for s in seq:
+        if s not in seen:
+            seen.add(s); out.append(s)
+    return out
+
+def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None) -> str:
+    """Erzeugt eine deterministische 12-Byte ID mittels BLAKE2s."""
+    base = f"{kind}:{s}->{t}#{scope}"
+    if rule_id: base += f"|{rule_id}"
+    return hashlib.blake2s(base.encode("utf-8"), digest_size=12).hexdigest()
+
+def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
+    """Konstruiert ein Kanten-Payload für Qdrant."""
+    pl = {
+        "kind": kind,
+        "relation": kind,
+        "scope": scope,
+        "source_id": source_id,
+        "target_id": target_id,
+        "note_id": note_id,
+    }
+    if extra: pl.update(extra)
+    return pl
+
+def load_types_registry() -> dict:
+    """Lädt die YAML-Registry."""
+    p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
+    if not os.path.isfile(p) or yaml is None: return {}
+    try:
+        with open(p, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {}
+    except Exception: return {}
+
+def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
+    """Ermittelt Standard-Kanten für einen Typ."""
+    types_map = reg.get("types", reg) if isinstance(reg, dict) else {}
+    if note_type and isinstance(types_map, dict):
+        t = types_map.get(note_type)
+        if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
+            return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
+    for key in ("defaults", "default", "global"):
+        v = reg.get(key)
+        if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
+            return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
+    return []
--- a/app/core/graph/graph_weights.py
+++ b/app/core/graph/graph_weights.py
@ -0,0 +1,39 @@
+"""
+FILE: app/core/graph/graph_weights.py
+DESCRIPTION: Definition der Basisgewichte und Berechnung der Kanteneffektivität.
+"""
+from typing import Dict
+
+# Basisgewichte je Edge-Typ (WP-04a Config)
+EDGE_BASE_WEIGHTS: Dict[str, float] = {
+    # Struktur
+    "belongs_to":     0.10,
+    "next":           0.06,
+    "prev":           0.06,
+    "backlink":       0.04,
+    "references_at":  0.08,
+
+    # Wissen
+    "references":     0.20,
+    "depends_on":     0.18,
+    "related_to":     0.15,
+    "similar_to":     0.12,
+}
+
+def calculate_edge_weight(pl: Dict) -> float:
+    """Berechnet das effektive Edge-Gewicht aus kind + confidence."""
+    kind = pl.get("kind", "edge")
+    base = EDGE_BASE_WEIGHTS.get(kind, 0.0)
+
+    conf_raw = pl.get("confidence", None)
+    try:
+        conf = float(conf_raw) if conf_raw is not None else None
+    except Exception:
+        conf = None
+
+    if conf is None:
+        return base
+
+    # Clamp confidence 0.0 - 1.0
+    conf = max(0.0, min(1.0, conf))
+    return base * conf
--- a/app/core/graph_adapter.py
+++ b/app/core/graph_adapter.py
@ -1,249 +1,10 @@
 """
 FILE: app/core/graph_adapter.py
-DESCRIPTION: Lädt Kanten aus Qdrant und baut einen In-Memory Subgraphen für Scoring (Centrality) und Explanation.
-VERSION: 0.4.0
-STATUS: Active
-DEPENDENCIES: qdrant_client, app.core.qdrant
-LAST_ANALYSIS: 2025-12-15
+DESCRIPTION: Facade für das neue graph Package (Adapter-Teil).
+             WP-14: Modularisierung abgeschlossen.
+VERSION: 0.5.0
 """
+from .graph.graph_subgraph import Subgraph, expand
+from .graph.graph_weights import EDGE_BASE_WEIGHTS

-from __future__ import annotations
-
-from typing import Dict, List, Optional, DefaultDict, Any
-from collections import defaultdict
-
-from qdrant_client import QdrantClient
-from qdrant_client.http import models as rest
-
-from app.core.qdrant import collection_names
-
-# Legacy-Import Fallback
-try:  # pragma: no cover
-    from app.core.qdrant_points import get_edges_for_sources  # type: ignore
-except Exception:  # pragma: no cover
-    get_edges_for_sources = None  # type: ignore
-
-
-# Basisgewichte je Edge-Typ (WP-04a Config)
-EDGE_BASE_WEIGHTS: Dict[str, float] = {
-    # Struktur
-    "belongs_to":     0.10,
-    "next":           0.06,
-    "prev":           0.06,
-    "backlink":       0.04,
-    "references_at":  0.08,
-
-    # Wissen
-    "references":     0.20,
-    "depends_on":     0.18,
-    "related_to":     0.15,
-    "similar_to":     0.12,
-}
-
-
-def _edge_weight(pl: Dict) -> float:
-    """Berechnet das effektive Edge-Gewicht aus kind + confidence."""
-    kind = pl.get("kind", "edge")
-    base = EDGE_BASE_WEIGHTS.get(kind, 0.0)
-
-    conf_raw = pl.get("confidence", None)
-    try:
-        conf = float(conf_raw) if conf_raw is not None else None
-    except Exception:
-        conf = None
-
-    if conf is None:
-        return base
-
-    if conf < 0.0: conf = 0.0
-    if conf > 1.0: conf = 1.0
-
-    return base * conf
-
-
-def _fetch_edges(
-    client: QdrantClient,
-    prefix: str,
-    seeds: List[str],
-    edge_types: Optional[List[str]] = None,
-    limit: int = 2048,
-) -> List[Dict]:
-    """
-    Holt Edges direkt aus der *_edges Collection.
-    Filter: source_id IN seeds OR target_id IN seeds OR note_id IN seeds
-    """
-    if not seeds or limit <= 0:
-        return []
-
-    _, _, edges_col = collection_names(prefix)
-
-    seed_conditions = []
-    for field in ("source_id", "target_id", "note_id"):
-        for s in seeds:
-            seed_conditions.append(
-                rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s)))
-            )
-    seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
-
-    type_filter = None
-    if edge_types:
-        type_conds = [
-            rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k)))
-            for k in edge_types
-        ]
-        type_filter = rest.Filter(should=type_conds)
-
-    must = []
-    if seeds_filter: must.append(seeds_filter)
-    if type_filter: must.append(type_filter)
-    
-    flt = rest.Filter(must=must) if must else None
-
-    pts, _ = client.scroll(
-        collection_name=edges_col,
-        scroll_filter=flt,
-        limit=limit,
-        with_payload=True,
-        with_vectors=False,
-    )
-
-    out: List[Dict] = []
-    for p in pts or []:
-        pl = dict(p.payload or {})
-        if pl:
-            out.append(pl)
-    return out
-
-
-class Subgraph:
-    """Leichtgewichtiger Subgraph mit Adjazenzlisten & Kennzahlen."""
-
-    def __init__(self) -> None:
-        # Forward: source -> [targets]
-        self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
-        # Reverse: target -> [sources] (Neu für WP-04b Explanation)
-        self.reverse_adj: DefaultDict[str, List[Dict]] = defaultdict(list)
-        
-        self.in_degree: DefaultDict[str, int] = defaultdict(int)
-        self.out_degree: DefaultDict[str, int] = defaultdict(int)
-
-    def add_edge(self, e: Dict) -> None:
-        """
-        Fügt eine Kante hinzu und aktualisiert Forward/Reverse Indizes.
-        e muss enthalten: source, target, kind, weight.
-        """
-        src = e.get("source")
-        tgt = e.get("target")
-        kind = e.get("kind")
-        weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
-        owner = e.get("note_id")
-
-        if not src or not tgt:
-            return
-
-        # 1. Primäre Adjazenz (Forward)
-        edge_data = {"target": tgt, "kind": kind, "weight": weight}
-        self.adj[src].append(edge_data)
-        self.out_degree[src] += 1
-        self.in_degree[tgt] += 1
-
-        # 2. Reverse Adjazenz (Neu für Explanation)
-        # Wir speichern, woher die Kante kam.
-        rev_data = {"source": src, "kind": kind, "weight": weight}
-        self.reverse_adj[tgt].append(rev_data)
-
-        # 3. Kontext-Note Handling (Forward & Reverse)
-        # Wenn eine Kante "im Kontext einer Note" (owner) definiert ist,
-        # schreiben wir sie der Note gut, damit der Retriever Scores auf Note-Ebene findet.
-        if owner and owner != src:
-            # Forward: Owner -> Target
-            self.adj[owner].append(edge_data)
-            self.out_degree[owner] += 1
-            
-            # Reverse: Target wird vom Owner referenziert (indirekt)
-            if owner != tgt:
-                rev_owner_data = {"source": owner, "kind": kind, "weight": weight, "via_context": True}
-                self.reverse_adj[tgt].append(rev_owner_data)
-                self.in_degree[owner] += 1 # Leichter Centrality Boost für den Owner
-
-    def aggregate_edge_bonus(self, node_id: str) -> float:
-        """Summe der ausgehenden Kantengewichte (Hub-Score)."""
-        return sum(edge["weight"] for edge in self.adj.get(node_id, []))
-
-    def edge_bonus(self, node_id: str) -> float:
-        """API für Retriever (WP-04a Kompatibilität)."""
-        return self.aggregate_edge_bonus(node_id)
-
-    def centrality_bonus(self, node_id: str) -> float:
-        """Log-gedämpfte Zentralität (In-Degree)."""
-        import math
-        indeg = self.in_degree.get(node_id, 0)
-        if indeg <= 0:
-            return 0.0
-        return min(math.log1p(indeg) / 10.0, 0.15)
-
-    # --- WP-04b Explanation Helpers ---
-
-    def get_outgoing_edges(self, node_id: str) -> List[Dict[str, Any]]:
-        """Liefert Liste aller Ziele, auf die dieser Knoten zeigt."""
-        return self.adj.get(node_id, [])
-
-    def get_incoming_edges(self, node_id: str) -> List[Dict[str, Any]]:
-        """Liefert Liste aller Quellen, die auf diesen Knoten zeigen."""
-        return self.reverse_adj.get(node_id, [])
-
-
-def expand(
-    client: QdrantClient,
-    prefix: str,
-    seeds: List[str],
-    depth: int = 1,
-    edge_types: Optional[List[str]] = None,
-) -> Subgraph:
-    """
-    Expandiert ab Seeds entlang von Edges (bis `depth`).
-    """
-    sg = Subgraph()
-    frontier = set(seeds)
-    visited = set()
-
-    max_depth = max(depth, 0)
-
-    for _ in range(max_depth):
-        if not frontier:
-            break
-
-        edges_payloads = _fetch_edges(
-            client=client,
-            prefix=prefix,
-            seeds=list(frontier),
-            edge_types=edge_types,
-            limit=2048,
-        )
-
-        next_frontier = set()
-        for pl in edges_payloads:
-            src = pl.get("source_id")
-            tgt = pl.get("target_id")
-            
-            # Skip invalid edges
-            if not src or not tgt:
-                continue
-
-            e = {
-                "source": src,
-                "target": tgt,
-                "kind": pl.get("kind", "edge"),
-                "weight": _edge_weight(pl),
-                "note_id": pl.get("note_id"),
-            }
-            sg.add_edge(e)
-
-            # Nur weitersuchen, wenn Target noch nicht besucht
-            if tgt and tgt not in visited:
-                next_frontier.add(tgt)
-
-        visited |= frontier
-        frontier = next_frontier - visited
-
-    return sg
+__all__ = ["Subgraph", "expand", "EDGE_BASE_WEIGHTS"]