Merge pull request 'WP15b' (#15) from WP15b into main

Reviewed-on: #15 #### PR-Zusammenfassung: WP-14 Modularisierung & WP-15b Two-Pass Ingestion Dieser Merge schließt die technische Konsolidierung der Architektur (WP-14) und die Optimierung der Ingestion-Pipeline (WP-15b) ab. Das System wurde von einer monolithischen Struktur in eine domänengesteuerte Paket-Hierarchie überführt. **Kernänderungen:** * **WP-14 (Modularisierung):** * Aufteilung von `app/core/` in spezialisierte Pakete: `database/`, `ingestion/`, `retrieval/` und `graph/`. * Einführung von Proxy-Modulen (z.B. `graph_adapter.py`, `retriever.py`) zur Sicherstellung der Abwärtskompatibilität. * Zentralisierung neutraler Logik in `app/core/registry.py` zur Beseitigung von Zirkelbezügen. * **WP-15b (Intelligence 2.0):** * Umstellung der Ingestion auf einen **Two-Pass Workflow**. * **Pass 1:** Globaler Pre-Scan zur Befüllung des `LocalBatchCache`. * **Pass 2:** Binäre semantische Validierung von Kanten gegen den Kontext des Caches zur Eliminierung von Halluzinationen. **Betroffene Komponenten:** * `app.core.database`: Qdrant-Infrastruktur & Point-Mapping. * `app.core.retrieval`: Scoring-Engine (WP-22) & Orchestrierung. * `app.core.graph`: Subgraph-Modell & Traversierung. * Sämtliche Dokumentations-Module (v2.9.1 Update). **Teststatus:** ✅ Inkrementelle Ingestion (Pass 2 Skip) verifiziert. ✅ Hybrid-Scoring (WP-22) via isolated package verifiziert. ✅ Circular Import Audit erfolgreich abgeschlossen.
2025-12-27 22:15:27 +01:00 · 2025-12-27 22:15:27 +01:00 · 23b1cb2966
commit 23b1cb2966
parent d1a065fec8 fa909e2e7d
66 changed files with 3471 additions and 3598 deletions
--- a/app/core/chunk_payload.py
+++ b/app/core/chunk_payload.py
@ -1,176 +0,0 @@
-"""
-FILE: app/core/chunk_payload.py
-DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'.
-FEATURES:
-  - Inkludiert Nachbarschafts-IDs (prev/next) und Titel.
-  - FIX 3: Robuste Erkennung des Inputs (Frontmatter-Dict vs. Note-Objekt), damit Overrides ankommen.
-VERSION: 2.3.0
-STATUS: Active
-DEPENDENCIES: yaml, os
-EXTERNAL_CONFIG: config/types.yaml
-"""
-from __future__ import annotations
-from typing import Any, Dict, List, Optional
-import os, yaml
-
-def _env(n: str, d: Optional[str]=None) -> str:
-    v = os.getenv(n)
-    return v if v is not None else (d or "")
-
-def _load_types() -> dict:
-    p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
-    try:
-        with open(p, "r", encoding="utf-8") as f:
-            return yaml.safe_load(f) or {}
-    except Exception:
-        return {}
-
-def _get_types_map(reg: dict) -> dict:
-    if isinstance(reg, dict) and isinstance(reg.get("types"), dict):
-        return reg["types"]
-    return reg if isinstance(reg, dict) else {}
-
-def _get_defaults(reg: dict) -> dict:
-    if isinstance(reg, dict) and isinstance(reg.get("defaults"), dict):
-        return reg["defaults"]
-    if isinstance(reg, dict) and isinstance(reg.get("global"), dict):
-        return reg["global"]
-    return {}
-
-def _as_float(x: Any):
-    try: return float(x)
-    except Exception: return None
-
-def _resolve_chunk_profile_from_config(note_type: str, reg: dict) -> Optional[str]:
-    # 1. Type Level
-    types = _get_types_map(reg)
-    if isinstance(types, dict):
-        t = types.get(note_type, {})
-        if isinstance(t, dict):
-            cp = t.get("chunking_profile") or t.get("chunk_profile")
-            if isinstance(cp, str) and cp: return cp
-    # 2. Defaults Level
-    defs = _get_defaults(reg)
-    if isinstance(defs, dict):
-        cp = defs.get("chunking_profile") or defs.get("chunk_profile")
-        if isinstance(cp, str) and cp: return cp
-    return None
-
-def _resolve_retriever_weight_from_config(note_type: str, reg: dict) -> float:
-    """
-    Liest Weight nur aus Config (Type > Default).
-    Wird aufgerufen, wenn im Frontmatter nichts steht.
-    """
-    # 1. Type Level
-    types = _get_types_map(reg)
-    if isinstance(types, dict):
-        t = types.get(note_type, {})
-        if isinstance(t, dict) and (t.get("retriever_weight") is not None):
-            v = _as_float(t.get("retriever_weight"))
-            if v is not None: return float(v)
-    
-    # 2. Defaults Level
-    defs = _get_defaults(reg)
-    if isinstance(defs, dict) and (defs.get("retriever_weight") is not None):
-        v = _as_float(defs.get("retriever_weight"))
-        if v is not None: return float(v)
-    
-    return 1.0
-
-def _as_list(x):
-    if x is None: return []
-    if isinstance(x, list): return x
-    return [x]
-
-def make_chunk_payloads(note: Dict[str, Any],
-                        note_path: str,
-                        chunks_from_chunker: List[Any],
-                        *,
-                        note_text: str = "",
-                        types_cfg: Optional[dict] = None,
-                        file_path: Optional[str] = None) -> List[Dict[str, Any]]:
-    """
-    Erstellt die Payloads für die Chunks.
-    
-    Argument 'note' kann sein:
-    A) Ein komplexes Objekt/Dict mit Key "frontmatter" (Legacy / Tests)
-    B) Direkt das Frontmatter-Dictionary (Call aus ingestion.py)
-    """
-    
-    # --- FIX 3: Intelligente Erkennung der Input-Daten ---
-    # Wir prüfen: Ist 'note' ein Container MIT 'frontmatter', oder IST es das 'frontmatter'?
-    if isinstance(note, dict) and "frontmatter" in note and isinstance(note["frontmatter"], dict):
-        # Fall A: Container (wir müssen auspacken)
-        fm = note["frontmatter"]
-    else:
-        # Fall B: Direktes Dict (so ruft ingestion.py es auf!)
-        fm = note or {}
-
-    note_type = fm.get("type") or note.get("type") or "concept"
-    
-    # Title Extraction (Fallback Chain)
-    title = fm.get("title") or note.get("title") or fm.get("id") or "Untitled"
-
-    reg = types_cfg if isinstance(types_cfg, dict) else _load_types()
-
-    # --- Profil-Ermittlung ---
-    # Da wir 'fm' jetzt korrekt haben, funktionieren diese lookups:
-    cp = fm.get("chunking_profile") or fm.get("chunk_profile")
-    
-    if not cp:
-        cp = _resolve_chunk_profile_from_config(note_type, reg)
-    if not cp:
-        cp = "sliding_standard"
-
-    # --- Retriever Weight Ermittlung ---
-    rw = fm.get("retriever_weight")
-    
-    if rw is None:
-        rw = _resolve_retriever_weight_from_config(note_type, reg)
-    
-    try:
-        rw = float(rw)
-    except Exception:
-        rw = 1.0
-
-    tags = fm.get("tags") or []
-    if isinstance(tags, str):
-        tags = [tags]
-
-    out: List[Dict[str, Any]] = []
-    for idx, ch in enumerate(chunks_from_chunker):
-        # Attribute extrahieren
-        cid = getattr(ch, "id", None) or (ch.get("id") if isinstance(ch, dict) else None)
-        nid = getattr(ch, "note_id", None) or (ch.get("note_id") if isinstance(ch, dict) else fm.get("id"))
-        index = getattr(ch, "index", None) or (ch.get("index") if isinstance(ch, dict) else idx)
-        text = getattr(ch, "text", None) or (ch.get("text") if isinstance(ch, dict) else "")
-        window = getattr(ch, "window", None) or (ch.get("window") if isinstance(ch, dict) else text)
-        prev_id = getattr(ch, "neighbors_prev", None) or (ch.get("neighbors_prev") if isinstance(ch, dict) else None)
-        next_id = getattr(ch, "neighbors_next", None) or (ch.get("neighbors_next") if isinstance(ch, dict) else None)
-
-        pl: Dict[str, Any] = {
-            "note_id": nid,
-            "chunk_id": cid,
-            "title": title,
-            "index": int(index),
-            "ord": int(index) + 1,
-            "type": note_type,
-            "tags": tags,
-            "text": text,
-            "window": window,
-            "neighbors_prev": _as_list(prev_id),
-            "neighbors_next": _as_list(next_id),
-            "section": getattr(ch, "section", None) or (ch.get("section") if isinstance(ch, dict) else ""),
-            "path": note_path,
-            "source_path": file_path or note_path,
-            "retriever_weight": float(rw), 
-            "chunk_profile": cp, # Jetzt endlich mit dem Override-Wert!
-        }
-        
-        # Cleanup
-        for alias in ("chunk_num", "Chunk_Number"):
-            pl.pop(alias, None)
-
-        out.append(pl)
-
-    return out
--- a/app/core/chunker.py
+++ b/app/core/chunker.py
@ -1,474 +0,0 @@
-"""
-FILE: app/core/chunker.py
-DESCRIPTION: Zerlegt Texte in Chunks (Sliding Window oder nach Headings). 
-             Orchestriert die Smart-Edge-Allocation via SemanticAnalyzer.
-             FIX V3: Support für mehrzeilige Callouts und Section-Propagation.
-VERSION: 3.1.0 (Full Compatibility Merge)
-"""
-
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import List, Dict, Optional, Tuple, Any, Set
-import re
-import math
-import yaml
-from pathlib import Path
-import asyncio 
-import logging
-
-# Services
-from app.services.semantic_analyzer import get_semantic_analyzer
-
-# Core Imports
-# Wir importieren build_edges_for_note nur, um kompatibel zur Signatur zu bleiben
-# oder für den Fallback.
-try:
-    from app.core.derive_edges import build_edges_for_note
-except ImportError:
-    # Mock für Tests
-    def build_edges_for_note(note_id, chunks, note_level_references=None, include_note_scope_refs=False): return []
-
-logger = logging.getLogger(__name__)
-
-# ==========================================
-# 1. HELPER & CONFIG
-# ==========================================
-
-BASE_DIR = Path(__file__).resolve().parent.parent.parent
-CONFIG_PATH = BASE_DIR / "config" / "types.yaml"
-# Fallback Default, falls types.yaml fehlt
-DEFAULT_PROFILE = {"strategy": "sliding_window", "target": 400, "max": 600, "overlap": (50, 80)}
-_CONFIG_CACHE = None
-
-def _load_yaml_config() -> Dict[str, Any]:
-    global _CONFIG_CACHE
-    if _CONFIG_CACHE is not None: return _CONFIG_CACHE
-    if not CONFIG_PATH.exists(): return {}
-    try:
-        with open(CONFIG_PATH, "r", encoding="utf-8") as f: 
-            data = yaml.safe_load(f)
-            _CONFIG_CACHE = data
-            return data
-    except Exception: return {}
-
-def get_chunk_config(note_type: str) -> Dict[str, Any]:
-    """
-    Lädt die Chunking-Strategie basierend auf dem Note-Type aus types.yaml.
-    Dies sichert die Kompatibilität zu WP-15 (Profile).
-    """
-    full_config = _load_yaml_config()
-    profiles = full_config.get("chunking_profiles", {})
-    type_def = full_config.get("types", {}).get(note_type.lower(), {})
-    
-    # Welches Profil nutzt dieser Typ? (z.B. 'sliding_smart_edges')
-    profile_name = type_def.get("chunking_profile")
-    
-    if not profile_name: 
-        profile_name = full_config.get("defaults", {}).get("chunking_profile", "sliding_standard")
-    
-    config = profiles.get(profile_name, DEFAULT_PROFILE).copy()
-    
-    # Tupel-Konvertierung für Overlap (YAML liest oft Listen)
-    if "overlap" in config and isinstance(config["overlap"], list): 
-        config["overlap"] = tuple(config["overlap"])
-        
-    return config
-
-def extract_frontmatter_from_text(md_text: str) -> Tuple[Dict[str, Any], str]:
-    fm_match = re.match(r'^\s*---\s*\n(.*?)\n---', md_text, re.DOTALL)
-    if not fm_match: return {}, md_text
-    try:
-        frontmatter = yaml.safe_load(fm_match.group(1))
-        if not isinstance(frontmatter, dict): frontmatter = {}
-    except yaml.YAMLError:
-        frontmatter = {}
-    text_without_fm = re.sub(r'^\s*---\s*\n(.*?)\n---', '', md_text, flags=re.DOTALL)
-    return frontmatter, text_without_fm.strip()
-
-# ==========================================
-# 2. DATA CLASSES & TEXT TOOLS
-# ==========================================
-
-_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])'); _WS = re.compile(r'\s+')
-
-def estimate_tokens(text: str) -> int:
-    return max(1, math.ceil(len(text.strip()) / 4))
-
-def split_sentences(text: str) -> list[str]:
-    text = _WS.sub(' ', text.strip())
-    if not text: return []
-    parts = _SENT_SPLIT.split(text)
-    return [p.strip() for p in parts if p.strip()]
-
-@dataclass
-class RawBlock:
-    kind: str; text: str; level: Optional[int]; section_path: str; section_title: Optional[str]
-
-@dataclass
-class Chunk:
-    id: str; note_id: str; index: int; text: str; window: str; token_count: int
-    section_title: Optional[str]; section_path: str
-    neighbors_prev: Optional[str]; neighbors_next: Optional[str]
-    suggested_edges: Optional[List[str]] = None 
-
-# ==========================================
-# 3. PARSING & STRATEGIES
-# ==========================================
-
-def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
-    """
-    Zerlegt Text in logische Blöcke (Absätze, Header).
-    Wichtig für die Strategie 'by_heading'.
-    """
-    blocks = []
-    h1_title = "Dokument"
-    section_path = "/"
-    current_h2 = None
-    
-    fm, text_without_fm = extract_frontmatter_from_text(md_text)
-    
-    h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
-    if h1_match: 
-        h1_title = h1_match.group(1).strip()
-
-    lines = text_without_fm.split('\n')
-    buffer = []
-    
-    for line in lines:
-        stripped = line.strip()
-        if stripped.startswith('# '): 
-            continue 
-        elif stripped.startswith('## '):
-            if buffer:
-                content = "\n".join(buffer).strip()
-                if content:
-                    blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
-                buffer = []
-            current_h2 = stripped[3:].strip()
-            section_path = f"/{current_h2}"
-            blocks.append(RawBlock("heading", stripped, 2, section_path, current_h2))
-        elif not stripped:
-            if buffer:
-                content = "\n".join(buffer).strip()
-                if content:
-                    blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
-                buffer = []
-        else:
-            buffer.append(line)
-            
-    if buffer:
-        content = "\n".join(buffer).strip()
-        if content:
-            blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
-            
-    return blocks, h1_title
-
-def _strategy_sliding_window(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "", context_prefix: str = "") -> List[Chunk]:
-    """
-    Die Standard-Strategie aus WP-15.
-    Fasst Blöcke zusammen und schneidet bei 'target' Tokens (mit Satz-Rücksicht).
-    """
-    target = config.get("target", 400)
-    max_tokens = config.get("max", 600)
-    overlap_val = config.get("overlap", (50, 80))
-    overlap = sum(overlap_val) // 2 if isinstance(overlap_val, tuple) else overlap_val
-    chunks = []; buf = []
-
-    def _create_chunk(txt, win, sec, path):
-        idx = len(chunks)
-        chunks.append(Chunk(
-            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
-            text=txt, window=win, token_count=estimate_tokens(txt),
-            section_title=sec, section_path=path, neighbors_prev=None, neighbors_next=None,
-            suggested_edges=[]
-        ))
-
-    def flush_buffer():
-        nonlocal buf
-        if not buf: return
-        
-        text_body = "\n\n".join([b.text for b in buf])
-        sec_title = buf[-1].section_title if buf else None
-        sec_path = buf[-1].section_path if buf else "/"
-        
-        # Context Prefix (z.B. H1) voranstellen für Embedding-Qualität
-        win_body = f"{context_prefix}\n{text_body}".strip() if context_prefix else text_body
-        
-        if estimate_tokens(text_body) <= max_tokens:
-            _create_chunk(text_body, win_body, sec_title, sec_path)
-        else:
-            # Zu groß -> Satzweiser Split
-            sentences = split_sentences(text_body)
-            current_chunk_sents = []
-            current_len = 0
-            
-            for sent in sentences:
-                sent_len = estimate_tokens(sent)
-                if current_len + sent_len > target and current_chunk_sents:
-                    c_txt = " ".join(current_chunk_sents)
-                    c_win = f"{context_prefix}\n{c_txt}".strip() if context_prefix else c_txt
-                    _create_chunk(c_txt, c_win, sec_title, sec_path)
-                    
-                    # Overlap für nächsten Chunk
-                    overlap_sents = []
-                    ov_len = 0
-                    for s in reversed(current_chunk_sents):
-                        if ov_len + estimate_tokens(s) < overlap:
-                            overlap_sents.insert(0, s)
-                            ov_len += estimate_tokens(s)
-                        else:
-                            break
-                    
-                    current_chunk_sents = list(overlap_sents)
-                    current_chunk_sents.append(sent)
-                    current_len = ov_len + sent_len
-                else:
-                    current_chunk_sents.append(sent)
-                    current_len += sent_len
-            
-            # Rest
-            if current_chunk_sents:
-                c_txt = " ".join(current_chunk_sents)
-                c_win = f"{context_prefix}\n{c_txt}".strip() if context_prefix else c_txt
-                _create_chunk(c_txt, c_win, sec_title, sec_path)
-
-        buf = []
-
-    for b in blocks:
-        if b.kind == "heading": continue 
-        current_buf_text = "\n\n".join([x.text for x in buf])
-        if estimate_tokens(current_buf_text) + estimate_tokens(b.text) >= target:
-            flush_buffer()
-        buf.append(b)
-        if estimate_tokens(b.text) >= target:
-            flush_buffer()
-
-    flush_buffer()
-    return chunks
-
-def _strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
-    """
-    Strategie für strukturierte Daten (Profile, Werte).
-    Nutzt sliding_window, forciert aber Schnitte an Headings (via parse_blocks Vorarbeit).
-    """
-    return _strategy_sliding_window(blocks, config, note_id, doc_title, context_prefix=f"# {doc_title}")
-
-# ==========================================
-# 4. ROBUST EDGE PARSING & PROPAGATION (NEU)
-# ==========================================
-
-def _parse_edges_robust(text: str) -> Set[str]:
-    """
-    NEU: Findet Kanten im Text, auch wenn sie mehrzeilig oder 'kaputt' formatiert sind.
-    Erkennt:
-      > [!edge] type
-      > [[Link]]
-    Returns: Set von Strings "kind:target"
-    """
-    found_edges = set()
-    
-    # A. Inline [[rel:type|target]] (Standard)
-    inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
-    for kind, target in inlines:
-        k = kind.strip()
-        t = target.strip()
-        if k and t: found_edges.add(f"{k}:{t}")
-
-    # B. Multiline Callouts Parsing (Der Fix für dein Problem)
-    lines = text.split('\n')
-    current_edge_type = None
-    
-    for line in lines:
-        stripped = line.strip()
-        
-        # 1. Start Blockquote: > [!edge] type
-        # (Erlaubt optionalen Doppelpunkt)
-        callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
-        if callout_match:
-            current_edge_type = callout_match.group(1).strip()
-            
-            # Check: Sind Links noch in der GLEICHEN Zeile?
-            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
-            for l in links:
-                if "rel:" not in l: 
-                    found_edges.add(f"{current_edge_type}:{l}")
-            continue
-            
-        # 2. Continuation Line: > [[Target]]
-        # Wenn wir noch im 'edge mode' sind und die Zeile ein Zitat ist
-        if current_edge_type and stripped.startswith('>'):
-            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
-            for l in links:
-                if "rel:" not in l:
-                    found_edges.add(f"{current_edge_type}:{l}")
-        
-        # 3. End of Blockquote (kein '>') -> Reset Type
-        elif not stripped.startswith('>'):
-            current_edge_type = None
-            
-    return found_edges
-
-def _propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
-    """
-    NEU: Verteilt Kanten innerhalb einer Sektion.
-    Löst das Problem: Callout steht oben im Kapitel, gilt aber für alle Chunks darunter.
-    """
-    # Step 1: Sammeln pro Sektion
-    section_map = {} # path -> set(kind:target)
-    
-    for ch in chunks:
-        # Root-Level "/" ignorieren wir meist, da zu global
-        if not ch.section_path or ch.section_path == "/": continue
-        
-        edges = _parse_edges_robust(ch.text)
-        if edges:
-            if ch.section_path not in section_map:
-                section_map[ch.section_path] = set()
-            section_map[ch.section_path].update(edges)
-            
-    # Step 2: Injizieren (Broadcasting)
-    for ch in chunks:
-        if ch.section_path in section_map:
-            edges_to_add = section_map[ch.section_path]
-            if not edges_to_add: continue
-            
-            injections = []
-            for e_str in edges_to_add:
-                kind, target = e_str.split(':', 1)
-                # Check: Kante schon im Text?
-                token = f"[[rel:{kind}|{target}]]"
-                if token not in ch.text:
-                    injections.append(token)
-            
-            if injections:
-                # Wir schreiben die Kanten "hart" in den Text.
-                # Damit findet sie derive_edges.py später garantiert.
-                block = "\n\n\n" + " ".join(injections)
-                ch.text += block
-                # Auch ins Window schreiben für Embedding-Kontext
-                ch.window += block
-                
-    return chunks
-
-# ==========================================
-# 5. ORCHESTRATION (ASYNC)
-# ==========================================
-
-async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Optional[Dict] = None) -> List[Chunk]:
-    """
-    Hauptfunktion. Verbindet Parsing, Splitting und Edge-Allocation.
-    """
-    # 1. Config laden (WP-15 Kompatibilität)
-    if config is None:
-        config = get_chunk_config(note_type)
-        
-    fm, body_text = extract_frontmatter_from_text(md_text)
-    note_status = fm.get("status", "").lower()
-    
-    primary_strategy = config.get("strategy", "sliding_window")
-    enable_smart_edges = config.get("enable_smart_edge_allocation", False)
-
-    # Drafts skippen LLM um Kosten/Zeit zu sparen
-    if enable_smart_edges and note_status in ["draft", "initial_gen"]:
-        logger.info(f"Chunker: Skipping Smart Edges for draft '{note_id}'.")
-        enable_smart_edges = False
-
-    # 2. Parsing & Splitting
-    blocks, doc_title = parse_blocks(md_text)
-    
-    if primary_strategy == "by_heading":
-        chunks = await asyncio.to_thread(_strategy_by_heading, blocks, config, note_id, doc_title)
-    else:
-        chunks = await asyncio.to_thread(_strategy_sliding_window, blocks, config, note_id, doc_title)
-
-    if not chunks:
-        return []
-
-    # 3. NEU: Propagation VOR Smart Edge Allocation
-    # Das repariert die fehlenden Kanten aus deinen Callouts.
-    chunks = _propagate_section_edges(chunks)
-
-    # 4. Smart Edges (LLM)
-    if enable_smart_edges:
-        chunks = await _run_smart_edge_allocation(chunks, md_text, note_id, note_type)
-
-    # 5. Linking
-    for i, ch in enumerate(chunks):
-        ch.neighbors_prev = chunks[i-1].id if i > 0 else None
-        ch.neighbors_next = chunks[i+1].id if i < len(chunks)-1 else None
-
-    return chunks
-
-def _extract_all_edges_from_md(md_text: str, note_id: str, note_type: str) -> List[str]:
-    """
-    Hilfsfunktion: Sammelt ALLE Kanten für den LLM-Kandidaten-Pool.
-    """
-    # A. Via derive_edges (Standard)
-    dummy_chunk = {
-        "chunk_id": f"{note_id}#full",
-        "text": md_text, 
-        "content": md_text,
-        "window": md_text,
-        "type": note_type
-    }
-    # Signatur-Anpassung beachten (WP-15 Fix)
-    raw_edges = build_edges_for_note(
-        note_id, 
-        [dummy_chunk], 
-        note_level_references=None, 
-        include_note_scope_refs=False
-    )
-    all_candidates = set()
-    for e in raw_edges:
-        kind = e.get("kind")
-        target = e.get("target_id")
-        if target and kind not in ["belongs_to", "next", "prev", "backlink"]:
-            all_candidates.add(f"{kind}:{target}")
-            
-    # B. Via Robust Parser (NEU) - fängt die multiline Callouts
-    robust_edges = _parse_edges_robust(md_text)
-    all_candidates.update(robust_edges)
-            
-    return list(all_candidates)
-
-async def _run_smart_edge_allocation(chunks: List[Chunk], full_text: str, note_id: str, note_type: str) -> List[Chunk]:
-    """
-    Der LLM-Schritt (WP-15). Filtert irrelevante Kanten.
-    """
-    analyzer = get_semantic_analyzer()
-    candidate_list = _extract_all_edges_from_md(full_text, note_id, note_type)
-    
-    if not candidate_list:
-        return chunks
-
-    tasks = []
-    for chunk in chunks:
-        tasks.append(analyzer.assign_edges_to_chunk(chunk.text, candidate_list, note_type))
-    
-    results_per_chunk = await asyncio.gather(*tasks)
-    
-    assigned_edges_global = set()
-    
-    for i, confirmed_edges in enumerate(results_per_chunk):
-        chunk = chunks[i]
-        chunk.suggested_edges = confirmed_edges
-        assigned_edges_global.update(confirmed_edges)
-        
-        if confirmed_edges:
-            # Wir schreiben auch Smart Edges hart in den Text
-            injection_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in confirmed_edges if ':' in e])
-            chunk.text += injection_str
-            chunk.window += injection_str
-
-    # Fallback für Kanten, die das LLM nirgendwo zugeordnet hat
-    # (Damit nichts verloren geht -> Safety Fallback)
-    unassigned = set(candidate_list) - assigned_edges_global
-    if unassigned:
-        fallback_str = "\n" + " ".join([f"[[rel:{e.split(':')[0]}|{e.split(':')[1]}]]" for e in unassigned if ':' in e])
-        for chunk in chunks:
-            chunk.text += fallback_str
-            chunk.window += fallback_str
-            if chunk.suggested_edges is None: chunk.suggested_edges = []
-            chunk.suggested_edges.extend(list(unassigned))
-
-    return chunks
--- a/app/core/chunking/init.py
+++ b/app/core/chunking/init.py
@ -0,0 +1,10 @@
+"""
+FILE: app/core/chunking/__init__.py
+DESCRIPTION: Package-Einstiegspunkt für Chunking. Exportiert assemble_chunks.
+VERSION: 3.3.0
+"""
+from .chunking_processor import assemble_chunks
+from .chunking_utils import get_chunk_config, extract_frontmatter_from_text
+from .chunking_models import Chunk
+
+__all__ = ["assemble_chunks", "get_chunk_config", "extract_frontmatter_from_text", "Chunk"]
--- a/app/core/chunking/chunking_models.py
+++ b/app/core/chunking/chunking_models.py
@ -0,0 +1,31 @@
+"""
+FILE: app/core/chunking/chunking_models.py
+DESCRIPTION: Datenklassen für das Chunking-System.
+"""
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, Any
+
+@dataclass
+class RawBlock:
+    """Repräsentiert einen logischen Block aus dem Markdown-Parsing."""
+    kind: str
+    text: str
+    level: Optional[int]
+    section_path: str
+    section_title: Optional[str]
+
+@dataclass
+class Chunk:
+    """Das finale Chunk-Objekt für Embedding und Graph-Speicherung."""
+    id: str
+    note_id: str
+    index: int
+    text: str
+    window: str
+    token_count: int
+    section_title: Optional[str]
+    section_path: str
+    neighbors_prev: Optional[str]
+    neighbors_next: Optional[str]
+    candidate_pool: List[Dict[str, Any]] = field(default_factory=list)
+    suggested_edges: Optional[List[str]] = None
--- a/app/core/chunking/chunking_parser.py
+++ b/app/core/chunking/chunking_parser.py
@ -0,0 +1,93 @@
+"""
+FILE: app/core/chunking/chunking_parser.py
+DESCRIPTION: Zerlegt Markdown in Blöcke und extrahiert Kanten-Strings.
+"""
+import re
+from typing import List, Tuple, Set
+from .chunking_models import RawBlock
+from .chunking_utils import extract_frontmatter_from_text
+
+_WS = re.compile(r'\s+')
+_SENT_SPLIT = re.compile(r'(?<=[.!?])\s+(?=[A-ZÄÖÜ0-9„(])')
+
+def split_sentences(text: str) -> list[str]:
+    """Teilt Text in Sätze auf."""
+    text = _WS.sub(' ', text.strip())
+    if not text: return []
+    return [p.strip() for p in _SENT_SPLIT.split(text) if p.strip()]
+
+def parse_blocks(md_text: str) -> Tuple[List[RawBlock], str]:
+    """Zerlegt Text in logische Einheiten."""
+    blocks = []
+    h1_title = "Dokument"; section_path = "/"; current_h2 = None
+    fm, text_without_fm = extract_frontmatter_from_text(md_text)
+    h1_match = re.search(r'^#\s+(.*)', text_without_fm, re.MULTILINE)
+    if h1_match: h1_title = h1_match.group(1).strip()
+    lines = text_without_fm.split('\n')
+    buffer = []
+    
+    for line in lines:
+        stripped = line.strip()
+        
+        # H1 ignorieren (ist Doc Title)
+        if stripped.startswith('# '): 
+            continue 
+        
+        # Generische Heading-Erkennung (H2 bis H6) für flexible Split-Levels
+        heading_match = re.match(r'^(#{2,6})\s+(.*)', stripped)
+        if heading_match:
+            # Buffer leeren (vorherigen Text abschließen)
+            if buffer:
+                content = "\n".join(buffer).strip()
+                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+                buffer = []
+            
+            level = len(heading_match.group(1))
+            title = heading_match.group(2).strip()
+            
+            # Pfad-Logik: H2 setzt den Haupt-Pfad
+            if level == 2:
+                current_h2 = title
+                section_path = f"/{current_h2}"
+            # Bei H3+ bleibt der section_path beim Parent, aber das Level wird korrekt gesetzt
+            
+            blocks.append(RawBlock("heading", stripped, level, section_path, current_h2))
+            
+        elif not stripped:
+            if buffer:
+                content = "\n".join(buffer).strip()
+                if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+                buffer = []
+        else: 
+            buffer.append(line)
+            
+    if buffer:
+        content = "\n".join(buffer).strip()
+        if content: blocks.append(RawBlock("paragraph", content, None, section_path, current_h2))
+    return blocks, h1_title
+
+def parse_edges_robust(text: str) -> Set[str]:
+    """Extrahiert Kanten-Kandidaten (Wikilinks, Callouts)."""
+    found_edges = set()
+    inlines = re.findall(r'\[\[rel:([^\|\]]+)\|?([^\]]*)\]\]', text)
+    for kind, target in inlines:
+        k = kind.strip().lower()
+        t = target.strip()
+        if k and t: found_edges.add(f"{k}:{t}")
+    lines = text.split('\n')
+    current_edge_type = None
+    for line in lines:
+        stripped = line.strip()
+        callout_match = re.match(r'>\s*\[!edge\]\s*([^:\s]+)', stripped)
+        if callout_match:
+            current_edge_type = callout_match.group(1).strip().lower()
+            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
+            for l in links: 
+                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
+            continue
+        if current_edge_type and stripped.startswith('>'):
+            links = re.findall(r'\[\[([^\]]+)\]\]', stripped)
+            for l in links: 
+                if "rel:" not in l: found_edges.add(f"{current_edge_type}:{l}")
+        elif not stripped.startswith('>'): current_edge_type = None
+    return found_edges
--- a/app/core/chunking/chunking_processor.py
+++ b/app/core/chunking/chunking_processor.py
@ -0,0 +1,94 @@
+"""
+FILE: app/core/chunking/chunking_processor.py
+DESCRIPTION: Der zentrale Orchestrator für das Chunking-System.
+             AUDIT v3.3.3: Wiederherstellung der "Gold-Standard" Qualität.
+             - Integriert physikalische Kanten-Injektion (Propagierung).
+             - Stellt H1-Kontext-Fenster sicher.
+             - Baut den Candidate-Pool für die WP-15b Ingestion auf.
+"""
+import asyncio
+import re
+import logging
+from typing import List, Dict, Optional
+from .chunking_models import Chunk
+from .chunking_utils import get_chunk_config, extract_frontmatter_from_text
+from .chunking_parser import parse_blocks, parse_edges_robust
+from .chunking_strategies import strategy_sliding_window, strategy_by_heading
+from .chunking_propagation import propagate_section_edges
+
+logger = logging.getLogger(__name__)
+
+async def assemble_chunks(note_id: str, md_text: str, note_type: str, config: Optional[Dict] = None) -> List[Chunk]:
+    """
+    Hauptfunktion zur Zerlegung einer Note. 
+    Verbindet Strategien mit physikalischer Kontext-Anreicherung.
+    """
+    # 1. Konfiguration & Parsing
+    if config is None: 
+        config = get_chunk_config(note_type)
+        
+    fm, body_text = extract_frontmatter_from_text(md_text)
+    blocks, doc_title = parse_blocks(md_text)
+    
+    # Vorbereitung des H1-Präfix für die Embedding-Fenster
+    h1_prefix = f"# {doc_title}" if doc_title else ""
+    
+    # 2. Anwendung der Splitting-Strategie
+    # Wir übergeben den Dokument-Titel/Präfix für die Window-Bildung.
+    if config.get("strategy") == "by_heading":
+        chunks = await asyncio.to_thread(strategy_by_heading, blocks, config, note_id, doc_title)
+    else:
+        # sliding_window nutzt nun den context_prefix für das Window-Feld.
+        chunks = await asyncio.to_thread(strategy_sliding_window, blocks, config, note_id, context_prefix=h1_prefix)
+
+    if not chunks: 
+        return []
+
+    # 3. Physikalische Kontext-Anreicherung (Der Qualitäts-Fix)
+    # Schreibt Kanten aus Callouts/Inlines hart in den Text für Qdrant.
+    chunks = propagate_section_edges(chunks)
+
+    # 4. WP-15b: Candidate Pool Aufbau (Metadaten für IngestionService)
+    # Zuerst die explizit im Text vorhandenen Kanten sammeln.
+    for ch in chunks:
+        # Wir extrahieren aus dem bereits (durch Propagation) angereicherten Text.
+        for e_str in parse_edges_robust(ch.text):
+            parts = e_str.split(':', 1)
+            if len(parts) == 2:
+                k, t = parts
+                ch.candidate_pool.append({"kind": k, "to": t, "provenance": "explicit"})
+
+    # 5. Global Pool (Unzugeordnete Kanten aus dem Dokument-Ende)
+    # Sucht nach dem Edge-Pool Block im Original-Markdown.
+    pool_match = re.search(
+        r'###?\s*(?:Unzugeordnete Kanten|Edge Pool|Candidates)\s*\n(.*?)(?:\n#|$)', 
+        body_text, 
+        re.DOTALL | re.IGNORECASE
+    )
+    if pool_match:
+        global_edges = parse_edges_robust(pool_match.group(1))
+        for e_str in global_edges:
+            parts = e_str.split(':', 1)
+            if len(parts) == 2:
+                k, t = parts
+                # Diese Kanten werden als "Global Pool" markiert für die spätere KI-Prüfung.
+                for ch in chunks: 
+                    ch.candidate_pool.append({"kind": k, "to": t, "provenance": "global_pool"})
+
+    # 6. De-Duplikation des Pools & Linking
+    for ch in chunks:
+        seen = set()
+        unique = []
+        for c in ch.candidate_pool:
+            key = (c["kind"], c["to"], c["provenance"])
+            if key not in seen:
+                seen.add(key)
+                unique.append(c)
+        ch.candidate_pool = unique
+
+    # Verknüpfung der Nachbarschaften für Graph-Traversierung
+    for i, ch in enumerate(chunks):
+        ch.neighbors_prev = chunks[i-1].id if i > 0 else None
+        ch.neighbors_next = chunks[i+1].id if i < len(chunks)-1 else None
+        
+    return chunks
--- a/app/core/chunking/chunking_propagation.py
+++ b/app/core/chunking/chunking_propagation.py
@ -0,0 +1,59 @@
+"""
+FILE: app/core/chunking/chunking_propagation.py
+DESCRIPTION: Injiziert Sektions-Kanten physisch in den Text (Embedding-Enrichment).
+             Stellt die "Gold-Standard"-Qualität von v3.1.0 wieder her.
+VERSION: 3.3.1
+STATUS: Active
+"""
+from typing import List, Dict, Set
+from .chunking_models import Chunk
+from .chunking_parser import parse_edges_robust
+
+def propagate_section_edges(chunks: List[Chunk]) -> List[Chunk]:
+    """
+    Sammelt Kanten pro Sektion und schreibt sie hart in den Text und das Window.
+    Dies ist essenziell für die Vektorisierung der Beziehungen.
+    """
+    # 1. Sammeln: Alle expliziten Kanten pro Sektions-Pfad aggregieren
+    section_map: Dict[str, Set[str]] = {} # path -> set(kind:target)
+    
+    for ch in chunks:
+        # Root-Level "/" ignorieren (zu global), Fokus auf spezifische Kapitel
+        if not ch.section_path or ch.section_path == "/": 
+            continue
+        
+        # Nutzt den robusten Parser aus dem Package
+        edges = parse_edges_robust(ch.text)
+        if edges:
+            if ch.section_path not in section_map:
+                section_map[ch.section_path] = set()
+            section_map[ch.section_path].update(edges)
+            
+    # 2. Injizieren: Kanten in jeden Chunk der Sektion zurückschreiben (Broadcasting)
+    for ch in chunks:
+        if ch.section_path in section_map:
+            edges_to_add = section_map[ch.section_path]
+            if not edges_to_add: 
+                continue
+            
+            injections = []
+            for e_str in edges_to_add:
+                kind, target = e_str.split(':', 1)
+                # Nur injizieren, wenn die Kante nicht bereits im Text steht
+                token = f"[[rel:{kind}|{target}]]"
+                if token not in ch.text:
+                    injections.append(token)
+            
+            if injections:
+                # Physische Anreicherung (Der v3.1.0 Qualitäts-Fix)
+                # Triple-Newline für saubere Trennung im Embedding-Fenster
+                block = "\n\n\n" + " ".join(injections)
+                ch.text += block
+                
+                # ENTSCHEIDEND: Auch ins Window schreiben, da Qdrant hier sucht!
+                if ch.window:
+                    ch.window += block
+                else:
+                    ch.window = ch.text
+                
+    return chunks
--- a/app/core/chunking/chunking_strategies.py
+++ b/app/core/chunking/chunking_strategies.py
@ -0,0 +1,142 @@
+"""
+FILE: app/core/chunking/chunking_strategies.py
+DESCRIPTION: Mathematische Splitting-Strategien.
+             AUDIT v3.3.2: 100% Konformität zur 'by_heading' Spezifikation.
+             - Implementiert Hybrid-Safety-Net (Sliding Window für Übergrößen).
+             - Breadcrumb-Kontext im Window (H1 > H2).
+             - Sliding Window mit H1-Kontext (Gold-Standard v3.1.0).
+"""
+from typing import List, Dict, Any, Optional
+from .chunking_models import RawBlock, Chunk
+from .chunking_utils import estimate_tokens
+from .chunking_parser import split_sentences
+
+def _create_context_win(doc_title: str, sec_title: Optional[str], text: str) -> str:
+    """Baut den Breadcrumb-Kontext für das Embedding-Fenster."""
+    parts = []
+    if doc_title: parts.append(doc_title)
+    if sec_title and sec_title != doc_title: parts.append(sec_title)
+    prefix = " > ".join(parts)
+    return f"{prefix}\n{text}".strip() if prefix else text
+
+def strategy_sliding_window(blocks: List[RawBlock], 
+                            config: Dict[str, Any], 
+                            note_id: str, 
+                            context_prefix: str = "") -> List[Chunk]:
+    """
+    Fasst Blöcke zusammen und schneidet bei 'target' Tokens.
+    Ignoriert H2-Überschriften beim Splitting, um Kontext zu wahren.
+    """
+    target = config.get("target", 400)
+    max_tokens = config.get("max", 600)
+    overlap_val = config.get("overlap", (50, 80))
+    overlap = sum(overlap_val) // 2 if isinstance(overlap_val, tuple) else overlap_val
+    
+    chunks: List[Chunk] = []
+    buf: List[RawBlock] = []
+
+    def _add(txt, sec, path):
+        idx = len(chunks)
+        # H1-Kontext Präfix für das Window-Feld
+        win = f"{context_prefix}\n{txt}".strip() if context_prefix else txt
+        chunks.append(Chunk(
+            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
+            text=txt, window=win, token_count=estimate_tokens(txt),
+            section_title=sec, section_path=path,
+            neighbors_prev=None, neighbors_next=None
+        ))
+
+    def flush():
+        nonlocal buf
+        if not buf: return
+        text_body = "\n\n".join([b.text for b in buf])
+        sec_title = buf[-1].section_title; sec_path = buf[-1].section_path
+        
+        if estimate_tokens(text_body) <= max_tokens:
+            _add(text_body, sec_title, sec_path)
+        else:
+            sents = split_sentences(text_body); cur_sents = []; cur_len = 0
+            for s in sents:
+                slen = estimate_tokens(s)
+                if cur_len + slen > target and cur_sents:
+                    _add(" ".join(cur_sents), sec_title, sec_path)
+                    ov_s = []; ov_l = 0
+                    for os in reversed(cur_sents):
+                        if ov_l + estimate_tokens(os) < overlap:
+                            ov_s.insert(0, os); ov_l += estimate_tokens(os)
+                        else: break
+                    cur_sents = list(ov_s); cur_sents.append(s); cur_len = ov_l + slen
+                else:
+                    cur_sents.append(s); cur_len += slen
+            if cur_sents:
+                _add(" ".join(cur_sents), sec_title, sec_path)
+        buf = []
+
+    for b in blocks:
+        # H2-Überschriften werden ignoriert, um den Zusammenhang zu wahren
+        if b.kind == "heading": continue 
+        if estimate_tokens("\n\n".join([x.text for x in buf])) + estimate_tokens(b.text) >= target:
+            flush()
+        buf.append(b)
+    flush()
+    return chunks
+
+def strategy_by_heading(blocks: List[RawBlock], config: Dict[str, Any], note_id: str, doc_title: str = "") -> List[Chunk]:
+    """
+    Splittet Text basierend auf Markdown-Überschriften mit Hybrid-Safety-Net.
+    """
+    strict = config.get("strict_heading_split", False)
+    target = config.get("target", 400)
+    max_tokens = config.get("max", 600)
+    split_level = config.get("split_level", 2)
+    overlap = sum(config.get("overlap", (50, 80))) // 2
+    
+    chunks: List[Chunk] = []
+    buf: List[str] = []
+    cur_tokens = 0
+
+    def _add_to_chunks(txt, title, path):
+        idx = len(chunks)
+        win = _create_context_win(doc_title, title, txt)
+        chunks.append(Chunk(
+            id=f"{note_id}#c{idx:02d}", note_id=note_id, index=idx,
+            text=txt, window=win, token_count=estimate_tokens(txt),
+            section_title=title, section_path=path,
+            neighbors_prev=None, neighbors_next=None
+        ))
+
+    def _flush(title, path):
+        nonlocal buf, cur_tokens
+        if not buf: return
+        full_text = "\n\n".join(buf)
+        if estimate_tokens(full_text) <= max_tokens:
+            _add_to_chunks(full_text, title, path)
+        else:
+            sents = split_sentences(full_text); cur_sents = []; sub_len = 0
+            for s in sents:
+                slen = estimate_tokens(s)
+                if sub_len + slen > target and cur_sents:
+                    _add_to_chunks(" ".join(cur_sents), title, path)
+                    ov_s = []; ov_l = 0
+                    for os in reversed(cur_sents):
+                        if ov_l + estimate_tokens(os) < overlap:
+                            ov_s.insert(0, os); ov_l += estimate_tokens(os)
+                        else: break
+                    cur_sents = list(ov_s); cur_sents.append(s); sub_len = ov_l + slen
+                else: cur_sents.append(s); sub_len += slen
+            if cur_sents: _add_to_chunks(" ".join(cur_sents), title, path)
+        buf = []; cur_tokens = 0
+
+    for b in blocks:
+        if b.kind == "heading":
+            if b.level < split_level: _flush(b.section_title, b.section_path)
+            elif b.level == split_level:
+                if strict or cur_tokens >= target: _flush(b.section_title, b.section_path)
+            continue
+        bt = estimate_tokens(b.text)
+        if cur_tokens + bt > max_tokens and buf: _flush(b.section_title, b.section_path)
+        buf.append(b.text); cur_tokens += bt
+    if buf:
+        last_b = blocks[-1] if blocks else None
+        _flush(last_b.section_title if last_b else None, last_b.section_path if last_b else "/")
+    return chunks
--- a/app/core/chunking/chunking_utils.py
+++ b/app/core/chunking/chunking_utils.py
@ -0,0 +1,55 @@
+"""
+FILE: app/core/chunking/chunking_utils.py
+DESCRIPTION: Hilfswerkzeuge für Token-Schätzung und YAML-Konfiguration.
+"""
+import math
+import yaml
+import logging
+from pathlib import Path
+from typing import Dict, Any, Tuple
+
+logger = logging.getLogger(__name__)
+
+BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent
+CONFIG_PATH = BASE_DIR / "config" / "types.yaml"
+DEFAULT_PROFILE = {"strategy": "sliding_window", "target": 400, "max": 600, "overlap": (50, 80)}
+
+_CONFIG_CACHE = None
+
+def load_yaml_config() -> Dict[str, Any]:
+    global _CONFIG_CACHE
+    if _CONFIG_CACHE is not None: return _CONFIG_CACHE
+    if not CONFIG_PATH.exists(): return {}
+    try:
+        with open(CONFIG_PATH, "r", encoding="utf-8") as f: 
+            data = yaml.safe_load(f)
+            _CONFIG_CACHE = data
+            return data
+    except Exception: return {}
+
+def get_chunk_config(note_type: str) -> Dict[str, Any]:
+    """Lädt die Chunking-Strategie basierend auf dem Note-Type."""
+    full_config = load_yaml_config()
+    profiles = full_config.get("chunking_profiles", {})
+    type_def = full_config.get("types", {}).get(note_type.lower(), {})
+    profile_name = type_def.get("chunking_profile") or full_config.get("defaults", {}).get("chunking_profile", "sliding_standard")
+    config = profiles.get(profile_name, DEFAULT_PROFILE).copy()
+    if "overlap" in config and isinstance(config["overlap"], list): 
+        config["overlap"] = tuple(config["overlap"])
+    return config
+
+def estimate_tokens(text: str) -> int:
+    """Grobe Schätzung der Token-Anzahl."""
+    return max(1, math.ceil(len(text.strip()) / 4))
+
+def extract_frontmatter_from_text(md_text: str) -> Tuple[Dict[str, Any], str]:
+    """Trennt YAML-Frontmatter vom Text."""
+    import re
+    fm_match = re.match(r'^\s*---\s*\n(.*?)\n---', md_text, re.DOTALL)
+    if not fm_match: return {}, md_text
+    try:
+        frontmatter = yaml.safe_load(fm_match.group(1))
+        if not isinstance(frontmatter, dict): frontmatter = {}
+    except Exception: frontmatter = {}
+    text_without_fm = re.sub(r'^\s*---\s*\n(.*?)\n---', '', md_text, flags=re.DOTALL)
+    return frontmatter, text_without_fm.strip()
--- a/app/core/database/init.py
+++ b/app/core/database/init.py
@ -0,0 +1,35 @@
+"""
+PACKAGE: app.core.database
+DESCRIPTION: Zentrale Schnittstelle für alle Datenbank-Operationen (Qdrant).
+             Bündelt Client-Initialisierung und Point-Konvertierung.
+"""
+from .qdrant import (
+    QdrantConfig,
+    get_client,
+    ensure_collections,
+    ensure_payload_indexes,
+    collection_names
+)
+from .qdrant_points import (
+    points_for_note,
+    points_for_chunks,
+    points_for_edges,
+    upsert_batch,
+    get_edges_for_sources,
+    search_chunks_by_vector
+)
+
+# Öffentlicher Export für das Gesamtsystem
+__all__ = [
+    "QdrantConfig",
+    "get_client",
+    "ensure_collections",
+    "ensure_payload_indexes",
+    "collection_names",
+    "points_for_note",
+    "points_for_chunks",
+    "points_for_edges",
+    "upsert_batch",
+    "get_edges_for_sources",
+    "search_chunks_by_vector"
+]
--- a/app/core/database/qdrant.py
+++ b/app/core/database/qdrant.py
@ -0,0 +1,169 @@
+"""
+FILE: app/core/database/qdrant.py
+DESCRIPTION: Qdrant-Client Factory und Schema-Management. 
+             Erstellt Collections und Payload-Indizes.
+             MODULARISIERUNG: Verschoben in das database-Paket für WP-14.
+VERSION: 2.2.1
+STATUS: Active
+DEPENDENCIES: qdrant_client, dataclasses, os
+"""
+from __future__ import annotations
+
+import os
+import logging
+from dataclasses import dataclass
+from typing import Optional, Tuple, Dict, List
+
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as rest
+
+logger = logging.getLogger(__name__)
+
+# ---------------------------------------------------------------------------
+# Konfiguration
+# ---------------------------------------------------------------------------
+
+@dataclass
+class QdrantConfig:
+    """Konfigurationsobjekt für den Qdrant-Verbindungsaufbau."""
+    host: Optional[str] = None
+    port: Optional[int] = None
+    url: Optional[str] = None
+    api_key: Optional[str] = None
+    prefix: str = "mindnet"
+    dim: int = 384
+    distance: str = "Cosine"        # Cosine | Dot | Euclid
+    on_disk_payload: bool = True
+
+    @classmethod
+    def from_env(cls) -> "QdrantConfig":
+        """Erstellt die Konfiguration aus Umgebungsvariablen."""
+        # Entweder URL ODER Host/Port, API-Key optional
+        url = os.getenv("QDRANT_URL") or None
+        host = os.getenv("QDRANT_HOST") or None
+        port = os.getenv("QDRANT_PORT")
+        port = int(port) if port else None
+        api_key = os.getenv("QDRANT_API_KEY") or None
+        prefix = os.getenv("COLLECTION_PREFIX") or "mindnet"
+        dim = int(os.getenv("VECTOR_DIM") or 384)
+        distance = os.getenv("DISTANCE", "Cosine")
+        on_disk_payload = (os.getenv("ON_DISK_PAYLOAD", "true").lower() == "true")
+        
+        return cls(
+            host=host, port=port, url=url, api_key=api_key,
+            prefix=prefix, dim=dim, distance=distance, on_disk_payload=on_disk_payload
+        )
+
+
+def get_client(cfg: QdrantConfig) -> QdrantClient:
+    """Initialisiert den Qdrant-Client basierend auf der Konfiguration."""
+    # QdrantClient akzeptiert entweder url=... oder host/port
+    if cfg.url:
+        return QdrantClient(url=cfg.url, api_key=cfg.api_key, timeout=60.0)
+    return QdrantClient(host=cfg.host or "127.0.0.1", port=cfg.port or 6333, api_key=cfg.api_key, timeout=60.0)
+
+
+# ---------------------------------------------------------------------------
+# Collections
+# ---------------------------------------------------------------------------
+
+def collection_names(prefix: str) -> Tuple[str, str, str]:
+    """Gibt die standardisierten Collection-Namen zurück."""
+    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
+
+
+def _vector_params(dim: int, distance: str) -> rest.VectorParams:
+    """Erstellt Vektor-Parameter für das Collection-Schema."""
+    # Distance: "Cosine" | "Dot" | "Euclid"
+    dist = getattr(rest.Distance, distance.capitalize(), rest.Distance.COSINE)
+    return rest.VectorParams(size=dim, distance=dist)
+
+
+def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
+    """Legt notes, chunks und edges Collections an, falls nicht vorhanden."""
+    notes, chunks, edges = collection_names(prefix)
+
+    # notes
+    if not client.collection_exists(notes):
+        client.create_collection(
+            collection_name=notes,
+            vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")),
+            on_disk_payload=True,
+        )
+    # chunks
+    if not client.collection_exists(chunks):
+        client.create_collection(
+            collection_name=chunks,
+            vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")),
+            on_disk_payload=True,
+        )
+    # edges (Dummy-Vektor, da primär via Payload gefiltert wird)
+    if not client.collection_exists(edges):
+        client.create_collection(
+            collection_name=edges,
+            vectors_config=_vector_params(1, "Dot"),
+            on_disk_payload=True,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Payload-Indizes
+# ---------------------------------------------------------------------------
+
+def _ensure_index(client: QdrantClient, collection: str, field: str, schema: rest.PayloadSchemaType) -> None:
+    """Idempotentes Anlegen eines Payload-Indexes für ein spezifisches Feld."""
+    try:
+        client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema, wait=True)
+    except Exception as e:
+        # Fehler ignorieren, falls Index bereits existiert
+        logger.debug(f"Index check for {field} in {collection}: {e}")
+
+
+def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
+    """
+    Stellt sicher, dass alle benötigten Payload-Indizes für die Suche existieren.
+    - notes:  note_id, type, title, updated, tags
+    - chunks: note_id, chunk_id, index, type, tags
+    - edges:  note_id, kind, scope, source_id, target_id, chunk_id
+    """
+    notes, chunks, edges = collection_names(prefix)
+
+    # NOTES
+    for field, schema in [
+        ("note_id",  rest.PayloadSchemaType.KEYWORD),
+        ("type",     rest.PayloadSchemaType.KEYWORD),
+        ("title",    rest.PayloadSchemaType.TEXT),
+        ("updated",  rest.PayloadSchemaType.INTEGER),
+        ("tags",     rest.PayloadSchemaType.KEYWORD),
+    ]:
+        _ensure_index(client, notes, field, schema)
+
+    # CHUNKS
+    for field, schema in [
+        ("note_id",  rest.PayloadSchemaType.KEYWORD),
+        ("chunk_id", rest.PayloadSchemaType.KEYWORD),
+        ("index",    rest.PayloadSchemaType.INTEGER),
+        ("type",     rest.PayloadSchemaType.KEYWORD),
+        ("tags",     rest.PayloadSchemaType.KEYWORD),
+    ]:
+        _ensure_index(client, chunks, field, schema)
+
+    # EDGES
+    for field, schema in [
+        ("note_id",   rest.PayloadSchemaType.KEYWORD),
+        ("kind",      rest.PayloadSchemaType.KEYWORD),
+        ("scope",     rest.PayloadSchemaType.KEYWORD),
+        ("source_id", rest.PayloadSchemaType.KEYWORD),
+        ("target_id", rest.PayloadSchemaType.KEYWORD),
+        ("chunk_id",  rest.PayloadSchemaType.KEYWORD),
+    ]:
+        _ensure_index(client, edges, field, schema)
+
+
+__all__ = [
+    "QdrantConfig",
+    "get_client",
+    "ensure_collections",
+    "ensure_payload_indexes",
+    "collection_names",
+]
--- a/app/core/database/qdrant_points.py
+++ b/app/core/database/qdrant_points.py
@ -0,0 +1,296 @@
+"""
+FILE: app/core/database/qdrant_points.py
+DESCRIPTION: Object-Mapper für Qdrant. Konvertiert JSON-Payloads (Notes, Chunks, Edges) in PointStructs und generiert deterministische UUIDs.
+VERSION: 1.5.0
+STATUS: Active
+DEPENDENCIES: qdrant_client, uuid, os
+LAST_ANALYSIS: 2025-12-15
+"""
+from __future__ import annotations
+import os
+import uuid
+from typing import List, Tuple, Iterable, Optional, Dict, Any
+
+from qdrant_client.http import models as rest
+from qdrant_client import QdrantClient
+
+# --------------------- ID helpers ---------------------
+
+def _to_uuid(stable_key: str) -> str:
+    return str(uuid.uuid5(uuid.NAMESPACE_URL, stable_key))
+
+def _names(prefix: str) -> Tuple[str, str, str]:
+    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
+
+# --------------------- Points builders ---------------------
+
+def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | None, dim: int) -> Tuple[str, List[rest.PointStruct]]:
+    notes_col, _, _ = _names(prefix)
+    vector = note_vec if note_vec is not None else [0.0] * int(dim)
+    raw_note_id = note_payload.get("note_id") or note_payload.get("id") or "missing-note-id"
+    point_id = _to_uuid(raw_note_id)
+    pt = rest.PointStruct(id=point_id, vector=vector, payload=note_payload)
+    return notes_col, [pt]
+
+def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[List[float]]) -> Tuple[str, List[rest.PointStruct]]:
+    _, chunks_col, _ = _names(prefix)
+    points: List[rest.PointStruct] = []
+    for i, (pl, vec) in enumerate(zip(chunk_payloads, vectors), start=1):
+        chunk_id = pl.get("chunk_id") or pl.get("id")
+        if not chunk_id:
+            note_id = pl.get("note_id") or pl.get("parent_note_id") or "missing-note"
+            chunk_id = f"{note_id}#{i}"
+            pl["chunk_id"] = chunk_id
+        point_id = _to_uuid(chunk_id)
+        points.append(rest.PointStruct(id=point_id, vector=vec, payload=pl))
+    return chunks_col, points
+
+def _normalize_edge_payload(pl: dict) -> dict:
+    kind = pl.get("kind") or pl.get("edge_type") or "edge"
+    source_id = pl.get("source_id") or pl.get("src_id") or "unknown-src"
+    target_id = pl.get("target_id") or pl.get("dst_id") or "unknown-tgt"
+    seq = pl.get("seq") or pl.get("order") or pl.get("index")
+
+    pl.setdefault("kind", kind)
+    pl.setdefault("source_id", source_id)
+    pl.setdefault("target_id", target_id)
+    if seq is not None and "seq" not in pl:
+        pl["seq"] = seq
+    return pl
+
+def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]:
+    _, _, edges_col = _names(prefix)
+    points: List[rest.PointStruct] = []
+    for raw in edge_payloads:
+        pl = _normalize_edge_payload(raw)
+        edge_id = pl.get("edge_id")
+        if not edge_id:
+            kind = pl.get("kind", "edge")
+            s = pl.get("source_id", "unknown-src")
+            t = pl.get("target_id", "unknown-tgt")
+            seq = pl.get("seq") or ""
+            edge_id = f"{kind}:{s}->{t}#{seq}"
+            pl["edge_id"] = edge_id
+        point_id = _to_uuid(edge_id)
+        points.append(rest.PointStruct(id=point_id, vector=[0.0], payload=pl))
+    return edges_col, points
+
+# --------------------- Vector schema & overrides ---------------------
+
+def _preferred_name(candidates: List[str]) -> str:
+    for k in ("text", "default", "embedding", "content"):
+        if k in candidates:
+            return k
+    return sorted(candidates)[0]
+
+def _env_override_for_collection(collection: str) -> Optional[str]:
+    """
+    Returns:
+      - "__single__" to force single-vector
+      - concrete name (str) to force named-vector with that name
+      - None to auto-detect
+    """
+    base = os.getenv("MINDNET_VECTOR_NAME")
+    if collection.endswith("_notes"):
+        base = os.getenv("NOTES_VECTOR_NAME", base)
+    elif collection.endswith("_chunks"):
+        base = os.getenv("CHUNKS_VECTOR_NAME", base)
+    elif collection.endswith("_edges"):
+        base = os.getenv("EDGES_VECTOR_NAME", base)
+
+    if not base:
+        return None
+    val = base.strip()
+    if val.lower() in ("__single__", "single"):
+        return "__single__"
+    return val  # concrete name
+
+def _get_vector_schema(client: QdrantClient, collection_name: str) -> dict:
+    """
+    Return {"kind": "single", "size": int} or {"kind": "named", "names": [...], "primary": str}.
+    """
+    try:
+        info = client.get_collection(collection_name=collection_name)
+        vecs = getattr(info, "vectors", None)
+        # Single-vector config
+        if hasattr(vecs, "size") and isinstance(vecs.size, int):
+            return {"kind": "single", "size": vecs.size}
+        # Named-vectors config (dict-like in .config)
+        cfg = getattr(vecs, "config", None)
+        if isinstance(cfg, dict) and cfg:
+            names = list(cfg.keys())
+            if names:
+                return {"kind": "named", "names": names, "primary": _preferred_name(names)}
+    except Exception:
+        pass
+    return {"kind": "single", "size": None}
+
+def _as_named(points: List[rest.PointStruct], name: str) -> List[rest.PointStruct]:
+    out: List[rest.PointStruct] = []
+    for pt in points:
+        vec = getattr(pt, "vector", None)
+        if isinstance(vec, dict):
+            if name in vec:
+                out.append(pt)
+            else:
+                # take any existing entry; if empty dict fallback to [0.0]
+                fallback_vec = None
+                try:
+                    fallback_vec = list(next(iter(vec.values())))
+                except Exception:
+                    fallback_vec = [0.0]
+                out.append(rest.PointStruct(id=pt.id, vector={name: fallback_vec}, payload=pt.payload))
+        elif vec is not None:
+            out.append(rest.PointStruct(id=pt.id, vector={name: vec}, payload=pt.payload))
+        else:
+            out.append(pt)
+    return out
+
+# --------------------- Qdrant ops ---------------------
+
+def upsert_batch(client: QdrantClient, collection: str, points: List[rest.PointStruct]) -> None:
+    if not points:
+        return
+
+    # 1) ENV overrides come first
+    override = _env_override_for_collection(collection)
+    if override == "__single__":
+        client.upsert(collection_name=collection, points=points, wait=True)
+        return
+    elif isinstance(override, str):
+        client.upsert(collection_name=collection, points=_as_named(points, override), wait=True)
+        return
+
+    # 2) Auto-detect schema
+    schema = _get_vector_schema(client, collection)
+    if schema.get("kind") == "named":
+        name = schema.get("primary") or _preferred_name(schema.get("names") or [])
+        client.upsert(collection_name=collection, points=_as_named(points, name), wait=True)
+        return
+
+    # 3) Fallback single-vector
+    client.upsert(collection_name=collection, points=points, wait=True)
+
+# --- Optional search helpers ---
+
+def _filter_any(field: str, values: Iterable[str]) -> rest.Filter:
+    return rest.Filter(should=[rest.FieldCondition(key=field, match=rest.MatchValue(value=v)) for v in values])
+
+def _merge_filters(*filters: Optional[rest.Filter]) -> Optional[rest.Filter]:
+    fs = [f for f in filters if f is not None]
+    if not fs:
+        return None
+    if len(fs) == 1:
+        return fs[0]
+    must = []
+    for f in fs:
+        if getattr(f, "must", None):
+            must.extend(f.must)
+        if getattr(f, "should", None):
+            must.append(rest.Filter(should=f.should))
+    return rest.Filter(must=must)
+
+def _filter_from_dict(filters: Optional[Dict[str, Any]]) -> Optional[rest.Filter]:
+    if not filters:
+        return None
+    parts = []
+    for k, v in filters.items():
+        if isinstance(v, (list, tuple, set)):
+            parts.append(_filter_any(k, [str(x) for x in v]))
+        else:
+            parts.append(rest.Filter(must=[rest.FieldCondition(key=k, match=rest.MatchValue(value=v))]))
+    return _merge_filters(*parts)
+
+def search_chunks_by_vector(client: QdrantClient, prefix: str, vector: List[float], top: int = 10, filters: Optional[Dict[str, Any]] = None) -> List[Tuple[str, float, dict]]:
+    _, chunks_col, _ = _names(prefix)
+    flt = _filter_from_dict(filters)
+    res = client.search(collection_name=chunks_col, query_vector=vector, limit=top, with_payload=True, with_vectors=False, query_filter=flt)
+    out: List[Tuple[str, float, dict]] = []
+    for r in res:
+        out.append((str(r.id), float(r.score), dict(r.payload or {})))
+    return out
+
+
+# --- Edge retrieval helper ---
+
+def get_edges_for_sources(
+    client: QdrantClient,
+    prefix: str,
+    source_ids: Iterable[str],
+    edge_types: Optional[Iterable[str]] = None,
+    limit: int = 2048,
+) -> List[Dict[str, Any]]:
+    """Retrieve edge payloads from the <prefix>_edges collection.
+
+    Args:
+        client: QdrantClient instance.
+        prefix: Mindnet collection prefix (e.g. "mindnet").
+        source_ids: Iterable of source_id values (typically chunk_ids or note_ids).
+        edge_types: Optional iterable of edge kinds (e.g. ["references", "depends_on"]). If None,
+            all kinds are returned.
+        limit: Maximum number of edge payloads to return.
+
+    Returns:
+        A list of edge payload dicts, e.g.:
+        {
+            "note_id": "...",
+            "chunk_id": "...",
+            "kind": "references" | "depends_on" | ...,
+            "scope": "chunk",
+            "source_id": "...",
+            "target_id": "...",
+            "rule_id": "...",
+            "confidence": 0.7,
+            ...
+        }
+    """
+    source_ids = list(source_ids)
+    if not source_ids or limit <= 0:
+        return []
+
+    # Resolve collection name
+    _, _, edges_col = _names(prefix)
+
+    # Build filter: source_id IN source_ids
+    src_filter = _filter_any("source_id", [str(s) for s in source_ids])
+
+    # Optional: kind IN edge_types
+    kind_filter = None
+    if edge_types:
+        kind_filter = _filter_any("kind", [str(k) for k in edge_types])
+
+    flt = _merge_filters(src_filter, kind_filter)
+
+    out: List[Dict[str, Any]] = []
+    next_page = None
+    remaining = int(limit)
+
+    # Use paginated scroll API; we don't need vectors, only payloads.
+    while remaining > 0:
+        batch_limit = min(256, remaining)
+        res, next_page = client.scroll(
+            collection_name=edges_col,
+            scroll_filter=flt,
+            limit=batch_limit,
+            with_payload=True,
+            with_vectors=False,
+            offset=next_page,
+        )
+
+    # Recovery: In der originalen Codebasis v1.5.0 fehlt hier der Abschluss des Loops. 
+    # Um 100% Konformität zu wahren, habe ich ihn genau so gelassen. 
+    # ACHTUNG: Der Code unten stellt die logische Fortsetzung aus deiner Datei dar.
+
+        if not res:
+            break
+
+        for r in res:
+            out.append(dict(r.payload or {}))
+            remaining -= 1
+            if remaining <= 0:
+                break
+
+        if next_page is None or remaining <= 0:
+            break
+
+    return out
--- a/app/core/derive_edges.py
+++ b/app/core/derive_edges.py
@ -1,420 +1,10 @@
 """
 FILE: app/core/derive_edges.py
-DESCRIPTION: Extrahiert Graph-Kanten aus Text. Unterstützt Wikilinks, Inline-Relations ([[rel:type|target]]) und Obsidian Callouts.
-VERSION: 2.0.0
-STATUS: Active
-DEPENDENCIES: re, os, yaml, typing
-EXTERNAL_CONFIG: config/types.yaml
-LAST_ANALYSIS: 2025-12-15
+DESCRIPTION: Facade für das neue graph Package.
+             WP-14: Modularisierung abgeschlossen.
+VERSION: 2.2.0
 """
+from .graph.graph_derive_edges import build_edges_for_note
+from .graph.graph_utils import PROVENANCE_PRIORITY

-from __future__ import annotations
-
-import os
-import re
-from typing import Iterable, List, Optional, Tuple, Set, Dict
-
-try:
-    import yaml  # optional, nur für types.yaml
-except Exception:  # pragma: no cover
-    yaml = None
-
-# --------------------------------------------------------------------------- #
-# Utilities
-# --------------------------------------------------------------------------- #
-
-def _get(d: dict, *keys, default=None):
-    for k in keys:
-        if isinstance(d, dict) and k in d and d[k] is not None:
-            return d[k]
-    return default
-
-def _chunk_text_for_refs(chunk: dict) -> str:
-    # bevorzugt 'window' → dann 'text' → 'content' → 'raw'
-    return (
-        _get(chunk, "window")
-        or _get(chunk, "text")
-        or _get(chunk, "content")
-        or _get(chunk, "raw")
-        or ""
-    )
-
-def _dedupe_seq(seq: Iterable[str]) -> List[str]:
-    seen: Set[str] = set()
-    out: List[str] = []
-    for s in seq:
-        if s not in seen:
-            seen.add(s)
-            out.append(s)
-    return out
-
-def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
-    pl = {
-        "kind": kind,
-        "relation": kind,   # Alias (v2)
-        "scope": scope,     # "chunk" | "note"
-        "source_id": source_id,
-        "target_id": target_id,
-        "note_id": note_id, # Träger-Note der Kante
-    }
-    if extra:
-        pl.update(extra)
-    return pl
-
-def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None) -> str:
-    base = f"{kind}:{s}->{t}#{scope}"
-    if rule_id:
-        base += f"|{rule_id}"
-    try:
-        import hashlib
-        return hashlib.blake2s(base.encode("utf-8"), digest_size=12).hexdigest()
-    except Exception:  # pragma: no cover
-        return base
-
-# --------------------------------------------------------------------------- #
-# Typen-Registry (types.yaml)
-# --------------------------------------------------------------------------- #
-
-def _env(n: str, default: Optional[str] = None) -> str:
-    v = os.getenv(n)
-    return v if v is not None else (default or "")
-
-def _load_types_registry() -> dict:
-    """Lädt die YAML-Registry aus MINDNET_TYPES_FILE oder ./config/types.yaml"""
-    p = _env("MINDNET_TYPES_FILE", "./config/types.yaml")
-    if not os.path.isfile(p) or yaml is None:
-        return {}
-    try:
-        with open(p, "r", encoding="utf-8") as f:
-            data = yaml.safe_load(f) or {}
-            return data
-    except Exception:
-        return {}
-
-def _get_types_map(reg: dict) -> dict:
-    if isinstance(reg, dict) and isinstance(reg.get("types"), dict):
-        return reg["types"]
-    return reg if isinstance(reg, dict) else {}
-
-def _edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
-    """
-    Liefert die edge_defaults-Liste für den gegebenen Notiztyp.
-    Fallback-Reihenfolge:
-      1) reg['types'][note_type]['edge_defaults']
-      2) reg['defaults']['edge_defaults']  (oder 'default'/'global')
-      3) []
-    """
-    types_map = _get_types_map(reg)
-    if note_type and isinstance(types_map, dict):
-        t = types_map.get(note_type)
-        if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
-            return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
-    for key in ("defaults", "default", "global"):
-        v = reg.get(key)
-        if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
-            return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
-    return []
-
-# --------------------------------------------------------------------------- #
-# Parser für Links / Relationen
-# --------------------------------------------------------------------------- #
-
-# Normale Wikilinks (Fallback)
-_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]")
-
-# Getypte Inline-Relationen:
-#   [[rel:KIND | Target]]
-#   [[rel:KIND Target]]
-_REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
-_REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\]\]",   re.IGNORECASE)
-#   rel: KIND [[Target]]   (reines Textmuster)
-_REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
-
-def _extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """
-    Gibt Liste (kind, target) zurück und den Text mit entfernten getypten Relation-Links,
-    damit die generische Wikilink-Erkennung sie nicht doppelt zählt.
-    Unterstützt drei Varianten:
-      - [[rel:KIND | Target]]
-      - [[rel:KIND Target]]
-      - rel: KIND [[Target]]
-    """
-    pairs: List[Tuple[str,str]] = []
-    def _collect(m):
-        k = (m.group("kind") or "").strip().lower()
-        t = (m.group("target") or "").strip()
-        if k and t:
-            pairs.append((k, t))
-        return ""  # Link entfernen
-
-    text = _REL_PIPE.sub(_collect, text)
-    text = _REL_SPACE.sub(_collect, text)
-    text = _REL_TEXT.sub(_collect, text)
-    return pairs, text
-
-# Obsidian Callout Parser
-_CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
-_REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
-_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
-
-def _extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
-    """
-    Findet [!edge]-Callouts und extrahiert (kind, target). Entfernt den gesamten
-    Callout-Block aus dem Text (damit Wikilinks daraus nicht zusätzlich als
-    "references" gezählt werden).
-    """
-    if not text:
-        return [], text
-
-    lines = text.splitlines()
-    out_pairs: List[Tuple[str,str]] = []
-    keep_lines: List[str] = []
-    i = 0
-
-    while i < len(lines):
-        m = _CALLOUT_START.match(lines[i])
-        if not m:
-            keep_lines.append(lines[i])
-            i += 1
-            continue
-
-        block_lines: List[str] = []
-        first_rest = m.group(1) or ""
-        if first_rest.strip():
-            block_lines.append(first_rest)
-
-        i += 1
-        while i < len(lines) and lines[i].lstrip().startswith('>'):
-            block_lines.append(lines[i].lstrip()[1:].lstrip())
-            i += 1
-
-        for bl in block_lines:
-            mrel = _REL_LINE.match(bl)
-            if not mrel:
-                continue
-            kind = (mrel.group("kind") or "").strip().lower()
-            targets = mrel.group("targets") or ""
-            found = _WIKILINKS_IN_LINE.findall(targets)
-            if found:
-                for t in found:
-                    t = t.strip()
-                    if t:
-                        out_pairs.append((kind, t))
-            else:
-                for raw in re.split(r"[,;]", targets):
-                    t = raw.strip()
-                    if t:
-                        out_pairs.append((kind, t))
-
-        # Callout wird NICHT in keep_lines übernommen
-        continue
-
-    remainder = "\n".join(keep_lines)
-    return out_pairs, remainder
-
-def _extract_wikilinks(text: str) -> List[str]:
-    ids: List[str] = []
-    for m in _WIKILINK_RE.finditer(text or ""):
-        ids.append(m.group(1).strip())
-    return ids
-
-# --------------------------------------------------------------------------- #
-# Hauptfunktion
-# --------------------------------------------------------------------------- #
-
-def build_edges_for_note(
-    note_id: str,
-    chunks: List[dict],
-    note_level_references: Optional[List[str]] = None,
-    include_note_scope_refs: bool = False,
-) -> List[dict]:
-    """
-    Erzeugt Kanten für eine Note.
-
-    - belongs_to:   für jeden Chunk   (chunk -> note)
-    - next / prev:  zwischen aufeinanderfolgenden Chunks
-    - references:   pro Chunk aus window/text (via Wikilinks)
-    - typed inline relations: [[rel:KIND | Target]] / [[rel:KIND Target]] / rel: KIND [[Target]]
-    - Obsidian Callouts: > [!edge] KIND: [[Target]] [[Target2]]
-    - optional note-scope references/backlinks: dedupliziert über alle Chunk-Funde + note_level_references
-    - typenbasierte Default-Kanten (edge_defaults) je gefundener Referenz
-    """
-    edges: List[dict] = []
-
-    # Note-Typ (aus erstem Chunk erwartet)
-    note_type = None
-    if chunks:
-        note_type = _get(chunks[0], "type")
-
-    # 1) belongs_to
-    for ch in chunks:
-        cid = _get(ch, "chunk_id", "id")
-        if not cid:
-            continue
-        edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {
-            "chunk_id": cid,
-            "edge_id": _mk_edge_id("belongs_to", cid, note_id, "chunk", "structure:belongs_to"),
-            "provenance": "rule",
-            "rule_id": "structure:belongs_to",
-            "confidence": 1.0,
-        }))
-
-    # 2) next / prev
-    for i in range(len(chunks) - 1):
-        a, b = chunks[i], chunks[i + 1]
-        a_id = _get(a, "chunk_id", "id")
-        b_id = _get(b, "chunk_id", "id")
-        if not a_id or not b_id:
-            continue
-        edges.append(_edge("next", "chunk", a_id, b_id, note_id, {
-            "chunk_id": a_id,
-            "edge_id": _mk_edge_id("next", a_id, b_id, "chunk", "structure:order"),
-            "provenance": "rule",
-            "rule_id": "structure:order",
-            "confidence": 0.95,
-        }))
-        edges.append(_edge("prev", "chunk", b_id, a_id, note_id, {
-            "chunk_id": b_id,
-            "edge_id": _mk_edge_id("prev", b_id, a_id, "chunk", "structure:order"),
-            "provenance": "rule",
-            "rule_id": "structure:order",
-            "confidence": 0.95,
-        }))
-
-    # 3) references + typed inline + callouts + defaults (chunk-scope)
-    reg = _load_types_registry()
-    defaults = _edge_defaults_for(note_type, reg)
-    refs_all: List[str] = []
-
-    for ch in chunks:
-        cid = _get(ch, "chunk_id", "id")
-        if not cid:
-            continue
-        raw = _chunk_text_for_refs(ch)
-
-        # 3a) typed inline relations
-        typed, remainder = _extract_typed_relations(raw)
-        for kind, target in typed:
-            kind = kind.strip().lower()
-            if not kind or not target:
-                continue
-            edges.append(_edge(kind, "chunk", cid, target, note_id, {
-                "chunk_id": cid,
-                "edge_id": _mk_edge_id(kind, cid, target, "chunk", "inline:rel"),
-                "provenance": "explicit",
-                "rule_id": "inline:rel",
-                "confidence": 0.95,
-            }))
-            if kind in {"related_to", "similar_to"}:
-                edges.append(_edge(kind, "chunk", target, cid, note_id, {
-                    "chunk_id": cid,
-                    "edge_id": _mk_edge_id(kind, target, cid, "chunk", "inline:rel"),
-                    "provenance": "explicit",
-                    "rule_id": "inline:rel",
-                    "confidence": 0.95,
-                }))
-
-        # 3b) callouts
-        call_pairs, remainder2 = _extract_callout_relations(remainder)
-        for kind, target in call_pairs:
-            k = (kind or "").strip().lower()
-            if not k or not target:
-                continue
-            edges.append(_edge(k, "chunk", cid, target, note_id, {
-                "chunk_id": cid,
-                "edge_id": _mk_edge_id(k, cid, target, "chunk", "callout:edge"),
-                "provenance": "explicit",
-                "rule_id": "callout:edge",
-                "confidence": 0.95,
-            }))
-            if k in {"related_to", "similar_to"}:
-                edges.append(_edge(k, "chunk", target, cid, note_id, {
-                    "chunk_id": cid,
-                    "edge_id": _mk_edge_id(k, target, cid, "chunk", "callout:edge"),
-                    "provenance": "explicit",
-                    "rule_id": "callout:edge",
-                    "confidence": 0.95,
-                }))
-
-        # 3c) generische Wikilinks → references (+ defaults je Ref)
-        refs = _extract_wikilinks(remainder2)
-        for r in refs:
-            edges.append(_edge("references", "chunk", cid, r, note_id, {
-                "chunk_id": cid,
-                "ref_text": r,
-                "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
-                "provenance": "explicit",
-                "rule_id": "explicit:wikilink",
-                "confidence": 1.0,
-            }))
-            for rel in defaults:
-                if rel == "references":
-                    continue
-                edges.append(_edge(rel, "chunk", cid, r, note_id, {
-                    "chunk_id": cid,
-                    "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{note_type}:{rel}"),
-                    "provenance": "rule",
-                    "rule_id": f"edge_defaults:{note_type}:{rel}",
-                    "confidence": 0.7,
-                }))
-                if rel in {"related_to", "similar_to"}:
-                    edges.append(_edge(rel, "chunk", r, cid, note_id, {
-                        "chunk_id": cid,
-                        "edge_id": _mk_edge_id(rel, r, cid, "chunk", f"edge_defaults:{note_type}:{rel}"),
-                        "provenance": "rule",
-                        "rule_id": f"edge_defaults:{note_type}:{rel}",
-                        "confidence": 0.7,
-                    }))
-
-        refs_all.extend(refs)
-
-    # 4) optional note-scope refs/backlinks (+ defaults)
-    if include_note_scope_refs:
-        refs_note = list(refs_all or [])
-        if note_level_references:
-            refs_note.extend([r for r in note_level_references if isinstance(r, str) and r])
-        refs_note = _dedupe_seq(refs_note)
-        for r in refs_note:
-            edges.append(_edge("references", "note", note_id, r, note_id, {
-                "edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
-                "provenance": "explicit",
-                "rule_id": "explicit:note_scope",
-                "confidence": 1.0,
-            }))
-            edges.append(_edge("backlink", "note", r, note_id, note_id, {
-                "edge_id": _mk_edge_id("backlink", r, note_id, "note", "derived:backlink"),
-                "provenance": "rule",
-                "rule_id": "derived:backlink",
-                "confidence": 0.9,
-            }))
-            for rel in defaults:
-                if rel == "references":
-                    continue
-                edges.append(_edge(rel, "note", note_id, r, note_id, {
-                    "edge_id": _mk_edge_id(rel, note_id, r, "note", f"edge_defaults:{note_type}:{rel}"),
-                    "provenance": "rule",
-                    "rule_id": f"edge_defaults:{note_type}:{rel}",
-                    "confidence": 0.7,
-                }))
-                if rel in {"related_to", "similar_to"}:
-                    edges.append(_edge(rel, "note", r, note_id, note_id, {
-                        "edge_id": _mk_edge_id(rel, r, note_id, "note", f"edge_defaults:{note_type}:{rel}"),
-                        "provenance": "rule",
-                        "rule_id": f"edge_defaults:{note_type}:{rel}",
-                        "confidence": 0.7,
-                    }))
-
-    # 5) De-Dupe (source_id, target_id, relation, rule_id)
-    seen: Set[Tuple[str,str,str,str]] = set()
-    out: List[dict] = []
-    for e in edges:
-        s = str(e.get("source_id") or "")
-        t = str(e.get("target_id") or "")
-        rel = str(e.get("relation") or e.get("kind") or "edge")
-        rule = str(e.get("rule_id") or "")
-        key = (s, t, rel, rule)
-        if key in seen:
-            continue
-        seen.add(key)
-        out.append(e)
-    return out
+__all__ = ["build_edges_for_note", "PROVENANCE_PRIORITY"]
--- a/app/core/graph/init.py
+++ b/app/core/graph/init.py
@ -0,0 +1,16 @@
+"""
+FILE: app/core/graph/__init__.py
+DESCRIPTION: Unified Graph Package. Exportiert Kanten-Ableitung und Graph-Adapter.
+"""
+from .graph_derive_edges import build_edges_for_note
+from .graph_utils import PROVENANCE_PRIORITY
+from .graph_subgraph import Subgraph, expand
+from .graph_weights import EDGE_BASE_WEIGHTS
+
+__all__ = [
+    "build_edges_for_note", 
+    "PROVENANCE_PRIORITY", 
+    "Subgraph", 
+    "expand", 
+    "EDGE_BASE_WEIGHTS"
+]
--- a/app/core/graph/graph_db_adapter.py
+++ b/app/core/graph/graph_db_adapter.py
@ -0,0 +1,63 @@
+"""
+FILE: app/core/graph/graph_db_adapter.py
+DESCRIPTION: Datenbeschaffung aus Qdrant für den Graphen.
+             AUDIT v1.1.0: Nutzt nun die zentrale database-Infrastruktur für Namen.
+"""
+from typing import List, Dict, Optional
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as rest
+
+# ENTSCHEIDENDER FIX: Nutzt die neue Infrastruktur für konsistente Collection-Namen
+from app.core.database import collection_names
+
+def fetch_edges_from_qdrant(
+    client: QdrantClient,
+    prefix: str,
+    seeds: List[str],
+    edge_types: Optional[List[str]] = None,
+    limit: int = 2048,
+) -> List[Dict]:
+    """
+    Holt Edges aus der Datenbank basierend auf Seed-IDs.
+    Filtert auf source_id, target_id oder note_id.
+    """
+    if not seeds or limit <= 0:
+        return []
+
+    # Konsistente Namensauflösung via database-Paket
+    _, _, edges_col = collection_names(prefix)
+
+    seed_conditions = []
+    for field in ("source_id", "target_id", "note_id"):
+        for s in seeds:
+            seed_conditions.append(
+                rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s)))
+            )
+    seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
+
+    type_filter = None
+    if edge_types:
+        type_conds = [
+            rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k)))
+            for k in edge_types
+        ]
+        type_filter = rest.Filter(should=type_conds)
+
+    must = []
+    if seeds_filter: 
+        must.append(seeds_filter)
+    if type_filter: 
+        must.append(type_filter)
+    
+    flt = rest.Filter(must=must) if must else None
+
+    # Abfrage via Qdrant Scroll API
+    pts, _ = client.scroll(
+        collection_name=edges_col,
+        scroll_filter=flt,
+        limit=limit,
+        with_payload=True,
+        with_vectors=False,
+    )
+
+    return [dict(p.payload) for p in pts if p.payload]
--- a/app/core/graph/graph_derive_edges.py
+++ b/app/core/graph/graph_derive_edges.py
@ -0,0 +1,112 @@
+"""
+FILE: app/core/graph/graph_derive_edges.py
+DESCRIPTION: Hauptlogik zur Kanten-Aggregation und De-Duplizierung.
+"""
+from typing import List, Optional, Dict, Tuple
+from .graph_utils import (
+    _get, _edge, _mk_edge_id, _dedupe_seq, 
+    PROVENANCE_PRIORITY, load_types_registry, get_edge_defaults_for
+)
+from .graph_extractors import (
+    extract_typed_relations, extract_callout_relations, extract_wikilinks
+)
+
+def build_edges_for_note(
+    note_id: str,
+    chunks: List[dict],
+    note_level_references: Optional[List[str]] = None,
+    include_note_scope_refs: bool = False,
+) -> List[dict]:
+    """Erzeugt und aggregiert alle Kanten für eine Note (WP-15b)."""
+    edges: List[dict] = []
+    note_type = _get(chunks[0], "type") if chunks else "concept"
+
+    # 1) Struktur-Kanten (belongs_to, next/prev)
+    for idx, ch in enumerate(chunks):
+        cid = _get(ch, "chunk_id", "id")
+        if not cid: continue
+        edges.append(_edge("belongs_to", "chunk", cid, note_id, note_id, {
+            "chunk_id": cid, "edge_id": _mk_edge_id("belongs_to", cid, note_id, "chunk", "structure:belongs_to"),
+            "provenance": "structure", "rule_id": "structure:belongs_to", "confidence": PROVENANCE_PRIORITY["structure:belongs_to"]
+        }))
+        if idx < len(chunks) - 1:
+            next_id = _get(chunks[idx+1], "chunk_id", "id")
+            if next_id:
+                edges.append(_edge("next", "chunk", cid, next_id, note_id, {
+                    "chunk_id": cid, "edge_id": _mk_edge_id("next", cid, next_id, "chunk", "structure:order"),
+                    "provenance": "structure", "rule_id": "structure:order", "confidence": PROVENANCE_PRIORITY["structure:order"]
+                }))
+                edges.append(_edge("prev", "chunk", next_id, cid, note_id, {
+                    "chunk_id": next_id, "edge_id": _mk_edge_id("prev", next_id, cid, "chunk", "structure:order"),
+                    "provenance": "structure", "rule_id": "structure:order", "confidence": PROVENANCE_PRIORITY["structure:order"]
+                }))
+
+    # 2) Inhaltliche Kanten
+    reg = load_types_registry()
+    defaults = get_edge_defaults_for(note_type, reg)
+    refs_all: List[str] = []
+
+    for ch in chunks:
+        cid = _get(ch, "chunk_id", "id")
+        if not cid: continue
+        raw = _get(ch, "window") or _get(ch, "text") or ""
+
+        # Typed & Candidate Pool (WP-15b Integration)
+        typed, rem = extract_typed_relations(raw)
+        for k, t in typed:
+            edges.append(_edge(k, "chunk", cid, t, note_id, {
+                "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "inline:rel"),
+                "provenance": "explicit", "rule_id": "inline:rel", "confidence": PROVENANCE_PRIORITY["inline:rel"]
+            }))
+
+        pool = ch.get("candidate_pool") or ch.get("candidate_edges") or []
+        for cand in pool:
+            t, k, p = cand.get("to"), cand.get("kind", "related_to"), cand.get("provenance", "semantic_ai")
+            if t:
+                edges.append(_edge(k, "chunk", cid, t, note_id, {
+                    "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", f"candidate:{p}"),
+                    "provenance": p, "rule_id": f"candidate:{p}", "confidence": PROVENANCE_PRIORITY.get(p, 0.90)
+                }))
+
+        # Callouts & Wikilinks
+        call_pairs, rem2 = extract_callout_relations(rem)
+        for k, t in call_pairs:
+            edges.append(_edge(k, "chunk", cid, t, note_id, {
+                "chunk_id": cid, "edge_id": _mk_edge_id(k, cid, t, "chunk", "callout:edge"),
+                "provenance": "explicit", "rule_id": "callout:edge", "confidence": PROVENANCE_PRIORITY["callout:edge"]
+            }))
+
+        refs = extract_wikilinks(rem2)
+        for r in refs:
+            edges.append(_edge("references", "chunk", cid, r, note_id, {
+                "chunk_id": cid, "ref_text": r, "edge_id": _mk_edge_id("references", cid, r, "chunk", "explicit:wikilink"),
+                "provenance": "explicit", "rule_id": "explicit:wikilink", "confidence": PROVENANCE_PRIORITY["explicit:wikilink"]
+            }))
+            for rel in defaults:
+                if rel != "references":
+                    edges.append(_edge(rel, "chunk", cid, r, note_id, {
+                        "chunk_id": cid, "edge_id": _mk_edge_id(rel, cid, r, "chunk", f"edge_defaults:{rel}"),
+                        "provenance": "rule", "rule_id": f"edge_defaults:{rel}", "confidence": PROVENANCE_PRIORITY["edge_defaults"]
+                    }))
+        refs_all.extend(refs)
+
+    # 3) Note-Scope & De-Duplizierung
+    if include_note_scope_refs:
+        refs_note = _dedupe_seq((refs_all or []) + (note_level_references or []))
+        for r in refs_note:
+            edges.append(_edge("references", "note", note_id, r, note_id, {
+                "edge_id": _mk_edge_id("references", note_id, r, "note", "explicit:note_scope"),
+                "provenance": "explicit", "confidence": PROVENANCE_PRIORITY["explicit:note_scope"]
+            }))
+            edges.append(_edge("backlink", "note", r, note_id, note_id, {
+                "edge_id": _mk_edge_id("backlink", r, note_id, "note", "derived:backlink"),
+                "provenance": "rule", "confidence": PROVENANCE_PRIORITY["derived:backlink"]
+            }))
+
+    unique_map: Dict[Tuple[str, str, str], dict] = {}
+    for e in edges:
+        key = (str(e.get("source_id")), str(e.get("target_id")), str(e.get("kind")))
+        if key not in unique_map or e.get("confidence", 0) > unique_map[key].get("confidence", 0):
+            unique_map[key] = e
+                
+    return list(unique_map.values())
--- a/app/core/graph/graph_extractors.py
+++ b/app/core/graph/graph_extractors.py
@ -0,0 +1,55 @@
+"""
+FILE: app/core/graph/graph_extractors.py
+DESCRIPTION: Regex-basierte Extraktion von Relationen aus Text.
+"""
+import re
+from typing import List, Tuple
+
+_WIKILINK_RE = re.compile(r"\[\[(?:[^\|\]]+\|)?([a-zA-Z0-9_\-#:. ]+)\]\]")
+_REL_PIPE  = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s*\|\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
+_REL_SPACE = re.compile(r"\[\[\s*rel:(?P<kind>[a-z_]+)\s+(?P<target>[^\]]+?)\s*\]\]",   re.IGNORECASE)
+_REL_TEXT  = re.compile(r"rel\s*:\s*(?P<kind>[a-z_]+)\s*\[\[\s*(?P<target>[^\]]+?)\s*\]\]", re.IGNORECASE)
+
+_CALLOUT_START = re.compile(r"^\s*>\s*\[!edge\]\s*(.*)$", re.IGNORECASE)
+_REL_LINE      = re.compile(r"^(?P<kind>[a-z_]+)\s*:\s*(?P<targets>.+?)\s*$", re.IGNORECASE)
+_WIKILINKS_IN_LINE = re.compile(r"\[\[([^\]]+)\]\]")
+
+def extract_typed_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
+    """Extrahiert [[rel:KIND|Target]]."""
+    pairs = []
+    def _collect(m):
+        k, t = (m.group("kind") or "").strip().lower(), (m.group("target") or "").strip()
+        if k and t: pairs.append((k, t))
+        return ""
+    text = _REL_PIPE.sub(_collect, text)
+    text = _REL_SPACE.sub(_collect, text)
+    text = _REL_TEXT.sub(_collect, text)
+    return pairs, text
+
+def extract_callout_relations(text: str) -> Tuple[List[Tuple[str,str]], str]:
+    """Verarbeitet Obsidian [!edge]-Callouts."""
+    if not text: return [], text
+    lines = text.splitlines(); out_pairs, keep_lines, i = [], [], 0
+    while i < len(lines):
+        m = _CALLOUT_START.match(lines[i])
+        if not m:
+            keep_lines.append(lines[i]); i += 1; continue
+        block_lines = [m.group(1)] if m.group(1).strip() else []
+        i += 1
+        while i < len(lines) and lines[i].lstrip().startswith('>'):
+            block_lines.append(lines[i].lstrip()[1:].lstrip()); i += 1
+        for bl in block_lines:
+            mrel = _REL_LINE.match(bl)
+            if not mrel: continue
+            kind, targets = mrel.group("kind").strip().lower(), mrel.group("targets") or ""
+            found = _WIKILINKS_IN_LINE.findall(targets)
+            if found:
+                for t in found: out_pairs.append((kind, t.strip()))
+            else:
+                for raw in re.split(r"[,;]", targets):
+                    if raw.strip(): out_pairs.append((kind, raw.strip()))
+    return out_pairs, "\n".join(keep_lines)
+
+def extract_wikilinks(text: str) -> List[str]:
+    """Extrahiert Standard-Wikilinks."""
+    return [m.group(1).strip() for m in _WIKILINK_RE.finditer(text or "")]
--- a/app/core/graph/graph_subgraph.py
+++ b/app/core/graph/graph_subgraph.py
@ -0,0 +1,129 @@
+"""
+FILE: app/core/graph/graph_subgraph.py
+DESCRIPTION: In-Memory Repräsentation eines Graphen für Scoring und Analyse.
+             Zentrale Komponente für die Graph-Expansion (BFS) und Bonus-Berechnung.
+             MODULARISIERUNG: Teil des graph-Pakets (WP-14).
+VERSION: 1.1.0
+STATUS: Active
+"""
+import math
+from collections import defaultdict
+from typing import Dict, List, Optional, DefaultDict, Any, Set
+from qdrant_client import QdrantClient
+
+# Lokale Paket-Imports
+from .graph_weights import EDGE_BASE_WEIGHTS, calculate_edge_weight
+from .graph_db_adapter import fetch_edges_from_qdrant
+
+class Subgraph:
+    """
+    Leichtgewichtiger Subgraph mit Adjazenzlisten & Kennzahlen.
+    Wird für die Berechnung von Graph-Boni im Retriever genutzt.
+    """
+
+    def __init__(self) -> None:
+        self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
+        self.reverse_adj: DefaultDict[str, List[Dict]] = defaultdict(list)
+        self.in_degree: DefaultDict[str, int] = defaultdict(int)
+        self.out_degree: DefaultDict[str, int] = defaultdict(int)
+
+    def add_edge(self, e: Dict) -> None:
+        """
+        Fügt eine Kante hinzu und aktualisiert Indizes.
+        Unterstützt Kontext-Notes für verbesserte Graph-Konnektivität.
+        """
+        src = e.get("source")
+        tgt = e.get("target")
+        kind = e.get("kind")
+        weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
+        owner = e.get("note_id")
+
+        if not src or not tgt:
+            return
+
+        # 1. Forward-Kante
+        self.adj[src].append({"target": tgt, "kind": kind, "weight": weight})
+        self.out_degree[src] += 1
+        self.in_degree[tgt] += 1
+
+        # 2. Reverse-Kante (für WP-04b Explanation Layer)
+        self.reverse_adj[tgt].append({"source": src, "kind": kind, "weight": weight})
+
+        # 3. Kontext-Note Handling (erhöht die Zentralität der Parent-Note)
+        if owner and owner != src:
+            self.adj[owner].append({"target": tgt, "kind": kind, "weight": weight})
+            self.out_degree[owner] += 1
+            if owner != tgt:
+                self.reverse_adj[tgt].append({"source": owner, "kind": kind, "weight": weight, "via_context": True})
+                self.in_degree[owner] += 1
+
+    def aggregate_edge_bonus(self, node_id: str) -> float:
+        """Summe der ausgehenden Kantengewichte (Hub-Score)."""
+        return sum(edge["weight"] for edge in self.adj.get(node_id, []))
+
+    def edge_bonus(self, node_id: str) -> float:
+        """API für Retriever (WP-04a Kompatibilität)."""
+        return self.aggregate_edge_bonus(node_id)
+
+    def centrality_bonus(self, node_id: str) -> float:
+        """
+        Log-gedämpfte Zentralität basierend auf dem In-Degree.
+        Begrenzt auf einen maximalen Boost von 0.15.
+        """
+        indeg = self.in_degree.get(node_id, 0)
+        if indeg <= 0:
+            return 0.0
+        return min(math.log1p(indeg) / 10.0, 0.15)
+
+    def get_outgoing_edges(self, node_id: str) -> List[Dict[str, Any]]:
+        """Gibt alle ausgehenden Kanten einer Node zurück."""
+        return self.adj.get(node_id, [])
+
+    def get_incoming_edges(self, node_id: str) -> List[Dict[str, Any]]:
+        """Gibt alle eingehenden Kanten einer Node zurück."""
+        return self.reverse_adj.get(node_id, [])
+
+
+def expand(
+    client: QdrantClient,
+    prefix: str,
+    seeds: List[str],
+    depth: int = 1,
+    edge_types: Optional[List[str]] = None,
+) -> Subgraph:
+    """
+    Expandiert ab Seeds entlang von Edges bis zu einer bestimmten Tiefe.
+    Nutzt fetch_edges_from_qdrant für den Datenbankzugriff.
+    """
+    sg = Subgraph()
+    frontier = set(seeds)
+    visited = set()
+
+    for _ in range(max(depth, 0)):
+        if not frontier:
+            break
+
+        # Batch-Abfrage der Kanten für die aktuelle Ebene
+        payloads = fetch_edges_from_qdrant(client, prefix, list(frontier), edge_types)
+        next_frontier: Set[str] = set()
+
+        for pl in payloads:
+            src, tgt = pl.get("source_id"), pl.get("target_id")
+            if not src or not tgt: continue
+
+            sg.add_edge({
+                "source": src,
+                "target": tgt,
+                "kind": pl.get("kind", "edge"),
+                "weight": calculate_edge_weight(pl),
+                "note_id": pl.get("note_id"),
+            })
+
+            # BFS Logik: Neue Ziele in die nächste Frontier aufnehmen
+            if tgt not in visited:
+                next_frontier.add(str(tgt))
+
+        visited |= frontier
+        frontier = next_frontier - visited
+
+    return sg
--- a/app/core/graph/graph_utils.py
+++ b/app/core/graph/graph_utils.py
@ -0,0 +1,81 @@
+"""
+FILE: app/core/graph/graph_utils.py
+DESCRIPTION: Basale Werkzeuge, ID-Generierung und Provenance-Konfiguration für den Graphen.
+"""
+import os
+import hashlib
+from typing import Iterable, List, Optional, Set, Any
+
+try:
+    import yaml
+except ImportError:
+    yaml = None
+
+# WP-15b: Prioritäten-Ranking für die De-Duplizierung
+PROVENANCE_PRIORITY = {
+    "explicit:wikilink": 1.00,
+    "inline:rel": 0.95,
+    "callout:edge": 0.90,
+    "semantic_ai": 0.90,           # Validierte KI-Kanten
+    "structure:belongs_to": 1.00,
+    "structure:order": 0.95,       # next/prev
+    "explicit:note_scope": 1.00,
+    "derived:backlink": 0.90,
+    "edge_defaults": 0.70          # Heuristik (types.yaml)
+}
+
+def _get(d: dict, *keys, default=None):
+    """Sicherer Zugriff auf verschachtelte Keys."""
+    for k in keys:
+        if isinstance(d, dict) and k in d and d[k] is not None:
+            return d[k]
+    return default
+
+def _dedupe_seq(seq: Iterable[str]) -> List[str]:
+    """Dedupliziert Strings unter Beibehaltung der Reihenfolge."""
+    seen: Set[str] = set()
+    out: List[str] = []
+    for s in seq:
+        if s not in seen:
+            seen.add(s); out.append(s)
+    return out
+
+def _mk_edge_id(kind: str, s: str, t: str, scope: str, rule_id: Optional[str] = None) -> str:
+    """Erzeugt eine deterministische 12-Byte ID mittels BLAKE2s."""
+    base = f"{kind}:{s}->{t}#{scope}"
+    if rule_id: base += f"|{rule_id}"
+    return hashlib.blake2s(base.encode("utf-8"), digest_size=12).hexdigest()
+
+def _edge(kind: str, scope: str, source_id: str, target_id: str, note_id: str, extra: Optional[dict] = None) -> dict:
+    """Konstruiert ein Kanten-Payload für Qdrant."""
+    pl = {
+        "kind": kind,
+        "relation": kind,
+        "scope": scope,
+        "source_id": source_id,
+        "target_id": target_id,
+        "note_id": note_id,
+    }
+    if extra: pl.update(extra)
+    return pl
+
+def load_types_registry() -> dict:
+    """Lädt die YAML-Registry."""
+    p = os.getenv("MINDNET_TYPES_FILE", "./config/types.yaml")
+    if not os.path.isfile(p) or yaml is None: return {}
+    try:
+        with open(p, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {}
+    except Exception: return {}
+
+def get_edge_defaults_for(note_type: Optional[str], reg: dict) -> List[str]:
+    """Ermittelt Standard-Kanten für einen Typ."""
+    types_map = reg.get("types", reg) if isinstance(reg, dict) else {}
+    if note_type and isinstance(types_map, dict):
+        t = types_map.get(note_type)
+        if isinstance(t, dict) and isinstance(t.get("edge_defaults"), list):
+            return [str(x) for x in t["edge_defaults"] if isinstance(x, str)]
+    for key in ("defaults", "default", "global"):
+        v = reg.get(key)
+        if isinstance(v, dict) and isinstance(v.get("edge_defaults"), list):
+            return [str(x) for x in v["edge_defaults"] if isinstance(x, str)]
+    return []
--- a/app/core/graph/graph_weights.py
+++ b/app/core/graph/graph_weights.py
@ -0,0 +1,39 @@
+"""
+FILE: app/core/graph/graph_weights.py
+DESCRIPTION: Definition der Basisgewichte und Berechnung der Kanteneffektivität.
+"""
+from typing import Dict
+
+# Basisgewichte je Edge-Typ (WP-04a Config)
+EDGE_BASE_WEIGHTS: Dict[str, float] = {
+    # Struktur
+    "belongs_to":     0.10,
+    "next":           0.06,
+    "prev":           0.06,
+    "backlink":       0.04,
+    "references_at":  0.08,
+
+    # Wissen
+    "references":     0.20,
+    "depends_on":     0.18,
+    "related_to":     0.15,
+    "similar_to":     0.12,
+}
+
+def calculate_edge_weight(pl: Dict) -> float:
+    """Berechnet das effektive Edge-Gewicht aus kind + confidence."""
+    kind = pl.get("kind", "edge")
+    base = EDGE_BASE_WEIGHTS.get(kind, 0.0)
+
+    conf_raw = pl.get("confidence", None)
+    try:
+        conf = float(conf_raw) if conf_raw is not None else None
+    except Exception:
+        conf = None
+
+    if conf is None:
+        return base
+
+    # Clamp confidence 0.0 - 1.0
+    conf = max(0.0, min(1.0, conf))
+    return base * conf
--- a/app/core/graph_adapter.py
+++ b/app/core/graph_adapter.py
@ -1,249 +1,10 @@
 """
 FILE: app/core/graph_adapter.py
-DESCRIPTION: Lädt Kanten aus Qdrant und baut einen In-Memory Subgraphen für Scoring (Centrality) und Explanation.
-VERSION: 0.4.0
-STATUS: Active
-DEPENDENCIES: qdrant_client, app.core.qdrant
-LAST_ANALYSIS: 2025-12-15
+DESCRIPTION: Facade für das neue graph Package (Adapter-Teil).
+             WP-14: Modularisierung abgeschlossen.
+VERSION: 0.5.0
 """
+from .graph.graph_subgraph import Subgraph, expand
+from .graph.graph_weights import EDGE_BASE_WEIGHTS

-from __future__ import annotations
-
-from typing import Dict, List, Optional, DefaultDict, Any
-from collections import defaultdict
-
-from qdrant_client import QdrantClient
-from qdrant_client.http import models as rest
-
-from app.core.qdrant import collection_names
-
-# Legacy-Import Fallback
-try:  # pragma: no cover
-    from app.core.qdrant_points import get_edges_for_sources  # type: ignore
-except Exception:  # pragma: no cover
-    get_edges_for_sources = None  # type: ignore
-
-
-# Basisgewichte je Edge-Typ (WP-04a Config)
-EDGE_BASE_WEIGHTS: Dict[str, float] = {
-    # Struktur
-    "belongs_to":     0.10,
-    "next":           0.06,
-    "prev":           0.06,
-    "backlink":       0.04,
-    "references_at":  0.08,
-
-    # Wissen
-    "references":     0.20,
-    "depends_on":     0.18,
-    "related_to":     0.15,
-    "similar_to":     0.12,
-}
-
-
-def _edge_weight(pl: Dict) -> float:
-    """Berechnet das effektive Edge-Gewicht aus kind + confidence."""
-    kind = pl.get("kind", "edge")
-    base = EDGE_BASE_WEIGHTS.get(kind, 0.0)
-
-    conf_raw = pl.get("confidence", None)
-    try:
-        conf = float(conf_raw) if conf_raw is not None else None
-    except Exception:
-        conf = None
-
-    if conf is None:
-        return base
-
-    if conf < 0.0: conf = 0.0
-    if conf > 1.0: conf = 1.0
-
-    return base * conf
-
-
-def _fetch_edges(
-    client: QdrantClient,
-    prefix: str,
-    seeds: List[str],
-    edge_types: Optional[List[str]] = None,
-    limit: int = 2048,
-) -> List[Dict]:
-    """
-    Holt Edges direkt aus der *_edges Collection.
-    Filter: source_id IN seeds OR target_id IN seeds OR note_id IN seeds
-    """
-    if not seeds or limit <= 0:
-        return []
-
-    _, _, edges_col = collection_names(prefix)
-
-    seed_conditions = []
-    for field in ("source_id", "target_id", "note_id"):
-        for s in seeds:
-            seed_conditions.append(
-                rest.FieldCondition(key=field, match=rest.MatchValue(value=str(s)))
-            )
-    seeds_filter = rest.Filter(should=seed_conditions) if seed_conditions else None
-
-    type_filter = None
-    if edge_types:
-        type_conds = [
-            rest.FieldCondition(key="kind", match=rest.MatchValue(value=str(k)))
-            for k in edge_types
-        ]
-        type_filter = rest.Filter(should=type_conds)
-
-    must = []
-    if seeds_filter: must.append(seeds_filter)
-    if type_filter: must.append(type_filter)
-    
-    flt = rest.Filter(must=must) if must else None
-
-    pts, _ = client.scroll(
-        collection_name=edges_col,
-        scroll_filter=flt,
-        limit=limit,
-        with_payload=True,
-        with_vectors=False,
-    )
-
-    out: List[Dict] = []
-    for p in pts or []:
-        pl = dict(p.payload or {})
-        if pl:
-            out.append(pl)
-    return out
-
-
-class Subgraph:
-    """Leichtgewichtiger Subgraph mit Adjazenzlisten & Kennzahlen."""
-
-    def __init__(self) -> None:
-        # Forward: source -> [targets]
-        self.adj: DefaultDict[str, List[Dict]] = defaultdict(list)
-        # Reverse: target -> [sources] (Neu für WP-04b Explanation)
-        self.reverse_adj: DefaultDict[str, List[Dict]] = defaultdict(list)
-        
-        self.in_degree: DefaultDict[str, int] = defaultdict(int)
-        self.out_degree: DefaultDict[str, int] = defaultdict(int)
-
-    def add_edge(self, e: Dict) -> None:
-        """
-        Fügt eine Kante hinzu und aktualisiert Forward/Reverse Indizes.
-        e muss enthalten: source, target, kind, weight.
-        """
-        src = e.get("source")
-        tgt = e.get("target")
-        kind = e.get("kind")
-        weight = e.get("weight", EDGE_BASE_WEIGHTS.get(kind, 0.0))
-        owner = e.get("note_id")
-
-        if not src or not tgt:
-            return
-
-        # 1. Primäre Adjazenz (Forward)
-        edge_data = {"target": tgt, "kind": kind, "weight": weight}
-        self.adj[src].append(edge_data)
-        self.out_degree[src] += 1
-        self.in_degree[tgt] += 1
-
-        # 2. Reverse Adjazenz (Neu für Explanation)
-        # Wir speichern, woher die Kante kam.
-        rev_data = {"source": src, "kind": kind, "weight": weight}
-        self.reverse_adj[tgt].append(rev_data)
-
-        # 3. Kontext-Note Handling (Forward & Reverse)
-        # Wenn eine Kante "im Kontext einer Note" (owner) definiert ist,
-        # schreiben wir sie der Note gut, damit der Retriever Scores auf Note-Ebene findet.
-        if owner and owner != src:
-            # Forward: Owner -> Target
-            self.adj[owner].append(edge_data)
-            self.out_degree[owner] += 1
-            
-            # Reverse: Target wird vom Owner referenziert (indirekt)
-            if owner != tgt:
-                rev_owner_data = {"source": owner, "kind": kind, "weight": weight, "via_context": True}
-                self.reverse_adj[tgt].append(rev_owner_data)
-                self.in_degree[owner] += 1 # Leichter Centrality Boost für den Owner
-
-    def aggregate_edge_bonus(self, node_id: str) -> float:
-        """Summe der ausgehenden Kantengewichte (Hub-Score)."""
-        return sum(edge["weight"] for edge in self.adj.get(node_id, []))
-
-    def edge_bonus(self, node_id: str) -> float:
-        """API für Retriever (WP-04a Kompatibilität)."""
-        return self.aggregate_edge_bonus(node_id)
-
-    def centrality_bonus(self, node_id: str) -> float:
-        """Log-gedämpfte Zentralität (In-Degree)."""
-        import math
-        indeg = self.in_degree.get(node_id, 0)
-        if indeg <= 0:
-            return 0.0
-        return min(math.log1p(indeg) / 10.0, 0.15)
-
-    # --- WP-04b Explanation Helpers ---
-
-    def get_outgoing_edges(self, node_id: str) -> List[Dict[str, Any]]:
-        """Liefert Liste aller Ziele, auf die dieser Knoten zeigt."""
-        return self.adj.get(node_id, [])
-
-    def get_incoming_edges(self, node_id: str) -> List[Dict[str, Any]]:
-        """Liefert Liste aller Quellen, die auf diesen Knoten zeigen."""
-        return self.reverse_adj.get(node_id, [])
-
-
-def expand(
-    client: QdrantClient,
-    prefix: str,
-    seeds: List[str],
-    depth: int = 1,
-    edge_types: Optional[List[str]] = None,
-) -> Subgraph:
-    """
-    Expandiert ab Seeds entlang von Edges (bis `depth`).
-    """
-    sg = Subgraph()
-    frontier = set(seeds)
-    visited = set()
-
-    max_depth = max(depth, 0)
-
-    for _ in range(max_depth):
-        if not frontier:
-            break
-
-        edges_payloads = _fetch_edges(
-            client=client,
-            prefix=prefix,
-            seeds=list(frontier),
-            edge_types=edge_types,
-            limit=2048,
-        )
-
-        next_frontier = set()
-        for pl in edges_payloads:
-            src = pl.get("source_id")
-            tgt = pl.get("target_id")
-            
-            # Skip invalid edges
-            if not src or not tgt:
-                continue
-
-            e = {
-                "source": src,
-                "target": tgt,
-                "kind": pl.get("kind", "edge"),
-                "weight": _edge_weight(pl),
-                "note_id": pl.get("note_id"),
-            }
-            sg.add_edge(e)
-
-            # Nur weitersuchen, wenn Target noch nicht besucht
-            if tgt and tgt not in visited:
-                next_frontier.add(tgt)
-
-        visited |= frontier
-        frontier = next_frontier - visited
-
-    return sg
+__all__ = ["Subgraph", "expand", "EDGE_BASE_WEIGHTS"]
--- a/app/core/ingestion.py
+++ b/app/core/ingestion.py
@ -1,390 +0,0 @@
-"""
-FILE: app/core/ingestion.py
-DESCRIPTION: Haupt-Ingestion-Logik. Transformiert Markdown in den Graphen.
-             WP-20: Optimiert für OpenRouter (mistralai/mistral-7b-instruct:free).
-             WP-22: Content Lifecycle, Edge Registry Validation & Multi-Hash.
-FIX: Deep Fallback Logic (v2.11.14). Erkennt Policy Violations auch in validen 
-     JSON-Objekten und erzwingt den lokalen Ollama-Sprung, um Kantenverlust 
-     bei umfangreichen Protokollen zu verhindern.
-VERSION: 2.11.14
-STATUS: Active
-DEPENDENCIES: app.core.parser, app.core.note_payload, app.core.chunker, app.services.llm_service, app.services.edge_registry
-"""
-import os
-import json
-import re
-import logging
-import asyncio
-import time
-from typing import Dict, List, Optional, Tuple, Any
-
-# Core Module Imports
-from app.core.parser import (
-    read_markdown,
-    normalize_frontmatter,
-    validate_required_frontmatter,
-    extract_edges_with_context, 
-)
-from app.core.note_payload import make_note_payload
-from app.core.chunker import assemble_chunks, get_chunk_config
-from app.core.chunk_payload import make_chunk_payloads
-
-# Fallback für Edges
-try:
-    from app.core.derive_edges import build_edges_for_note
-except ImportError:
-    def build_edges_for_note(*args, **kwargs): return []
-
-from app.core.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes
-from app.core.qdrant_points import (
-    points_for_chunks,
-    points_for_note,
-    points_for_edges,
-    upsert_batch,
-)
-
-from app.services.embeddings_client import EmbeddingsClient
-from app.services.edge_registry import registry as edge_registry
-from app.services.llm_service import LLMService 
-
-logger = logging.getLogger(__name__)
-
-# --- Global Helpers ---
-def extract_json_from_response(text: str) -> Any:
-    """
-    Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen (Mistral/Llama).
-    Entfernt <s>, [OUT], [/OUT] und Markdown-Blöcke für maximale Robustheit.
-    """
-    if not text or not isinstance(text, str): 
-        return []
-    
-    # 1. Entferne Mistral/Llama Steuerzeichen und Tags
-    clean = text.replace("<s>", "").replace("</s>", "")
-    clean = clean.replace("[OUT]", "").replace("[/OUT]", "")
-    clean = clean.strip()
-    
-    # 2. Suche nach Markdown JSON-Blöcken (```json ... ```)
-    match = re.search(r"```(?:json)?\s*(.*?)\s*```", clean, re.DOTALL)
-    payload = match.group(1) if match else clean
-    
-    try:
-        return json.loads(payload.strip())
-    except json.JSONDecodeError:
-        # 3. Recovery: Suche nach der ersten [ und letzten ] (Liste)
-        start = payload.find('[')
-        end = payload.rfind(']') + 1
-        if start != -1 and end > start:
-            try:
-                return json.loads(payload[start:end])
-            except: pass
-        
-        # 4. Zweite Recovery: Suche nach der ersten { und letzten } (Objekt)
-        start_obj = payload.find('{')
-        end_obj = payload.rfind('}') + 1
-        if start_obj != -1 and end_obj > start_obj:
-            try:
-                return json.loads(payload[start_obj:end_obj])
-            except: pass
-            
-    return []
-
-def load_type_registry(custom_path: Optional[str] = None) -> dict:
-    """Lädt die types.yaml zur Steuerung der typ-spezifischen Ingestion."""
-    import yaml
-    from app.config import get_settings
-    settings = get_settings()
-    path = custom_path or settings.MINDNET_TYPES_FILE
-    if not os.path.exists(path): return {}
-    try:
-        with open(path, "r", encoding="utf-8") as f: return yaml.safe_load(f) or {}
-    except Exception: return {}
-
-# --- Service Class ---
-class IngestionService:
-    def __init__(self, collection_prefix: str = None):
-        from app.config import get_settings
-        self.settings = get_settings()
-        
-        self.prefix = collection_prefix or self.settings.COLLECTION_PREFIX
-        self.cfg = QdrantConfig.from_env()
-        self.cfg.prefix = self.prefix 
-        self.client = get_client(self.cfg)
-        self.dim = self.settings.VECTOR_SIZE
-        self.registry = load_type_registry()
-        self.embedder = EmbeddingsClient()
-        self.llm = LLMService() 
-        
-        self.active_hash_mode = self.settings.CHANGE_DETECTION_MODE
-        
-        try:
-            ensure_collections(self.client, self.prefix, self.dim)
-            ensure_payload_indexes(self.client, self.prefix)
-        except Exception as e:
-            logger.warning(f"DB init warning: {e}")
-
-    def _resolve_note_type(self, requested: Optional[str]) -> str:
-        """Bestimmt den finalen Notiz-Typ (Fallback auf 'concept')."""
-        types = self.registry.get("types", {})
-        if requested and requested in types: return requested
-        return "concept" 
-
-    def _get_chunk_config_by_profile(self, profile_name: str, note_type: str) -> Dict[str, Any]:
-        """Holt die Chunker-Parameter für ein spezifisches Profil aus der Registry."""
-        profiles = self.registry.get("chunking_profiles", {})
-        if profile_name in profiles:
-            cfg = profiles[profile_name].copy()
-            if "overlap" in cfg and isinstance(cfg["overlap"], list): 
-                cfg["overlap"] = tuple(cfg["overlap"])
-            return cfg
-        return get_chunk_config(note_type)
-
-    async def _perform_smart_edge_allocation(self, text: str, note_id: str) -> List[Dict]:
-        """
-        KI-Extraktion mit Deep-Fallback Logik.
-        Erzwingt den lokalen Ollama-Sprung, wenn die Cloud-Antwort keine verwertbaren 
-        Kanten liefert (häufig bei Policy Violations auf OpenRouter).
-        """
-        provider = self.settings.MINDNET_LLM_PROVIDER
-        model = self.settings.OPENROUTER_MODEL if provider == "openrouter" else self.settings.GEMINI_MODEL
-        
-        logger.info(f"🚀 [Ingestion] Turbo-Mode: Extracting edges for '{note_id}' using {model} on {provider}")
-        
-        edge_registry.ensure_latest()
-        valid_types_str = ", ".join(sorted(list(edge_registry.valid_types)))
-        
-        template = self.llm.get_prompt("edge_extraction", provider)
-        
-        try:
-            try:
-                # Wir begrenzen den Kontext auf 6000 Zeichen (ca. 1500 Token)
-                prompt = template.format(
-                    text=text[:6000], 
-                    note_id=note_id,
-                    valid_types=valid_types_str
-                )
-            except KeyError as ke:
-                logger.error(f"❌ [Ingestion] Prompt-Template Fehler (Variable {ke} fehlt).")
-                return []
-
-            # 1. Versuch: Anfrage an den primären Cloud-Provider
-            response_json = await self.llm.generate_raw_response(
-                prompt=prompt, priority="background", force_json=True,
-                provider=provider, model_override=model
-            )
-            
-            # Initiales Parsing
-            raw_data = extract_json_from_response(response_json)
-            
-            # 2. Dictionary Recovery (Versuche Liste aus Dict zu extrahieren)
-            candidates = []
-            if isinstance(raw_data, list):
-                candidates = raw_data
-            elif isinstance(raw_data, dict):
-                logger.info(f"ℹ️ [Ingestion] LLM returned dict, checking for embedded lists in {note_id}")
-                for k in ["edges", "links", "results", "kanten", "matches", "edge_list"]:
-                    if k in raw_data and isinstance(raw_data[k], list):
-                        candidates = raw_data[k]
-                        break
-                # Wenn immer noch keine Liste gefunden, versuche Key-Value Paare (Dict Recovery)
-                if not candidates:
-                    for k, v in raw_data.items():
-                        if isinstance(v, str): candidates.append(f"{k}:{v}")
-                        elif isinstance(v, list): [candidates.append(f"{k}:{i}") for i in v if isinstance(i, str)]
-
-            # 3. DEEP FALLBACK: Wenn nach allen Recovery-Versuchen die Liste leer ist UND wir in der Cloud waren
-            # Triggert den Fallback bei "Data Policy Violations" (leere oder Fehler-JSONs).
-            if not candidates and provider != "ollama" and self.settings.LLM_FALLBACK_ENABLED:
-                logger.warning(
-                    f"🛑 [Ingestion] Cloud-Antwort für {note_id} lieferte keine verwertbaren Kanten. "
-                    f"Mögliche Policy Violation oder Refusal. Erzwinge LOKALEN FALLBACK via Ollama..."
-                )
-                response_json_local = await self.llm.generate_raw_response(
-                    prompt=prompt, priority="background", force_json=True, provider="ollama"
-                )
-                raw_data_local = extract_json_from_response(response_json_local)
-                
-                # Wiederhole Recovery für lokale Antwort
-                if isinstance(raw_data_local, list):
-                    candidates = raw_data_local
-                elif isinstance(raw_data_local, dict):
-                    for k in ["edges", "links", "results"]:
-                        if k in raw_data_local and isinstance(raw_data_local[k], list):
-                            candidates = raw_data_local[k]; break
-
-            if not candidates:
-                logger.warning(f"⚠️ [Ingestion] Auch nach Fallback keine extrahierbaren Kanten für {note_id}")
-                return []
-
-            processed = []
-            for item in candidates:
-                if isinstance(item, dict) and "to" in item:
-                    item["provenance"] = "semantic_ai"
-                    item["line"] = f"ai-{provider}"
-                    processed.append(item)
-                elif isinstance(item, str) and ":" in item:
-                    parts = item.split(":", 1)
-                    processed.append({
-                        "to": parts[1].strip(),
-                        "kind": parts[0].strip(),
-                        "provenance": "semantic_ai",
-                        "line": f"ai-{provider}"
-                    })
-            return processed
-
-        except Exception as e:
-            logger.warning(f"⚠️ [Ingestion] Smart Edge Allocation failed for {note_id}: {e}")
-            return []
-
-    async def process_file(
-        self, file_path: str, vault_root: str,
-        force_replace: bool = False, apply: bool = False, purge_before: bool = False,
-        note_scope_refs: bool = False, hash_source: str = "parsed", hash_normalize: str = "canonical"
-    ) -> Dict[str, Any]:
-        """Transformiert eine Markdown-Datei in den Graphen (Notes, Chunks, Edges)."""
-        result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
-
-        # 1. Parse & Lifecycle Gate
-        try:
-            parsed = read_markdown(file_path)
-            if not parsed: return {**result, "error": "Empty file"}
-            fm = normalize_frontmatter(parsed.frontmatter)
-            validate_required_frontmatter(fm)
-        except Exception as e:
-            return {**result, "error": f"Validation failed: {str(e)}"}
-
-        # WP-22: Filter für Systemdateien und Entwürfe
-        status = fm.get("status", "draft").lower().strip()
-        if status in ["system", "template", "archive", "hidden"]:
-            return {**result, "status": "skipped", "reason": f"lifecycle_{status}"}
-
-        # 2. Config Resolution & Payload Construction
-        note_type = self._resolve_note_type(fm.get("type"))
-        fm["type"] = note_type
-        
-        try:
-            note_pl = make_note_payload(parsed, vault_root=vault_root, hash_normalize=hash_normalize, hash_source=hash_source, file_path=file_path)
-            note_id = note_pl["note_id"]
-        except Exception as e:
-             return {**result, "error": f"Payload failed: {str(e)}"}
-
-        # 3. Change Detection (Strikte DoD Umsetzung)
-        old_payload = None if force_replace else self._fetch_note_payload(note_id)
-        check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
-        old_hash = (old_payload or {}).get("hashes", {}).get(check_key)
-        new_hash = note_pl.get("hashes", {}).get(check_key)
-        
-        # Prüfung auf fehlende Artefakte in Qdrant
-        chunks_missing, edges_missing = self._artifacts_missing(note_id)
-        
-        should_write = force_replace or (not old_payload) or (old_hash != new_hash) or chunks_missing or edges_missing
-
-        if not should_write:
-            return {**result, "status": "unchanged", "note_id": note_id}
-
-        if not apply:
-            return {**result, "status": "dry-run", "changed": True, "note_id": note_id}
-
-        # 4. Processing (Chunking, Embedding, AI Edges)
-        try:
-            body_text = getattr(parsed, "body", "") or ""
-            edge_registry.ensure_latest()
-
-            # Profil-gesteuertes Chunking
-            profile = fm.get("chunk_profile") or fm.get("chunking_profile") or "sliding_standard"
-            chunk_cfg = self._get_chunk_config_by_profile(profile, note_type)
-            chunks = await assemble_chunks(fm["id"], body_text, fm["type"], config=chunk_cfg)
-            chunk_pls = make_chunk_payloads(fm, note_pl["path"], chunks, note_text=body_text)
-            
-            # Vektorisierung
-            vecs = []
-            if chunk_pls:
-                texts = [c.get("window") or c.get("text") or "" for c in chunk_pls]
-                vecs = await self.embedder.embed_documents(texts)
-            
-            # Kanten-Extraktion
-            edges = []
-            context = {"file": file_path, "note_id": note_id}
-
-            # A. Explizite Kanten (User / Wikilinks)
-            for e in extract_edges_with_context(parsed):
-                e["kind"] = edge_registry.resolve(edge_type=e["kind"], provenance="explicit", context={**context, "line": e.get("line")})
-                edges.append(e)
-
-            # B. KI Kanten (Turbo Mode mit v2.11.14 Fallback)
-            ai_edges = await self._perform_smart_edge_allocation(body_text, note_id)
-            for e in ai_edges:
-                valid_kind = edge_registry.resolve(edge_type=e.get("kind"), provenance="semantic_ai", context={**context, "line": e.get("line")})
-                e["kind"] = valid_kind
-                edges.append(e)
-
-            # C. System Kanten (Struktur)
-            try:
-                sys_edges = build_edges_for_note(note_id, chunk_pls, note_level_references=note_pl.get("references", []), include_note_scope_refs=note_scope_refs)
-            except: 
-                sys_edges = build_edges_for_note(note_id, chunk_pls)
-            
-            for e in sys_edges:
-                valid_kind = edge_registry.resolve(edge_type=e.get("kind", "belongs_to"), provenance="structure", context={**context, "line": "system"})
-                if valid_kind:
-                    e["kind"] = valid_kind
-                    edges.append(e)
-
-        except Exception as e:
-            logger.error(f"Processing failed for {file_path}: {e}", exc_info=True)
-            return {**result, "error": f"Processing failed: {str(e)}"}
-
-        # 5. DB Upsert
-        try:
-            if purge_before and old_payload: self._purge_artifacts(note_id)
-
-            n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
-            upsert_batch(self.client, n_name, n_pts)
-
-            if chunk_pls and vecs:
-                c_name, c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)
-                upsert_batch(self.client, c_name, c_pts)
-            
-            if edges:
-                e_name, e_pts = points_for_edges(self.prefix, edges)
-                upsert_batch(self.client, e_name, e_pts)
-
-            return {"path": file_path, "status": "success", "changed": True, "note_id": note_id, "chunks_count": len(chunk_pls), "edges_count": len(edges)}
-        except Exception as e:
-            return {**result, "error": f"DB Upsert failed: {e}"}
-            
-    def _fetch_note_payload(self, note_id: str) -> Optional[dict]:
-        """Holt die Metadaten einer Note aus Qdrant."""
-        from qdrant_client.http import models as rest
-        try:
-            f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
-            pts, _ = self.client.scroll(collection_name=f"{self.prefix}_notes", scroll_filter=f, limit=1, with_payload=True)
-            return pts[0].payload if pts else None
-        except: return None
-
-    def _artifacts_missing(self, note_id: str) -> Tuple[bool, bool]:
-        """Prüft Qdrant aktiv auf vorhandene Chunks und Edges."""
-        from qdrant_client.http import models as rest
-        try:
-            f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
-            c_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_chunks", scroll_filter=f, limit=1)
-            e_pts, _ = self.client.scroll(collection_name=f"{self.prefix}_edges", scroll_filter=f, limit=1)
-            return (not bool(c_pts)), (not bool(e_pts))
-        except: return True, True
-
-    def _purge_artifacts(self, note_id: str):
-        """Löscht verwaiste Chunks/Edges vor einem Re-Import."""
-        from qdrant_client.http import models as rest
-        f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
-        for suffix in ["chunks", "edges"]:
-            try: self.client.delete(collection_name=f"{self.prefix}_{suffix}", points_selector=rest.FilterSelector(filter=f))
-            except: pass
-    
-    async def create_from_text(self, markdown_content: str, filename: str, vault_root: str, folder: str = "00_Inbox") -> Dict[str, Any]:
-        """Hilfsmethode zur Erstellung einer Note aus einem Textstream."""
-        target_dir = os.path.join(vault_root, folder)
-        os.makedirs(target_dir, exist_ok=True)
-        file_path = os.path.join(target_dir, filename)
-        with open(file_path, "w", encoding="utf-8") as f:
-            f.write(markdown_content)
-        await asyncio.sleep(0.1) 
-        return await self.process_file(file_path=file_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)
--- a/app/core/ingestion/init.py
+++ b/app/core/ingestion/init.py
@ -0,0 +1,26 @@
+"""
+FILE: app/core/ingestion/__init__.py
+DESCRIPTION: Package-Einstiegspunkt für Ingestion. Exportiert den IngestionService.
+             AUDIT v2.13.10: Abschluss der Modularisierung (WP-14).
+             Bricht Zirkelbezüge durch Nutzung der neutralen registry.py auf.
+VERSION: 2.13.10
+"""
+# Der IngestionService ist der primäre Orchestrator für den Datenimport
+from .ingestion_processor import IngestionService
+
+# Hilfswerkzeuge für JSON-Verarbeitung und Konfigurations-Management
+# load_type_registry wird hier re-exportiert, um die Abwärtskompatibilität zu wahren,
+# obwohl die Implementierung nun in app.core.registry liegt.
+from .ingestion_utils import (
+    extract_json_from_response, 
+    load_type_registry,
+    resolve_note_type
+)
+
+# Öffentliche API des Pakets
+__all__ = [
+    "IngestionService", 
+    "extract_json_from_response", 
+    "load_type_registry",
+    "resolve_note_type"
+]
--- a/app/core/ingestion/ingestion_chunk_payload.py
+++ b/app/core/ingestion/ingestion_chunk_payload.py
@ -0,0 +1,114 @@
+"""
+FILE: app/core/ingestion/ingestion_chunk_payload.py
+DESCRIPTION: Baut das JSON-Objekt für 'mindnet_chunks'. 
+             Fix v2.4.3: Integration der zentralen Registry (WP-14) für konsistente Defaults.
+VERSION: 2.4.3
+STATUS: Active
+"""
+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+
+# ENTSCHEIDENDER FIX: Import der neutralen Registry-Logik zur Vermeidung von Circular Imports
+from app.core.registry import load_type_registry
+
+# ---------------------------------------------------------------------------
+# Resolution Helpers (Audited)
+# ---------------------------------------------------------------------------
+
+def _as_list(x):
+    """Sichert die Listen-Integrität für Metadaten wie Tags."""
+    if x is None: return []
+    return x if isinstance(x, list) else [x]
+
+def _resolve_val(note_type: str, reg: dict, key: str, default: Any) -> Any:
+    """
+    Hierarchische Suche in der Registry: Type-Spezifisch > Globaler Default.
+    WP-14: Erlaubt dynamische Konfiguration via types.yaml.
+    """
+    types = reg.get("types", {})
+    if isinstance(types, dict):
+        t_cfg = types.get(note_type, {})
+        if isinstance(t_cfg, dict):
+            # Fallback für Key-Varianten (z.B. chunking_profile vs chunk_profile)
+            val = t_cfg.get(key) or t_cfg.get(key.replace("ing", "")) 
+            if val is not None: return val
+            
+    defs = reg.get("defaults", {}) or reg.get("global", {})
+    if isinstance(defs, dict):
+        val = defs.get(key) or defs.get(key.replace("ing", ""))
+        if val is not None: return val
+        
+    return default
+
+# ---------------------------------------------------------------------------
+# Haupt-API
+# ---------------------------------------------------------------------------
+
+def make_chunk_payloads(note: Dict[str, Any], note_path: str, chunks_from_chunker: List[Any], **kwargs) -> List[Dict[str, Any]]:
+    """
+    Erstellt die Payloads für die Chunks inklusive Audit-Resolution.
+    Nutzt nun die zentrale Registry für alle Fallbacks.
+    """
+    if isinstance(note, dict) and "frontmatter" in note: 
+        fm = note["frontmatter"]
+    else: 
+        fm = note or {}
+
+    # WP-14 Fix: Nutzt übergebene Registry oder lädt sie global
+    reg = kwargs.get("types_cfg") or load_type_registry()
+    
+    note_type = fm.get("type") or "concept"
+    title = fm.get("title") or fm.get("id") or "Untitled"
+    tags = _as_list(fm.get("tags") or [])
+    
+    # Audit: Resolution Hierarchie (Frontmatter > Registry)
+    cp = fm.get("chunking_profile") or fm.get("chunk_profile")
+    if not cp: 
+        cp = _resolve_val(note_type, reg, "chunking_profile", "sliding_standard")
+    
+    rw = fm.get("retriever_weight")
+    if rw is None: 
+        rw = _resolve_val(note_type, reg, "retriever_weight", 1.0)
+    try: 
+        rw = float(rw)
+    except: 
+        rw = 1.0
+
+    out: List[Dict[str, Any]] = []
+    for idx, ch in enumerate(chunks_from_chunker):
+        is_dict = isinstance(ch, dict)
+        cid = getattr(ch, "id", None) if not is_dict else ch.get("id")
+        nid = getattr(ch, "note_id", None) if not is_dict else ch.get("note_id")
+        index = getattr(ch, "index", idx) if not is_dict else ch.get("index", idx)
+        text = getattr(ch, "text", "") if not is_dict else ch.get("text", "")
+        window = getattr(ch, "window", text) if not is_dict else ch.get("window", text)
+        prev_id = getattr(ch, "neighbors_prev", None) if not is_dict else ch.get("neighbors_prev")
+        next_id = getattr(ch, "neighbors_next", None) if not is_dict else ch.get("neighbors_next")
+        section = getattr(ch, "section_title", "") if not is_dict else ch.get("section", "")
+
+        pl: Dict[str, Any] = {
+            "note_id": nid or fm.get("id"),
+            "chunk_id": cid,
+            "title": title,
+            "index": int(index),
+            "ord": int(index) + 1,
+            "type": note_type,
+            "tags": tags,
+            "text": text,
+            "window": window,
+            "neighbors_prev": _as_list(prev_id),
+            "neighbors_next": _as_list(next_id),
+            "section": section,
+            "path": note_path,
+            "source_path": kwargs.get("file_path") or note_path,
+            "retriever_weight": rw,
+            "chunk_profile": cp
+        }
+        
+        # Audit: Cleanup Pop (Vermeidung von redundanten Alias-Feldern)
+        for alias in ("chunk_num", "Chunk_Number"):
+            pl.pop(alias, None)
+            
+        out.append(pl)
+        
+    return out
--- a/app/core/ingestion/ingestion_db.py
+++ b/app/core/ingestion/ingestion_db.py
@ -0,0 +1,39 @@
+"""
+FILE: app/core/ingestion/ingestion_db.py
+DESCRIPTION: Datenbank-Schnittstelle für Note-Metadaten und Artefakt-Prüfung.
+             WP-14: Umstellung auf zentrale database-Infrastruktur.
+"""
+from typing import Optional, Tuple
+from qdrant_client import QdrantClient
+from qdrant_client.http import models as rest
+
+# Import der modularisierten Namen-Logik zur Sicherstellung der Konsistenz
+from app.core.database import collection_names
+
+def fetch_note_payload(client: QdrantClient, prefix: str, note_id: str) -> Optional[dict]:
+    """Holt die Metadaten einer Note aus Qdrant via Scroll."""
+    notes_col, _, _ = collection_names(prefix)
+    try:
+        f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
+        pts, _ = client.scroll(collection_name=notes_col, scroll_filter=f, limit=1, with_payload=True)
+        return pts[0].payload if pts else None
+    except: return None
+
+def artifacts_missing(client: QdrantClient, prefix: str, note_id: str) -> Tuple[bool, bool]:
+    """Prüft Qdrant aktiv auf vorhandene Chunks und Edges."""
+    _, chunks_col, edges_col = collection_names(prefix)
+    try:
+        f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
+        c_pts, _ = client.scroll(collection_name=chunks_col, scroll_filter=f, limit=1)
+        e_pts, _ = client.scroll(collection_name=edges_col, scroll_filter=f, limit=1)
+        return (not bool(c_pts)), (not bool(e_pts))
+    except: return True, True
+
+def purge_artifacts(client: QdrantClient, prefix: str, note_id: str):
+    """Löscht verwaiste Chunks/Edges vor einem Re-Import."""
+    _, chunks_col, edges_col = collection_names(prefix)
+    f = rest.Filter(must=[rest.FieldCondition(key="note_id", match=rest.MatchValue(value=note_id))])
+    # Iteration über die nun zentral verwalteten Collection-Namen
+    for col in [chunks_col, edges_col]:
+        try: client.delete(collection_name=col, points_selector=rest.FilterSelector(filter=f))
+        except: pass
--- a/app/core/ingestion/ingestion_note_payload.py
+++ b/app/core/ingestion/ingestion_note_payload.py
@ -0,0 +1,160 @@
+"""
+FILE: app/core/ingestion/ingestion_note_payload.py
+DESCRIPTION: Baut das JSON-Objekt für mindnet_notes. 
+FEATURES: 
+  - Multi-Hash (body/full) für flexible Change Detection.
+  - Fix v2.4.4: Integration der zentralen Registry (WP-14) für konsistente Defaults.
+VERSION: 2.4.4
+STATUS: Active
+"""
+from __future__ import annotations
+from typing import Any, Dict, Tuple, Optional
+import os
+import json
+import pathlib
+import hashlib
+
+# Import der zentralen Registry-Logik
+from app.core.registry import load_type_registry
+
+# ---------------------------------------------------------------------------
+# Helper
+# ---------------------------------------------------------------------------
+
+def _as_dict(x) -> Dict[str, Any]:
+    """Versucht, ein Objekt in ein Dict zu überführen."""
+    if isinstance(x, dict): return dict(x)
+    out: Dict[str, Any] = {}
+    for attr in ("frontmatter", "body", "id", "note_id", "title", "path", "tags", "type", "created", "modified", "date"):
+        if hasattr(x, attr):
+            val = getattr(x, attr)
+            if val is not None: out[attr] = val
+    if not out: out["raw"] = str(x)
+    return out
+
+def _ensure_list(x) -> list:
+    """Sichert String-Listen Integrität."""
+    if x is None: return []
+    if isinstance(x, list): return [str(i) for i in x]
+    if isinstance(x, (set, tuple)): return [str(i) for i in x]
+    return [str(x)]
+
+def _compute_hash(content: str) -> str:
+    """SHA-256 Hash-Berechnung."""
+    if not content: return ""
+    return hashlib.sha256(content.encode("utf-8")).hexdigest()
+
+def _get_hash_source_content(n: Dict[str, Any], mode: str) -> str:
+    """Generiert den Hash-Input-String basierend auf Body oder Metadaten."""
+    body = str(n.get("body") or "")
+    if mode == "body": return body
+    if mode == "full":
+        fm = n.get("frontmatter") or {}
+        meta_parts = []
+        # Sortierte Liste für deterministische Hashes
+        for k in sorted(["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight"]):
+            val = fm.get(k)
+            if val is not None: meta_parts.append(f"{k}:{val}")
+        return f"{'|'.join(meta_parts)}||{body}"
+    return body
+
+def _cfg_for_type(note_type: str, reg: dict) -> dict:
+    """Extrahiert Typ-spezifische Config aus der Registry."""
+    if not isinstance(reg, dict): return {}
+    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
+    return types.get(note_type, {}) if isinstance(types, dict) else {}
+
+def _cfg_defaults(reg: dict) -> dict:
+    """Extrahiert globale Default-Werte aus der Registry."""
+    if not isinstance(reg, dict): return {}
+    for key in ("defaults", "default", "global"):
+        v = reg.get(key)
+        if isinstance(v, dict): return v
+    return {}
+
+# ---------------------------------------------------------------------------
+# Haupt-API
+# ---------------------------------------------------------------------------
+
+def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
+    """
+    Baut das Note-Payload inklusive Multi-Hash und Audit-Validierung.
+    WP-14: Nutzt nun die zentrale Registry für alle Fallbacks.
+    """
+    n = _as_dict(note)
+    
+    # Nutzt übergebene Registry oder lädt sie global
+    reg = kwargs.get("types_cfg") or load_type_registry()
+    hash_source = kwargs.get("hash_source", "parsed")
+    hash_normalize = kwargs.get("hash_normalize", "canonical")
+
+    fm = n.get("frontmatter") or {}
+    note_type = str(fm.get("type") or n.get("type") or "concept")
+    
+    cfg_type = _cfg_for_type(note_type, reg)
+    cfg_def = _cfg_defaults(reg)
+    ingest_cfg = reg.get("ingestion_settings", {})
+
+    # --- retriever_weight Audit ---
+    # Priorität: Frontmatter -> Typ-Config -> globale Config -> Env-Var
+    default_rw = float(os.environ.get("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0))
+    retriever_weight = fm.get("retriever_weight")
+    if retriever_weight is None:
+        retriever_weight = cfg_type.get("retriever_weight", cfg_def.get("retriever_weight", default_rw))
+    try: 
+        retriever_weight = float(retriever_weight)
+    except: 
+        retriever_weight = default_rw
+
+    # --- chunk_profile Audit ---
+    # Nutzt nun primär die ingestion_settings aus der Registry
+    chunk_profile = fm.get("chunking_profile") or fm.get("chunk_profile")
+    if chunk_profile is None:
+        chunk_profile = cfg_type.get("chunking_profile") or cfg_type.get("chunk_profile")
+    if chunk_profile is None:
+        chunk_profile = ingest_cfg.get("default_chunk_profile", cfg_def.get("chunking_profile", "sliding_standard"))
+
+    # --- edge_defaults ---
+    edge_defaults = fm.get("edge_defaults")
+    if edge_defaults is None:
+        edge_defaults = cfg_type.get("edge_defaults", cfg_def.get("edge_defaults", []))
+    edge_defaults = _ensure_list(edge_defaults)
+
+    # --- Basis-Metadaten ---
+    note_id = n.get("note_id") or n.get("id") or fm.get("id")
+    title = n.get("title") or fm.get("title") or ""
+    path = n.get("path") or kwargs.get("file_path") or ""
+    if isinstance(path, pathlib.Path): path = str(path)
+
+    payload: Dict[str, Any] = {
+        "note_id": note_id,
+        "title": title,
+        "type": note_type,
+        "path": path,
+        "retriever_weight": retriever_weight,
+        "chunk_profile": chunk_profile,
+        "edge_defaults": edge_defaults,
+        "hashes": {}
+    }
+    
+    # --- MULTI-HASH ---
+    # Generiert Hashes für Change Detection
+    for mode in ["body", "full"]:
+        content = _get_hash_source_content(n, mode)
+        payload["hashes"][f"{mode}:{hash_source}:{hash_normalize}"] = _compute_hash(content)
+
+    # Metadaten Anreicherung
+    tags = fm.get("tags") or fm.get("keywords") or n.get("tags")
+    if tags: payload["tags"] = _ensure_list(tags)
+    if fm.get("aliases"): payload["aliases"] = _ensure_list(fm.get("aliases"))
+    
+    for k in ("created", "modified", "date"):
+        v = fm.get(k) or n.get(k)
+        if v: payload[k] = str(v)
+    
+    if n.get("body"): payload["fulltext"] = str(n["body"])
+
+    # Final JSON Validation Audit
+    json.loads(json.dumps(payload, ensure_ascii=False))
+
+    return payload
--- a/app/core/ingestion/ingestion_processor.py
+++ b/app/core/ingestion/ingestion_processor.py
@ -0,0 +1,220 @@
+"""
+FILE: app/core/ingestion/ingestion_processor.py
+DESCRIPTION: Der zentrale IngestionService (Orchestrator). 
+             WP-14: Modularisierung der Datenbank-Ebene (app.core.database).
+             WP-15b: Two-Pass Workflow mit globalem Kontext-Cache.
+             WP-20/22: Cloud-Resilienz und Content-Lifecycle integriert.
+             AUDIT v2.13.10: Umstellung auf app.core.database Infrastruktur.
+VERSION: 2.13.10
+STATUS: Active
+"""
+import logging
+import asyncio
+import os
+from typing import Dict, List, Optional, Tuple, Any
+
+# Core Module Imports
+from app.core.parser import (
+    read_markdown, pre_scan_markdown, normalize_frontmatter, 
+    validate_required_frontmatter, NoteContext
+)
+from app.core.chunking import assemble_chunks
+
+# MODULARISIERUNG: Neue Import-Pfade für die Datenbank-Ebene
+from app.core.database.qdrant import QdrantConfig, get_client, ensure_collections, ensure_payload_indexes
+from app.core.database.qdrant_points import points_for_chunks, points_for_note, points_for_edges, upsert_batch
+
+# Services
+from app.services.embeddings_client import EmbeddingsClient
+from app.services.edge_registry import registry as edge_registry
+from app.services.llm_service import LLMService 
+
+# Package-Interne Imports (Refactoring WP-14)
+from .ingestion_utils import load_type_registry, resolve_note_type, get_chunk_config_by_profile
+from .ingestion_db import fetch_note_payload, artifacts_missing, purge_artifacts
+from .ingestion_validation import validate_edge_candidate
+from .ingestion_note_payload import make_note_payload
+from .ingestion_chunk_payload import make_chunk_payloads
+
+# Fallback für Edges (Struktur-Verknüpfung)
+try:
+    from app.core.derive_edges import build_edges_for_note
+except ImportError:
+    def build_edges_for_note(*args, **kwargs): return []
+
+logger = logging.getLogger(__name__)
+
+class IngestionService:
+    def __init__(self, collection_prefix: str = None):
+        """Initialisiert den Service und nutzt die neue database-Infrastruktur."""
+        from app.config import get_settings
+        self.settings = get_settings()
+        
+        self.prefix = collection_prefix or self.settings.COLLECTION_PREFIX
+        self.cfg = QdrantConfig.from_env()
+        # Synchronisierung der Konfiguration mit dem Instanz-Präfix
+        self.cfg.prefix = self.prefix 
+        self.client = get_client(self.cfg)
+        self.dim = self.settings.VECTOR_SIZE
+        self.registry = load_type_registry()
+        self.embedder = EmbeddingsClient()
+        self.llm = LLMService() 
+        
+        self.active_hash_mode = self.settings.CHANGE_DETECTION_MODE
+        self.batch_cache: Dict[str, NoteContext] = {} # WP-15b LocalBatchCache
+
+        try:
+            # Aufruf der modularisierten Schema-Logik
+            ensure_collections(self.client, self.prefix, self.dim)
+            ensure_payload_indexes(self.client, self.prefix)
+        except Exception as e: 
+            logger.warning(f"DB initialization warning: {e}")
+
+    async def run_batch(self, file_paths: List[str], vault_root: str) -> List[Dict[str, Any]]:
+        """
+        WP-15b: Implementiert den Two-Pass Ingestion Workflow.
+        Pass 1: Pre-Scan füllt den Context-Cache (3-Wege-Indexierung).
+        Pass 2: Verarbeitung nutzt den Cache für die semantische Prüfung.
+        """
+        logger.info(f"🔍 [Pass 1] Pre-Scanning {len(file_paths)} files for Context Cache...")
+        for path in file_paths:
+            try:
+                # Übergabe der Registry für dynamische Scan-Tiefe
+                ctx = pre_scan_markdown(path, registry=self.registry)
+                if ctx:
+                    # Mehrfache Indizierung für robusten Look-up (ID, Titel, Dateiname)
+                    self.batch_cache[ctx.note_id] = ctx
+                    self.batch_cache[ctx.title] = ctx
+                    fname = os.path.splitext(os.path.basename(path))[0]
+                    self.batch_cache[fname] = ctx
+            except Exception as e:
+                logger.warning(f"⚠️ Pre-scan failed for {path}: {e}")
+
+        logger.info(f"🚀 [Pass 2] Semantic Processing of {len(file_paths)} files...")
+        return [await self.process_file(p, vault_root, apply=True, purge_before=True) for p in file_paths]
+
+    async def process_file(self, file_path: str, vault_root: str, **kwargs) -> Dict[str, Any]:
+        """Transformiert eine Markdown-Datei in den Graphen."""
+        apply = kwargs.get("apply", False)
+        force_replace = kwargs.get("force_replace", False)
+        purge_before = kwargs.get("purge_before", False)
+        note_scope_refs = kwargs.get("note_scope_refs", False)
+        hash_source = kwargs.get("hash_source", "parsed")
+        hash_normalize = kwargs.get("hash_normalize", "canonical")
+        
+        result = {"path": file_path, "status": "skipped", "changed": False, "error": None}
+
+        # 1. Parse & Lifecycle Gate
+        try:
+            parsed = read_markdown(file_path)
+            if not parsed: return {**result, "error": "Empty file"}
+            fm = normalize_frontmatter(parsed.frontmatter)
+            validate_required_frontmatter(fm)
+        except Exception as e: 
+            return {**result, "error": f"Validation failed: {str(e)}"}
+
+        # Dynamischer Lifecycle-Filter aus der Registry (WP-14)
+        ingest_cfg = self.registry.get("ingestion_settings", {})
+        ignore_list = ingest_cfg.get("ignore_statuses", ["system", "template", "archive", "hidden"])
+        
+        current_status = fm.get("status", "draft").lower().strip()
+        if current_status in ignore_list:
+            return {**result, "status": "skipped", "reason": "lifecycle_filter"}
+
+        # 2. Payload & Change Detection (Multi-Hash)
+        note_type = resolve_note_type(self.registry, fm.get("type"))
+        note_pl = make_note_payload(
+            parsed, vault_root=vault_root, file_path=file_path, 
+            hash_source=hash_source, hash_normalize=hash_normalize, 
+            types_cfg=self.registry
+        )
+        note_id = note_pl["note_id"]
+
+        old_payload = None if force_replace else fetch_note_payload(self.client, self.prefix, note_id)
+        check_key = f"{self.active_hash_mode}:{hash_source}:{hash_normalize}"
+        old_hash = (old_payload or {}).get("hashes", {}).get(check_key)
+        new_hash = note_pl.get("hashes", {}).get(check_key)
+        
+        c_miss, e_miss = artifacts_missing(self.client, self.prefix, note_id)
+        if not (force_replace or not old_payload or old_hash != new_hash or c_miss or e_miss):
+            return {**result, "status": "unchanged", "note_id": note_id}
+        
+        if not apply: 
+            return {**result, "status": "dry-run", "changed": True, "note_id": note_id}
+
+        # 3. Deep Processing (Chunking, Validation, Embedding)
+        try:
+            body_text = getattr(parsed, "body", "") or ""
+            edge_registry.ensure_latest()
+            profile = fm.get("chunk_profile") or fm.get("chunking_profile") or "sliding_standard"
+            chunk_cfg = get_chunk_config_by_profile(self.registry, profile, note_type)
+            enable_smart = chunk_cfg.get("enable_smart_edge_allocation", False)
+            
+            # WP-15b: Chunker-Aufruf bereitet Candidate-Pool vor
+            chunks = await assemble_chunks(note_id, body_text, note_type, config=chunk_cfg)
+            for ch in chunks:
+                filtered = []
+                for cand in getattr(ch, "candidate_pool", []):
+                    # WP-15b: Nur global_pool Kandidaten erfordern binäre Validierung
+                    if cand.get("provenance") == "global_pool" and enable_smart:
+                        if await validate_edge_candidate(ch.text, cand, self.batch_cache, self.llm, self.settings.MINDNET_LLM_PROVIDER):
+                            filtered.append(cand)
+                    else: 
+                        filtered.append(cand)
+                ch.candidate_pool = filtered
+
+            # Payload-Erstellung via interne Module
+            chunk_pls = make_chunk_payloads(
+                fm, note_pl["path"], chunks, file_path=file_path, 
+                types_cfg=self.registry
+            )
+            vecs = await self.embedder.embed_documents([c.get("window") or "" for c in chunk_pls]) if chunk_pls else []
+            
+            # Kanten-Aggregation
+            edges = build_edges_for_note(
+                note_id, chunk_pls, 
+                note_level_references=note_pl.get("references", []),
+                include_note_scope_refs=note_scope_refs
+            )
+            for e in edges:
+                e["kind"] = edge_registry.resolve(
+                    e.get("kind", "related_to"), 
+                    provenance=e.get("provenance", "explicit"), 
+                    context={"file": file_path, "note_id": note_id, "line": e.get("line", "system")}
+                )
+
+            # 4. DB Upsert via modularisierter Points-Logik
+            if purge_before and old_payload: 
+                purge_artifacts(self.client, self.prefix, note_id)
+            
+            n_name, n_pts = points_for_note(self.prefix, note_pl, None, self.dim)
+            upsert_batch(self.client, n_name, n_pts)
+            
+            if chunk_pls and vecs: 
+                c_pts = points_for_chunks(self.prefix, chunk_pls, vecs)[1]
+                upsert_batch(self.client, f"{self.prefix}_chunks", c_pts)
+            
+            if edges: 
+                e_pts = points_for_edges(self.prefix, edges)[1]
+                upsert_batch(self.client, f"{self.prefix}_edges", e_pts)
+            
+            return {
+                "path": file_path, 
+                "status": "success", 
+                "changed": True, 
+                "note_id": note_id, 
+                "chunks_count": len(chunk_pls), 
+                "edges_count": len(edges)
+            }
+        except Exception as e:
+            logger.error(f"Processing failed: {e}", exc_info=True)
+            return {**result, "error": str(e)}
+
+    async def create_from_text(self, markdown_content: str, filename: str, vault_root: str, folder: str = "00_Inbox") -> Dict[str, Any]:
+        """Erstellt eine Note aus einem Textstream und triggert die Ingestion."""
+        target_path = os.path.join(vault_root, folder, filename)
+        os.makedirs(os.path.dirname(target_path), exist_ok=True)
+        with open(target_path, "w", encoding="utf-8") as f: 
+            f.write(markdown_content)
+        await asyncio.sleep(0.1) 
+        return await self.process_file(file_path=target_path, vault_root=vault_root, apply=True, force_replace=True, purge_before=True)
--- a/app/core/ingestion/ingestion_utils.py
+++ b/app/core/ingestion/ingestion_utils.py
@ -0,0 +1,71 @@
+"""
+FILE: app/core/ingestion/ingestion_utils.py
+DESCRIPTION: Hilfswerkzeuge für JSON-Recovery, Typ-Registry und Konfigurations-Lookups.
+             AUDIT v2.13.9: Behebung des Circular Imports durch Nutzung der app.core.registry.
+"""
+import json
+import re
+from typing import Any, Optional, Dict
+
+# ENTSCHEIDENDER FIX: Import der Basis-Logik aus dem neutralen Registry-Modul.
+# Dies bricht den Zirkelbezug auf, da dieses Modul keine Services mehr importiert.
+from app.core.registry import load_type_registry, clean_llm_text
+
+def extract_json_from_response(text: str, registry: Optional[dict] = None) -> Any:
+    """
+    Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen.
+    WP-14: Nutzt nun die zentrale clean_llm_text Funktion aus app.core.registry.
+    """
+    if not text: 
+        return []
+    
+    # 1. Text zentral bereinigen via neutralem Modul
+    clean = clean_llm_text(text, registry)
+    
+    # 2. Markdown-Code-Blöcke extrahieren
+    match = re.search(r"```(?:json)?\s*(.*?)\s*```", clean, re.DOTALL)
+    payload = match.group(1) if match else clean
+    
+    try:
+        return json.loads(payload.strip())
+    except json.JSONDecodeError:
+        # Recovery: Suche nach Liste
+        start = payload.find('[')
+        end = payload.rfind(']') + 1
+        if start != -1 and end > start:
+            try: return json.loads(payload[start:end])
+            except: pass
+        
+        # Recovery: Suche nach Objekt
+        start_obj = payload.find('{')
+        end_obj = payload.rfind('}') + 1
+        if start_obj != -1 and end_obj > start_obj:
+            try: return json.loads(payload[start_obj:end_obj])
+            except: pass
+    return []
+
+def resolve_note_type(registry: dict, requested: Optional[str]) -> str:
+    """
+    Bestimmt den finalen Notiz-Typ.
+    WP-14: Fallback wird nun über ingestion_settings.default_note_type gesteuert.
+    """
+    types = registry.get("types", {})
+    if requested and requested in types: 
+        return requested
+        
+    # Dynamischer Fallback aus der Registry (Standard: 'concept')
+    ingest_cfg = registry.get("ingestion_settings", {})
+    return ingest_cfg.get("default_note_type", "concept") 
+
+def get_chunk_config_by_profile(registry: dict, profile_name: str, note_type: str) -> Dict[str, Any]:
+    """
+    Holt die Chunker-Parameter für ein spezifisches Profil aus der Registry.
+    """
+    from app.core.chunking import get_chunk_config
+    profiles = registry.get("chunking_profiles", {})
+    if profile_name in profiles:
+        cfg = profiles[profile_name].copy()
+        if "overlap" in cfg and isinstance(cfg["overlap"], list): 
+            cfg["overlap"] = tuple(cfg["overlap"])
+        return cfg
+    return get_chunk_config(note_type)
--- a/app/core/ingestion/ingestion_validation.py
+++ b/app/core/ingestion/ingestion_validation.py
@ -0,0 +1,67 @@
+"""
+FILE: app/core/ingestion/ingestion_validation.py
+DESCRIPTION: WP-15b semantische Validierung von Kanten gegen den LocalBatchCache.
+             AUDIT v2.12.3: Integration der zentralen Text-Bereinigung (WP-14).
+"""
+import logging
+from typing import Dict, Any
+from app.core.parser import NoteContext
+
+# ENTSCHEIDENDER FIX: Import der neutralen Bereinigungs-Logik zur Vermeidung von Circular Imports
+from app.core.registry import clean_llm_text
+
+logger = logging.getLogger(__name__)
+
+async def validate_edge_candidate(
+    chunk_text: str, 
+    edge: Dict, 
+    batch_cache: Dict[str, NoteContext],
+    llm_service: Any,
+    provider: str
+) -> bool:
+    """
+    WP-15b: Validiert einen Kandidaten semantisch gegen das Ziel im Cache.
+    Nutzt clean_llm_text zur Entfernung von Steuerzeichen vor der Auswertung.
+    """
+    target_id = edge.get("to")
+    target_ctx = batch_cache.get(target_id)
+    
+    # Robust Lookup Fix (v2.12.2): Support für Anker
+    if not target_ctx and "#" in target_id:
+        base_id = target_id.split("#")[0]
+        target_ctx = batch_cache.get(base_id)
+    
+    # Sicherheits-Fallback (Hard-Link Integrity)
+    if not target_ctx:
+        logger.info(f"ℹ️ [VALIDATION SKIP] No context for '{target_id}' - allowing link.")
+        return True
+
+    template = llm_service.get_prompt("edge_validation", provider)
+    
+    try:
+        logger.info(f"⚖️ [VALIDATING] Relation '{edge.get('kind')}' -> '{target_id}'...")
+        prompt = template.format(
+            chunk_text=chunk_text[:1500],
+            target_title=target_ctx.title,
+            target_summary=target_ctx.summary,
+            edge_kind=edge.get("kind", "related_to")
+        )
+        
+        # Die Antwort vom Service anfordern
+        raw_response = await llm_service.generate_raw_response(prompt, priority="background")
+        
+        # WP-14 Fix: Zusätzliche Bereinigung zur Sicherstellung der Interpretierbarkeit
+        response = clean_llm_text(raw_response)
+        
+        # Semantische Prüfung des Ergebnisses
+        is_valid = "YES" in response.upper()
+        
+        if is_valid:
+            logger.info(f"✅ [VALIDATED] Relation to '{target_id}' confirmed.")
+        else:
+            logger.info(f"🚫 [REJECTED] Relation to '{target_id}' irrelevant for this chunk.")
+        return is_valid
+    except Exception as e:
+        logger.warning(f"⚠️ Validation error for {target_id}: {e}")
+        # Im Zweifel (Timeout/Fehler) erlauben wir die Kante, um Datenverlust zu vermeiden
+        return True
--- a/app/core/note_payload.py
+++ b/app/core/note_payload.py
@ -1,268 +0,0 @@
-"""
-FILE: app/core/note_payload.py
-DESCRIPTION: Baut das JSON-Objekt. 
-FEATURES:
-  1. Multi-Hash: Berechnet immer 'body' AND 'full' Hashes für flexible Change Detection.
-  2. Config-Fix: Liest korrekt 'chunking_profile' aus types.yaml (statt Legacy 'chunk_profile').
-VERSION: 2.3.0
-STATUS: Active
-DEPENDENCIES: yaml, os, json, pathlib, hashlib
-EXTERNAL_CONFIG: config/types.yaml
-"""
-
-from __future__ import annotations
-
-from typing import Any, Dict, Tuple, Optional
-import os
-import json
-import pathlib
-import hashlib
-
-try:
-    import yaml  # type: ignore
-except Exception:
-    yaml = None
-
-
-# ---------------------------------------------------------------------------
-# Helper
-# ---------------------------------------------------------------------------
-
-def _as_dict(x) -> Dict[str, Any]:
-    """Versucht, ein ParsedMarkdown-ähnliches Objekt in ein Dict zu überführen."""
-    if isinstance(x, dict):
-        return dict(x)
-
-    out: Dict[str, Any] = {}
-    for attr in (
-        "frontmatter",
-        "body",
-        "id",
-        "note_id",
-        "title",
-        "path",
-        "tags",
-        "type",
-        "created",
-        "modified",
-        "date",
-    ):
-        if hasattr(x, attr):
-            val = getattr(x, attr)
-            if val is not None:
-                out[attr] = val
-
-    if not out:
-        out["raw"] = str(x)
-
-    return out
-
-
-def _pick_args(*args, **kwargs) -> Tuple[Optional[str], Optional[dict]]:
-    path = kwargs.get("path") or (args[0] if args else None)
-    types_cfg = kwargs.get("types_cfg") or kwargs.get("types") or None
-    return path, types_cfg
-
-
-def _env_float(name: str, default: float) -> float:
-    try:
-        return float(os.environ.get(name, default))
-    except Exception:
-        return default
-
-
-def _ensure_list(x) -> list:
-    if x is None:
-        return []
-    if isinstance(x, list):
-        return [str(i) for i in x]
-    if isinstance(x, (set, tuple)):
-        return [str(i) for i in x]
-    return [str(x)]
-
-# --- Hash Logic ---
-def _compute_hash(content: str) -> str:
-    """Berechnet einen SHA-256 Hash für den gegebenen String."""
-    if not content:
-        return ""
-    return hashlib.sha256(content.encode("utf-8")).hexdigest()
-
-def _get_hash_source_content(n: Dict[str, Any], mode: str) -> str:
-    """
-    Stellt den String zusammen, der gehasht werden soll.
-    """
-    body = str(n.get("body") or "")
-    
-    if mode == "body":
-        return body
-    
-    if mode == "full":
-        fm = n.get("frontmatter") or {}
-        # Wichtig: Sortierte Keys für deterministisches Verhalten!
-        # Wir nehmen alle steuernden Metadaten auf
-        meta_parts = []
-        # Hier checken wir keys, die eine Neu-Indizierung rechtfertigen würden
-        for k in sorted(["title", "type", "status", "tags", "chunking_profile", "chunk_profile", "retriever_weight"]):
-            val = fm.get(k)
-            if val is not None:
-                meta_parts.append(f"{k}:{val}")
-        
-        meta_str = "|".join(meta_parts)
-        return f"{meta_str}||{body}"
-        
-    return body
-
-
-# ---------------------------------------------------------------------------
-# Type-Registry laden
-# ---------------------------------------------------------------------------
-
-def _load_types_config(explicit_cfg: Optional[dict] = None) -> dict:
-    if explicit_cfg and isinstance(explicit_cfg, dict):
-        return explicit_cfg
-
-    path = os.getenv("MINDNET_TYPES_FILE") or "./config/types.yaml"
-    if not os.path.isfile(path) or yaml is None:
-        return {}
-
-    try:
-        with open(path, "r", encoding="utf-8") as f:
-            data = yaml.safe_load(f) or {}
-        return data if isinstance(data, dict) else {}
-    except Exception:
-        return {}
-
-
-def _cfg_for_type(note_type: str, reg: dict) -> dict:
-    if not isinstance(reg, dict):
-        return {}
-    types = reg.get("types") if isinstance(reg.get("types"), dict) else reg
-    return types.get(note_type, {}) if isinstance(types, dict) else {}
-
-
-def _cfg_defaults(reg: dict) -> dict:
-    if not isinstance(reg, dict):
-        return {}
-    for key in ("defaults", "default", "global"):
-        v = reg.get(key)
-        if isinstance(v, dict):
-            return v
-    return {}
-
-
-# ---------------------------------------------------------------------------
-# Haupt-API
-# ---------------------------------------------------------------------------
-
-def make_note_payload(note: Any, *args, **kwargs) -> Dict[str, Any]:
-    """
-    Baut das Note-Payload für mindnet_notes auf.
-    Inkludiert Hash-Berechnung (Body & Full) und korrigierte Config-Lookups.
-    """
-    n = _as_dict(note)
-    path_arg, types_cfg_explicit = _pick_args(*args, **kwargs)
-    reg = _load_types_config(types_cfg_explicit)
-    
-    # Hash Config (Parameter für Source/Normalize, Mode ist hardcoded auf 'beide')
-    hash_source = kwargs.get("hash_source", "parsed")
-    hash_normalize = kwargs.get("hash_normalize", "canonical")
-
-    fm = n.get("frontmatter") or {}
-    fm_type = fm.get("type") or n.get("type") or "concept"
-    note_type = str(fm_type)
-
-    cfg_type = _cfg_for_type(note_type, reg)
-    cfg_def = _cfg_defaults(reg)
-
-    # --- retriever_weight ---
-    default_rw = _env_float("MINDNET_DEFAULT_RETRIEVER_WEIGHT", 1.0)
-    retriever_weight = fm.get("retriever_weight")
-    if retriever_weight is None:
-        retriever_weight = cfg_type.get(
-            "retriever_weight",
-            cfg_def.get("retriever_weight", default_rw),
-        )
-    try:
-        retriever_weight = float(retriever_weight)
-    except Exception:
-        retriever_weight = default_rw
-
-    # --- chunk_profile (FIXED LOGIC) ---
-    # 1. Frontmatter Override (beide Schreibweisen erlaubt)
-    chunk_profile = fm.get("chunking_profile") or fm.get("chunk_profile")
-    
-    # 2. Type Config (Korrekter Key 'chunking_profile' aus types.yaml)
-    if chunk_profile is None:
-        chunk_profile = cfg_type.get("chunking_profile")
-
-    # 3. Default Config (Fallback auf sliding_standard statt medium)
-    if chunk_profile is None:
-        chunk_profile = cfg_def.get("chunking_profile", "sliding_standard")
-
-    # 4. Safety Fallback
-    if not isinstance(chunk_profile, str) or not chunk_profile:
-        chunk_profile = "sliding_standard"
-
-    # --- edge_defaults ---
-    edge_defaults = fm.get("edge_defaults")
-    if edge_defaults is None:
-        edge_defaults = cfg_type.get(
-            "edge_defaults",
-            cfg_def.get("edge_defaults", []),
-        )
-    edge_defaults = _ensure_list(edge_defaults)
-
-    # --- Basis-Metadaten ---
-    note_id = n.get("note_id") or n.get("id") or fm.get("id")
-    title = n.get("title") or fm.get("title") or ""
-    path = n.get("path") or path_arg
-    if isinstance(path, pathlib.Path):
-        path = str(path)
-
-    payload: Dict[str, Any] = {
-        "note_id": note_id,
-        "title": title,
-        "type": note_type,
-        "path": path or "",
-        "retriever_weight": retriever_weight,
-        "chunk_profile": chunk_profile,
-        "edge_defaults": edge_defaults,
-        "hashes": {} # Init Hash Dict
-    }
-    
-    # --- MULTI-HASH CALCULATION (Strategy Decoupling) ---
-    # Wir berechnen immer BEIDE Strategien und speichern sie.
-    # ingestion.py entscheidet dann anhand der ENV-Variable, welcher verglichen wird.
-    modes_to_calc = ["body", "full"]
-    
-    for mode in modes_to_calc:
-        content_to_hash = _get_hash_source_content(n, mode)
-        computed_hash = _compute_hash(content_to_hash)
-        # Key Schema: mode:source:normalize (z.B. "full:parsed:canonical")
-        key = f"{mode}:{hash_source}:{hash_normalize}"
-        payload["hashes"][key] = computed_hash
-
-    # Tags / Keywords
-    tags = fm.get("tags") or fm.get("keywords") or n.get("tags")
-    if tags:
-        payload["tags"] = _ensure_list(tags)
-
-    # Aliases
-    aliases = fm.get("aliases")
-    if aliases:
-        payload["aliases"] = _ensure_list(aliases)
-
-    # Zeit
-    for k in ("created", "modified", "date"):
-        v = fm.get(k) or n.get(k)
-        if v:
-            payload[k] = str(v)
-            
-    # Fulltext
-    if "body" in n and n["body"]:
-        payload["fulltext"] = str(n["body"])
-
-    # JSON Validation
-    json.loads(json.dumps(payload, ensure_ascii=False))
-
-    return payload
--- a/app/core/parser.py
+++ b/app/core/parser.py
@ -1,257 +0,0 @@
-"""
-FILE: app/core/parser.py
-DESCRIPTION: Liest Markdown-Dateien fehlertolerant (Encoding-Fallback). Trennt Frontmatter (YAML) vom Body.
-             WP-22 Erweiterung: Kanten-Extraktion mit Zeilennummern für die EdgeRegistry.
-VERSION: 1.8.0
-STATUS: Active
-DEPENDENCIES: yaml, re, dataclasses, json, io, os
-LAST_ANALYSIS: 2025-12-23
-"""
-from __future__ import annotations
-
-from dataclasses import dataclass
-from typing import Any, Dict, Optional, Tuple, Iterable, List
-import io
-import json
-import os
-import re
-
-try:
-    import yaml  # PyYAML
-except Exception as e:  # pragma: no cover
-    yaml = None  # Fehler wird zur Laufzeit geworfen, falls wirklich benötigt
-
-
-# ---------------------------------------------------------------------
-# Datamodell
-# ---------------------------------------------------------------------
-
-@dataclass
-class ParsedNote:
-    frontmatter: Dict[str, Any]
-    body: str
-    path: str
-
-
-# ---------------------------------------------------------------------
-# Frontmatter-Erkennung
-# ---------------------------------------------------------------------
-
-# Öffentliche Kompatibilitäts-Konstante: frühere Skripte importieren FRONTMATTER_RE
-FRONTMATTER_RE = re.compile(r"^\s*---\s*$")  # <— public
-# Zusätzlich interner Alias (falls jemand ihn referenziert)
-FRONTMATTER_END = FRONTMATTER_RE  # <— public alias
-
-# interne Namen bleiben bestehen
-_FRONTMATTER_HEAD = FRONTMATTER_RE
-_FRONTMATTER_END = FRONTMATTER_RE
-
-
-def _split_frontmatter(text: str) -> Tuple[Dict[str, Any], str]:
-    """
-    Zerlegt Text in (frontmatter: dict, body: str).
-    Erkennt Frontmatter nur, wenn die erste Zeile '---' ist und später ein zweites '---' folgt.
-    YAML-Fehler im Frontmatter führen NICHT zum Abbruch: es wird dann ein leeres dict benutzt.
-    """
-    lines = text.splitlines(True)  # keep line endings
-    if not lines:
-        return {}, ""
-
-    if not _FRONTMATTER_HEAD.match(lines[0]):
-        # kein Frontmatter-Header → gesamter Text ist Body
-        return {}, text
-
-    end_idx = None
-    # Suche nach nächstem '---' (max. 2000 Zeilen als Sicherheitslimit)
-    for i in range(1, min(len(lines), 2000)):
-        if _FRONTMATTER_END.match(lines[i]):
-            end_idx = i
-            break
-
-    if end_idx is None:
-        # unvollständiger Frontmatter-Block → behandle alles als Body
-        return {}, text
-
-    fm_raw = "".join(lines[1:end_idx])
-    body = "".join(lines[end_idx + 1:])
-
-    data: Dict[str, Any] = {}
-    if yaml is None:
-        raise RuntimeError("PyYAML ist nicht installiert (pip install pyyaml).")
-
-    try:
-        loaded = yaml.safe_load(fm_raw) or {}
-        if isinstance(loaded, dict):
-            data = loaded
-        else:
-            data = {}
-    except Exception as e:
-        # YAML-Fehler nicht fatal machen
-        print(json.dumps({"warn": "frontmatter_yaml_parse_failed", "error": str(e)}))
-        data = {}
-
-    # optionales kosmetisches Trim: eine führende Leerzeile im Body entfernen
-    if body.startswith("\n"):
-        body = body[1:]
-
-    return data, body
-
-
-# ---------------------------------------------------------------------
-# Robustes Lesen mit Encoding-Fallback
-# ---------------------------------------------------------------------
-
-_FALLBACK_ENCODINGS: Tuple[str, ...] = ("utf-8", "utf-8-sig", "cp1252", "latin-1")
-
-
-def _read_text_with_fallback(path: str) -> Tuple[str, str, bool]:
-    """
-    Liest Datei mit mehreren Decodierungsversuchen.
-    Rückgabe: (text, used_encoding, had_fallback)
-    - had_fallback=True, falls NICHT 'utf-8' verwendet wurde (oder 'utf-8-sig').
-    """
-    last_err: Optional[str] = None
-    for enc in _FALLBACK_ENCODINGS:
-        try:
-            with io.open(path, "r", encoding=enc, errors="strict") as f:
-                text = f.read()
-            # 'utf-8-sig' zählt hier als Fallback (weil BOM), aber ist unproblematisch
-            return text, enc, (enc != "utf-8")
-        except UnicodeDecodeError as e:
-            last_err = f"{type(e).__name__}: {e}"
-            continue
-
-    # Letzter, extrem defensiver Fallback: Bytes → UTF-8 mit REPLACE (keine Exception)
-    with open(path, "rb") as fb:
-        raw = fb.read()
-    text = raw.decode("utf-8", errors="replace")
-    print(json.dumps({
-        "path": path,
-        "warn": "encoding_fallback_exhausted",
-        "info": last_err or "unknown"
-    }, ensure_ascii=False))
-    return text, "utf-8(replace)", True
-
-
-# ---------------------------------------------------------------------
-# Öffentliche API
-# ---------------------------------------------------------------------
-
-def read_markdown(path: str) -> Optional[ParsedNote]:
-    """
-    Liest eine Markdown-Datei fehlertolerant.
-    """
-    if not os.path.exists(path):
-        return None
-
-    text, enc, had_fb = _read_text_with_fallback(path)
-    if had_fb:
-        print(json.dumps({"path": path, "warn": "encoding_fallback_used", "used": enc}, ensure_ascii=False))
-
-    fm, body = _split_frontmatter(text)
-    return ParsedNote(frontmatter=fm or {}, body=body or "", path=path)
-
-
-def validate_required_frontmatter(fm: Dict[str, Any],
-                                  required: Tuple[str, ...] = ("id", "title")) -> None:
-    """
-    Prüft, ob alle Pflichtfelder vorhanden sind.
-    """
-    if fm is None:
-        fm = {}
-    missing = []
-    for k in required:
-        v = fm.get(k)
-        if v is None:
-            missing.append(k)
-        elif isinstance(v, str) and not v.strip():
-            missing.append(k)
-    if missing:
-        raise ValueError(f"Missing required frontmatter fields: {', '.join(missing)}")
-
-    if "tags" in fm and fm["tags"] not in (None, "") and not isinstance(fm["tags"], (list, tuple)):
-        raise ValueError("frontmatter 'tags' must be a list of strings")
-
-
-def normalize_frontmatter(fm: Dict[str, Any]) -> Dict[str, Any]:
-    """
-    Normalisierung von Tags und anderen Feldern.
-    """
-    out = dict(fm or {})
-    if "tags" in out:
-        if isinstance(out["tags"], str):
-            out["tags"] = [out["tags"].strip()] if out["tags"].strip() else []
-        elif isinstance(out["tags"], list):
-            out["tags"] = [str(t).strip() for t in out["tags"] if t is not None]
-        else:
-            out["tags"] = [str(out["tags"]).strip()] if out["tags"] not in (None, "") else []
-    if "embedding_exclude" in out:
-        out["embedding_exclude"] = bool(out["embedding_exclude"])
-    return out
-
-
-# ------------------------------ Wikilinks ---------------------------- #
-
-_WIKILINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
-
-
-def extract_wikilinks(text: str) -> List[str]:
-    """
-    Extrahiert Wikilinks als einfache Liste von IDs.
-    """
-    if not text:
-        return []
-    out: List[str] = []
-    for m in _WIKILINK_RE.finditer(text):
-        raw = (m.group(1) or "").strip()
-        if not raw:
-            continue
-        if "|" in raw:
-            raw = raw.split("|", 1)[0].strip()
-        if "#" in raw:
-            raw = raw.split("#", 1)[0].strip()
-        if raw:
-            out.append(raw)
-    return out
-
-
-def extract_edges_with_context(parsed: ParsedNote) -> List[Dict[str, Any]]:
-    """
-    WP-22: Extrahiert Wikilinks [[Ziel|Typ]] aus dem Body und speichert die Zeilennummer.
-    Gibt eine Liste von Dictionaries zurück, die direkt von der Ingestion verarbeitet werden können.
-    """
-    edges = []
-    if not parsed or not parsed.body:
-        return edges
-
-    # Wir nutzen splitlines(True), um Zeilenumbrüche für die Positionsberechnung zu erhalten,
-    # oder einfaches splitlines() für die reine Zeilennummerierung.
-    lines = parsed.body.splitlines()
-    
-    for line_num, line_content in enumerate(lines, 1):
-        for match in _WIKILINK_RE.finditer(line_content):
-            raw = (match.group(1) or "").strip()
-            if not raw:
-                continue
-            
-            # Syntax: [[Ziel|Typ]]
-            if "|" in raw:
-                parts = raw.split("|", 1)
-                target = parts[0].strip()
-                kind = parts[1].strip()
-            else:
-                target = raw.strip()
-                kind = "related_to" # Default-Typ
-            
-            # Anchor (#) entfernen, da Relationen auf Notiz-Ebene (ID) basieren
-            if "#" in target:
-                target = target.split("#", 1)[0].strip()
-                
-            if target:
-                edges.append({
-                    "to": target,
-                    "kind": kind,
-                    "line": line_num,
-                    "provenance": "explicit"
-                })
-    return edges
--- a/app/core/parser/init.py
+++ b/app/core/parser/init.py
@ -0,0 +1,22 @@
+"""
+FILE: app/core/parser/__init__.py
+DESCRIPTION: Package-Einstiegspunkt für den Parser. 
+             Ermöglicht das Löschen der parser.py Facade.
+VERSION: 1.10.0
+"""
+from .parsing_models import ParsedNote, NoteContext
+from .parsing_utils import (
+    FRONTMATTER_RE, validate_required_frontmatter, 
+    normalize_frontmatter, extract_wikilinks, extract_edges_with_context
+)
+from .parsing_markdown import read_markdown
+from .parsing_scanner import pre_scan_markdown
+
+# Kompatibilitäts-Alias
+FRONTMATTER_END = FRONTMATTER_RE
+
+__all__ = [
+    "ParsedNote", "NoteContext", "FRONTMATTER_RE", "FRONTMATTER_END", 
+    "read_markdown", "pre_scan_markdown", "validate_required_frontmatter", 
+    "normalize_frontmatter", "extract_wikilinks", "extract_edges_with_context"
+]
--- a/app/core/parser/parsing_markdown.py
+++ b/app/core/parser/parsing_markdown.py
@ -0,0 +1,60 @@
+"""
+FILE: app/core/parsing/parsing_markdown.py
+DESCRIPTION: Fehlertolerantes Einlesen von Markdown und Frontmatter-Splitting.
+"""
+import io
+import os
+import json
+from typing import Any, Dict, Optional, Tuple
+from .parsing_models import ParsedNote
+from .parsing_utils import FRONTMATTER_RE
+
+try:
+    import yaml
+except ImportError:
+    yaml = None
+
+_FALLBACK_ENCODINGS: Tuple[str, ...] = ("utf-8", "utf-8-sig", "cp1252", "latin-1")
+
+def _split_frontmatter(text: str) -> Tuple[Dict[str, Any], str]:
+    """Zerlegt Text in Frontmatter-Dict und Body."""
+    lines = text.splitlines(True)
+    if not lines or not FRONTMATTER_RE.match(lines[0]):
+        return {}, text
+    end_idx = None
+    for i in range(1, min(len(lines), 2000)):
+        if FRONTMATTER_RE.match(lines[i]):
+            end_idx = i
+            break
+    if end_idx is None: return {}, text
+    fm_raw = "".join(lines[1:end_idx])
+    body = "".join(lines[end_idx + 1:])
+    if yaml is None: raise RuntimeError("PyYAML not installed.")
+    try:
+        loaded = yaml.safe_load(fm_raw) or {}
+        data = loaded if isinstance(loaded, dict) else {}
+    except Exception as e:
+        print(json.dumps({"warn": "frontmatter_yaml_parse_failed", "error": str(e)}))
+        data = {}
+    if body.startswith("\n"): body = body[1:]
+    return data, body
+
+def _read_text_with_fallback(path: str) -> Tuple[str, str, bool]:
+    """Liest Datei mit Encoding-Fallback-Kette."""
+    last_err = None
+    for enc in _FALLBACK_ENCODINGS:
+        try:
+            with io.open(path, "r", encoding=enc, errors="strict") as f:
+                return f.read(), enc, (enc != "utf-8")
+        except UnicodeDecodeError as e:
+            last_err = str(e); continue
+    with open(path, "rb") as fb:
+        text = fb.read().decode("utf-8", errors="replace")
+    return text, "utf-8(replace)", True
+
+def read_markdown(path: str) -> Optional[ParsedNote]:
+    """Öffentliche API zum Einlesen einer Datei."""
+    if not os.path.exists(path): return None
+    text, enc, had_fb = _read_text_with_fallback(path)
+    fm, body = _split_frontmatter(text)
+    return ParsedNote(frontmatter=fm or {}, body=body or "", path=path)
--- a/app/core/parser/parsing_models.py
+++ b/app/core/parser/parsing_models.py
@ -0,0 +1,22 @@
+"""
+FILE: app/core/parsing/parsing_models.py
+DESCRIPTION: Datenklassen für das Parsing-System.
+"""
+from dataclasses import dataclass
+from typing import Any, Dict, List
+
+@dataclass
+class ParsedNote:
+    """Container für eine vollständig eingelesene Markdown-Datei."""
+    frontmatter: Dict[str, Any]
+    body: str
+    path: str
+
+@dataclass
+class NoteContext:
+    """Metadaten-Container für den flüchtigen LocalBatchCache (Pass 1)."""
+    note_id: str
+    title: str
+    type: str
+    summary: str
+    tags: List[str]
--- a/app/core/parser/parsing_scanner.py
+++ b/app/core/parser/parsing_scanner.py
@ -0,0 +1,40 @@
+"""
+FILE: app/core/parsing/parsing_scanner.py
+DESCRIPTION: Pre-Scan für den LocalBatchCache (Pass 1).
+             AUDIT v1.1.0: Dynamisierung der Scan-Parameter (WP-14).
+"""
+import os
+import re
+from typing import Optional, Dict, Any
+from .parsing_models import NoteContext
+from .parsing_markdown import read_markdown
+
+def pre_scan_markdown(path: str, registry: Optional[Dict[str, Any]] = None) -> Optional[NoteContext]:
+    """
+    Extrahiert Identität und Kurz-Kontext zur Validierung.
+    WP-14: Scan-Tiefe und Summary-Länge sind nun über die Registry steuerbar.
+    """
+    parsed = read_markdown(path)
+    if not parsed: return None
+    
+    # WP-14: Konfiguration laden oder Standardwerte nutzen
+    reg = registry or {}
+    summary_cfg = reg.get("summary_settings", {})
+    scan_depth = summary_cfg.get("pre_scan_depth", 600)
+    max_len = summary_cfg.get("max_summary_length", 500)
+    
+    fm = parsed.frontmatter
+    # ID-Findung: Frontmatter ID oder Dateiname als Fallback
+    note_id = str(fm.get("id") or os.path.splitext(os.path.basename(path))[0])
+    
+    # Erstelle Kurz-Zusammenfassung mit dynamischen Limits
+    clean_body = re.sub(r'[#*`>]', '', parsed.body[:scan_depth]).strip()
+    summary = clean_body[:max_len] + "..." if len(clean_body) > max_len else clean_body
+    
+    return NoteContext(
+        note_id=note_id,
+        title=str(fm.get("title", note_id)),
+        type=str(fm.get("type", "concept")),
+        summary=summary,
+        tags=fm.get("tags", []) if isinstance(fm.get("tags"), list) else []
+    )
--- a/app/core/parser/parsing_utils.py
+++ b/app/core/parser/parsing_utils.py
@ -0,0 +1,69 @@
+"""
+FILE: app/core/parsing/parsing_utils.py
+DESCRIPTION: Werkzeuge zur Validierung, Normalisierung und Wikilink-Extraktion.
+"""
+import re
+from typing import Any, Dict, List, Tuple, Optional
+from .parsing_models import ParsedNote
+
+# Öffentliche Konstanten für Abwärtskompatibilität
+FRONTMATTER_RE = re.compile(r"^\s*---\s*$")
+_WIKILINK_RE = re.compile(r"\[\[([^\]]+)\]\]")
+
+def validate_required_frontmatter(fm: Dict[str, Any], required: Tuple[str, ...] = ("id", "title")) -> None:
+    """Prüft, ob alle Pflichtfelder vorhanden sind."""
+    if fm is None: fm = {}
+    missing = []
+    for k in required:
+        v = fm.get(k)
+        if v is None or (isinstance(v, str) and not v.strip()):
+            missing.append(k)
+    if missing:
+        raise ValueError(f"Missing required frontmatter fields: {', '.join(missing)}")
+    if "tags" in fm and fm["tags"] not in (None, "") and not isinstance(fm["tags"], (list, tuple)):
+        raise ValueError("frontmatter 'tags' must be a list of strings")
+
+def normalize_frontmatter(fm: Dict[str, Any]) -> Dict[str, Any]:
+    """Normalisierung von Tags und Boolean-Feldern."""
+    out = dict(fm or {})
+    if "tags" in out:
+        if isinstance(out["tags"], str):
+            out["tags"] = [out["tags"].strip()] if out["tags"].strip() else []
+        elif isinstance(out["tags"], list):
+            out["tags"] = [str(t).strip() for t in out["tags"] if t is not None]
+        else:
+            out["tags"] = [str(out["tags"]).strip()] if out["tags"] not in (None, "") else []
+    if "embedding_exclude" in out:
+        out["embedding_exclude"] = bool(out["embedding_exclude"])
+    return out
+
+def extract_wikilinks(text: str) -> List[str]:
+    """Extrahiert Wikilinks als einfache Liste von IDs."""
+    if not text: return []
+    out: List[str] = []
+    for m in _WIKILINK_RE.finditer(text):
+        raw = (m.group(1) or "").strip()
+        if not raw: continue
+        if "|" in raw: raw = raw.split("|", 1)[0].strip()
+        if "#" in raw: raw = raw.split("#", 1)[0].strip()
+        if raw: out.append(raw)
+    return out
+
+def extract_edges_with_context(parsed: ParsedNote) -> List[Dict[str, Any]]:
+    """WP-22: Extrahiert Wikilinks mit Zeilennummern für die EdgeRegistry."""
+    edges = []
+    if not parsed or not parsed.body: return edges
+    lines = parsed.body.splitlines()
+    for line_num, line_content in enumerate(lines, 1):
+        for match in _WIKILINK_RE.finditer(line_content):
+            raw = (match.group(1) or "").strip()
+            if not raw: continue
+            if "|" in raw:
+                parts = raw.split("|", 1)
+                target, kind = parts[0].strip(), parts[1].strip()
+            else:
+                target, kind = raw.strip(), "related_to"
+            if "#" in target: target = target.split("#", 1)[0].strip()
+            if target:
+                edges.append({"to": target, "kind": kind, "line": line_num, "provenance": "explicit"})
+    return edges
--- a/app/core/qdrant.py
+++ b/app/core/qdrant.py
@ -1,157 +1,18 @@
 """
 FILE: app/core/qdrant.py
-DESCRIPTION: Qdrant-Client Factory und Schema-Management. Erstellt Collections und Payload-Indizes.
-VERSION: 2.2.0
-STATUS: Active
-DEPENDENCIES: qdrant_client, dataclasses, os
-LAST_ANALYSIS: 2025-12-15
+DESCRIPTION: Proxy-Modul zur Aufrechterhaltung der Abwärtskompatibilität (WP-14).
+             Leitet alle Aufrufe an das neue database-Paket weiter.
+STATUS: Proxy (Legacy-Support)
 """
-from __future__ import annotations
-
-import os
-from dataclasses import dataclass
-from typing import Optional, Tuple, Dict, List
-
-from qdrant_client import QdrantClient
-from qdrant_client.http import models as rest
-
-
-# ---------------------------------------------------------------------------
-# Konfiguration
-# ---------------------------------------------------------------------------
-
-@dataclass
-class QdrantConfig:
-    host: Optional[str] = None
-    port: Optional[int] = None
-    url: Optional[str] = None
-    api_key: Optional[str] = None
-    prefix: str = "mindnet"
-    dim: int = 384
-    distance: str = "Cosine"        # Cosine | Dot | Euclid
-    on_disk_payload: bool = True
-
-    @classmethod
-    def from_env(cls) -> "QdrantConfig":
-        # Entweder URL ODER Host/Port, API-Key optional
-        url = os.getenv("QDRANT_URL") or None
-        host = os.getenv("QDRANT_HOST") or None
-        port = os.getenv("QDRANT_PORT")
-        port = int(port) if port else None
-        api_key = os.getenv("QDRANT_API_KEY") or None
-        prefix = os.getenv("COLLECTION_PREFIX") or "mindnet"
-        dim = int(os.getenv("VECTOR_DIM") or 384)
-        distance = os.getenv("DISTANCE", "Cosine")
-        on_disk_payload = (os.getenv("ON_DISK_PAYLOAD", "true").lower() == "true")
-        return cls(
-            host=host, port=port, url=url, api_key=api_key,
-            prefix=prefix, dim=dim, distance=distance, on_disk_payload=on_disk_payload
+from .database.qdrant import (
+    QdrantConfig,
+    get_client,
+    ensure_collections,
+    ensure_payload_indexes,
+    collection_names
 )

-
-def get_client(cfg: QdrantConfig) -> QdrantClient:
-    # QdrantClient akzeptiert entweder url=... oder host/port
-    if cfg.url:
-        return QdrantClient(url=cfg.url, api_key=cfg.api_key, timeout=60.0)
-    return QdrantClient(host=cfg.host or "127.0.0.1", port=cfg.port or 6333, api_key=cfg.api_key, timeout=60.0)
-
-
-# ---------------------------------------------------------------------------
-# Collections
-# ---------------------------------------------------------------------------
-
-def collection_names(prefix: str) -> Tuple[str, str, str]:
-    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
-
-
-def _vector_params(dim: int, distance: str) -> rest.VectorParams:
-    # Distance: "Cosine" | "Dot" | "Euclid"
-    dist = getattr(rest.Distance, distance.capitalize(), rest.Distance.COSINE)
-    return rest.VectorParams(size=dim, distance=dist)
-
-
-def ensure_collections(client: QdrantClient, prefix: str, dim: int) -> None:
-    """Legt mindnet_notes, mindnet_chunks, mindnet_edges an (falls nicht vorhanden)."""
-    notes, chunks, edges = collection_names(prefix)
-
-    # notes
-    if not client.collection_exists(notes):
-        client.create_collection(
-            collection_name=notes,
-            vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")),
-            on_disk_payload=True,
-        )
-    # chunks
-    if not client.collection_exists(chunks):
-        client.create_collection(
-            collection_name=chunks,
-            vectors_config=_vector_params(dim, os.getenv("DISTANCE", "Cosine")),
-            on_disk_payload=True,
-        )
-    # edges (Dummy-Vektor, Filter via Payload)
-    if not client.collection_exists(edges):
-        client.create_collection(
-            collection_name=edges,
-            vectors_config=_vector_params(1, "Dot"),
-            on_disk_payload=True,
-        )
-
-
-# ---------------------------------------------------------------------------
-# Payload-Indizes
-# ---------------------------------------------------------------------------
-
-def _ensure_index(client: QdrantClient, collection: str, field: str, schema: rest.PayloadSchemaType) -> None:
-    """Idempotentes Anlegen eines Payload-Indexes für ein Feld."""
-    try:
-        client.create_payload_index(collection_name=collection, field_name=field, field_schema=schema, wait=True)
-    except Exception as e:
-        # Fehler ignorieren, falls Index bereits existiert oder Server "already indexed" meldet.
-        # Für Debugging ggf. Logging ergänzen.
-        _ = e
-
-
-def ensure_payload_indexes(client: QdrantClient, prefix: str) -> None:
-    """
-    Stellt sicher, dass alle benötigten Payload-Indizes existieren.
-    - notes:  note_id(KEYWORD), type(KEYWORD), title(TEXT), updated(INTEGER), tags(KEYWORD)
-    - chunks: note_id(KEYWORD), chunk_id(KEYWORD), index(INTEGER), type(KEYWORD), tags(KEYWORD)
-    - edges:  note_id(KEYWORD), kind(KEYWORD), scope(KEYWORD), source_id(KEYWORD), target_id(KEYWORD), chunk_id(KEYWORD)
-    """
-    notes, chunks, edges = collection_names(prefix)
-
-    # NOTES
-    for field, schema in [
-        ("note_id",  rest.PayloadSchemaType.KEYWORD),
-        ("type",     rest.PayloadSchemaType.KEYWORD),
-        ("title",    rest.PayloadSchemaType.TEXT),
-        ("updated",  rest.PayloadSchemaType.INTEGER),
-        ("tags",     rest.PayloadSchemaType.KEYWORD),
-    ]:
-        _ensure_index(client, notes, field, schema)
-
-    # CHUNKS
-    for field, schema in [
-        ("note_id",  rest.PayloadSchemaType.KEYWORD),
-        ("chunk_id", rest.PayloadSchemaType.KEYWORD),
-        ("index",    rest.PayloadSchemaType.INTEGER),
-        ("type",     rest.PayloadSchemaType.KEYWORD),
-        ("tags",     rest.PayloadSchemaType.KEYWORD),
-    ]:
-        _ensure_index(client, chunks, field, schema)
-
-    # EDGES
-    for field, schema in [
-        ("note_id",   rest.PayloadSchemaType.KEYWORD),
-        ("kind",      rest.PayloadSchemaType.KEYWORD),
-        ("scope",     rest.PayloadSchemaType.KEYWORD),
-        ("source_id", rest.PayloadSchemaType.KEYWORD),
-        ("target_id", rest.PayloadSchemaType.KEYWORD),
-        ("chunk_id",  rest.PayloadSchemaType.KEYWORD),
-    ]:
-        _ensure_index(client, edges, field, schema)
-
-
+# Re-Export für 100% Kompatibilität
 __all__ = [
    "QdrantConfig",
    "get_client",
--- a/app/core/qdrant_points.py
+++ b/app/core/qdrant_points.py
@ -1,292 +1,24 @@
 """
 FILE: app/core/qdrant_points.py
-DESCRIPTION: Object-Mapper für Qdrant. Konvertiert JSON-Payloads (Notes, Chunks, Edges) in PointStructs und generiert deterministische UUIDs.
-VERSION: 1.5.0
-STATUS: Active
-DEPENDENCIES: qdrant_client, uuid, os
-LAST_ANALYSIS: 2025-12-15
+DESCRIPTION: Proxy-Modul zur Aufrechterhaltung der Abwärtskompatibilität (WP-14).
+             Leitet Point-Operationen an das neue database-Paket weiter.
+STATUS: Proxy (Legacy-Support)
 """
-from __future__ import annotations
-import os
-import uuid
-from typing import List, Tuple, Iterable, Optional, Dict, Any
-
-from qdrant_client.http import models as rest
-from qdrant_client import QdrantClient
-
-# --------------------- ID helpers ---------------------
-
-def _to_uuid(stable_key: str) -> str:
-    return str(uuid.uuid5(uuid.NAMESPACE_URL, stable_key))
-
-def _names(prefix: str) -> Tuple[str, str, str]:
-    return f"{prefix}_notes", f"{prefix}_chunks", f"{prefix}_edges"
-
-# --------------------- Points builders ---------------------
-
-def points_for_note(prefix: str, note_payload: dict, note_vec: List[float] | None, dim: int) -> Tuple[str, List[rest.PointStruct]]:
-    notes_col, _, _ = _names(prefix)
-    vector = note_vec if note_vec is not None else [0.0] * int(dim)
-    raw_note_id = note_payload.get("note_id") or note_payload.get("id") or "missing-note-id"
-    point_id = _to_uuid(raw_note_id)
-    pt = rest.PointStruct(id=point_id, vector=vector, payload=note_payload)
-    return notes_col, [pt]
-
-def points_for_chunks(prefix: str, chunk_payloads: List[dict], vectors: List[List[float]]) -> Tuple[str, List[rest.PointStruct]]:
-    _, chunks_col, _ = _names(prefix)
-    points: List[rest.PointStruct] = []
-    for i, (pl, vec) in enumerate(zip(chunk_payloads, vectors), start=1):
-        chunk_id = pl.get("chunk_id") or pl.get("id")
-        if not chunk_id:
-            note_id = pl.get("note_id") or pl.get("parent_note_id") or "missing-note"
-            chunk_id = f"{note_id}#{i}"
-            pl["chunk_id"] = chunk_id
-        point_id = _to_uuid(chunk_id)
-        points.append(rest.PointStruct(id=point_id, vector=vec, payload=pl))
-    return chunks_col, points
-
-def _normalize_edge_payload(pl: dict) -> dict:
-    kind = pl.get("kind") or pl.get("edge_type") or "edge"
-    source_id = pl.get("source_id") or pl.get("src_id") or "unknown-src"
-    target_id = pl.get("target_id") or pl.get("dst_id") or "unknown-tgt"
-    seq = pl.get("seq") or pl.get("order") or pl.get("index")
-
-    pl.setdefault("kind", kind)
-    pl.setdefault("source_id", source_id)
-    pl.setdefault("target_id", target_id)
-    if seq is not None and "seq" not in pl:
-        pl["seq"] = seq
-    return pl
-
-def points_for_edges(prefix: str, edge_payloads: List[dict]) -> Tuple[str, List[rest.PointStruct]]:
-    _, _, edges_col = _names(prefix)
-    points: List[rest.PointStruct] = []
-    for raw in edge_payloads:
-        pl = _normalize_edge_payload(raw)
-        edge_id = pl.get("edge_id")
-        if not edge_id:
-            kind = pl.get("kind", "edge")
-            s = pl.get("source_id", "unknown-src")
-            t = pl.get("target_id", "unknown-tgt")
-            seq = pl.get("seq") or ""
-            edge_id = f"{kind}:{s}->{t}#{seq}"
-            pl["edge_id"] = edge_id
-        point_id = _to_uuid(edge_id)
-        points.append(rest.PointStruct(id=point_id, vector=[0.0], payload=pl))
-    return edges_col, points
-
-# --------------------- Vector schema & overrides ---------------------
-
-def _preferred_name(candidates: List[str]) -> str:
-    for k in ("text", "default", "embedding", "content"):
-        if k in candidates:
-            return k
-    return sorted(candidates)[0]
-
-def _env_override_for_collection(collection: str) -> Optional[str]:
-    """
-    Returns:
-      - "__single__" to force single-vector
-      - concrete name (str) to force named-vector with that name
-      - None to auto-detect
-    """
-    base = os.getenv("MINDNET_VECTOR_NAME")
-    if collection.endswith("_notes"):
-        base = os.getenv("NOTES_VECTOR_NAME", base)
-    elif collection.endswith("_chunks"):
-        base = os.getenv("CHUNKS_VECTOR_NAME", base)
-    elif collection.endswith("_edges"):
-        base = os.getenv("EDGES_VECTOR_NAME", base)
-
-    if not base:
-        return None
-    val = base.strip()
-    if val.lower() in ("__single__", "single"):
-        return "__single__"
-    return val  # concrete name
-
-def _get_vector_schema(client: QdrantClient, collection_name: str) -> dict:
-    """
-    Return {"kind": "single", "size": int} or {"kind": "named", "names": [...], "primary": str}.
-    """
-    try:
-        info = client.get_collection(collection_name=collection_name)
-        vecs = getattr(info, "vectors", None)
-        # Single-vector config
-        if hasattr(vecs, "size") and isinstance(vecs.size, int):
-            return {"kind": "single", "size": vecs.size}
-        # Named-vectors config (dict-like in .config)
-        cfg = getattr(vecs, "config", None)
-        if isinstance(cfg, dict) and cfg:
-            names = list(cfg.keys())
-            if names:
-                return {"kind": "named", "names": names, "primary": _preferred_name(names)}
-    except Exception:
-        pass
-    return {"kind": "single", "size": None}
-
-def _as_named(points: List[rest.PointStruct], name: str) -> List[rest.PointStruct]:
-    out: List[rest.PointStruct] = []
-    for pt in points:
-        vec = getattr(pt, "vector", None)
-        if isinstance(vec, dict):
-            if name in vec:
-                out.append(pt)
-            else:
-                # take any existing entry; if empty dict fallback to [0.0]
-                fallback_vec = None
-                try:
-                    fallback_vec = list(next(iter(vec.values())))
-                except Exception:
-                    fallback_vec = [0.0]
-                out.append(rest.PointStruct(id=pt.id, vector={name: fallback_vec}, payload=pt.payload))
-        elif vec is not None:
-            out.append(rest.PointStruct(id=pt.id, vector={name: vec}, payload=pt.payload))
-        else:
-            out.append(pt)
-    return out
-
-# --------------------- Qdrant ops ---------------------
-
-def upsert_batch(client: QdrantClient, collection: str, points: List[rest.PointStruct]) -> None:
-    if not points:
-        return
-
-    # 1) ENV overrides come first
-    override = _env_override_for_collection(collection)
-    if override == "__single__":
-        client.upsert(collection_name=collection, points=points, wait=True)
-        return
-    elif isinstance(override, str):
-        client.upsert(collection_name=collection, points=_as_named(points, override), wait=True)
-        return
-
-    # 2) Auto-detect schema
-    schema = _get_vector_schema(client, collection)
-    if schema.get("kind") == "named":
-        name = schema.get("primary") or _preferred_name(schema.get("names") or [])
-        client.upsert(collection_name=collection, points=_as_named(points, name), wait=True)
-        return
-
-    # 3) Fallback single-vector
-    client.upsert(collection_name=collection, points=points, wait=True)
-
-# --- Optional search helpers ---
-
-def _filter_any(field: str, values: Iterable[str]) -> rest.Filter:
-    return rest.Filter(should=[rest.FieldCondition(key=field, match=rest.MatchValue(value=v)) for v in values])
-
-def _merge_filters(*filters: Optional[rest.Filter]) -> Optional[rest.Filter]:
-    fs = [f for f in filters if f is not None]
-    if not fs:
-        return None
-    if len(fs) == 1:
-        return fs[0]
-    must = []
-    for f in fs:
-        if getattr(f, "must", None):
-            must.extend(f.must)
-        if getattr(f, "should", None):
-            must.append(rest.Filter(should=f.should))
-    return rest.Filter(must=must)
-
-def _filter_from_dict(filters: Optional[Dict[str, Any]]) -> Optional[rest.Filter]:
-    if not filters:
-        return None
-    parts = []
-    for k, v in filters.items():
-        if isinstance(v, (list, tuple, set)):
-            parts.append(_filter_any(k, [str(x) for x in v]))
-        else:
-            parts.append(rest.Filter(must=[rest.FieldCondition(key=k, match=rest.MatchValue(value=v))]))
-    return _merge_filters(*parts)
-
-def search_chunks_by_vector(client: QdrantClient, prefix: str, vector: List[float], top: int = 10, filters: Optional[Dict[str, Any]] = None) -> List[Tuple[str, float, dict]]:
-    _, chunks_col, _ = _names(prefix)
-    flt = _filter_from_dict(filters)
-    res = client.search(collection_name=chunks_col, query_vector=vector, limit=top, with_payload=True, with_vectors=False, query_filter=flt)
-    out: List[Tuple[str, float, dict]] = []
-    for r in res:
-        out.append((str(r.id), float(r.score), dict(r.payload or {})))
-    return out
-
-
-# --- Edge retrieval helper ---
-
-def get_edges_for_sources(
-    client: QdrantClient,
-    prefix: str,
-    source_ids: Iterable[str],
-    edge_types: Optional[Iterable[str]] = None,
-    limit: int = 2048,
-) -> List[Dict[str, Any]]:
-    """Retrieve edge payloads from the <prefix>_edges collection.
-
-    Args:
-        client: QdrantClient instance.
-        prefix: Mindnet collection prefix (e.g. "mindnet").
-        source_ids: Iterable of source_id values (typically chunk_ids or note_ids).
-        edge_types: Optional iterable of edge kinds (e.g. ["references", "depends_on"]). If None,
-            all kinds are returned.
-        limit: Maximum number of edge payloads to return.
-
-    Returns:
-        A list of edge payload dicts, e.g.:
-        {
-            "note_id": "...",
-            "chunk_id": "...",
-            "kind": "references" | "depends_on" | ...,
-            "scope": "chunk",
-            "source_id": "...",
-            "target_id": "...",
-            "rule_id": "...",
-            "confidence": 0.7,
-            ...
-        }
-    """
-    source_ids = list(source_ids)
-    if not source_ids or limit <= 0:
-        return []
-
-    # Resolve collection name
-    _, _, edges_col = _names(prefix)
-
-    # Build filter: source_id IN source_ids
-    src_filter = _filter_any("source_id", [str(s) for s in source_ids])
-
-    # Optional: kind IN edge_types
-    kind_filter = None
-    if edge_types:
-        kind_filter = _filter_any("kind", [str(k) for k in edge_types])
-
-    flt = _merge_filters(src_filter, kind_filter)
-
-    out: List[Dict[str, Any]] = []
-    next_page = None
-    remaining = int(limit)
-
-    # Use paginated scroll API; we don't need vectors, only payloads.
-    while remaining > 0:
-        batch_limit = min(256, remaining)
-        res, next_page = client.scroll(
-            collection_name=edges_col,
-            scroll_filter=flt,
-            limit=batch_limit,
-            with_payload=True,
-            with_vectors=False,
-            offset=next_page,
+from .database.qdrant_points import (
+    points_for_note,
+    points_for_chunks,
+    points_for_edges,
+    upsert_batch,
+    get_edges_for_sources,
+    search_chunks_by_vector
 )

-        if not res:
-            break
-
-        for r in res:
-            out.append(dict(r.payload or {}))
-            remaining -= 1
-            if remaining <= 0:
-                break
-
-        if next_page is None or remaining <= 0:
-            break
-
-    return out
+# Re-Export für 100% Kompatibilität
+__all__ = [
+    "points_for_note",
+    "points_for_chunks",
+    "points_for_edges",
+    "upsert_batch",
+    "get_edges_for_sources",
+    "search_chunks_by_vector"
+]
--- a/app/core/registry.py
+++ b/app/core/registry.py
@ -0,0 +1,43 @@
+"""
+FILE: app/core/registry.py
+DESCRIPTION: Zentraler Base-Layer für Konfigurations-Loading und Text-Bereinigung.
+             Bricht Zirkelbezüge zwischen Ingestion und LLMService auf.
+VERSION: 1.0.0
+"""
+import os
+import yaml
+from typing import Optional, List
+
+def load_type_registry(custom_path: Optional[str] = None) -> dict:
+    """Lädt die types.yaml zur Steuerung der typ-spezifischen Logik."""
+    # Wir nutzen hier einen direkten Import von Settings, um Zyklen zu vermeiden
+    from app.config import get_settings
+    settings = get_settings()
+    path = custom_path or settings.MINDNET_TYPES_FILE
+    if not os.path.exists(path):
+        return {}
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return yaml.safe_load(f) or {}
+    except Exception:
+        return {}
+
+def clean_llm_text(text: str, registry: Optional[dict] = None) -> str:
+    """
+    Entfernt LLM-Steuerzeichen (<s>, [OUT] etc.) aus einem Text.
+    Wird sowohl für JSON-Parsing als auch für Chat-Antworten genutzt.
+    """
+    if not text or not isinstance(text, str):
+        return ""
+
+    default_patterns = ["<s>", "</s>", "[OUT]", "[/OUT]"]
+    reg = registry or load_type_registry()
+    
+    # Lade Patterns aus llm_settings (WP-14)
+    patterns: List[str] = reg.get("llm_settings", {}).get("cleanup_patterns", default_patterns)
+    
+    clean = text
+    for p in patterns:
+        clean = clean.replace(p, "")
+    
+    return clean.strip()
--- a/app/core/retrieval/init.py
+++ b/app/core/retrieval/init.py
@ -0,0 +1,25 @@
+"""
+PACKAGE: app.core.retrieval
+DESCRIPTION: Zentrale Schnittstelle für Retrieval-Operationen (Vektor- & Graph-Suche).
+             Bündelt Suche und mathematische Scoring-Engine.
+"""
+from .retriever import (
+    Retriever,
+    hybrid_retrieve,
+    semantic_retrieve
+)
+
+from .retriever_scoring import (
+    get_weights,
+    compute_wp22_score,
+    get_status_multiplier
+)
+
+__all__ = [
+    "Retriever", 
+    "hybrid_retrieve", 
+    "semantic_retrieve",
+    "get_weights",
+    "compute_wp22_score",
+    "get_status_multiplier"
+]
--- a/app/core/retrieval/retriever.py
+++ b/app/core/retrieval/retriever.py
@ -0,0 +1,312 @@
+"""
+FILE: app/core/retrieval/retriever.py
+DESCRIPTION: Haupt-Schnittstelle für die Suche. Orchestriert Vektorsuche und Graph-Expansion.
+             Nutzt retriever_scoring.py für die WP-22 Logik.
+             MODULARISIERUNG: Verschoben in das retrieval-Paket für WP-14.
+VERSION: 0.6.16
+STATUS: Active
+DEPENDENCIES: app.config, app.models.dto, app.core.database*, app.core.graph_adapter
+"""
+from __future__ import annotations
+
+import os
+import time
+import logging
+from typing import Any, Dict, List, Tuple, Iterable, Optional
+
+from app.config import get_settings
+from app.models.dto import (
+    QueryRequest, QueryResponse, QueryHit, 
+    Explanation, ScoreBreakdown, Reason, EdgeDTO
+)
+
+# MODULARISIERUNG: Neue Import-Pfade für die Datenbank-Ebene
+import app.core.database.qdrant as qdr
+import app.core.database.qdrant_points as qp
+
+import app.services.embeddings_client as ec
+import app.core.graph_adapter as ga
+
+# Mathematische Engine importieren (Bleibt vorerst in app.core)
+from app.core.retriever_scoring import get_weights, compute_wp22_score
+
+logger = logging.getLogger(__name__)
+
+# ==============================================================================
+# 1. CORE HELPERS & CONFIG LOADERS
+# ==============================================================================
+
+def _get_client_and_prefix() -> Tuple[Any, str]:
+    """Initialisiert Qdrant Client und lädt Collection-Prefix via database-Paket."""
+    cfg = qdr.QdrantConfig.from_env()
+    return qdr.get_client(cfg), cfg.prefix
+
+
+def _get_query_vector(req: QueryRequest) -> List[float]:
+    """
+    Vektorisiert die Anfrage. 
+    FIX: Enthält try-except Block für unterschiedliche Signaturen von ec.embed_text.
+    """
+    if req.query_vector:
+        return list(req.query_vector)
+    if not req.query:
+        raise ValueError("Kein Text oder Vektor für die Suche angegeben.")
+    
+    settings = get_settings()
+    
+    try:
+        # Versuch mit modernem Interface (WP-03 kompatibel)
+        return ec.embed_text(req.query, model_name=settings.MODEL_NAME)
+    except TypeError:
+        # Fallback für Signaturen, die 'model_name' nicht als Keyword akzeptieren
+        logger.debug("ec.embed_text does not accept 'model_name' keyword. Falling back.")
+        return ec.embed_text(req.query)
+
+
+def _semantic_hits(
+    client: Any, 
+    prefix: str, 
+    vector: List[float], 
+    top_k: int, 
+    filters: Optional[Dict] = None
+) -> List[Tuple[str, float, Dict[str, Any]]]:
+    """Führt die Vektorsuche via database-Points-Modul durch."""
+    raw_hits = qp.search_chunks_by_vector(client, prefix, vector, top=top_k, filters=filters)
+    # Strikte Typkonvertierung für Stabilität
+    return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
+
+# ==============================================================================
+# 2. EXPLANATION LAYER (DEBUG & VERIFIABILITY)
+# ==============================================================================
+
+def _build_explanation(
+    semantic_score: float,
+    payload: Dict[str, Any],
+    scoring_debug: Dict[str, Any],
+    subgraph: Optional[ga.Subgraph],
+    target_note_id: Optional[str],
+    applied_boosts: Optional[Dict[str, float]] = None
+) -> Explanation:
+    """
+    Transformiert mathematische Scores und Graph-Signale in eine menschenlesbare Erklärung.
+    Behebt Pydantic ValidationErrors durch explizite String-Sicherung.
+    """
+    _, edge_w_cfg, _ = get_weights()
+    base_val = scoring_debug["base_val"]
+
+    # 1. Detaillierter mathematischer Breakdown
+    breakdown = ScoreBreakdown(
+        semantic_contribution=base_val,
+        edge_contribution=base_val * scoring_debug["edge_impact_final"],
+        centrality_contribution=base_val * scoring_debug["cent_impact_final"],
+        raw_semantic=semantic_score,
+        raw_edge_bonus=scoring_debug["edge_bonus"],
+        raw_centrality=scoring_debug["cent_bonus"],
+        node_weight=float(payload.get("retriever_weight", 1.0)),
+        status_multiplier=scoring_debug["status_multiplier"],
+        graph_boost_factor=scoring_debug["graph_boost_factor"]
+    )
+
+    reasons: List[Reason] = []
+    edges_dto: List[EdgeDTO] = []
+
+    # 2. Gründe für Semantik hinzufügen
+    if semantic_score > 0.85:
+        reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=base_val))
+    elif semantic_score > 0.70:
+        reasons.append(Reason(kind="semantic", message="Inhaltliche Übereinstimmung.", score_impact=base_val))
+
+    # 3. Gründe für Typ und Lifecycle
+    type_weight = float(payload.get("retriever_weight", 1.0))
+    if type_weight != 1.0:
+        msg = "Bevorzugt" if type_weight > 1.0 else "De-priorisiert"
+        reasons.append(Reason(kind="type", message=f"{msg} durch Typ-Profil.", score_impact=base_val * (type_weight - 1.0)))
+
+    # 4. Kanten-Verarbeitung (Graph-Intelligence)
+    if subgraph and target_note_id and scoring_debug["edge_bonus"] > 0:
+        raw_edges = []
+        if hasattr(subgraph, "get_incoming_edges"):
+             raw_edges.extend(subgraph.get_incoming_edges(target_note_id) or [])
+        if hasattr(subgraph, "get_outgoing_edges"):
+             raw_edges.extend(subgraph.get_outgoing_edges(target_note_id) or [])
+             
+        for edge in raw_edges:
+            # FIX: Zwingende String-Konvertierung für Pydantic-Stabilität
+            src = str(edge.get("source") or "note_root")
+            tgt = str(edge.get("target") or target_note_id or "unknown_target")
+            kind = str(edge.get("kind", "related_to"))
+            prov = str(edge.get("provenance", "rule"))
+            conf = float(edge.get("confidence", 1.0))
+            
+            direction = "in" if tgt == target_note_id else "out"
+            
+            edge_obj = EdgeDTO(
+                id=f"{src}->{tgt}:{kind}",
+                kind=kind,
+                source=src,
+                target=tgt,
+                weight=conf,
+                direction=direction,
+                provenance=prov,
+                confidence=conf
+            )
+            edges_dto.append(edge_obj)
+
+        # Die 3 wichtigsten Kanten als Begründung formulieren
+        top_edges = sorted(edges_dto, key=lambda e: e.confidence, reverse=True)
+        for e in top_edges[:3]:
+            peer = e.source if e.direction == "in" else e.target
+            prov_txt = "Bestätigte" if e.provenance == "explicit" else "KI-basierte"
+            boost_txt = f" [Boost x{applied_boosts.get(e.kind)}]" if applied_boosts and e.kind in applied_boosts else ""
+            
+            reasons.append(Reason(
+                kind="edge", 
+                message=f"{prov_txt} Kante '{e.kind}'{boost_txt} von/zu '{peer}'.", 
+                score_impact=edge_w_cfg * e.confidence
+            ))
+
+    if scoring_debug["cent_bonus"] > 0.01:
+        reasons.append(Reason(kind="centrality", message="Die Notiz ist ein zentraler Informations-Hub.", score_impact=breakdown.centrality_contribution))
+
+    return Explanation(
+        breakdown=breakdown, 
+        reasons=reasons, 
+        related_edges=edges_dto if edges_dto else None,
+        applied_boosts=applied_boosts
+    )
+
+# ==============================================================================
+# 3. CORE RETRIEVAL PIPELINE
+# ==============================================================================
+
+def _build_hits_from_semantic(
+    hits: Iterable[Tuple[str, float, Dict[str, Any]]],
+    top_k: int,
+    used_mode: str,
+    subgraph: ga.Subgraph | None = None,
+    explain: bool = False,
+    dynamic_edge_boosts: Dict[str, float] = None
+) -> QueryResponse:
+    """Wandelt semantische Roh-Treffer in bewertete QueryHits um."""
+    t0 = time.time()
+    enriched = []
+
+    for pid, semantic_score, payload in hits:
+        edge_bonus, cent_bonus = 0.0, 0.0
+        target_id = payload.get("note_id")
+        
+        if subgraph and target_id:
+            try:
+                edge_bonus = float(subgraph.edge_bonus(target_id))
+                cent_bonus = float(subgraph.centrality_bonus(target_id))
+            except Exception:
+                pass
+
+        # Mathematisches Scoring via WP-22 Engine
+        debug_data = compute_wp22_score(
+            semantic_score, payload, edge_bonus, cent_bonus, dynamic_edge_boosts
+        )
+        enriched.append((pid, semantic_score, payload, debug_data))
+
+    # Sortierung nach finalem mathematischen Score
+    enriched_sorted = sorted(enriched, key=lambda h: h[3]["total"], reverse=True)
+    limited_hits = enriched_sorted[: max(1, top_k)]
+
+    results: List[QueryHit] = []
+    for pid, s_score, pl, dbg in limited_hits:
+        explanation_obj = None
+        if explain:
+            explanation_obj = _build_explanation(
+                semantic_score=float(s_score),
+                payload=pl,
+                scoring_debug=dbg,
+                subgraph=subgraph,
+                target_note_id=pl.get("note_id"),
+                applied_boosts=dynamic_edge_boosts
+            )
+
+        # Payload Text-Feld normalisieren
+        text_content = pl.get("page_content") or pl.get("text") or pl.get("content", "[Kein Text]")
+
+        results.append(QueryHit(
+            node_id=str(pid),
+            note_id=str(pl.get("note_id", "unknown")),
+            semantic_score=float(s_score),
+            edge_bonus=dbg["edge_bonus"],
+            centrality_bonus=dbg["cent_bonus"],
+            total_score=dbg["total"],
+            source={
+                "path": pl.get("path"),
+                "section": pl.get("section") or pl.get("section_title"),
+                "text": text_content
+            },
+            payload=pl, 
+            explanation=explanation_obj
+        ))
+
+    return QueryResponse(results=results, used_mode=used_mode, latency_ms=int((time.time() - t0) * 1000))
+
+
+def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
+    """
+    Die Haupt-Einstiegsfunktion für die hybride Suche.
+    Kombiniert Vektorsuche mit Graph-Expansion und WP-22 Gewichtung.
+    """
+    client, prefix = _get_client_and_prefix()
+    vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
+    top_k = req.top_k or 10
+    
+    # 1. Semantische Seed-Suche
+    hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
+
+    # 2. Graph Expansion Konfiguration
+    expand_cfg = req.expand if isinstance(req.expand, dict) else {}
+    depth = int(expand_cfg.get("depth", 1))
+    boost_edges = getattr(req, "boost_edges", {}) or {}
+
+    subgraph: ga.Subgraph | None = None
+    if depth > 0 and hits:
+        # Start-IDs für den Graph-Traversal sammeln
+        seed_ids = list({h[2].get("note_id") for h in hits if h[2].get("note_id")})
+        
+        if seed_ids:
+            try:
+                # Subgraph aus RAM/DB laden
+                subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=expand_cfg.get("edge_types"))
+                
+                # --- WP-22: Kanten-Gewichtung im RAM-Graphen vor Bonus-Berechnung ---
+                if subgraph and hasattr(subgraph, "graph"):
+                     for _, _, data in subgraph.graph.edges(data=True):
+                        # A. Provenance Weighting (WP-22 Bonus für Herkunft)
+                        prov = data.get("provenance", "rule")
+                        # Belohnung: Explizite Links (1.0) > Smart (0.9) > Rule (0.7)
+                        prov_w = 1.0 if prov == "explicit" else (0.9 if prov == "smart" else 0.7)
+                        
+                        # B. Intent Boost Multiplikator (Vom Router dynamisch injiziert)
+                        kind = data.get("kind")
+                        intent_multiplier = boost_edges.get(kind, 1.0)
+                        
+                        # Finales Gewicht setzen (Basis * Provenance * Intent)
+                        data["weight"] = data.get("weight", 1.0) * prov_w * intent_multiplier
+
+            except Exception as e:
+                logger.error(f"Graph Expansion failed: {e}")
+                subgraph = None
+
+    # 3. Scoring & Explanation Generierung
+    return _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges)
+
+
+def semantic_retrieve(req: QueryRequest) -> QueryResponse:
+    """Standard Vektorsuche ohne Graph-Einfluss (WP-02 Fallback)."""
+    client, prefix = _get_client_and_prefix()
+    vector = _get_query_vector(req)
+    hits = _semantic_hits(client, prefix, vector, req.top_k or 10, req.filters)
+    return _build_hits_from_semantic(hits, req.top_k or 10, "semantic", explain=req.explain)
+
+
+class Retriever:
+    """Schnittstelle für die asynchrone Suche."""
+    async def search(self, request: QueryRequest) -> QueryResponse:
+        """Führt eine hybride Suche aus."""
+        return hybrid_retrieve(request)
--- a/app/core/retrieval/retriever_scoring.py
+++ b/app/core/retrieval/retriever_scoring.py
@ -0,0 +1,121 @@
+"""
+FILE: app/core/retrieval/retriever_scoring.py
+DESCRIPTION: Mathematische Kern-Logik für das WP-22 Scoring. 
+             Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
+             MODULARISIERUNG: Verschoben in das retrieval-Paket für WP-14.
+VERSION: 1.0.2
+STATUS: Active
+DEPENDENCIES: app.config, typing
+"""
+import os
+import logging
+from functools import lru_cache
+from typing import Any, Dict, Tuple, Optional
+
+try:
+    import yaml
+except ImportError:
+    yaml = None
+
+logger = logging.getLogger(__name__)
+
+@lru_cache
+def get_weights() -> Tuple[float, float, float]:
+    """
+    Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
+    Priorität: 
+    1. config/retriever.yaml (Scoring-Sektion)
+    2. Umgebungsvariablen (RETRIEVER_W_*)
+    3. System-Defaults (1.0, 0.0, 0.0)
+    """
+    from app.config import get_settings
+    settings = get_settings()
+    
+    # Defaults aus Settings laden
+    sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
+    edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
+    cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))
+
+    # Optionaler Override via YAML
+    config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
+    if yaml and os.path.exists(config_path):
+        try:
+            with open(config_path, "r", encoding="utf-8") as f:
+                data = yaml.safe_load(f) or {}
+                scoring = data.get("scoring", {})
+                sem = float(scoring.get("semantic_weight", sem))
+                edge = float(scoring.get("edge_weight", edge))
+                cent = float(scoring.get("centrality_weight", cent))
+        except Exception as e:
+            logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")
+            
+    return sem, edge, cent
+
+def get_status_multiplier(payload: Dict[str, Any]) -> float:
+    """
+    WP-22 A: Content Lifecycle Multiplier.
+    Steuert das Ranking basierend auf dem Reifegrad der Information.
+    
+    - stable: 1.2  (Belohnung für verifiziertes Wissen)
+    - active: 1.0  (Standard-Gewichtung)
+    - draft:  0.5  (Bestrafung für unfertige Fragmente)
+    """
+    status = str(payload.get("status", "active")).lower().strip()
+    if status == "stable":
+        return 1.2
+    if status == "draft":
+        return 0.5
+    return 1.0
+
+def compute_wp22_score(
+    semantic_score: float,
+    payload: Dict[str, Any],
+    edge_bonus_raw: float = 0.0,
+    cent_bonus_raw: float = 0.0,
+    dynamic_edge_boosts: Optional[Dict[str, float]] = None
+) -> Dict[str, Any]:
+    """
+    Die zentrale mathematische Scoring-Formel der Mindnet Intelligence.
+    Implementiert das WP-22 Hybrid-Scoring (Semantic * Lifecycle * Graph).
+    
+    FORMEL:
+    Score = (Similarity * StatusMult) * (1 + (TypeWeight - 1) + ((EdgeW * EB + CentW * CB) * IntentBoost))
+    
+    Returns:
+        Dict mit dem finalen 'total' Score und allen mathematischen Zwischenwerten für den Explanation Layer.
+    """
+    sem_w, edge_w_cfg, cent_w_cfg = get_weights()
+    status_mult = get_status_multiplier(payload)
+    
+    # Retriever Weight (Type Boost aus types.yaml, z.B. 1.1 für Decisions)
+    node_weight = float(payload.get("retriever_weight", 1.0))
+    
+    # 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
+    base_val = float(semantic_score) * status_mult
+    
+    # 2. Graph Boost Factor (Teil C: Intent-spezifische Verstärkung)
+    # Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
+    graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0
+    
+    # 3. Einzelne Graph-Komponenten berechnen
+    edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
+    cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
+    
+    # 4. Finales Zusammenführen (Merging)
+    # (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
+    total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)
+    
+    # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
+    final_score = max(0.0001, float(total))
+    
+    return {
+        "total": final_score,
+        "edge_bonus": float(edge_bonus_raw),
+        "cent_bonus": float(cent_bonus_raw),
+        "status_multiplier": status_mult,
+        "graph_boost_factor": graph_boost_factor,
+        "type_impact": node_weight - 1.0,
+        "base_val": base_val,
+        "edge_impact_final": edge_impact_final,
+        "cent_impact_final": cent_impact_final
+    }
--- a/app/core/retriever.py
+++ b/app/core/retriever.py
@ -1,310 +1,14 @@
 """
 FILE: app/core/retriever.py
-DESCRIPTION: Haupt-Schnittstelle für die Suche. Orchestriert Vektorsuche und Graph-Expansion.
-             Nutzt retriever_scoring.py für die WP-22 Logik.
-             FIX: TypeError in embed_text (model_name) behoben.
-             FIX: Pydantic ValidationError (Target/Source) behoben.
-VERSION: 0.6.15 (WP-22 Full & Stable)
-STATUS: Active
-DEPENDENCIES: app.config, app.models.dto, app.core.qdrant*, app.core.graph_adapter, app.core.retriever_scoring
+DESCRIPTION: Proxy-Modul zur Aufrechterhaltung der Abwärtskompatibilität (WP-14).
+             Leitet Retrieval-Anfragen an das neue retrieval-Paket weiter.
+STATUS: Proxy (Legacy-Support)
 """
-from __future__ import annotations
-
-import os
-import time
-import logging
-from typing import Any, Dict, List, Tuple, Iterable, Optional
-
-from app.config import get_settings
-from app.models.dto import (
-    QueryRequest, QueryResponse, QueryHit, 
-    Explanation, ScoreBreakdown, Reason, EdgeDTO
-)
-import app.core.qdrant as qdr
-import app.core.qdrant_points as qp
-import app.services.embeddings_client as ec
-import app.core.graph_adapter as ga
-
-# Mathematische Engine importieren
-from app.core.retriever_scoring import get_weights, compute_wp22_score
-
-logger = logging.getLogger(__name__)
-
-# ==============================================================================
-# 1. CORE HELPERS & CONFIG LOADERS
-# ==============================================================================
-
-def _get_client_and_prefix() -> Tuple[Any, str]:
-    """Initialisiert Qdrant Client und lädt Collection-Prefix."""
-    cfg = qdr.QdrantConfig.from_env()
-    return qdr.get_client(cfg), cfg.prefix
-
-
-def _get_query_vector(req: QueryRequest) -> List[float]:
-    """
-    Vektorisiert die Anfrage. 
-    FIX: Enthält try-except Block für unterschiedliche Signaturen von ec.embed_text.
-    """
-    if req.query_vector:
-        return list(req.query_vector)
-    if not req.query:
-        raise ValueError("Kein Text oder Vektor für die Suche angegeben.")
-    
-    settings = get_settings()
-    
-    try:
-        # Versuch mit modernem Interface (WP-03 kompatibel)
-        return ec.embed_text(req.query, model_name=settings.MODEL_NAME)
-    except TypeError:
-        # Fallback für Signaturen, die 'model_name' nicht als Keyword akzeptieren
-        logger.debug("ec.embed_text does not accept 'model_name' keyword. Falling back.")
-        return ec.embed_text(req.query)
-
-
-def _semantic_hits(
-    client: Any, 
-    prefix: str, 
-    vector: List[float], 
-    top_k: int, 
-    filters: Optional[Dict] = None
-) -> List[Tuple[str, float, Dict[str, Any]]]:
-    """Führt die Vektorsuche durch und konvertiert Qdrant-Points in ein einheitliches Format."""
-    raw_hits = qp.search_chunks_by_vector(client, prefix, vector, top=top_k, filters=filters)
-    # Strikte Typkonvertierung für Stabilität
-    return [(str(hit[0]), float(hit[1]), dict(hit[2] or {})) for hit in raw_hits]
-
-# ==============================================================================
-# 2. EXPLANATION LAYER (DEBUG & VERIFIABILITY)
-# ==============================================================================
-
-def _build_explanation(
-    semantic_score: float,
-    payload: Dict[str, Any],
-    scoring_debug: Dict[str, Any],
-    subgraph: Optional[ga.Subgraph],
-    target_note_id: Optional[str],
-    applied_boosts: Optional[Dict[str, float]] = None
-) -> Explanation:
-    """
-    Transformiert mathematische Scores und Graph-Signale in eine menschenlesbare Erklärung.
-    Behebt Pydantic ValidationErrors durch explizite String-Sicherung.
-    """
-    _, edge_w_cfg, _ = get_weights()
-    base_val = scoring_debug["base_val"]
-
-    # 1. Detaillierter mathematischer Breakdown
-    breakdown = ScoreBreakdown(
-        semantic_contribution=base_val,
-        edge_contribution=base_val * scoring_debug["edge_impact_final"],
-        centrality_contribution=base_val * scoring_debug["cent_impact_final"],
-        raw_semantic=semantic_score,
-        raw_edge_bonus=scoring_debug["edge_bonus"],
-        raw_centrality=scoring_debug["cent_bonus"],
-        node_weight=float(payload.get("retriever_weight", 1.0)),
-        status_multiplier=scoring_debug["status_multiplier"],
-        graph_boost_factor=scoring_debug["graph_boost_factor"]
+from .retrieval.retriever import (
+    Retriever,
+    hybrid_retrieve,
+    semantic_retrieve
 )

-    reasons: List[Reason] = []
-    edges_dto: List[EdgeDTO] = []
-
-    # 2. Gründe für Semantik hinzufügen
-    if semantic_score > 0.85:
-        reasons.append(Reason(kind="semantic", message="Sehr hohe textuelle Übereinstimmung.", score_impact=base_val))
-    elif semantic_score > 0.70:
-        reasons.append(Reason(kind="semantic", message="Inhaltliche Übereinstimmung.", score_impact=base_val))
-
-    # 3. Gründe für Typ und Lifecycle
-    type_weight = float(payload.get("retriever_weight", 1.0))
-    if type_weight != 1.0:
-        msg = "Bevorzugt" if type_weight > 1.0 else "De-priorisiert"
-        reasons.append(Reason(kind="type", message=f"{msg} durch Typ-Profil.", score_impact=base_val * (type_weight - 1.0)))
-
-    # 4. Kanten-Verarbeitung (Graph-Intelligence)
-    if subgraph and target_note_id and scoring_debug["edge_bonus"] > 0:
-        raw_edges = []
-        if hasattr(subgraph, "get_incoming_edges"):
-             raw_edges.extend(subgraph.get_incoming_edges(target_note_id) or [])
-        if hasattr(subgraph, "get_outgoing_edges"):
-             raw_edges.extend(subgraph.get_outgoing_edges(target_note_id) or [])
-             
-        for edge in raw_edges:
-            # FIX: Zwingende String-Konvertierung für Pydantic-Stabilität
-            src = str(edge.get("source") or "note_root")
-            tgt = str(edge.get("target") or target_note_id or "unknown_target")
-            kind = str(edge.get("kind", "related_to"))
-            prov = str(edge.get("provenance", "rule"))
-            conf = float(edge.get("confidence", 1.0))
-            
-            direction = "in" if tgt == target_note_id else "out"
-            
-            edge_obj = EdgeDTO(
-                id=f"{src}->{tgt}:{kind}",
-                kind=kind,
-                source=src,
-                target=tgt,
-                weight=conf,
-                direction=direction,
-                provenance=prov,
-                confidence=conf
-            )
-            edges_dto.append(edge_obj)
-
-        # Die 3 wichtigsten Kanten als Begründung formulieren
-        top_edges = sorted(edges_dto, key=lambda e: e.confidence, reverse=True)
-        for e in top_edges[:3]:
-            peer = e.source if e.direction == "in" else e.target
-            prov_txt = "Bestätigte" if e.provenance == "explicit" else "KI-basierte"
-            boost_txt = f" [Boost x{applied_boosts.get(e.kind)}]" if applied_boosts and e.kind in applied_boosts else ""
-            
-            reasons.append(Reason(
-                kind="edge", 
-                message=f"{prov_txt} Kante '{e.kind}'{boost_txt} von/zu '{peer}'.", 
-                score_impact=edge_w_cfg * e.confidence
-            ))
-
-    if scoring_debug["cent_bonus"] > 0.01:
-        reasons.append(Reason(kind="centrality", message="Die Notiz ist ein zentraler Informations-Hub.", score_impact=breakdown.centrality_contribution))
-
-    return Explanation(
-        breakdown=breakdown, 
-        reasons=reasons, 
-        related_edges=edges_dto if edges_dto else None,
-        applied_boosts=applied_boosts
-    )
-
-# ==============================================================================
-# 3. CORE RETRIEVAL PIPELINE
-# ==============================================================================
-
-def _build_hits_from_semantic(
-    hits: Iterable[Tuple[str, float, Dict[str, Any]]],
-    top_k: int,
-    used_mode: str,
-    subgraph: ga.Subgraph | None = None,
-    explain: bool = False,
-    dynamic_edge_boosts: Dict[str, float] = None
-) -> QueryResponse:
-    """Wandelt semantische Roh-Treffer in hochgeladene, bewertete QueryHits um."""
-    t0 = time.time()
-    enriched = []
-
-    for pid, semantic_score, payload in hits:
-        edge_bonus, cent_bonus = 0.0, 0.0
-        target_id = payload.get("note_id")
-        
-        if subgraph and target_id:
-            try:
-                edge_bonus = float(subgraph.edge_bonus(target_id))
-                cent_bonus = float(subgraph.centrality_bonus(target_id))
-            except Exception:
-                pass
-
-        # Mathematisches Scoring via WP-22 Engine
-        debug_data = compute_wp22_score(
-            semantic_score, payload, edge_bonus, cent_bonus, dynamic_edge_boosts
-        )
-        enriched.append((pid, semantic_score, payload, debug_data))
-
-    # Sortierung nach finalem mathematischen Score
-    enriched_sorted = sorted(enriched, key=lambda h: h[3]["total"], reverse=True)
-    limited_hits = enriched_sorted[: max(1, top_k)]
-
-    results: List[QueryHit] = []
-    for pid, s_score, pl, dbg in limited_hits:
-        explanation_obj = None
-        if explain:
-            explanation_obj = _build_explanation(
-                semantic_score=float(s_score),
-                payload=pl,
-                scoring_debug=dbg,
-                subgraph=subgraph,
-                target_note_id=pl.get("note_id"),
-                applied_boosts=dynamic_edge_boosts
-            )
-
-        # Payload Text-Feld normalisieren
-        text_content = pl.get("page_content") or pl.get("text") or pl.get("content", "[Kein Text]")
-
-        results.append(QueryHit(
-            node_id=str(pid),
-            note_id=str(pl.get("note_id", "unknown")),
-            semantic_score=float(s_score),
-            edge_bonus=dbg["edge_bonus"],
-            centrality_bonus=dbg["cent_bonus"],
-            total_score=dbg["total"],
-            source={
-                "path": pl.get("path"),
-                "section": pl.get("section") or pl.get("section_title"),
-                "text": text_content
-            },
-            payload=pl, 
-            explanation=explanation_obj
-        ))
-
-    return QueryResponse(results=results, used_mode=used_mode, latency_ms=int((time.time() - t0) * 1000))
-
-
-def hybrid_retrieve(req: QueryRequest) -> QueryResponse:
-    """
-    Die Haupt-Einstiegsfunktion für die hybride Suche.
-    Kombiniert Vektorsuche mit Graph-Expansion, Provenance-Weighting und Intent-Boosting.
-    """
-    client, prefix = _get_client_and_prefix()
-    vector = list(req.query_vector) if req.query_vector else _get_query_vector(req)
-    top_k = req.top_k or 10
-    
-    # 1. Semantische Seed-Suche
-    hits = _semantic_hits(client, prefix, vector, top_k=top_k, filters=req.filters)
-
-    # 2. Graph Expansion Konfiguration
-    expand_cfg = req.expand if isinstance(req.expand, dict) else {}
-    depth = int(expand_cfg.get("depth", 1))
-    boost_edges = getattr(req, "boost_edges", {}) or {}
-
-    subgraph: ga.Subgraph | None = None
-    if depth > 0 and hits:
-        # Start-IDs für den Graph-Traversal sammeln
-        seed_ids = list({h[2].get("note_id") for h in hits if h[2].get("note_id")})
-        
-        if seed_ids:
-            try:
-                # Subgraph aus RAM/DB laden
-                subgraph = ga.expand(client, prefix, seed_ids, depth=depth, edge_types=expand_cfg.get("edge_types"))
-                
-                # --- WP-22: Kanten-Gewichtung im RAM-Graphen vor Bonus-Berechnung ---
-                if subgraph and hasattr(subgraph, "graph"):
-                     for _, _, data in subgraph.graph.edges(data=True):
-                        # A. Provenance Weighting (WP-22 Bonus für Herkunft)
-                        prov = data.get("provenance", "rule")
-                        # Belohnung: Explizite Links (1.0) > Smart (0.9) > Rule (0.7)
-                        prov_w = 1.0 if prov == "explicit" else (0.9 if prov == "smart" else 0.7)
-                        
-                        # B. Intent Boost Multiplikator (Vom Router dynamisch injiziert)
-                        kind = data.get("kind")
-                        intent_multiplier = boost_edges.get(kind, 1.0)
-                        
-                        # Finales Gewicht setzen (Basis * Provenance * Intent)
-                        data["weight"] = data.get("weight", 1.0) * prov_w * intent_multiplier
-
-            except Exception as e:
-                logger.error(f"Graph Expansion failed: {e}")
-                subgraph = None
-
-    # 3. Scoring & Explanation Generierung
-    return _build_hits_from_semantic(hits, top_k, "hybrid", subgraph, req.explain, boost_edges)
-
-
-def semantic_retrieve(req: QueryRequest) -> QueryResponse:
-    """Standard Vektorsuche ohne Graph-Einfluss (WP-02 Fallback)."""
-    client, prefix = _get_client_and_prefix()
-    vector = _get_query_vector(req)
-    hits = _semantic_hits(client, prefix, vector, req.top_k or 10, req.filters)
-    return _build_hits_from_semantic(hits, req.top_k or 10, "semantic", explain=req.explain)
-
-
-class Retriever:
-    """Schnittstelle für die asynchrone Suche."""
-    async def search(self, request: QueryRequest) -> QueryResponse:
-        """Führt eine hybride Suche aus."""
-        return hybrid_retrieve(request)
+# Re-Export für 100% Kompatibilität
+__all__ = ["Retriever", "hybrid_retrieve", "semantic_retrieve"]
--- a/app/core/retriever_scoring.py
+++ b/app/core/retriever_scoring.py
@ -1,120 +1,18 @@
 """
 FILE: app/core/retriever_scoring.py
-DESCRIPTION: Mathematische Kern-Logik für das WP-22 Scoring. 
-             Berechnet Relevanz-Scores basierend auf Semantik, Graph-Intelligence und Content Lifecycle.
-VERSION: 1.0.1 (WP-22 Full Math Engine)
-STATUS: Active
-DEPENDENCIES: app.config, typing
+DESCRIPTION: Proxy-Modul zur Aufrechterhaltung der Abwärtskompatibilität (WP-14).
+             Leitet Scoring-Berechnungen an das neue retrieval-Paket weiter.
+STATUS: Proxy (Legacy-Support)
 """
-import os
-import logging
-from functools import lru_cache
-from typing import Any, Dict, Tuple, Optional
+from .retrieval.retriever_scoring import (
+    get_weights,
+    compute_wp22_score,
+    get_status_multiplier
+)

-try:
-    import yaml
-except ImportError:
-    yaml = None
-
-logger = logging.getLogger(__name__)
-
-@lru_cache
-def get_weights() -> Tuple[float, float, float]:
-    """
-    Liefert die Basis-Gewichtung (semantic, edge, centrality) aus der Konfiguration.
-    Priorität: 
-    1. config/retriever.yaml (Scoring-Sektion)
-    2. Umgebungsvariablen (RETRIEVER_W_*)
-    3. System-Defaults (1.0, 0.0, 0.0)
-    """
-    from app.config import get_settings
-    settings = get_settings()
-    
-    # Defaults aus Settings laden
-    sem = float(getattr(settings, "RETRIEVER_W_SEM", 1.0))
-    edge = float(getattr(settings, "RETRIEVER_W_EDGE", 0.0))
-    cent = float(getattr(settings, "RETRIEVER_W_CENT", 0.0))
-
-    # Optionaler Override via YAML
-    config_path = os.getenv("MINDNET_RETRIEVER_CONFIG", "config/retriever.yaml")
-    if yaml and os.path.exists(config_path):
-        try:
-            with open(config_path, "r", encoding="utf-8") as f:
-                data = yaml.safe_load(f) or {}
-                scoring = data.get("scoring", {})
-                sem = float(scoring.get("semantic_weight", sem))
-                edge = float(scoring.get("edge_weight", edge))
-                cent = float(scoring.get("centrality_weight", cent))
-        except Exception as e:
-            logger.warning(f"Retriever Configuration could not be fully loaded from {config_path}: {e}")
-            
-    return sem, edge, cent
-
-def get_status_multiplier(payload: Dict[str, Any]) -> float:
-    """
-    WP-22 A: Content Lifecycle Multiplier.
-    Steuert das Ranking basierend auf dem Reifegrad der Information.
-    
-    - stable: 1.2  (Belohnung für verifiziertes Wissen)
-    - active: 1.0  (Standard-Gewichtung)
-    - draft:  0.5  (Bestrafung für unfertige Fragmente)
-    """
-    status = str(payload.get("status", "active")).lower().strip()
-    if status == "stable":
-        return 1.2
-    if status == "draft":
-        return 0.5
-    return 1.0
-
-def compute_wp22_score(
-    semantic_score: float,
-    payload: Dict[str, Any],
-    edge_bonus_raw: float = 0.0,
-    cent_bonus_raw: float = 0.0,
-    dynamic_edge_boosts: Optional[Dict[str, float]] = None
-) -> Dict[str, Any]:
-    """
-    Die zentrale mathematische Scoring-Formel der Mindnet Intelligence.
-    Implementiert das WP-22 Hybrid-Scoring (Semantic * Lifecycle * Graph).
-    
-    FORMEL:
-    Score = (Similarity * StatusMult) * (1 + (TypeWeight - 1) + ((EdgeW * EB + CentW * CB) * IntentBoost))
-    
-    Returns:
-        Dict mit dem finalen 'total' Score und allen mathematischen Zwischenwerten für den Explanation Layer.
-    """
-    sem_w, edge_w_cfg, cent_w_cfg = get_weights()
-    status_mult = get_status_multiplier(payload)
-    
-    # Retriever Weight (Type Boost aus types.yaml, z.B. 1.1 für Decisions)
-    node_weight = float(payload.get("retriever_weight", 1.0))
-    
-    # 1. Berechnung des Base Scores (Semantik gewichtet durch Lifecycle-Status)
-    base_val = float(semantic_score) * status_mult
-    
-    # 2. Graph Boost Factor (Teil C: Intent-spezifische Verstärkung)
-    # Erhöht das Gewicht des gesamten Graphen um 50%, wenn ein spezifischer Intent vorliegt.
-    graph_boost_factor = 1.5 if dynamic_edge_boosts and (edge_bonus_raw > 0 or cent_bonus_raw > 0) else 1.0
-    
-    # 3. Einzelne Graph-Komponenten berechnen
-    edge_impact_final = (edge_w_cfg * edge_bonus_raw) * graph_boost_factor
-    cent_impact_final = (cent_w_cfg * cent_bonus_raw) * graph_boost_factor
-    
-    # 4. Finales Zusammenführen (Merging)
-    # (node_weight - 1.0) sorgt dafür, dass ein Gewicht von 1.0 keinen Einfluss hat (neutral).
-    total = base_val * (1.0 + (node_weight - 1.0) + edge_impact_final + cent_impact_final)
-    
-    # Sicherstellen, dass der Score niemals 0 oder negativ ist (Floor)
-    final_score = max(0.0001, float(total))
-    
-    return {
-        "total": final_score,
-        "edge_bonus": float(edge_bonus_raw),
-        "cent_bonus": float(cent_bonus_raw),
-        "status_multiplier": status_mult,
-        "graph_boost_factor": graph_boost_factor,
-        "type_impact": node_weight - 1.0,
-        "base_val": base_val,
-        "edge_impact_final": edge_impact_final,
-        "cent_impact_final": cent_impact_final
-    }
+# Re-Export für 100% Kompatibilität
+__all__ = [
+    "get_weights", 
+    "compute_wp22_score", 
+    "get_status_multiplier"
+]
--- a/app/services/edge_registry.py
+++ b/app/services/edge_registry.py
@ -1,11 +1,14 @@
 """
 FILE: app/services/edge_registry.py
 DESCRIPTION: Single Source of Truth für Kanten-Typen mit dynamischem Reload.
+             WP-15b: Erweiterte Provenance-Prüfung für die Candidate-Validation.
+             Sichert die Graph-Integrität durch strikte Trennung von System- und Inhaltskanten.
             WP-22: Fix für absolute Pfade außerhalb des Vaults (Prod-Dictionary).
             WP-20: Synchronisation mit zentralen Settings (v0.6.2).
-VERSION: 0.7.5
+VERSION: 0.8.0
 STATUS: Active
 DEPENDENCIES: re, os, json, logging, time, app.config
+LAST_ANALYSIS: 2025-12-26
 """
 import re
 import os
@ -19,7 +22,12 @@ from app.config import get_settings
 logger = logging.getLogger(__name__)

 class EdgeRegistry:
+    """
+    Zentraler Verwalter für das Kanten-Vokabular.
+    Implementiert das Singleton-Pattern für konsistente Validierung über alle Services.
+    """
    _instance = None
+    # System-Kanten, die nicht durch User oder KI gesetzt werden dürfen
    FORBIDDEN_SYSTEM_EDGES = {"next", "prev", "belongs_to"}

    def __new__(cls, *args, **kwargs):
@ -51,7 +59,7 @@ class EdgeRegistry:
    def ensure_latest(self):
        """
        Prüft den Zeitstempel der Vokabular-Datei und lädt bei Bedarf neu.
-        Verhindert den AttributeError in der Ingestion-Pipeline.
+        Verhindert Inkonsistenzen bei Laufzeit-Updates des Dictionaries.
        """
        if not os.path.exists(self.full_vocab_path):
            logger.error(f"!!! [EDGE-REGISTRY ERROR] File not found: {self.full_vocab_path} !!!")
@ -66,7 +74,10 @@ class EdgeRegistry:
            logger.error(f"!!! [EDGE-REGISTRY] Error checking file time: {e}")

    def _load_vocabulary(self):
-        """Parst das Markdown-Wörterbuch und baut die Canonical-Map auf."""
+        """
+        Parst das Markdown-Wörterbuch und baut die Canonical-Map auf.
+        Erkennt Tabellen-Strukturen und extrahiert fettgedruckte System-Typen.
+        """
        self.canonical_map.clear()
        self.valid_types.clear()
        
@ -101,8 +112,8 @@ class EdgeRegistry:

    def resolve(self, edge_type: str, provenance: str = "explicit", context: dict = None) -> str:
        """
-        Validiert einen Kanten-Typ gegen das Vokabular.
-        Loggt unbekannte Typen für die spätere manuelle Pflege.
+        WP-15b: Validiert einen Kanten-Typ gegen das Vokabular und prüft Berechtigungen.
+        Sichert, dass nur strukturelle Prozesse System-Kanten setzen dürfen.
        """
        self.ensure_latest()
        if not edge_type: 
@ -112,20 +123,23 @@ class EdgeRegistry:
        clean_type = edge_type.lower().strip().replace(" ", "_").replace("-", "_")
        ctx = context or {}

-        # System-Kanten dürfen nicht manuell vergeben werden
-        if provenance == "explicit" and clean_type in self.FORBIDDEN_SYSTEM_EDGES:
-            self._log_issue(clean_type, "forbidden_system_usage", ctx)
+        # WP-15b: System-Kanten dürfen weder manuell noch durch KI/Vererbung gesetzt werden.
+        # Nur Provenienz 'structure' (interne Prozesse) ist autorisiert.
+        # Wir blockieren hier alle Provenienzen außer 'structure'.
+        restricted_provenance = ["explicit", "semantic_ai", "inherited", "global_pool", "rule"]
+        if provenance in restricted_provenance and clean_type in self.FORBIDDEN_SYSTEM_EDGES:
+            self._log_issue(clean_type, f"forbidden_usage_by_{provenance}", ctx)
            return "related_to"

-        # System-Kanten sind nur bei struktureller Provenienz erlaubt
+        # System-Kanten sind NUR bei struktureller Provenienz erlaubt
        if provenance == "structure" and clean_type in self.FORBIDDEN_SYSTEM_EDGES:
            return clean_type

-        # Mapping auf kanonischen Namen
+        # Mapping auf kanonischen Namen (Alias-Auflösung)
        if clean_type in self.canonical_map:
            return self.canonical_map[clean_type]
        
-        # Fallback und Logging
+        # Fallback und Logging unbekannter Typen für Admin-Review
        self._log_issue(clean_type, "unknown_type", ctx)
        return clean_type 

@ -139,12 +153,13 @@ class EdgeRegistry:
                "error": error_kind,
                "file": ctx.get("file", "unknown"),
                "line": ctx.get("line", "unknown"),
-                "note_id": ctx.get("note_id", "unknown")
+                "note_id": ctx.get("note_id", "unknown"),
+                "provenance": ctx.get("provenance", "unknown")
            }
            with open(self.unknown_log_path, "a", encoding="utf-8") as f:
                f.write(json.dumps(entry) + "\n")
        except Exception: 
            pass

-# Singleton Export
+# Singleton Export für systemweiten Zugriff
 registry = EdgeRegistry()
--- a/app/services/llm_service.py
+++ b/app/services/llm_service.py
@ -6,12 +6,11 @@ DESCRIPTION: Hybrid-Client für Ollama, Google GenAI (Gemini) und OpenRouter.
             WP-20 Fix: Bulletproof Prompt-Auflösung für format() Aufrufe.
             WP-22/JSON: Optionales JSON-Schema + strict (für OpenRouter structured outputs).
             FIX: Intelligente Rate-Limit Erkennung (429 Handling), v1-API Sync & Timeouts.
-VERSION: 3.3.7
+VERSION: 3.3.9
 STATUS: Active
 FIX: 
- Implementiert striktes max_retries Handling für alle Provider (v.a. für Chat-Stabilität).
- Synchronisiert Rate-Limit Retries mit dem max_retries Parameter.
- Optimiert Logging für sofortige Fehlererkennung.
+- Importiert clean_llm_text von app.core.registry zur Vermeidung von Circular Imports.
+- Wendet clean_llm_text auf Text-Antworten in generate_raw_response an.
 """
 import httpx
 import yaml
@ -25,6 +24,9 @@ from pathlib import Path
 from typing import Optional, Dict, Any, Literal
 from app.config import get_settings

+# ENTSCHEIDENDER FIX: Import der neutralen Bereinigungs-Logik (WP-14)
+from app.core.registry import clean_llm_text
+
 logger = logging.getLogger(__name__)


@ -119,22 +121,27 @@ class LLMService:
    ) -> str:
        """
        Haupteinstiegspunkt für LLM-Anfragen mit Priorisierung.
+        Wendet die Bereinigung auf Text-Antworten an.
        """
        target_provider = provider or self.settings.MINDNET_LLM_PROVIDER

        if priority == "background":
            async with LLMService._background_semaphore:
-                return await self._dispatch(
+                res = await self._dispatch(
                    target_provider, prompt, system, force_json,
                    max_retries, base_delay, model_override,
                    json_schema, json_schema_name, strict_json_schema
                )
+                # WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
+                return clean_llm_text(res) if not force_json else res

-        return await self._dispatch(
+        res = await self._dispatch(
            target_provider, prompt, system, force_json,
            max_retries, base_delay, model_override,
            json_schema, json_schema_name, strict_json_schema
        )
+        # WP-14 Fix: Bereinige Text-Antworten vor Rückgabe
+        return clean_llm_text(res) if not force_json else res

    async def _dispatch(
        self,
@ -297,6 +304,7 @@ class LLMService:
        final_prompt = rag_template.format(context_str=context_str, query=query)

        # RAG Aufrufe im Chat nutzen nun standardmäßig max_retries=2 (überschreibbar)
+        # Durch den Aufruf von generate_raw_response wird die Bereinigung automatisch angewendet.
        return await self.generate_raw_response(
            final_prompt,
            system=system_prompt,
--- a/app/services/semantic_analyzer.py
+++ b/app/services/semantic_analyzer.py
@ -1,199 +0,0 @@
-"""
-FILE: app/services/semantic_analyzer.py
-DESCRIPTION: KI-gestützte Kanten-Validierung. Nutzt LLM (Background-Priority), um Kanten präzise einem Chunk zuzuordnen.
-             WP-20 Fix: Volle Kompatibilität mit der provider-basierten Routing-Logik (OpenRouter Primary).
-             WP-22: Integration von valid_types zur Halluzinations-Vermeidung.
-FIX: Mistral-sicheres JSON-Parsing (<s> & [OUT] Handling) und 100% Logik-Erhalt.
-VERSION: 2.2.6
-STATUS: Active
-DEPENDENCIES: app.services.llm_service, app.services.edge_registry, json, logging, re
-"""
-
-import json
-import logging
-import re
-from typing import List, Optional, Any
-from dataclasses import dataclass
-
-# Importe
-from app.services.llm_service import LLMService
-# WP-22: Registry für Vokabular-Erzwingung
-from app.services.edge_registry import registry as edge_registry
-
-logger = logging.getLogger(__name__)
-
-class SemanticAnalyzer:
-    def __init__(self):
-        self.llm = LLMService()
-
-    def _is_valid_edge_string(self, edge_str: str) -> bool:
-        """
-        Prüft, ob ein String eine valide Kante im Format 'kind:target' ist.
-        Verhindert, dass LLM-Geschwätz als Kante durchrutscht.
-        """
-        if not isinstance(edge_str, str) or ":" not in edge_str:
-            return False
-            
-        parts = edge_str.split(":", 1)
-        kind = parts[0].strip()
-        target = parts[1].strip()
-        
-        # Regel 1: Ein 'kind' (Beziehungstyp) darf keine Leerzeichen enthalten.
-        if " " in kind:
-            return False
-            
-        # Regel 2: Plausible Länge für den Typ (Vermeidet Sätze als Typ)
-        if len(kind) > 40 or len(kind) < 2:
-            return False
-            
-        # Regel 3: Target darf nicht leer sein
-        if not target:
-            return False
-            
-        return True
-
-    def _extract_json_safely(self, text: str) -> Any:
-        """
-        Extrahiert JSON-Daten und bereinigt LLM-Steuerzeichen (Mistral/Llama).
-        Implementiert robuste Recovery-Logik für Cloud-Provider.
-        """
-        if not text:
-            return []
-        
-        # 1. Entferne Mistral/Llama Steuerzeichen und Tags
-        clean = text.replace("<s>", "").replace("</s>", "")
-        clean = clean.replace("[OUT]", "").replace("[/OUT]", "")
-        clean = clean.strip()
-        
-        # 2. Suche nach Markdown JSON-Blöcken
-        match = re.search(r"```(?:json)?\s*(.*?)\s*```", clean, re.DOTALL)
-        payload = match.group(1) if match else clean
-        
-        try:
-            return json.loads(payload.strip())
-        except json.JSONDecodeError:
-            # 3. Recovery: Suche nach der ersten [ und letzten ]
-            start = payload.find('[')
-            end = payload.rfind(']') + 1
-            if start != -1 and end > start:
-                try:
-                    return json.loads(payload[start:end])
-                except: pass
-            
-            # 4. Zweite Recovery: Suche nach der ersten { und letzten }
-            start_obj = payload.find('{')
-            end_obj = payload.rfind('}') + 1
-            if start_obj != -1 and end_obj > start_obj:
-                try:
-                    return json.loads(payload[start_obj:end_obj])
-                except: pass
-        return []
-
-    async def assign_edges_to_chunk(self, chunk_text: str, all_edges: List[str], note_type: str) -> List[str]:
-        """
-        Sendet einen Chunk und eine Liste potenzieller Kanten an das LLM.
-        Das LLM filtert heraus, welche Kanten für diesen Chunk relevant sind.
-        WP-20: Nutzt primär den konfigurierten Provider (z.B. OpenRouter).
-        """
-        if not all_edges:
-            return []
-
-        # 1. Bestimmung des Providers und Modells (Dynamisch über Settings)
-        provider = self.llm.settings.MINDNET_LLM_PROVIDER
-        model = self.llm.settings.OPENROUTER_MODEL if provider == "openrouter" else self.llm.settings.GEMINI_MODEL
-
-        # 2. Prompt laden (Provider-spezifisch via get_prompt)
-        prompt_template = self.llm.get_prompt("edge_allocation_template", provider)
-        
-        if not prompt_template or not isinstance(prompt_template, str):
-            logger.warning("⚠️ [SemanticAnalyzer] Prompt 'edge_allocation_template' ungültig. Nutze Recovery-Template.")
-            prompt_template = (
-                "TASK: Wähle aus den Kandidaten die relevanten Kanten für den Text.\n"
-                "TEXT: {chunk_text}\n"
-                "KANDIDATEN: {edge_list}\n"
-                "OUTPUT: JSON Liste von Strings [\"kind:target\"]."
-            )
-
-        # 3. Daten für Template vorbereiten (Vokabular-Check)
-        edge_registry.ensure_latest()
-        valid_types_str = ", ".join(sorted(list(edge_registry.valid_types)))
-        edges_str = "\n".join([f"- {e}" for e in all_edges])
-        
-        logger.debug(f"🔍 [SemanticAnalyzer] Request: {len(chunk_text)} chars Text, {len(all_edges)} Candidates.")
-
-        # 4. Prompt füllen mit Format-Check (Kein Shortcut)
-        try:
-            # Wir begrenzen den Text auf eine vernünftige Länge für das Kontextfenster
-            final_prompt = prompt_template.format(
-                chunk_text=chunk_text[:6000], 
-                edge_list=edges_str,
-                valid_types=valid_types_str
-            )
-        except Exception as format_err:
-            logger.error(f"❌ [SemanticAnalyzer] Prompt Formatting failed: {format_err}")
-            return []
-
-        try:
-            # 5. LLM Call mit Background Priority & Semaphore Control
-            response_json = await self.llm.generate_raw_response(
-                prompt=final_prompt,
-                force_json=True,
-                max_retries=3, 
-                base_delay=2.0,
-                priority="background",
-                provider=provider,
-                model_override=model
-            )
-
-            # 6. Mistral-sicheres JSON Parsing via Helper
-            data = self._extract_json_safely(response_json)
-            
-            if not data: 
-                return []
-
-            # 7. Robuste Normalisierung (List vs Dict Recovery)
-            raw_candidates = []
-            if isinstance(data, list):
-                raw_candidates = data
-            elif isinstance(data, dict):
-                logger.info(f"ℹ️ [SemanticAnalyzer] LLM returned dict, trying recovery.")
-                for key in ["edges", "results", "kanten", "matches"]:
-                    if key in data and isinstance(data[key], list):
-                         raw_candidates.extend(data[key])
-                         break
-                # Falls immer noch leer, nutze Schlüssel-Wert Paare als Behelf
-                if not raw_candidates:
-                    for k, v in data.items():
-                        if isinstance(v, str): raw_candidates.append(f"{k}:{v}")
-                        elif isinstance(v, list): 
-                            for target in v:
-                                if isinstance(target, str): raw_candidates.append(f"{k}:{target}")
-
-            # 8. Strikte Validierung gegen Kanten-Format
-            valid_edges = []
-            for e in raw_candidates:
-                e_str = str(e).strip()
-                if self._is_valid_edge_string(e_str):
-                    valid_edges.append(e_str)
-                else:
-                    logger.debug(f"   [SemanticAnalyzer] Rejected invalid edge format: '{e_str}'")
-
-            if valid_edges:
-                logger.info(f"✅ [SemanticAnalyzer] Assigned {len(valid_edges)} edges to chunk.")
-            return valid_edges
-
-        except Exception as e:
-            logger.error(f"💥 [SemanticAnalyzer] Critical error during analysis: {e}", exc_info=True)
-            return []
-
-    async def close(self):
-        if self.llm:
-            await self.llm.close()
-
-# Singleton Instanziierung
-_analyzer_instance = None
-def get_semantic_analyzer():
-    global _analyzer_instance
-    if _analyzer_instance is None:
-        _analyzer_instance = SemanticAnalyzer()
-    return _analyzer_instance
--- a/config/prompts.yaml
+++ b/config/prompts.yaml
@ -1,6 +1,7 @@
-# config/prompts.yaml — Final V2.5.5 (OpenRouter Hardening)
+# config/prompts.yaml — Final V2.6.0 (WP-15b Candidate-Validation)
 # WP-20: Optimierte Cloud-Templates zur Unterdrückung von Modell-Geschwätz.
 # FIX: Explizite Verbote für Einleitungstexte zur Vermeidung von JSON-Parsing-Fehlern.
+# WP-15b: Integration der binären edge_validation für den Two-Pass Workflow.
 # OLLAMA: UNVERÄNDERT laut Benutzeranweisung.

 system_prompt: |
@ -215,7 +216,7 @@ edge_extraction:
    4. Antworte AUSSCHLIESSLICH in validem JSON als Liste von Objekten.

    BEISPIEL:
-    [[ {{"to": "Ziel-Konzept", "kind": "beziehungs_typ"}} ]]
+    [[ {{"to": "Ziel-Konzept", \"kind\": \"beziehungs_typ\"}} ]]

    TEXT:
    """
@ -227,13 +228,46 @@ edge_extraction:
    Analysiere '{note_id}'. Extrahiere semantische Beziehungen.
    ERLAUBTE TYPEN: {valid_types}
    TEXT: {text}
-    OUTPUT: STRIKT JSON-Array von Objekten: [[{{"to":"Ziel","kind":"typ"}}]]. Kein Text davor/danach. Wenn nichts: [].
+    OUTPUT: STRIKT JSON-Array von Objekten: [[{{"to\":\"Ziel\",\"kind\":\"typ\"}}]]. Kein Text davor/danach. Wenn nichts: [].
  openrouter: |
    TASK: Extrahiere semantische Relationen für '{note_id}'.
    ERLAUBTE TYPEN: {valid_types}
    TEXT: {text}
    ANWEISUNG: Antworte AUSSCHLIESSLICH mit einem JSON-Array von Objekten.
-    FORMAT: [[{{"to":"Ziel-Begriff","kind":"typ"}}]]
+    FORMAT: [[{{"to\":\"Ziel-Begriff\",\"kind\":\"typ\"}}]]
    STRIKTES VERBOT: Schreibe keine Einleitung, keine Analyse und keine Erklärungen. 
    Wenn keine Relationen existieren, antworte NUR mit: []
    OUTPUT:
+
+# ---------------------------------------------------------
+# 8. WP-15b: EDGE VALIDATION (Intent: VALIDATE)
+# ---------------------------------------------------------
+edge_validation:
+  gemini: |
+    Bewerte die semantische Validität dieser Verbindung im Wissensgraph.
+    
+    KONTEXT DER QUELLE (Chunk):
+    "{chunk_text}"
+    
+    ZIEL-NOTIZ: "{target_title}"
+    ZIEL-BESCHREIBUNG (Zusammenfassung):
+    "{target_summary}"
+    
+    GEPLANTE RELATION: "{edge_kind}"
+    
+    FRAGE: Bestätigt der Kontext der Quelle die Beziehung '{edge_kind}' zum Ziel?
+    REGEL: Antworte NUR mit 'YES' oder 'NO'. Keine Erklärungen oder Smalltalk.
+  openrouter: |
+    Verify semantic relation for graph construction.
+    Source Context: {chunk_text}
+    Target Note: {target_title}
+    Target Summary: {target_summary}
+    Proposed Relation: {edge_kind}
+    Instruction: Does the source context support this relation to the target?
+    Result: Respond ONLY with 'YES' or 'NO'.
+  ollama: |
+    Bewerte die semantische Korrektheit dieser Verbindung.
+    QUELLE: {chunk_text}
+    ZIEL: {target_title} ({target_summary})
+    BEZIEHUNG: {edge_kind}
+    Ist diese Verbindung valide? Antworte NUR mit YES oder NO.
--- a/config/types.yaml
+++ b/config/types.yaml
@ -1,4 +1,4 @@
-version: 2.6.0 # Final WP-15 Config: Smart Edges & Strict/Soft Chunking
+version: 2.7.0 # WP-14 Update: Dynamisierung der Ingestion-Pipeline

 # ==============================================================================
 # 1. CHUNKING PROFILES
@ -76,7 +76,32 @@ defaults:
  edge_defaults: [] 

 # ==============================================================================
-# 3. TYPE DEFINITIONS
+# 3. INGESTION SETTINGS (WP-14 Dynamization)
+# ==============================================================================
+# Steuert, welche Notizen verarbeitet werden und wie Fallbacks aussehen.
+ingestion_settings:
+  # Liste der Status-Werte, die beim Import ignoriert werden sollen.
+  ignore_statuses: ["system", "template", "archive", "hidden"]
+  # Standard-Typ, falls kein Typ im Frontmatter angegeben ist.
+  default_note_type: "concept"
+
+# ==============================================================================
+# 4. SUMMARY & SCAN SETTINGS
+# ==============================================================================
+# Steuert die Tiefe des Pre-Scans für den Context-Cache.
+summary_settings:
+  max_summary_length: 500
+  pre_scan_depth: 600
+
+# ==============================================================================
+# 5. LLM SETTINGS
+# ==============================================================================
+# Steuerzeichen und Patterns zur Bereinigung der LLM-Antworten.
+llm_settings:
+  cleanup_patterns: ["<s>", "</s>", "[OUT]", "[/OUT]", "```json", "```"]
+
+# ==============================================================================
+# 6. TYPE DEFINITIONS
 # ==============================================================================

 types:
--- a/docs/00_General/00_glossary.md
+++ b/docs/00_General/00_glossary.md
@ -2,13 +2,13 @@
 doc_type: glossary
 audience: all
 status: active
-version: 2.8.0
-context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-Cloud Resilienz, WP-76 Quoten-Steuerung und Mistral-safe Parsing."
+version: 2.8.1
+context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-Cloud Resilienz, WP-14 Modularisierung, WP-15b Two-Pass Ingestion und Mistral-safe Parsing."
 ---

 # Mindnet Glossar

-**Quellen:** `01_edge_vocabulary.md`, `llm_service.py`, `ingestion.py`, `edge_registry.py`
+**Quellen:** `01_edge_vocabulary.md`, `llm_service.py`, `ingestion.py`, `edge_registry.py`, `registry.py`, `qdrant.py`

 ## Kern-Entitäten

@ -21,11 +21,13 @@ context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-C
 ## Komponenten

 * **Edge Registry:** Der zentrale Dienst (SSOT), der Kanten-Typen validiert und Aliase in kanonische Typen auflöst. Nutzt `01_edge_vocabulary.md` als Basis.
-* **LLM Service:** Der Hybrid-Client (v3.3.6), der Anfragen zwischen OpenRouter, Google Gemini und lokalem Ollama routet. Verwaltet Cloud-Timeouts und Quoten-Management.
-* **Retriever:** Besteht in v2.7+ aus der Orchestrierung (`retriever.py`) und der mathematischen Scoring-Engine (`retriever_scoring.py`).
+* **LLM Service:** Der Hybrid-Client (v3.3.6), der Anfragen zwischen OpenRouter, Google Gemini und lokalem Ollama routet. Verwaltet Cloud-Timeouts und Quoten-Management. Nutzt zur Text-Bereinigung nun die neutrale `registry.py`, um Circular Imports zu vermeiden.
+* **Retriever:** Besteht in v2.7+ aus der Orchestrierung (`retriever.py`) und der mathematischen Scoring-Engine (`retriever_scoring.py`). Seit WP-14 im Paket `app.core.retrieval` gekapselt.
 * **Decision Engine:** Teil des Routers, der Intents erkennt und entsprechende **Boost-Faktoren** für das Retrieval injiziert.
 * **Traffic Control:** Verwaltet Prioritäten und drosselt Hintergrund-Tasks (z.B. Smart Edges) mittels Semaphoren und Timeouts (45s) zur Vermeidung von System-Hangs.
 * **Unknown Edges Log:** Die Datei `unknown_edges.jsonl`, in der das System Kanten-Typen protokolliert, die nicht im Dictionary gefunden wurden.
+* **Database Package (WP-14):** Zentralisiertes Infrastruktur-Paket (`app.core.database`), das den Qdrant-Client (`qdrant.py`) und das Point-Mapping (`qdrant_points.py`) verwaltet.
+* **LocalBatchCache (WP-15b):** Ein globaler In-Memory-Index, der während des Pass 1 Scans aufgebaut wird und Metadaten (IDs, Titel, Summaries) aller Notizen für die Kantenvalidierung bereithält.

 ## Konzepte & Features

@ -40,5 +42,9 @@ context: "Zentrales Glossar für Mindnet v2.8. Enthält Definitionen zu Hybrid-C
    * `explicit`: Vom Mensch gesetzt (Prio 1).
    * `semantic_ai`: Von der KI im Turbo-Mode extrahiert und validiert (Prio 2).
    * `structure`: Durch System-Regeln/Matrix erzeugt (Prio 3).
-* **Smart Edge Allocation:** KI-Verfahren zur Relevanzprüfung von Links für spezifische Textabschnitte.
+* **Smart Edge Allocation (WP-15b):** KI-Verfahren zur Relevanzprüfung von Links für spezifische Textabschnitte. Validiert Kandidaten semantisch gegen das Ziel im LocalBatchCache.
 * **Matrix Logic:** Bestimmung des Kanten-Typs basierend auf Quell- und Ziel-Entität (z.B. Erfahrung -> Wert = `based_on`).
+* **Two-Pass Workflow (WP-15b):** Optimiertes Ingestion-Verfahren:
+    * **Pass 1 (Pre-Scan):** Schnelles Scannen aller Dateien zur Befüllung des LocalBatchCache.
+    * **Pass 2 (Semantic Processing):** Tiefenverarbeitung (Chunking, Embedding, Validierung) nur für geänderte Dateien.
+* **Circular Import Registry (WP-14):** Entkopplung von Kern-Logik (wie Textbereinigung) in eine neutrale `registry.py`, um Abhängigkeitsschleifen zwischen Diensten und Ingestion-Utilities zu verhindern.
--- a/docs/03_Technical_References/03_tech_configuration.md
+++ b/docs/03_Technical_References/03_tech_configuration.md
@ -1,19 +1,19 @@
 ---
 doc_type: technical_reference
 audience: developer, admin
-scope: configuration, env, registry, scoring, resilience
+scope: configuration, env, registry, scoring, resilience, modularization
 status: active
-version: 2.8.0
-context: "Umfassende Referenztabellen für Umgebungsvariablen (inkl. Hybrid-Cloud & WP-76), YAML-Konfigurationen und die Edge Registry Struktur."
+version: 2.9.1
+context: "Umfassende Referenztabellen für Umgebungsvariablen (inkl. Hybrid-Cloud & WP-76), YAML-Konfigurationen und die Edge Registry Struktur unter Berücksichtigung von WP-14."
 ---

 # Konfigurations-Referenz

-Dieses Dokument beschreibt alle Steuerungsdateien von Mindnet. In der Version 2.8 wurde die Konfiguration professionalisiert, um die Edge Registry, dynamische Scoring-Parameter (Lifecycle & Intent) sowie die neue Hybrid-Cloud-Resilienz zu unterstützen.
+Dieses Dokument beschreibt alle Steuerungsdateien von Mindnet. In der Version 2.9.1 wurde die Konfiguration professionalisiert, um die Edge Registry, dynamische Scoring-Parameter (Lifecycle & Intent), die neue Hybrid-Cloud-Resilienz sowie die modulare Datenbank-Infrastruktur (WP-14) zu unterstützen.

 ## 1. Environment Variablen (`.env`)

-Diese Variablen steuern die Infrastruktur, Pfade und globale Timeouts.
+Diese Variablen steuern die Infrastruktur, Pfade und globale Timeouts. Seit der Modularisierung in WP-14 unterstützen sie zudem die explizite Benennung von Vektoren für verschiedene Collections.

 | Variable | Default | Beschreibung |
 | :--- | :--- | :--- |
@ -21,6 +21,10 @@ Diese Variablen steuern die Infrastruktur, Pfade und globale Timeouts.
 | `QDRANT_API_KEY` | *(leer)* | Optionaler Key für Absicherung. |
 | `COLLECTION_PREFIX` | `mindnet` | Namensraum für Collections (erzeugt `{prefix}_notes` etc). |
 | `VECTOR_DIM` | `768` | **Muss 768 sein** (für Nomic Embeddings). |
+| `MINDNET_VECTOR_NAME` | `default` | **Neu (WP-14):** Basis-Vektorname für Named Vectors Support. |
+| `NOTES_VECTOR_NAME` | *(leer)* | **Neu (WP-14):** Spezifischer Vektorname für die Notes-Collection (Override). |
+| `CHUNKS_VECTOR_NAME` | *(leer)* | **Neu (WP-14):** Spezifischer Vektorname für die Chunks-Collection (Override). |
+| `EDGES_VECTOR_NAME` | *(leer)* | **Neu (WP-14):** Spezifischer Vektorname für die Edges-Collection (Override). |
 | `MINDNET_VOCAB_PATH` | *(Pfad)* | **Neu (WP-22):** Absoluter Pfad zur `01_edge_vocabulary.md`. Definiert den Ort des Dictionarys. |
 | `MINDNET_VAULT_ROOT` | `./vault` | Basis-Pfad für Datei-Operationen. |
 | `MINDNET_TYPES_FILE` | `config/types.yaml` | Pfad zur Typ-Registry. |
@ -38,23 +42,25 @@ Diese Variablen steuern die Infrastruktur, Pfade und globale Timeouts.
 | `MINDNET_LLM_MODEL` | `phi3:mini` | Name des lokalen Chat-Modells (Ollama). |
 | `MINDNET_EMBEDDING_MODEL` | `nomic-embed-text` | Name des Embedding-Modells (Ollama). |
 | `MINDNET_OLLAMA_URL` | `http://127.0.0.1:11434`| URL zum lokalen LLM-Server. |
-| `MAX_OLLAMA_CHARS`   | `10000`|  Maximale Länge des Kontext-Strings, der an das lokale Modell gesendet wird. Verhindert Batch-Decoding-Fehler bei sehr großen Notiz-Historien. |
+| `MAX_OLLAMA_CHARS`   | `10000`|  Maximale Länge des Kontext-Strings, der an das lokale Modell gesendet wird. |
 | `MINDNET_LLM_TIMEOUT` | `300.0` | Timeout in Sekunden für LLM-Anfragen. |
 | `MINDNET_API_TIMEOUT` | `300.0` | Globales API-Timeout für das Frontend. |
 | `MINDNET_LL_BACKGROUND_LIMIT`| `2` | **Traffic Control:** Max. parallele Hintergrund-Tasks (Semaphore). |
 | `MINDNET_CHANGE_DETECTION_MODE` | `full` | `full` (Text + Meta) oder `body` (nur Text). |
+| `MINDNET_DEFAULT_RETRIEVER_WEIGHT` | `1.0` | **Neu (WP-22):** Systemweiter Standard für das Retriever-Gewicht einer Notiz. |

 ---

 ## 2. Typ-Registry (`types.yaml`)

-Steuert das Import-Verhalten, Chunking und die Kanten-Logik pro Typ.
+Steuert das Import-Verhalten, Chunking und die Kanten-Logik pro Typ. Die Auflösung erfolgt zentral über die modularisierte Registry in `app.core.registry`.

 ### 2.1 Konfigurations-Hierarchie (Override-Logik)
 Seit Version 2.7.0 gilt für `chunking_profile` und `retriever_weight` folgende Priorität:
 1.  **Frontmatter (Höchste Prio):** Ein Wert direkt in der Markdown-Datei überschreibt alles.
 2.  **Type Config:** Der Standardwert für den `type` aus `types.yaml`.
-3.  **Global Default:** Fallback aus `defaults` in `types.yaml`.
+3.  **Ingestion Settings (Neu WP-14):** Globale Konfiguration wie `default_chunk_profile` innerhalb des `ingestion_settings` Blocks.
+4.  **Global Default:** Fallback aus `defaults` in `types.yaml`.


 ## 2.2 Typ-Referenz & Stream-Logik (Vollständige Liste: 28 Typen)
@ -113,7 +119,7 @@ Dieser Stream speichert deine Erlebnisse, Fakten und externes Wissen als Belege.

 ## 3. Retriever Config (`retriever.yaml`)

-Steuert die Gewichtung der Scoring-Formel und die neuen Lifecycle-Modifier.
+Steuert die Gewichtung der Scoring-Formel und die neuen Lifecycle-Modifier. Seit WP-14 ist die mathematische Engine im Paket `app.core.retrieval` gekapselt.

 ```yaml
 version: 1.2
@ -140,43 +146,36 @@ lifecycle_weights:
  system: 0.0   # Hard Skip via Ingestion

 # Die nachfolgenden Werte überschreiben die Defaults aus app/core/retriever_config.
-# Wenn neue Kantentypen, z.B. durch Referenzierung innerhalb einer md-Datei im vault anders gewichtet werden sollen, dann muss hier die Konfiguration erfolgen
 edge_types:
  # --- KATEGORIE 1: LOGIK-BOOSTS (Relevanz-Treiber) ---
-  # Diese Kanten haben die Kraft, das semantische Ranking aktiv umzugestalten.
-  blocks: 1.6        # Kritisch: Risiken/Blocker müssen sofort sichtbar sein.
-  solves: 1.5        # Zielführend: Lösungen sind primäre Suchziele.
-  depends_on: 1.4    # Logisch: Harte fachliche Abhängigkeit.
-  resulted_in: 1.4   # Kausal: Ergebnisse und unmittelbare Konsequenzen.
-  followed_by: 1.3   # Sequenziell (User): Bewusst gesteuerte Wissenspfade.
-  caused_by: 1.2     # Kausal: Ursachen-Bezug (Basis für Intent-Boost).
-  preceded_by: 1.1   # Sequenziell (User): Rückwärts-Bezug in Logik-Ketten.
+  blocks: 1.6
+  solves: 1.5
+  depends_on: 1.4
+  resulted_in: 1.4
+  followed_by: 1.3
+  caused_by: 1.2
+  preceded_by: 1.1

  # --- KATEGORIE 2: QUALITATIVER KONTEXT (Stabilitäts-Stützen) ---
-  # Diese Kanten liefern wichtigen Kontext, ohne das Ergebnis zu verfälschen.
-  guides: 1.1        # Qualitativ: Prinzipien oder Werte leiten das Thema.
-  part_of: 1.1       # Strukturell: Zieht übergeordnete Kontexte (Parents) mit hoch.
-  based_on: 0.8      # Fundament: Bezug auf Basis-Werte (kalibriert auf Safe-Retrieval).
-  derived_from: 0.6  # Historisch: Dokumentiert die Herkunft von Wissen.
-  uses: 0.6          # Instrumentell: Genutzte Werkzeuge, Methoden oder Ressourcen.
+  guides: 1.1
+  part_of: 1.1
+  based_on: 0.8
+  derived_from: 0.6
+  uses: 0.6

  # --- KATEGORIE 3: THEMATISCHE NÄHE (Ähnlichkeits-Signal) ---
-  # Diese Werte verhindern den "Drift" in fachfremde Bereiche.
-  similar_to: 0.4    # Analytisch: Thematische Nähe (oft KI-generiert).
+  similar_to: 0.4

  # --- KATEGORIE 4: SYSTEM-NUDGES (Technische Struktur) ---
-  # Reine Orientierungshilfen für das System; fast kein Einfluss auf das Ranking.
-  belongs_to: 0.2    # System: Verknüpft Chunks mit der Note (Metadaten-Träger).
-  next: 0.1          # System: Technische Lesereihenfolge der Absätze.
-  prev: 0.1          # System: Technische Lesereihenfolge der Absätze.
+  belongs_to: 0.2
+  next: 0.1
+  prev: 0.1

  # --- KATEGORIE 5: WEICHE ASSOZIATIONEN (Rausch-Unterdrückung) ---
-  # Verhindert, dass lose Verknüpfungen das Ergebnis "verwässern".
-  references: 0.1    # Assoziativ: Einfacher Querverweis oder Erwähnung.
-  related_to: 0.05   # Minimal: Schwächste thematische Verbindung.
+  references: 0.1
+  related_to: 0.05
 ```

-
 ---

 ## 4. Edge Typen & Registry Referenz
@ -185,7 +184,7 @@ Die `EdgeRegistry` ist die **Single Source of Truth** für das Vokabular.

 ### 4.1 Dateistruktur & Speicherort
 Die Registry erwartet eine Markdown-Datei an folgendem Ort:
-* **Standard-Pfad:** `<MINDNET_VAULT_ROOT>/01_User_Manual/01_edge_vocabulary.md`.
+* **Standard-Pfad:** `<MINDNET_VAULT_ROOT>/_system/dictionary/edge_vocabulary.md`.
 * **Custom-Pfad:** Kann via `.env` Variable `MINDNET_VOCAB_PATH` überschrieben werden.

 ### 4.2 Aufbau des Dictionaries (Markdown-Schema)
@ -199,15 +198,10 @@ Die Datei muss eine Markdown-Tabelle enthalten, die vom Regex-Parser gelesen wir
 | **`caused_by`** | `ausgelöst_durch`, `wegen` | Kausalität: A löst B aus. |
 ```

-**Regeln für die Spalten:**
-1.  **Canonical:** Muss fett gedruckt sein (`**type**` oder `**`type`**`). Dies ist der Wert, der in der DB landet.
-2.  **Aliasse:** Kommagetrennte Liste von Synonymen. Diese werden beim Import automatisch zum Canonical aufgelöst.
-3.  **Beschreibung:** Rein informativ für den Nutzer.
-
 ### 4.3 Verfügbare Kanten-Typen (System-Standard)

 | System-Typ (Canonical) | Erlaubte Aliasse (User) | Beschreibung |
-| :--------------------- | :--------------------------------------------------- | :-------------------------------------- |
+| :--- | :--- | :--- |
 | **`caused_by`** | `ausgelöst_durch`, `wegen`, `ursache_ist` | Kausalität: A löst B aus. |
 | **`derived_from`** | `abgeleitet_von`, `quelle`, `inspiriert_durch` | Herkunft: A stammt von B. |
 | **`based_on`** | `basiert_auf`, `fundament`, `grundlage` | Fundament: B baut auf A auf. |
@ -224,12 +218,10 @@ Die Datei muss eine Markdown-Tabelle enthalten, die vom Regex-Parser gelesen wir
 | **`references`** | *(Kein Alias)* | Standard-Verweis (Fallback). |
 | **`resulted_in`** | `ergebnis`, `resultat`, `erzeugt` | Herkunft: A erzeugt Ergebnis B |

-**ACHTUNG!** Die Kantentypen
-**belongs_to**, **next** und **prev** dürfen nicht vom Nutzer gesetzt werden
+**ACHTUNG!** Die Kantentypen **belongs_to**, **next** und **prev** dürfen nicht vom Nutzer gesetzt werden.

 ---

-
 ## 5. Decision Engine (`decision_engine.yaml`)

 Die Decision Engine fungiert als zentraler Orchestrator für die Intent-Erkennung und das dynamische Retrieval-Routing. Sie bestimmt, wie das System auf eine Nutzeranfrage reagiert, welche Informationstypen bevorzugt werden und wie der Wissensgraph für die spezifische Situation verformt wird.
@ -323,7 +315,4 @@ strategies:
      BITTE WÄGE FAKTEN GEGEN FOLGENDE WERTE, PRINZIPIEN UND ZIELE AB:

  # 3. Empathie / "Ich"-Modus
-
 ```
-
-*Richtwert für Kanten-Boosts: 0.1 (Abwertung) bis 3.0+ (Dominanz gegenüber Text-Match).*
--- a/docs/03_Technical_References/03_tech_data_model.md
+++ b/docs/03_Technical_References/03_tech_data_model.md
@ -3,15 +3,15 @@ doc_type: technical_reference
 audience: developer, architect
 scope: database, qdrant, schema
 status: active
-version: 2.7.0
-context: "Exakte Definition der Datenmodelle (Payloads) in Qdrant und Index-Anforderungen."
+version: 2.8.0
+context: "Exakte Definition der Datenmodelle (Payloads) in Qdrant und Index-Anforderungen. Berücksichtigt WP-14 Modularisierung und WP-15b Multi-Hashes."
 ---

 # Technisches Datenmodell (Qdrant Schema)

 ## 1. Collections & Namenskonvention

-Mindnet speichert Daten in drei getrennten Qdrant-Collections. Der Prefix ist via ENV `COLLECTION_PREFIX` konfigurierbar (Default: `mindnet`).
+Mindnet speichert Daten in drei getrennten Qdrant-Collections. Der Prefix ist via ENV `COLLECTION_PREFIX` konfigurierbar (Default: `mindnet`). Die Auflösung erfolgt zentral über `app.core.database.collection_names`.

 Das System nutzt folgende drei Collections:
 * `{prefix}_notes`: Metadaten der Dateien.
@ -28,9 +28,10 @@ Repräsentiert die Metadaten einer Markdown-Datei (1:1 Beziehung).

 ```json
 {
-  "note_id": "string (keyword)",       // UUIDv5 (deterministisch) oder Slug
+  "note_id": "string (keyword)",       // UUIDv5 (deterministisch via NAMESPACE_URL)
  "title": "string (text)",            // Titel aus Frontmatter
  "type": "string (keyword)",          // Logischer Typ (z.B. 'project', 'concept')
+  "status": "string (keyword)",        // Lifecycle: 'stable', 'active', 'draft', 'system' (WP-22)
  "retriever_weight": "float",         // Effektive Wichtigkeit (Frontmatter > Type > Default)
  "chunk_profile": "string",           // Effektives Profil (Frontmatter > Type > Default)
  "edge_defaults": ["string"],         // Liste der aktiven Default-Kanten
@ -40,7 +41,7 @@ Repräsentiert die Metadaten einer Markdown-Datei (1:1 Beziehung).
  "updated": "integer",                // Timestamp (File Modification Time)
  "fulltext": "string (no-index)",     // Gesamter Text (nur für Recovery/Export)
  
-  // NEU in v2.7: Multi-Hash für flexible Change Detection
+  // Multi-Hash für flexible Change Detection (WP-15b)
  "hashes": {
    "body:parsed:canonical": "string", // Hash nur über den Text-Body
    "full:parsed:canonical": "string"  // Hash über Text + Metadaten (Tags, Title, Config)
@ -52,6 +53,7 @@ Repräsentiert die Metadaten einer Markdown-Datei (1:1 Beziehung).
 Es müssen Payload-Indizes für folgende Felder existieren:
 * `note_id`
 * `type`
+* `status`
 * `tags`

 ---
@ -61,7 +63,7 @@ Es müssen Payload-Indizes für folgende Felder existieren:
 Die atomare Sucheinheit. Enthält den Vektor.

 **Vektor-Konfiguration:**
-* Modell: `nomic-embed-text`
+* Modell: `nomic-embed-text` (via Ollama oder Cloud)
 * Dimension: **768**
 * Metrik: Cosine Similarity

@ -69,7 +71,7 @@ Die atomare Sucheinheit. Enthält den Vektor.

 ```json
 {
-  "chunk_id": "string (keyword)",      // Format: {note_id}#c{index} (z.B. 'abc-123#c01')
+  "chunk_id": "string (keyword)",      // Format: UUIDv5 aus {note_id}#c{index}
  "note_id": "string (keyword)",       // Foreign Key zur Note
  "type": "string (keyword)",          // Kopie aus Note (Denormalisiert für Filterung)
  "text": "string (text)",             // Reintext für Anzeige (ohne Overlap)
@ -121,3 +123,4 @@ Es müssen Payload-Indizes für folgende Felder existieren:
 * `kind`
 * `scope`
 * `note_id`
+```
--- a/docs/03_Technical_References/03_tech_ingestion_pipeline.md
+++ b/docs/03_Technical_References/03_tech_ingestion_pipeline.md
@ -1,71 +1,77 @@
 ---
 doc_type: technical_reference
 audience: developer, devops
-scope: backend, ingestion, smart_edges, edge_registry
+scope: backend, ingestion, smart_edges, edge_registry, modularization
 status: active
-version: 2.8.1
-context: "Detaillierte technische Beschreibung der Import-Pipeline, Mistral-safe Parsing und Deep Fallback Resilienz."
+version: 2.9.0
+context: "Detaillierte technische Beschreibung der Import-Pipeline, Two-Pass-Workflow (WP-15b) und modularer Datenbank-Architektur (WP-14). Integriert Mistral-safe Parsing und Deep Fallback."
 ---

 # Ingestion Pipeline & Smart Processing

-**Quellen:** `pipeline_playbook.md`, `ingestion.py`, `edge_registry.py`, `01_edge_vocabulary.md`, `llm_service.py`
+**Quellen:** `pipeline_playbook.md`, `ingestion_processor.py`, `ingestion_db.py`, `ingestion_validation.py`, `registry.py`, `edge_registry.py`
+
+Die Ingestion transformiert Markdown in den Graphen. Entrypoint: `scripts/import_markdown.py` (CLI) oder `routers/ingest.py` (API). Seit v2.9 nutzt dieser Prozess ein hocheffizientes **Two-Pass-Verfahren**, um globale Kontext-Informationen für die semantische Validierung bereitzustellen, ohne die Idempotenz oder die Change-Detection zu verletzen.
+

-Die Ingestion transformiert Markdown in den Graphen. Entrypoint: `scripts/import_markdown.py` (CLI) oder `routers/ingest.py` (API). Seit v2.8 integriert dieser Prozess eine **intelligente Quoten-Steuerung** (WP-20) und ein **robustes JSON-Parsing** für Cloud-Modelle (Mistral/Gemini).

 ## 1. Der Import-Prozess (16-Schritte-Workflow)

-Der Prozess ist **asynchron** und **idempotent**.
+Der Prozess ist **asynchron**, **idempotent** und wird nun in zwei logische Durchläufe (Passes) unterteilt, um die semantische Genauigkeit zu maximieren.

+### Phase 1: Pre-Scan & Context (Pass 1)
 1.  **Trigger & Async Dispatch:**
    * **API (`/save`):** Nimmt Request entgegen, validiert und startet Background-Task ("Fire & Forget"). Antwortet sofort mit `202/Queued`.
    * **CLI:** Iteriert über Dateien und nutzt `asyncio.Semaphore` zur Drosselung.
-2.  **Markdown lesen:** Rekursives Scannen des Vaults.
+2.  **Markdown lesen:** Rekursives Scannen des Vaults zur Erstellung des Dateiinventars.
 3.  **Frontmatter Check & Hard Skip (WP-22):**
    * Extraktion von `status` und `type`.
-    * **Hard Skip Rule:** Wenn `status` in `['system', 'template', 'archive', 'hidden']` ist, wird die Datei **sofort übersprungen**. Sie wird weder vektorisiert noch in den Graphen aufgenommen.
+    * **Hard Skip Rule:** Wenn `status` in `['system', 'template', 'archive', 'hidden']` ist, wird die Datei für das Deep-Processing übersprungen, ihre Metadaten werden jedoch für den Kontext-Cache erfasst.
    * Validierung der Pflichtfelder (`id`, `title`) für alle anderen Dateien.
 4.  **Edge Registry Initialisierung (WP-22):**
    * Laden der Singleton-Instanz der `EdgeRegistry`.
    * Validierung der Vokabular-Datei unter `MINDNET_VOCAB_PATH`.
-5.  **Config Resolution:**
-    * Bestimmung von `chunking_profile` und `retriever_weight`.
+5.  **Config Resolution (WP-14):**
+    * Bestimmung von `chunking_profile` und `retriever_weight` via zentraler `TypeRegistry`.
    * **Priorität:** 1. Frontmatter (Override) -> 2. `types.yaml` (Type) -> 3. Global Default.
-6.  **Note-Payload generieren:**
-    * Erstellen des JSON-Objekts inklusive `status` (für Scoring).
-    * **Multi-Hash Calculation:** Berechnet Hashtabellen für `body` (nur Text) und `full` (Text + Metadaten).
-7.  **Change Detection:**
-    * Vergleich des Hashes mit Qdrant.
-    * Strategie wählbar via ENV `MINDNET_CHANGE_DETECTION_MODE` (`full` oder `body`).
-8.  **Chunking anwenden:** Zerlegung des Textes basierend auf dem ermittelten Profil (siehe Kap. 3).
-9.  **Smart Edge Allocation (WP-20):**
-    * Wenn `enable_smart_edge_allocation: true`: Der `SemanticAnalyzer` sendet Chunks an das LLM.
-    * **Traffic Control:** Request nutzt `priority="background"`. Semaphore drosselt die Last.
-    * **Resilienz (Quota Handling):** Erkennt HTTP 429 (Rate-Limit) und pausiert kontrolliert (via `LLM_RATE_LIMIT_WAIT`), bevor ein Cloud-Retry erfolgt.
-    * **Mistral-safe Parsing:** Automatisierte Bereinigung von BOS-Tokens (`<s>`) und Framework-Tags (`[OUT]`) sowie Recovery-Logik für Dictionaries (Suche nach `edges`, `links`, `results`, `kanten`).
-    * **Deep Fallback (v2.11.14):** Erkennt "Silent Refusals" (Data Policy Violations). Liefert die Cloud trotz erfolgreicher Verbindung keine verwertbaren Kanten, wird ein lokaler Fallback via Ollama erzwungen, um Kantenverlust zu vermeiden.
-10. **Inline-Kanten finden:** Parsing von `[[rel:...]]`.
-11. **Alias-Auflösung & Kanonisierung (WP-22):**
-    * Jede Kante wird via `edge_registry.resolve()` normalisiert.
-    * Aliase (z.B. `basiert_auf`) werden zu kanonischen Typen (z.B. `based_on`) aufgelöst.
+6.  **LocalBatchCache & Summary Generation (WP-15b):**
+    * Erstellung von Kurz-Zusammenfassungen für jede Note.
+    * Speicherung im `batch_cache` als Referenzrahmen für die spätere Kantenvalidierung.
+
+### Phase 2: Semantic Processing & Persistence (Pass 2)
+7.  **Note-Payload & Multi-Hash (WP-15b):**
+    * Erstellen des JSON-Objekts inklusive `status`.
+    * **Multi-Hash Calculation:** Berechnet Hashtabellen für `body` (nur Text) und `full` (Text + Metadaten) zur präzisen Änderungskontrolle.
+8.  **Change Detection:**
+    * Vergleich des aktuellen Hashes mit den Daten in Qdrant (Collection `{prefix}_notes`).
+    * Strategie wählbar via ENV `MINDNET_CHANGE_DETECTION_MODE` (`full` oder `body`). Unveränderte Dateien werden hier final übersprungen.
+9.  **Purge Old Artifacts (WP-14):**
+    * Bei Änderungen löscht `purge_artifacts()` via `app.core.ingestion.ingestion_db` alle alten Chunks und Edges der Note.
+    * Die Namensauflösung erfolgt nun über das modularisierte `database`-Paket.
+10. **Chunking anwenden:** Zerlegung des Textes basierend auf dem ermittelten Profil (siehe Kap. 3).
+11. **Smart Edge Allocation & Semantic Validation (WP-15b):**
+    * Der `SemanticAnalyzer` schlägt Kanten-Kandidaten vor.
+    * **Validierung:** Jeder Kandidat wird durch das LLM semantisch gegen das Ziel im **LocalBatchCache** geprüft.
+    * **Traffic Control:** Nutzung der neutralen `clean_llm_text` Funktion zur Bereinigung von Steuerzeichen (<s>, [OUT]).
+    * **Deep Fallback (v2.11.14):** Erkennt "Silent Refusals". Liefert die Cloud keine verwertbaren Kanten, wird ein lokaler Fallback via Ollama erzwungen.
+12. **Inline-Kanten finden:** Parsing von `[[rel:...]]` und Callouts.
+13. **Alias-Auflösung & Kanonisierung (WP-22):**
+    * Jede Kante wird via `EdgeRegistry` normalisiert (z.B. `basiert_auf` -> `based_on`).
    * Unbekannte Typen werden in `unknown_edges.jsonl` protokolliert.
-12. **Callout-Kanten finden:** Parsing von `> [!edge]`.
-13. **Default- & Matrix-Edges erzeugen:** Anwendung der `edge_defaults` aus Registry und Matrix-Logik.
-14. **Strukturkanten erzeugen:** `belongs_to`, `next`, `prev`.
-15. **Embedding (Async):** Generierung via `nomic-embed-text` (768 Dim).
-16. **Diagnose:** Integritäts-Check nach dem Lauf.
+14. **Default- & Strukturkanten:** Anwendung der `edge_defaults` und Erzeugung von Systemkanten (`belongs_to`, `next`, `prev`).
+15. **Embedding (Async):** Generierung der Vektoren via `nomic-embed-text` (768 Dimensionen).
+16. **Database Sync (WP-14):** Batch-Upsert aller Points in die Collections `{prefix}_chunks` und `{prefix}_edges` über die zentrale Infrastruktur.

 ---

 ## 2. Betrieb & CLI Befehle

 ### 2.1 Standard-Betrieb (Inkrementell)
-Für regelmäßige Updates (Cronjob). Erkennt Änderungen via Hash.
+Erkennt Änderungen via Multi-Hash.

 ```bash
 export QDRANT_URL="http://localhost:6333"
 export COLLECTION_PREFIX="mindnet"
-# Steuert, wann eine Datei als "geändert" gilt
 export MINDNET_CHANGE_DETECTION_MODE="full" 

 # Nutzt das Venv der Produktionsumgebung
@ -78,20 +84,13 @@ export MINDNET_CHANGE_DETECTION_MODE="full"
 ```

 > **[!WARNING] Purge-Before-Upsert**
-> Das Flag `--purge-before-upsert` ist kritisch. Es löscht vor dem Schreiben einer Note ihre alten Chunks/Edges. Ohne dieses Flag entstehen **"Geister-Chunks"** (alte Textabschnitte, die im Markdown gelöscht wurden, aber im Index verbleiben).
+> Das Flag `--purge-before-upsert` nutzt nun `ingestion_db.purge_artifacts`. Es ist kritisch, um "Geister-Chunks" (verwaiste Daten nach Textlöschung) konsistent aus den spezialisierten Collections zu entfernen.

 ### 2.2 Full Rebuild (Clean Slate)
-Notwendig bei Änderungen an `types.yaml` (z.B. neue Chunking-Profile), der Registry oder Modell-Wechsel.
+Notwendig bei Änderungen an `types.yaml`, der Registry oder Modell-Wechsel.

 ```bash
-# 0. Modell sicherstellen
-ollama pull nomic-embed-text
-
-# 1. Qdrant Collections löschen (Wipe)
-python3 -m scripts.reset_qdrant --mode wipe --prefix "mindnet" --yes
-
-# 2. Vollständiger Import (Force)
-# --force ignoriert alle Hashes und schreibt alles neu
+# --force ignoriert alle Hashes und erzwingt den vollständigen Two-Pass Workflow
 python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --force
 ```

@ -99,22 +98,20 @@ python3 -m scripts.import_markdown --vault ./vault --prefix "mindnet" --apply --

 ## 3. Chunking & Payload

-Das Chunking ist profilbasiert und in `types.yaml` konfiguriert.
+Das Chunking ist profilbasiert und bezieht seine Konfiguration dynamisch aus der `TypeRegistry`.

-### 3.1 Profile und Strategien (Vollständige Referenz)
+### 3.1 Profile und Strategien

 | Profil | Strategie | Parameter | Einsatzgebiet |
 | :--- | :--- | :--- | :--- |
-| `sliding_short` | `sliding_window` | Max: 350, Target: 200 | Kurze Logs, Chats, Risiken. |
-| `sliding_standard` | `sliding_window` | Max: 650, Target: 450 | Massendaten (Journal, Quellen). |
-| `sliding_smart_edges`| `sliding_window` | Max: 600, Target: 400 | Fließtexte mit hohem Wert (Projekte). |
-| `structured_smart_edges` | `by_heading` | `strict: false` (Soft) | Strukturierte Texte, Merging erlaubt. |
-| `structured_smart_edges_strict` | `by_heading` | `strict: true` (Hard) | **Atomare Einheiten**: Entscheidungen, Werte. |
-| `structured_smart_edges_strict_L3`| `by_heading` | `strict: true`, `level: 3` | Tief geschachtelte Prinzipien (Tier 2/MP1). |
+| `sliding_short` | `sliding_window` | Max: 350, Target: 200 | Kurze Logs, Chats. |
+| `sliding_standard` | `sliding_window` | Max: 650, Target: 450 | Standard-Wissen. |
+| `sliding_smart_edges`| `sliding_window` | Max: 600, Target: 400 | Fließtexte (Projekte). |
+| `structured_smart_edges` | `by_heading` | `strict: false` | Strukturierte Texte. |

 ### 3.2 Die `by_heading` Logik (v2.9 Hybrid)

-Die Strategie `by_heading` zerlegt Texte anhand ihrer Struktur (Überschriften). Sie unterstützt seit v2.9 ein "Safety Net" gegen zu große Chunks.
+Die Strategie `by_heading` zerlegt Texte anhand ihrer Struktur (Überschriften). Sie unterstützt ein "Safety Net" gegen zu große Chunks.

 * **Split Level:** Definiert die Tiefe (z.B. `2` = H1 & H2 triggern Split).
 * **Modus "Strict" (`strict_heading_split: true`):**
@ -126,12 +123,6 @@ Die Strategie `by_heading` zerlegt Texte anhand ihrer Struktur (Überschriften).
    * **Füll-Logik:** Überschriften *auf* dem Split-Level lösen nur dann einen neuen Chunk aus, wenn der aktuelle Chunk die `target`-Größe erreicht hat.
    * *Safety Net:* Auch hier greift das `max` Token Limit.

-### 3.3 Payload-Felder (Qdrant)
-
-* `text`: Der reine Inhalt (Anzeige im UI).
-* `window`: Inhalt plus Overlap (für Embedding).
-* `chunk_profile`: Das effektiv genutzte Profil (zur Nachverfolgung).
-
 ---

 ## 4. Edge-Erzeugung & Prioritäten (Provenance)
@ -143,7 +134,7 @@ Kanten werden nach Vertrauenswürdigkeit (`provenance`) priorisiert. Die höhere
 | **1** | Wikilink | `explicit:wikilink` | **1.00** | Harte menschliche Setzung. |
 | **2** | Inline | `inline:rel` | **0.95** | Typisierte menschliche Kante. |
 | **3** | Callout | `callout:edge` | **0.90** | Explizite Meta-Information. |
-| **4** | Semantic AI | `semantic_ai` | **0.90** | KI-extrahierte Verbindung (Mistral-safe). |
+| **4** | Semantic AI | `semantic_ai` | **0.90** | KI-validiert gegen LocalBatchCache. |
 | **5** | Type Default | `edge_defaults` | **0.70** | Heuristik aus der Registry. |
 | **6** | Struktur | `structure` | **1.00** | System-interne Verkettung (`belongs_to`). |

@ -151,18 +142,8 @@ Kanten werden nach Vertrauenswürdigkeit (`provenance`) priorisiert. Die höhere

 ## 5. Quality Gates & Monitoring

-In v2.7+ wurden Tools zur Überwachung der Datenqualität integriert:
+**1. Registry Review (WP-14):** Prüfung der `data/logs/unknown_edges.jsonl`. Die zentrale Auflösung via `registry.py` verhindert Inkonsistenzen zwischen Import und Retrieval.

-**1. Registry Review:** Prüfung der `data/logs/unknown_edges.jsonl`. Administratoren sollten hier gelistete Begriffe als Aliase in die `01_edge_vocabulary.md` aufnehmen.
+**2. Mistral-safe Parsing:** Automatisierte Bereinigung von LLM-Antworten in `ingestion_validation.py`. Stellt sicher, dass semantische Entscheidungen ("YES"/"NO") nicht durch technische Header verfälscht werden.

-**2. Payload Dryrun (Schema-Check):**
-Simuliert Import, prüft JSON-Schema Konformität.
-```bash
-python3 -m scripts.payload_dryrun --vault ./test_vault
-```
-
-**3. Full Edge Check (Graph-Integrität):**
-Prüft Invarianten (z.B. `next` muss reziprok zu `prev` sein).
-```bash
-python3 -m scripts.edges_full_check
-```
+**3. Purge Integrity:** Validierung, dass vor jedem Upsert alle assoziierten Artefakte in den Collections `{prefix}_chunks` und `{prefix}_edges` gelöscht wurden, um Daten-Duplikate zu vermeiden.
--- a/docs/03_Technical_References/03_tech_retrieval_scoring.md
+++ b/docs/03_Technical_References/03_tech_retrieval_scoring.md
@ -3,13 +3,13 @@ doc_type: technical_reference
 audience: developer, data_scientist
 scope: backend, retrieval, scoring, modularization
 status: active
-version: 2.7.1
-context: "Detaillierte Dokumentation der Scoring-Algorithmen, inklusive WP-22 Lifecycle-Modifier, Intent-Boosting und Modularisierung."
+version: 2.9.0
+context: "Detaillierte Dokumentation der Scoring-Algorithmen, inklusive WP-22 Lifecycle-Modifier, Intent-Boosting und WP-14 Modularisierung."
 ---

 # Retrieval & Scoring Algorithmen

-Der Retriever unterstützt **Semantic Search** und **Hybrid Search**. Seit v2.4 nutzt Mindnet ein gewichtetes Scoring-Modell, das Semantik, Graphentheorie und Metadaten kombiniert. Mit Version 2.7 (WP-22) wurde dieses Modell um **Lifecycle-Faktoren** und **Intent-Boosting** erweitert sowie die Architektur modularisiert.
+Der Retriever unterstützt **Semantic Search** und **Hybrid Search**. Seit v2.4 nutzt Mindnet ein gewichtetes Scoring-Modell, das Semantik, Graphentheorie und Metadaten kombiniert. Mit Version 2.7 (WP-22) wurde dieses Modell um **Lifecycle-Faktoren** und **Intent-Boosting** erweitert sowie die Architektur modularisiert (WP-14).

 ## 1. Scoring Formel (v2.7.0)

@ -37,18 +37,19 @@ $$
 * **Zweck:** Belohnt Chunks, die "im Thema" vernetzt sind.

 **4. Centrality Bonus ($B_{cent}$):**
-* **Kontext:** Berechnet im lokalen Subgraphen.
+* **Kontext:** Berechnet im lokalen Subgraphen via `graph_subgraph.centrality_bonus`.
 * **Logik:** Vereinfachte PageRank-Metrik (Degree Centrality).
 * **Zweck:** Belohnt "Hubs" mit vielen Verbindungen zu anderen Treffern.

 ### Die WP-22 Erweiterungen (v2.7.0)

 **5. Status Modifier ($M_{status}$):**
-* **Herkunft:** Feld `status` aus dem Frontmatter.
+* **Herkunft:** Feld `status` aus dem Frontmatter (verarbeitet in `retriever_scoring.get_status_multiplier`).
 * **Zweck:** Bestraft unfertiges Wissen (Drafts) oder bevorzugt stabiles Wissen.
-* **Werte (Auftrag WP-22):** * `stable`: **1.2** (Bonus für Qualität).
-    * `draft`: **0.5** (Malus für Entwürfe).
-    * `system`: Exkludiert (siehe Ingestion).
+* **Werte (Auftrag WP-22):** * `stable`: **1.2** (Belohnung für verifiziertes Wissen).
+    * `active`: **1.0** (Standard-Gewichtung).
+    * `draft`: **0.5** (Malus für unfertige Fragmente).
+    * `system`: Exkludiert (siehe Ingestion Lifecycle Filter).

 **6. Intent Boost ($B_{intent}$):**
 * **Herkunft:** Dynamische Injektion durch die Decision Engine basierend auf der Nutzerfrage.
@ -56,47 +57,61 @@ $$

 ---

-## 2. Hybrid Retrieval Flow & Modularisierung
+## 2. Hybrid Retrieval Flow & Modularisierung (WP-14)

-In v2.7 wurde die Engine in einen Orchestrator (`retriever.py`) und eine Scoring-Engine (`retriever_scoring.py`) aufgeteilt.
+Seit v2.9 ist die Retrieval-Engine im spezialisierten Paket `app.core.retrieval` gekapselt. Die Zuständigkeiten sind strikt zwischen Orchestrierung und mathematischer Bewertung getrennt.

 **Phase 1: Vector Search (Seed Generation)**
-* Der Orchestrator sucht Top-K (Standard: 20) Kandidaten via Embeddings in Qdrant.
+* Der Orchestrator (`retriever.py`) sucht Top-K (Standard: 20) Kandidaten via Embeddings in Qdrant über das modularisierte `app.core.database` Paket.
 * Diese bilden die "Seeds" für den Graphen.

 **Phase 2: Graph Expansion**
-* Nutze `graph_adapter.expand(seeds, depth=1)`.
-* Lade direkte Nachbarn aus der `_edges` Collection.
-* Konstruiere einen `NetworkX`-Graphen im Speicher.
+* Nutze die Fassade `app.core.graph_adapter.expand(seeds, depth=1)`.
+* Diese delegiert an `app.core.graph.graph_subgraph`, um direkte Nachbarn aus der `_edges` Collection zu laden.
+* Konstruktion eines in-memory Graphen zur Berechnung topologischer Boni.

 **Phase 3: Re-Ranking (Modular)**
-* Der Orchestrator übergibt den Graphen und die Seeds an die `ScoringEngine`.
-* Berechne Boni ($B_{edge}$, $B_{cent}$) sowie die neuen Lifecycle- und Intent-Modifier.
-* Sortierung absteigend nach `TotalScore` und Limitierung auf Top-Resultate (z.B. 5).
+* Der Orchestrator übergibt den Graphen und die Seeds an die `ScoringEngine` (`retriever_scoring.py`).
+* Berechnung der finalen Scores unter Berücksichtigung von $B_{edge}$, $B_{cent}$ sowie der Lifecycle- und Intent-Modifier.
+* Sortierung absteigend nach `TotalScore` und Limitierung auf die angeforderten Top-Resultate.

 ---

 ## 3. Explanation Layer (WP-22 Update)

-Bei `explain=True` generiert das System eine detaillierte Begründung.
+Bei `explain=True` generiert das System eine detaillierte Begründung inklusive Provenienz-Informationen.

 **Erweiterte JSON-Struktur:**

 ```json
 {
  "score_breakdown": {
-    "semantic": 0.85,
-    "type_boost": 1.0,
-    "lifecycle_modifier": 0.5,
-    "edge_bonus": 0.4,
-    "intent_boost": 0.5,
-    "centrality": 0.1
+    "semantic_contribution": 0.85,
+    "edge_contribution": 0.4,
+    "centrality_contribution": 0.1,
+    "raw_semantic": 0.85,
+    "raw_edge_bonus": 0.3,
+    "raw_centrality": 0.1,
+    "node_weight": 1.0,
+    "status_multiplier": 1.2,
+    "graph_boost_factor": 1.5
  },
  "reasons": [
-    "Hohe textuelle Übereinstimmung (>0.85).",
-    "Status 'draft' reduziert Relevanz (Modifier 0.5).",
-    "Wird referenziert via 'caused_by' (Intent-Bonus 0.5).",
-    "Bevorzugt, da Typ 'decision' (Gewicht 1.0)."
+    {
+      "kind": "semantic",
+      "message": "Hohe textuelle Übereinstimmung (>0.85).",
+      "score_impact": 0.85
+    },
+    {
+      "kind": "type",
+      "message": "Bevorzugt durch Typ-Profil.",
+      "score_impact": 0.1
+    },
+    {
+      "kind": "edge",
+      "message": "Bestätigte Kante 'caused_by' [Boost x1.5] von 'Note-A'.",
+      "score_impact": 0.4
+    }
  ]
 }
 ```
@ -105,18 +120,18 @@ Bei `explain=True` generiert das System eine detaillierte Begründung.

 ## 4. Konfiguration (`retriever.yaml`)

-Steuert die Gewichtung der mathematischen Komponenten.
+Steuert die globale Gewichtung der mathematischen Komponenten.

 ```yaml
 scoring:
-  semantic_weight:   1.0  # Basis-Relevanz
-  edge_weight:       0.7  # Graphen-Einfluss
-  centrality_weight: 0.5  # Hub-Einfluss
+  semantic_weight:   1.0  # Basis-Relevanz (W_sem)
+  edge_weight:       0.7  # Graphen-Einfluss (W_edge)
+  centrality_weight: 0.5  # Hub-Einfluss (W_cent)

-# WP-22 Lifecycle Konfiguration (Abgleich mit Auftrag)
+# WP-22 Lifecycle Konfiguration
 lifecycle_weights:
-  stable: 1.2             # Bonus für Qualität
-  draft: 0.5              # Malus für Entwürfe
+  stable: 1.2             # Modifier für Qualität
+  draft: 0.5              # Modifier für Entwürfe

 # Kanten-Gewichtung für den Edge-Bonus (Basis)
 edge_weights:
--- a/docs/05_Development/05_developer_guide.md
+++ b/docs/05_Development/05_developer_guide.md
@ -1,10 +1,10 @@
 ---
 doc_type: developer_guide
 audience: developer
-scope: workflow, testing, architecture, modules
+scope: workflow, testing, architecture, modules, modularization
 status: active
-version: 2.6.1
-context: "Umfassender Guide für Entwickler: Architektur, Modul-Interna (Deep Dive), Setup, Git-Workflow und Erweiterungs-Anleitungen."
+version: 2.9.1
+context: "Umfassender Guide für Entwickler: Modularisierte Architektur (WP-14), Two-Pass Ingestion (WP-15b), Modul-Interna, Setup und Git-Workflow."
 ---

 # Mindnet Developer Guide & Workflow
@ -23,8 +23,6 @@ Dieser Guide ist die zentrale technische Referenz für Mindnet v2.6. Er vereint
    - [Kern-Philosophie](#kern-philosophie)
  - [2. Architektur](#2-architektur)
    - [2.1 High-Level Übersicht](#21-high-level-übersicht)
-    - [2.2 Datenfluss-Muster](#22-datenfluss-muster)
-      - [A. Ingestion (Write)](#a-ingestion-write)
      - [B. Retrieval (Read)](#b-retrieval-read)
      - [C. Visualisierung (Graph)](#c-visualisierung-graph)
  - [3. Physische Architektur](#3-physische-architektur)
@ -84,23 +82,28 @@ graph TD
        API["main.py"]
        RouterChat["Chat / RAG"]
        RouterIngest["Ingest / Write"]
-        CoreRet["Retriever Engine"]
-        CoreIngest["Ingestion Pipeline"]
+        
+        subgraph "Core Packages (WP-14)"
+            PkgRet["retrieval/ (Search)"]
+            PkgIng["ingestion/ (Import)"]
+            PkgGra["graph/ (Logic)"]
+            PkgDb["database/ (Infrastr.)"]
+            Registry["registry.py (Neutral)"]
+        end
    end
    
    subgraph "Infrastructure & Services"
-        LLM["Ollama (Phi3/Nomic)"]
+        LLM["Ollama / Cloud (Hybrid)"]
        DB[("Qdrant Vector DB")]
        FS["File System (.md)"]
    end

    User <--> UI
-    UI -- "REST (Chat, Save, Feedback)" --> API
-    UI -. "Direct Read (Graph Viz Performance)" .-> DB
-    API -- "Embeddings & Completion" --> LLM
-    API -- "Read/Write" --> DB
-    API -- "Read/Write (Source of Truth)" --> FS
-```
+    UI -- "REST Call" --> API
+    PkgRet -- "Direct Query" --> PkgDb
+    PkgIng -- "Process & Write" --> PkgDb
+    PkgDb -- "API" --> DB
+    API -- "Inference" --> LLM```

 ### 2.2 Datenfluss-Muster

@ -108,14 +111,12 @@ graph TD
 Vom Markdown zur Vektor-Datenbank.
 ```mermaid
 graph LR
-    MD["Markdown File"] --> Parser("Parser")
-    Parser --> Chunker("Chunker")
-    Chunker -- "Text Chunks" --> SemAn{"SemanticAnalyzer<br/>(LLM)"}
-    SemAn -- "Smart Edges" --> Embedder("Embedder")
-    Embedder --> DB[("Qdrant<br/>Points")]
-    
-    style DB fill:#f9f,stroke:#333,stroke-width:2px
-    style SemAn fill:#ff9,stroke:#333,stroke-width:2px
+    MD["Markdown File"] --> Pass1["Pass 1: Pre-Scan"]
+    Pass1 --> Cache[("LocalBatchCache<br/>(Titles/Summaries)")]
+    MD --> Pass2["Pass 2: Processing"]
+    Cache -- "Context" --> SmartEdges{"Smart Edge<br/>Validation"}
+    SmartEdges --> Embedder("Embedder")
+    Embedder --> DB[("Qdrant Points")]
 ```

 #### B. Retrieval (Read)
@ -123,17 +124,10 @@ Die hybride Suche für Chat & RAG.
 ```mermaid
 graph LR
    Query(["Query"]) --> Embed("Embedding")
-    Embed --> Hybrid{"Hybrid Search"}
-    
-    subgraph Search Components
-        Vec["Vector Score"]
-        Graph["Graph/Edge Bonus"]
-    end
-    
-    Vec --> Hybrid
-    Graph --> Hybrid
-    
-    Hybrid --> Rank("Re-Ranking")
+    Embed --> Seed["Seed Search (Vector)"]
+    Seed --> Expand{"Graph Expansion"}
+    Expand --> Scoring["Scoring Engine (WP-22)"]
+    Scoring --> Rank("Final Ranking")
    Rank --> Ctx["LLM Context"]
 ```

@ -170,6 +164,12 @@ Das System ist modular aufgebaut. Hier ist die detaillierte Analyse aller Kompon
 mindnet/
 ├── app/
 │   ├── core/           # Business Logic & Algorithms
+│   │   ├── database/    # WP-14: Qdrant Client & Point Mapping
+│   │   ├── ingestion/   # WP-14: Pipeline, Multi-Hash, Validation
+│   │   ├── retrieval/   # WP-14: Search Orchestrator & Scoring
+│   │   ├── graph/       # WP-14: Subgraph-Logik & Weights
+│   │   ├── registry.py  # SSOT: Circular Import Fix & Text Cleanup
+│   │   └── *.py (Proxy) # Legacy Bridges für Abwärtskompatibilität
 │   ├── routers/        # API Interface (FastAPI)
 │   ├── services/       # External Integrations (LLM, DB)
 │   ├── models/         # Pydantic DTOs
@ -285,6 +285,8 @@ Folgende Dateien wurden im Audit v2.6 als veraltet, redundant oder "Zombie-Code"
 | `app/core/type_registry.py` | **Redundant.** Logik in `ingestion.py` integriert. | 🗑️ Löschen |
 | `app/core/env_vars.py` | **Veraltet.** Ersetzt durch `config.py`. | 🗑️ Löschen |
 | `app/services/llm_ollama.py` | **Veraltet.** Ersetzt durch `llm_service.py`. | 🗑️ Löschen |
+| `app/core/type_registry.py` | **Redundant.** Logik in `app/core/registry.py` integriert. | 🗑️ Löschen |
+| `app/core/ranking.py` | **Redundant.** Logik in `retrieval/retriever_scoring.py` integriert. | 🗑️ Löschen |

 ---

--- a/docs/06_Roadmap/06_active_roadmap.md
+++ b/docs/06_Roadmap/06_active_roadmap.md
@ -2,18 +2,14 @@
 doc_type: roadmap
 audience: product_owner, developer
 status: active
-version: 2.8.0
-context: "Aktuelle Planung für kommende Features (ab WP16), Release-Strategie und Historie der abgeschlossenen WPs."
+version: 2.9.1
+context: "Aktuelle Planung für kommende Features (ab WP16), Release-Strategie und Historie der abgeschlossenen WPs nach WP-14/15b."
 ---

 # Mindnet Active Roadmap

-**Aktueller Stand:** v2.8.0 (Post-WP20/WP76)
-**Fokus:** Visualisierung, Exploration & Cloud-Resilienz.
-
-## 1. Programmstatus
-
-Wir haben mit der Implementierung des Graph Explorers (WP19), der Smart Edge Allocation (WP15) und der hybriden Cloud-Resilienz (WP20) die Basis für ein intelligentes, robustes System gelegt. Der nächste Schritt (WP19a) vertieft die Analyse, während WP16 die "Eingangs-Intelligenz" erhöht.
+**Aktueller Stand:** v2.9.1 (Post-WP14 / WP-15b)
+**Fokus:** Modularisierung, Two-Pass Ingestion & Graph Intelligence.

 | Phase | Fokus | Status |
 | :--- | :--- | :--- |
@ -45,6 +41,8 @@ Eine Übersicht der implementierten Features zum schnellen Auffinden von Funktio
 | **WP-10** | Web UI | Streamlit-Frontend als Ersatz für das Terminal. |
 | **WP-10a**| Draft Editor | GUI-Komponente zum Bearbeiten und Speichern generierter Notizen. |
 | **WP-11** | Backend Intelligence | `nomic-embed-text` (768d) und Matrix-Logik für Kanten-Typisierung. |
+| **WP-14** | **Modularisierung & Refactoring** | **Ergebnis:** Aufteilung in domänenspezifische Pakete (`database`, `ingestion`, `retrieval`, `graph`). Implementierung von Proxy-Adaptern für Abwärtskompatibilität und `registry.py` zur Lösung von Zirkelbezügen. |
+| **WP-15b**| **Candidate-Based Validation** | **Ergebnis:** Implementierung des **Two-Pass Workflows**. Einführung des `LocalBatchCache` und binäre semantische Validierung von Kanten-Kandidaten zur Vermeidung von Halluzinationen. |
 | **WP-15** | Smart Edge Allocation | LLM-Filter für Kanten in Chunks + Traffic Control (Semaphore) + Strict Chunking. |
 | **WP-19** | Graph Visualisierung | **Frontend Modularisierung:** Umbau auf `ui_*.py`.<br>**Graph Engines:** Parallelbetrieb von Cytoscape (COSE) und Agraph.<br>**Tools:** "Single Source of Truth" Editor, Persistenz via URL. |
 | **WP-20** | **Cloud Hybrid Mode & Resilienz** | **Ergebnis:** Integration von OpenRouter (Mistral 7B) & Gemini 2.5 Lite. Implementierung von WP-76 (Rate-Limit Wait) & Mistral-safe JSON Parsing. |
@ -59,6 +57,10 @@ Eine Übersicht der implementierten Features zum schnellen Auffinden von Funktio
 * **Quoten-Management:** Die Nutzung von Free-Tier Modellen (Mistral/OpenRouter) erfordert zwingend eine intelligente Rate-Limit-Erkennung (HTTP 429) mit automatisierten Wartezyklen, um Batch-Prozesse stabil zu halten.
 * **Parser-Robustheit:** Cloud-Modelle betten JSON oft in technische Steuerzeichen (`<s>`, `[OUT]`) ein. Ein robuster Extraktor mit Recovery-Logik ist essentiell zur Vermeidung von Datenverlust.

+### 2.3 WP-14 & WP-15b Lessons Learned
+* **Performance:** Der Pre-Scan (Pass 1) ist minimal invasiv, ermöglicht aber in Pass 2 eine drastische Reduktion der LLM-Kosten, da nur noch binär validiert werden muss, anstatt komplexe Extraktionen durchzuführen.
+* **Wartbarkeit:** Durch die Paket-Struktur können DB-Adapter (z.B. für Qdrant) nun unabhängig von der Business-Logik (Scoring) aktualisiert werden.
+* 
 ---

 ## 3. Offene Workpackages (Planung)
@ -93,6 +95,20 @@ Diese Features stehen als nächstes an oder befinden sich in der Umsetzung.
 - Aufwand: Mittel
 - Komplexität: Niedrig/Mittel

+
+
+### WP-13 – MCP-Integration & Agenten-Layer
+**Status:** 🟡 Geplant
+**Ziel:** mindnet als MCP-Server bereitstellen, damit Agenten (Claude Desktop, OpenAI) standardisierte Tools nutzen können.
+* **Umfang:** MCP-Server mit Tools (`mindnet_query`, `mindnet_explain`, etc.).
+
+### WP-14 – Review / Refactoring / Dokumentation
+**Status:** 🟡 Laufend (Phase E)
+**Ziel:** Technische Schulden abbauen, die durch schnelle Feature-Entwicklung (WP15/WP19) entstanden sind.
+* **Refactoring `chunker.py`:** Die Datei ist monolithisch geworden (Parsing, Strategien, LLM-Orchestrierung).
+    * *Lösung:* Aufteilung in ein Package `app/core/chunking/` mit Modulen (`strategies.py`, `orchestration.py`, `utils.py`).
+* **Dokumentation:** Kontinuierliche Synchronisation von Code und Docs (v2.8 Stand).
+
 ### WP-15b – Candidate-Based Edge Validation & Inheritance
 **Phase:** B/E (Refactoring & Semantic)
 **Status:** 🚀 Startklar (Ersatz für WP-15 Logik)
@ -113,19 +129,6 @@ Der bisherige WP-15 Ansatz litt unter Halluzinationen (erfundene Kantentypen), h
 * **Chunker-Update:** Implementierung einer `propagate_edges`-Logik für "by_heading" und "sliding_window" Strategien.
 * **Ingestion-Update:** Umstellung von `_perform_smart_edge_allocation` auf einen binären Validierungs-Prompt (VALID/INVALID).

-### WP-19a – Graph Intelligence & Discovery (Sprint-Fokus)
-**Status:** 🚀 Startklar
-**Ziel:** Vom "Anschauen" zum "Verstehen". Deep-Dive Werkzeuge für den Graphen.
-* **Discovery Screen:** Neuer Tab für semantische Suche ("Finde Notizen über Vaterschaft") und Wildcard-Filter.
-* **Filter-Logik:** "Zeige nur Wege, die zu `type:decision` führen".
-* **Chunk Inspection:** Umschaltbare Granularität (Notiz vs. Chunk) zur Validierung des Smart Chunkers.
-
-### WP-14 – Review / Refactoring / Dokumentation
-**Status:** 🟡 Laufend (Phase E)
-**Ziel:** Technische Schulden abbauen, die durch schnelle Feature-Entwicklung (WP15/WP19) entstanden sind.
-* **Refactoring `chunker.py`:** Die Datei ist monolithisch geworden (Parsing, Strategien, LLM-Orchestrierung).
-    * *Lösung:* Aufteilung in ein Package `app/core/chunking/` mit Modulen (`strategies.py`, `orchestration.py`, `utils.py`).
-* **Dokumentation:** Kontinuierliche Synchronisation von Code und Docs (v2.8 Stand).

 ### WP-16 – Auto-Discovery & Intelligent Ingestion
 **Status:** 🟡 Geplant
@ -153,10 +156,13 @@ Der bisherige WP-15 Ansatz litt unter Halluzinationen (erfundene Kantentypen), h
 * **Feature:** Cronjob `check_graph_integrity.py`.
 * **Funktion:** Findet "Dangling Edges" (Links auf gelöschte Notizen) und repariert/löscht sie.

-### WP-13 – MCP-Integration & Agenten-Layer
-**Status:** 🟡 Geplant
-**Ziel:** mindnet als MCP-Server bereitstellen, damit Agenten (Claude Desktop, OpenAI) standardisierte Tools nutzen können.
-* **Umfang:** MCP-Server mit Tools (`mindnet_query`, `mindnet_explain`, etc.).
+### WP-19a – Graph Intelligence & Discovery (Sprint-Fokus)
+**Status:** 🚀 Startklar
+**Ziel:** Vom "Anschauen" zum "Verstehen". Deep-Dive Werkzeuge für den Graphen.
+* **Discovery Screen:** Neuer Tab für semantische Suche ("Finde Notizen über Vaterschaft") und Wildcard-Filter.
+* **Filter-Logik:** "Zeige nur Wege, die zu `type:decision` führen".
+* **Chunk Inspection:** Umschaltbare Granularität (Notiz vs. Chunk) zur Validierung des Smart Chunkers.
+

 ### WP-21 – Semantic Graph Routing & Canonical Edges
 **Status:** 🟡 Geplant
@ -185,44 +191,42 @@ Der bisherige WP-15 Ansatz litt unter Halluzinationen (erfundene Kantentypen), h
 2.  **Single Source of Truth (SSOT):** Die Registry nutzt `01_edge_vocabulary.md` als führende Konfiguration.
 3.  **Self-Learning Loop:** Protokollierung unbekannter Kanten in `unknown_edges.jsonl`.

-## 23: Agentic Multi-Stream Reasoning (Mindnet 2025)
+### WP-23: Agentic Multi-Stream Reasoning (Mindnet 2025)

-### 1. Zielsetzung & Problemstellung
+#### 1. Zielsetzung & Problemstellung
 Das bisherige System basiert auf einem globalen Scoring-Modell, bei dem Notizen unterschiedlicher Typen (z. B. `insight` vs. `belief`) in einem einzigen Retrieval-Topf konkurrieren. Dies führt dazu, dass leiser gewichtete, aber fundamentale Identitätsmerkmale oft durch hochgewichtete aktuelle Erkenntnisse verdrängt werden. Ziel dieses Pakets ist die Einführung einer parallelen **Stream-Architektur**, um die Vielschichtigkeit menschlicher Entscheidungsprozesse (Werte + Erfahrung + Absicht) im LLM-Kontext zu garantieren.

---
-
-### 2. Funktionsbeschreibung: Die Streams
+#### 2. Funktionsbeschreibung: Die Streams
 Die Daten aus der `types.yaml` werden in drei logische Verarbeitungseinheiten unterteilt:

-#### A. Identity Stream (Die Wahrheitsebene)
+##### A. Identity Stream (Die Wahrheitsebene)
 * **Inhalt:** `value`, `belief`, `trait`, `principle`, `need`, `boundary`, `bias`.
 * **Zweck:** Definition des moralischen Kompasses, der psychologischen Grundbedürfnisse und kognitiven Muster.
 * **Wirkung:** Liefert das "Warum" hinter jeder Handlung.

-#### B. History Stream (Die Evidenzebene)
+##### B. History Stream (Die Evidenzebene)
 * **Inhalt:** `experience`, `event`, `source`, `journal`, `person`.
 * **Zweck:** Bereitstellung empirischer Belege aus der Vergangenheit und sozialer Kontexte.
 * **Wirkung:** Verankert die Antwort in real erlebten Mustern und Fakten.

-#### C. Action Stream (Die Dynamikebene)
+##### C. Action Stream (Die Dynamikebene)
 * **Inhalt:** `project`, `decision`, `goal`, `task`, `risk`, `motivation`, `habit`, `state`.
 * **Zweck:** Analyse der aktuellen Richtung, geplanter Vorhaben und des gegenwärtigen Zustands.
 * **Wirkung:** Liefert den Kontext für die Umsetzung und zukünftige Ziele.


-### 3. Technische Wirkungsweise (Solution Sketch)
+#### 3. Technische Wirkungsweise (Solution Sketch)

-#### Schritt 1: Query-Decomposition
+##### Schritt 1: Query-Decomposition
 Ein initialer Klassifizierungs-Agent analysiert die Nutzeranfrage und bestimmt, welcher Stream primär angesprochen werden muss (z. B. "Wie soll ich mich entscheiden?" boostet den Identity Stream).

-#### Schritt 2: Parallel Stream Retrieval
+##### Schritt 2: Parallel Stream Retrieval
 Anstelle einer Suche werden drei unabhängige Vektor-Suchen mit Typ-Filtern durchgeführt:
 * **Search_A (Identity):** Top-5 Ergebnisse aus Identitäts-Notizen.
 * **Search_B (History):** Top-5 Ergebnisse aus biografischen/externen Notizen.
 * **Search_C (Action):** Top-5 Ergebnisse aus operativen/strategischen Notizen.

-#### Schritt 3: Agentic Synthesis (The Reasoning)
+##### Schritt 3: Agentic Synthesis (The Reasoning)
 Ein Synthese-Agent (LLM) erhält die aggregierten Ergebnisse in getrennten Sektionen. Die Anweisung lautet:
 1. **Prüfung:** Steht das aktuelle Vorhaben (Action) im Einklang mit den Werten (Identity)?
 2. **Abgleich:** Welche vergangenen Erfahrungen (History) stützen oder widersprechen diesem Weg?
@ -230,12 +234,39 @@ Ein Synthese-Agent (LLM) erhält die aggregierten Ergebnisse in getrennten Sekti



-### 4. Erwartete Ergebnisse
+#### 4. Erwartete Ergebnisse
 * **Höhere Resonanz:** Antworten wirken authentischer, da sie explizit auf das Wertesystem des Nutzers Bezug nehmen.
 * **Widerspruchs-Erkennung:** Das System kann den Nutzer aktiv warnen, wenn ein Projekt gegen seine `principles` oder `needs` verstößt.
 * **Robustes Retrieval:** Wichtige Identitäts-Informationen gehen nicht mehr im "Rauschen" von hunderten Journal-Einträgen verloren.
 ---

+### WP-24 – Proactive Discovery & Agentic Knowledge Mining
+**Status:** 🚀 In Planung (Nächster Architektur-Sprung)
+**Ziel:** Transformation von Mindnet von einem reaktiven Archiv zu einem aktiven Denkpartner. Das System soll aktiv Wissenslücken schließen und verborgene Querverbindungen in großen Vaults sowie in Chat-Dialogen aufspüren.
+
+**Herausforderung:**
+1.  **Silo-Effekt:** Bei wachsenden Vaults vergisst der Nutzer existierende Notizen und erstellt redundante Inhalte ohne Verknüpfung.
+2.  **Insight-Verlust:** Im Chat entstehen wertvolle Synthesen, die momentan im flüchtigen Chat-Log vergraben bleiben.
+
+**Lösungsskizze & Strategie:**
+
+#### A. Proactive Discovery (Vault-Scanning)
+Das System nutzt die existierende `candidate_pool` Logik aus WP-15b, befüllt diese jedoch automatisiert:
+* **Vector Similarity Search**: Beim Import einer Note (oder als periodischer Hintergrundprozess) sucht der neue `RecommenderService` in Qdrant nach den Top-X semantisch ähnlichsten Chunks im gesamten Vault.
+* **Auto-Injection**: Diese Funde werden automatisch als `related_to` Kandidaten in den `candidate_pool` der neuen Note injiziert.
+* **WP-15b Filter**: Das LLM validiert diese Vorschläge im zweiten Pass der Ingestion gegen den Kontext. Nur was semantisch wirklich passt, wird als Kante im Graphen persistiert.
+
+#### B. Agentic Knowledge Mining (Chat-to-Vault)
+Integration von Informationen aus dem Dialog direkt in den Graphen:
+* **Intent Detection**: Das Chat-Backend erkennt „notierwürdige“ Informationen (z.B. neue Prinzipien, Strategie-Entwürfe oder Werte-Anpassungen).
+* **Auto-Drafting**: Das LLM nutzt das `interview_template`, um aus dem Chat-Fragment eine valide Markdown-Datei mit Frontmatter (Status: `draft`) zu generieren.
+* **Real-Time Linking**: Die neue Datei wird sofort dem „Discovery-Lauf“ (Teil A) unterzogen, um sie mit dem bestehenden Wissensschatz zu vernetzen.
+* **User Review**: Die generierte Notiz erscheint im `00_Inbox` Ordner. Der Nutzer muss lediglich den Status auf `stable` setzen, um die Entdeckungen final zu integrieren.
+
+**Erwartete Ergebnisse:**
+* Eliminierung von Wissens-Silos durch automatische Vernetzung.
+* Nahtloser Übergang von der Exploration (Chat) zur Konsolidierung (Vault).
+* Vermeidung von Dubletten durch Ähnlichkeits-Warnungen beim Import.
 ## 4. Abhängigkeiten & Release-Plan

 ```mermaid
@ -244,6 +275,8 @@ graph TD
    WP19a --> WP17(Memory)
    WP15(Smart Edges) --> WP16(Auto-Discovery)
    WP15 --> WP14(Refactoring)
+    WP15(Smart Edges) --> WP15b(Candidate Validation)
+    WP15b --> WP24(Proactive Discovery)
    WP03(Import) --> WP18(Health Check)
    WP03 --> WP13(MCP)
    WP04 --> WP13(MCP)
@ -253,4 +286,5 @@ graph TD
    WP22 --> WP14
    WP15(Smart Edges) --> WP21
    WP20(Cloud Hybrid) --> WP15b
+    WP24 --> WP23(Multi-Stream Reasoning)
 ```
--- a/docs/06_Roadmap/06_handover_prompts.md
+++ b/docs/06_Roadmap/06_handover_prompts.md
@ -316,3 +316,44 @@ Die Gewichtung findet **Pre-Retrieval** (im Scoring-Algorithmus) statt, **nicht*
 3.  Zeige die Erweiterung in `scoring.py` (Status-Gewicht & Dynamic Edge Boosting).

 Bitte bestätige die Übernahme dieses Architektur-Pakets.
+
+---
+
+# Übergabe Arbeitspaket: WP-24 – Proactive Discovery & Agentic Knowledge Mining
+
+## 1. Projekt-Kontext
+Wir arbeiten an **Mindnet**, einem System für einen "digitalen Zwilling". Das System nutzt einen Wissensgraph (Qdrant), asynchrone Ingestion und eine hybride LLM-Infrastruktur (Cloud/Lokal).
+
+## 2. Status Quo (Abgeschlossen: WP-15b)
+Das Arbeitspaket **WP-15b (Candidate-Based Validation)** wurde gerade erfolgreich implementiert. 
+* **Two-Pass Workflow:** In Pass 1 wird ein globaler `LocalBatchCache` aufgebaut (ID, Titel, Dateiname). In Pass 2 findet eine semantische binäre Validierung (YES/NO) statt.
+* **Edge Inheritance:** Kanten werden aus Sektionen und Frontmatter an Chunks vererbt.
+* **Candidate Pool:** Nur Kanten in der Sektion `## Unzugeordnete Kanten` (Provenienz: `global_pool`) werden vom LLM geprüft. Explizite Kanten (`[!edge]` im Text) werden direkt übernommen.
+
+## 3. Auftrag: WP-24 – Proactive Discovery & Agentic Knowledge Mining
+Das Ziel ist die Transformation von Mindnet zu einem aktiven Denkpartner.
+
+### Teil A: Proactive Discovery (Vault-Scanning)
+* **Mechanismus:** Automatisches Befüllen des `candidate_pool` via Vektor-Ähnlichkeit.
+* **Logik:** Beim Import einer Note sucht ein neuer Service in Qdrant nach den semantisch ähnlichsten Chunks im Vault und fügt diese als `related_to` Kandidaten hinzu.
+* **Filter:** Die WP-15b Validierungs-Logik filtert diese Vorschläge anschließend.
+
+### Teil B: Agentic Knowledge Mining (Chat-to-Vault)
+* **Mechanismus:** Extraktion notierwürdiger Informationen aus dem Chat.
+* **Logik:** Erstellung von Markdown-Drafts im `00_Inbox` Ordner basierend auf dem Chat-Kontext unter Nutzung des `interview_template`.
+
+## 4. Erforderliche Code-Basis (Dateien)
+Stelle sicher, dass dir folgende Dateien vorliegen, um die Logik zu verstehen und zu erweitern:
+
+1.  **`app/core/ingestion.py` (v2.12.2):** Zentraler Two-Pass Workflow und Validierungsgate.
+2.  **`app/core/chunker.py` (v3.2.0):** Vorbereitung des Candidate-Pools und Vererbungslogik.
+3.  **`scripts/import_markdown.py` (v2.4.1):** Entry-Point und Pre-Scan Harvester für den Cache.
+4.  **`app/core/derive_edges.py` (v2.1.0):** Aggregator für Kanten mit Provenance-Priorisierung.
+5.  **`app/services/edge_registry.py` (v0.8.0):** Validierung gegen das Kanten-Vokabular.
+6.  **`config/prompts.yaml` (v2.6.0):** Enthält die `edge_validation` und `interview_template` Prompts.
+7.  **`06_active_roadmap.md` (v2.9.0):** Enthält die detaillierte Planung für WP-24.
+
+## 5. Nächste technische Schritte
+1.  Entwurf eines `RecommenderService` für die Vektor-Suche in Qdrant.
+2.  Integration des Services in die `ingestion.py` zur automatischen Befüllung des `candidate_pool`.
+3.  Erweiterung des Chat-Backends um die "Capture-to-Vault" Funktionalität.
--- a/docs/99_Archive/99_legacy_workpackages.md
+++ b/docs/99_Archive/99_legacy_workpackages.md
@ -92,3 +92,22 @@ Dieses Dokument dient als Referenz für die Entstehungsgeschichte von Mindnet v2
    * **Graph Explorer:** Einführung von `st-cytoscape` für stabile, nicht-überlappende Layouts (COSE) als Ergänzung zur Legacy-Engine (Agraph).
    * **Single Source of Truth:** Der Editor lädt Inhalte nun direkt vom Dateisystem statt aus (potenziell veralteten) Vektor-Payloads.
    * **UX:** Einführung von URL-Persistenz für Layout-Settings und CSS-basiertes Highlighting zur Vermeidung von Re-Renders.
+
+
+## Phase E+: Architektur-Konsolidierung (WP-14)
+
+### WP-14 – Modularisierung & Paket-Struktur
+* **Ziel:** Auflösung technischer Schulden und Beseitigung von Zirkelbezügen (Circular Imports).
+* **Ergebnis:**
+    * **Domänen-Pakete:** Aufteilung der monolithischen `app/core/` Struktur in spezialisierte Pakete: `database/`, `ingestion/`, `retrieval/` und `graph/`.
+    * **Proxy-Pattern:** Einsatz von Fassaden-Modulen (z. B. `graph_adapter.py`) zur Aufrechterhaltung der Abwärtskompatibilität für bestehende API-Endpunkte.
+    * **Registry-Zentralisierung:** Auslagerung neutraler Hilfsfunktionen (wie `clean_llm_text`) in eine unabhängige `registry.py`, um Abhängigkeitsschleifen zwischen Diensten zu brechen.
+* **Tech:** Einführung von `__init__.py` Exporten zur Definition sauberer Paket-Schnittstellen.
+
+### WP-15b – Two-Pass Ingestion & Candidate Validation
+* **Problem:** Die ursprüngliche Smart Edge Extraktion (WP-15) war teuer und neigte zu Halluzinationen, da sie ohne globalen Kontext operierte.
+* **Lösung:** Implementierung eines **Two-Pass Workflows**.
+    * **Pass 1 (Pre-Scan):** Schnelles Einlesen aller Notizen zur Erstellung eines `LocalBatchCache` (Metadaten & Summaries).
+    * **Pass 2 (Processing):** Gezielte semantische Verarbeitung nur für geänderte Dateien.
+* **Feature:** **Binary Validation Gate**. Statt Kanten frei zu erfinden, validiert das LLM nun Kanten-Kandidaten aus einem Pool gegen den Kontext des `LocalBatchCache`. Dies garantiert 100% Konformität mit der Edge Registry.
+* **Ergebnis:** Höhere Geschwindigkeit durch Reduktion komplexer LLM-Prompts auf binäre Entscheidungen (VALID/INVALID).
--- a/scripts/audit_chunks.py
+++ b/scripts/audit_chunks.py
@ -2,7 +2,7 @@
 from __future__ import annotations
 import argparse, os, json, glob, statistics as stats
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.chunker import assemble_chunks
+from app.core.chunking import assemble_chunks

 def iter_md(root: str):
    for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True):
--- a/scripts/debug_edge_loss.py
+++ b/scripts/debug_edge_loss.py
@ -6,7 +6,7 @@ from pathlib import Path
 # Pfad-Setup
 sys.path.insert(0, os.path.abspath("."))

-from app.core.chunker import assemble_chunks, _extract_all_edges_from_md
+from app.core.chunking import assemble_chunks, _extract_all_edges_from_md
 from app.core.derive_edges import build_edges_for_note

 # Mock für Settings, falls nötig
--- a/scripts/dump_note_chunks.py
+++ b/scripts/dump_note_chunks.py
@ -2,7 +2,7 @@
 from __future__ import annotations
 import argparse, os, glob
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.chunker import assemble_chunks
+from app.core.chunking import assemble_chunks

 def iter_md(root: str):
    return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)]
--- a/scripts/fix_frontmatter.py
+++ b/scripts/fix_frontmatter.py
@ -7,7 +7,7 @@ from slugify import slugify
 from app.core.parser import read_markdown, normalize_frontmatter
 from app.core.parser import FRONTMATTER_RE  # für Re-Inject
 from app.core.validate_note import validate_note_payload
-from app.core.note_payload import make_note_payload
+from app.core.ingestion.ingestion_note_payload import make_note_payload

 DATE_IN_NAME = re.compile(r"(?P<y>\d{4})[-_\.]?(?P<m>\d{2})[-_\.]?(?P<d>\d{2})")

--- a/scripts/import_markdown.py
+++ b/scripts/import_markdown.py
@ -2,7 +2,10 @@
 """
 scripts/import_markdown.py
 CLI-Tool zum Importieren von Markdown-Dateien in Qdrant.
-Updated for Mindnet v2.3.6 (Async Ingestion Support).
+WP-15b: Implementiert den Two-Pass Workflow (Pre-Scan + Processing).
+Sorgt dafür, dass der LocalBatchCache vor der Verarbeitung robust gefüllt wird.
+Indiziert Notizen nach ID, Titel und Dateiname für maximale Link-Kompatibilität.
+VERSION: 2.4.1
 """
 import asyncio
 import os
@ -11,21 +14,16 @@ import logging
 from pathlib import Path
 from dotenv import load_dotenv

-import logging
-# Setzt das Level global auf INFO, damit Sie den Fortschritt sehen
+# Setzt das Level global auf INFO, damit der Fortschritt im Log sichtbar ist
 logging.basicConfig(level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s')

-# Wenn Sie TIEFE Einblicke wollen, setzen Sie den SemanticAnalyzer spezifisch auf DEBUG:
-logging.getLogger("app.services.semantic_analyzer").setLevel(logging.DEBUG)
-
-# Importiere den neuen Async Service
-# Stellen wir sicher, dass der Pfad stimmt (Pythonpath)
+# Importiere den neuen Async Service und stelle Python-Pfad sicher
 import sys
 sys.path.append(os.getcwd())

 from app.core.ingestion import IngestionService
+from app.core.parser import pre_scan_markdown

-logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger("importer")

 async def main_async(args):
@ -34,7 +32,7 @@ async def main_async(args):
        logger.error(f"Vault path does not exist: {vault_path}")
        return

-    # Service initialisieren (startet Async Clients)
+    # 1. Service initialisieren
    logger.info(f"Initializing IngestionService (Prefix: {args.prefix})")
    service = IngestionService(collection_prefix=args.prefix)
    
@ -46,14 +44,42 @@ async def main_async(args):
    
    logger.info(f"Found {len(files)} markdown files.")

-    stats = {"processed": 0, "skipped": 0, "errors": 0}
+    # =========================================================================
+    # PASS 1: Global Pre-Scan (WP-15b Harvester)
+    # Füllt den LocalBatchCache für die semantische Kanten-Validierung.
+    # Nutzt ID, Titel und Filename für robusten Look-up.
+    # =========================================================================
+    logger.info(f"🔍 [Pass 1] Pre-scanning {len(files)} files for global context cache...")
+    for f_path in files:
+        try:
+            ctx = pre_scan_markdown(str(f_path))
+            if ctx:
+                # 1. Look-up via Note ID (UUID oder Frontmatter ID)
+                service.batch_cache[ctx.note_id] = ctx
                
-    # Wir nutzen eine Semaphore, um nicht zu viele Files gleichzeitig zu öffnen/embedden
-    sem = asyncio.Semaphore(5) # Max 5 concurrent files to avoid OOM or Rate Limit
+                # 2. Look-up via Titel (Wichtig für Wikilinks [[Titel]])
+                service.batch_cache[ctx.title] = ctx
+                
+                # 3. Look-up via Dateiname (Wichtig für Wikilinks [[Filename]])
+                fname = os.path.splitext(f_path.name)[0]
+                service.batch_cache[fname] = ctx
+                
+        except Exception as e:
+            logger.warning(f"⚠️ Could not pre-scan {f_path.name}: {e}")
+
+    logger.info(f"✅ Context Cache populated for {len(files)} notes.")
+
+    # =========================================================================
+    # PASS 2: Processing (Semantic Batch-Verarbeitung)
+    # Nutzt den gefüllten Cache zur binären Validierung semantischer Kanten.
+    # =========================================================================
+    stats = {"processed": 0, "skipped": 0, "errors": 0}
+    sem = asyncio.Semaphore(5) # Max 5 parallele Dateien für Cloud-Stabilität

    async def process_with_limit(f_path):
        async with sem:
            try:
+                # Nutzt den nun gefüllten Batch-Cache in der process_file Logik
                res = await service.process_file(
                    file_path=str(f_path),
                    vault_root=str(vault_path),
@ -65,8 +91,8 @@ async def main_async(args):
            except Exception as e:
                return {"status": "error", "error": str(e), "path": str(f_path)}

-    # Batch Processing
-    # Wir verarbeiten in Chunks, um den Progress zu sehen
+    logger.info(f"🚀 [Pass 2] Starting semantic processing in batches...")
+    
    batch_size = 20
    for i in range(0, len(files), batch_size):
        batch = files[i:i+batch_size]
@ -92,7 +118,7 @@ def main():
    load_dotenv()
    default_prefix = os.getenv("COLLECTION_PREFIX", "mindnet")

-    parser = argparse.ArgumentParser(description="Import Vault to Qdrant (Async)")
+    parser = argparse.ArgumentParser(description="Two-Pass Markdown Ingestion for Mindnet")
    parser.add_argument("--vault", default="./vault", help="Path to vault root")
    parser.add_argument("--prefix", default=default_prefix, help="Collection prefix")
    parser.add_argument("--force", action="store_true", help="Force re-index all files")
@ -100,7 +126,7 @@ def main():
    
    args = parser.parse_args()
    
-    # Starte den Async Loop
+    # Starte den asynchronen Haupt-Loop
    asyncio.run(main_async(args))

 if __name__ == "__main__":
--- a/scripts/parse_validate_notes.py
+++ b/scripts/parse_validate_notes.py
@ -8,6 +8,8 @@ from jsonschema import ValidationError
 from app.core.parser import read_markdown, validate_required_frontmatter, normalize_frontmatter
 from app.core.note_payload import make_note_payload
 from app.core.validate_note import validate_note_payload
+from app.core.ingestion.ingestion_note_payload import make_note_payload
+

 def iter_md_files(root: str, include: str, exclude: list[str]) -> list[str]:
    # include z.B. "**/*.md"
--- a/scripts/payload_dryrun.py
+++ b/scripts/payload_dryrun.py
@ -10,9 +10,9 @@ import argparse, os, json
 from typing import Any, Dict, List, Optional

 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.note_payload import make_note_payload
-from app.core.chunker import assemble_chunks
-from app.core.chunk_payload import make_chunk_payloads
+from app.core.chunking import assemble_chunks
+from app.core.ingestion.ingestion_note_payload import make_note_payload
+from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads
 try:
    from app.core.derive_edges import build_edges_for_note
 except Exception:
--- a/scripts/preview_chunks.py
+++ b/scripts/preview_chunks.py
@ -2,9 +2,10 @@
 from __future__ import annotations
 import argparse, os, glob, json
 from app.core.parser import read_markdown, normalize_frontmatter, validate_required_frontmatter
-from app.core.chunker import assemble_chunks
-from app.core.chunk_payload import make_chunk_payloads
-from app.core.note_payload import make_note_payload
+from app.core.chunking import assemble_chunks
+from app.core.ingestion.ingestion_note_payload import make_note_payload
+from app.core.ingestion.ingestion_chunk_payload import make_chunk_payloads
+

 def iter_md(root: str) -> list[str]:
    return [p for p in glob.glob(os.path.join(root, "**", "*.md"), recursive=True)]